aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp402
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp304
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp138
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp652
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp104
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp1213
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequency.cpp59
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp363
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp165
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp148
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp1412
-rw-r--r--contrib/llvm/lib/Analysis/DIBuilder.cpp839
-rw-r--r--contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp224
-rw-r--r--contrib/llvm/lib/Analysis/DebugInfo.cpp948
-rw-r--r--contrib/llvm/lib/Analysis/DomPrinter.cpp232
-rw-r--r--contrib/llvm/lib/Analysis/DominanceFrontier.cpp137
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraph.cpp340
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp608
-rw-r--r--contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp101
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp609
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp29
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp272
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp647
-rw-r--r--contrib/llvm/lib/Analysis/InstCount.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp2526
-rw-r--r--contrib/llvm/lib/Analysis/Interval.cpp58
-rw-r--r--contrib/llvm/lib/Analysis/IntervalPartition.cpp114
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp1128
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp137
-rw-r--r--contrib/llvm/lib/Analysis/LibCallSemantics.cpp63
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp655
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp236
-rw-r--r--contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp358
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp419
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp422
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp176
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp213
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp1469
-rw-r--r--contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp88
-rw-r--r--contrib/llvm/lib/Analysis/PHITransAddr.cpp442
-rw-r--r--contrib/llvm/lib/Analysis/PathNumbering.cpp522
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileInfo.cpp434
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileVerifier.cpp207
-rw-r--r--contrib/llvm/lib/Analysis/PostDominators.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp426
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfo.cpp1105
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp157
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp267
-rw-r--r--contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp382
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp851
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp275
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp220
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp6432
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp1403
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp184
-rw-r--r--contrib/llvm/lib/Analysis/SparsePropagation.cpp347
-rw-r--r--contrib/llvm/lib/Analysis/Trace.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp300
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp1798
-rw-r--r--contrib/llvm/lib/Archive/Archive.cpp261
-rw-r--r--contrib/llvm/lib/Archive/ArchiveInternals.h89
-rw-r--r--contrib/llvm/lib/Archive/ArchiveReader.cpp630
-rw-r--r--contrib/llvm/lib/Archive/ArchiveWriter.cpp489
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.cpp830
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.h93
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp3742
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.h373
-rw-r--r--contrib/llvm/lib/AsmParser/LLToken.h149
-rw-r--r--contrib/llvm/lib/AsmParser/Parser.cpp62
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitReader.cpp88
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp2824
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h278
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp40
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp1628
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp41
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp494
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h153
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp962
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h184
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp79
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h73
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp303
-rw-r--r--contrib/llvm/lib/CodeGen/AntiDepBreaker.h71
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp87
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp1996
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp233
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp419
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp368
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h433
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp152
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp1054
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h280
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp2755
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h531
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp739
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h242
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp166
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp116
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp1671
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h123
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp219
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp182
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp425
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp663
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h110
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp201
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp685
-rw-r--r--contrib/llvm/lib/CodeGen/ELF.h227
-rw-r--r--contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp205
-rw-r--r--contrib/llvm/lib/CodeGen/ELFCodeEmitter.h78
-rw-r--r--contrib/llvm/lib/CodeGen/ELFWriter.cpp1105
-rw-r--r--contrib/llvm/lib/CodeGen/ELFWriter.h251
-rw-r--r--contrib/llvm/lib/CodeGen/EdgeBundles.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp213
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp416
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp1513
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp1058
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp166
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.h204
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp570
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp498
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp152
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp966
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h70
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp775
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp2171
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp325
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.h264
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp327
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.h206
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp770
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp359
-rw-r--r--contrib/llvm/lib/CodeGen/LowerSubregs.cpp223
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp791
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp113
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp535
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp744
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp60
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp1743
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp1211
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp83
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp116
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp567
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp229
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp372
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp610
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp1218
-rw-r--r--contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp141
-rw-r--r--contrib/llvm/lib/CodeGen/OcamlGC.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp190
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp434
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.h25
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp73
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp465
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp712
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp295
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp852
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.h177
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp134
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h193
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp587
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp1073
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp1429
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp1544
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp723
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp112
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.h121
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp1797
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.h243
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp395
-rw-r--r--contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp1012
-rw-r--r--contrib/llvm/lib/CodeGen/RenderMachineFunction.h338
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp607
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp68
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp697
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h211
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp99
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp243
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp7976
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp1373
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp456
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp900
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h142
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3893
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1456
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp2887
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1153
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h720
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp480
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp337
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2568
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h114
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h54
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp644
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp265
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2994
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp802
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h165
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp6655
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6602
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h548
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp2805
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp302
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp3370
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGC.cpp441
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp1152
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp588
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp179
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp352
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.h142
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.cpp241
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.h46
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp1386
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h469
-rw-r--r--contrib/llvm/lib/CodeGen/Splitter.cpp827
-rw-r--r--contrib/llvm/lib/CodeGen/Splitter.h101
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp260
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp768
-rw-r--r--contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp829
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp938
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp443
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp1160
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp1530
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp215
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp371
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.h528
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp2633
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegRewriter.h32
-rw-r--r--contrib/llvm/lib/CompilerDriver/Action.cpp134
-rw-r--r--contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp61
-rw-r--r--contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp655
-rw-r--r--contrib/llvm/lib/CompilerDriver/Main.cpp146
-rw-r--r--contrib/llvm/lib/CompilerDriver/Tool.cpp95
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp1130
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp254
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp1350
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp491
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp98
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h242
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp161
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp821
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JIT.h226
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp211
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h116
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp598
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h73
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp1308
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp727
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp192
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp161
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp221
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h91
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h69
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile13
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt3
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile13
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp95
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h152
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp524
-rw-r--r--contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp90
-rw-r--r--contrib/llvm/lib/Linker/LinkArchives.cpp198
-rw-r--r--contrib/llvm/lib/Linker/LinkItems.cpp241
-rw-r--r--contrib/llvm/lib/Linker/LinkModules.cpp968
-rw-r--r--contrib/llvm/lib/Linker/Linker.cpp174
-rw-r--r--contrib/llvm/lib/MC/ELFObjectWriter.cpp1730
-rw-r--r--contrib/llvm/lib/MC/ELFObjectWriter.h414
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfo.cpp137
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp37
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp61
-rw-r--r--contrib/llvm/lib/MC/MCAsmStreamer.cpp1256
-rw-r--r--contrib/llvm/lib/MC/MCAssembler.cpp977
-rw-r--r--contrib/llvm/lib/MC/MCCodeEmitter.cpp18
-rw-r--r--contrib/llvm/lib/MC/MCContext.cpp315
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler.cpp14
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp164
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/Disassembler.h96
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp419
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h269
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDInfo.h84
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp212
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDInst.h182
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp315
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDOperand.h91
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp210
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDToken.h139
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp1086
-rw-r--r--contrib/llvm/lib/MC/MCELF.cpp72
-rw-r--r--contrib/llvm/lib/MC/MCELF.h35
-rw-r--r--contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp23
-rw-r--r--contrib/llvm/lib/MC/MCELFStreamer.cpp386
-rw-r--r--contrib/llvm/lib/MC/MCELFStreamer.h140
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp605
-rw-r--r--contrib/llvm/lib/MC/MCInst.cpp66
-rw-r--r--contrib/llvm/lib/MC/MCInstPrinter.cpp25
-rw-r--r--contrib/llvm/lib/MC/MCLabel.cpp21
-rw-r--r--contrib/llvm/lib/MC/MCLoggingStreamer.cpp249
-rw-r--r--contrib/llvm/lib/MC/MCMachOStreamer.cpp420
-rw-r--r--contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCNullStreamer.cpp104
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp254
-rw-r--r--contrib/llvm/lib/MC/MCObjectWriter.cpp80
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmLexer.cpp441
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp2731
-rw-r--r--contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp442
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp668
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp533
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp27
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp48
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp19
-rw-r--r--contrib/llvm/lib/MC/MCPureStreamer.cpp234
-rw-r--r--contrib/llvm/lib/MC/MCSection.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCSectionCOFF.cpp84
-rw-r--r--contrib/llvm/lib/MC/MCSectionELF.cpp150
-rw-r--r--contrib/llvm/lib/MC/MCSectionMachO.cpp303
-rw-r--r--contrib/llvm/lib/MC/MCStreamer.cpp534
-rw-r--r--contrib/llvm/lib/MC/MCSubtargetInfo.cpp96
-rw-r--r--contrib/llvm/lib/MC/MCSymbol.cpp84
-rw-r--r--contrib/llvm/lib/MC/MCValue.cpp36
-rw-r--r--contrib/llvm/lib/MC/MCWin64EH.cpp258
-rw-r--r--contrib/llvm/lib/MC/MachObjectWriter.cpp785
-rw-r--r--contrib/llvm/lib/MC/SubtargetFeature.cpp397
-rw-r--r--contrib/llvm/lib/MC/TargetAsmBackend.cpp37
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp887
-rw-r--r--contrib/llvm/lib/MC/WinCOFFStreamer.cpp406
-rw-r--r--contrib/llvm/lib/Object/Binary.cpp96
-rw-r--r--contrib/llvm/lib/Object/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp455
-rw-r--r--contrib/llvm/lib/Object/ELFObjectFile.cpp740
-rw-r--r--contrib/llvm/lib/Object/Error.cpp57
-rw-r--r--contrib/llvm/lib/Object/MachOObject.cpp370
-rw-r--r--contrib/llvm/lib/Object/MachOObjectFile.cpp469
-rw-r--r--contrib/llvm/lib/Object/Makefile14
-rw-r--r--contrib/llvm/lib/Object/Object.cpp68
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp61
-rw-r--r--contrib/llvm/lib/Support/APFloat.cpp3616
-rw-r--r--contrib/llvm/lib/Support/APInt.cpp2947
-rw-r--r--contrib/llvm/lib/Support/APSInt.cpp23
-rw-r--r--contrib/llvm/lib/Support/Allocator.cpp188
-rw-r--r--contrib/llvm/lib/Support/Atomic.cpp112
-rw-r--r--contrib/llvm/lib/Support/BranchProbability.cpp44
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp1426
-rw-r--r--contrib/llvm/lib/Support/ConstantRange.cpp702
-rw-r--r--contrib/llvm/lib/Support/CrashRecoveryContext.cpp281
-rw-r--r--contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp357
-rw-r--r--contrib/llvm/lib/Support/Debug.cpp134
-rw-r--r--contrib/llvm/lib/Support/DeltaAlgorithm.cpp114
-rw-r--r--contrib/llvm/lib/Support/Disassembler.cpp75
-rw-r--r--contrib/llvm/lib/Support/Dwarf.cpp658
-rw-r--r--contrib/llvm/lib/Support/DynamicLibrary.cpp170
-rw-r--r--contrib/llvm/lib/Support/Errno.cpp74
-rw-r--r--contrib/llvm/lib/Support/ErrorHandling.cpp99
-rw-r--r--contrib/llvm/lib/Support/FileUtilities.cpp281
-rw-r--r--contrib/llvm/lib/Support/FoldingSet.cpp426
-rw-r--r--contrib/llvm/lib/Support/FormattedStream.cpp101
-rw-r--r--contrib/llvm/lib/Support/GraphWriter.cpp200
-rw-r--r--contrib/llvm/lib/Support/Host.cpp315
-rw-r--r--contrib/llvm/lib/Support/IncludeFile.cpp20
-rw-r--r--contrib/llvm/lib/Support/IntEqClasses.cpp70
-rw-r--r--contrib/llvm/lib/Support/IntervalMap.cpp161
-rw-r--r--contrib/llvm/lib/Support/IsInf.cpp49
-rw-r--r--contrib/llvm/lib/Support/IsNAN.cpp33
-rw-r--r--contrib/llvm/lib/Support/ManagedStatic.cpp75
-rw-r--r--contrib/llvm/lib/Support/Memory.cpp74
-rw-r--r--contrib/llvm/lib/Support/MemoryBuffer.cpp376
-rw-r--r--contrib/llvm/lib/Support/MemoryObject.cpp34
-rw-r--r--contrib/llvm/lib/Support/Mutex.cpp157
-rw-r--r--contrib/llvm/lib/Support/Path.cpp303
-rw-r--r--contrib/llvm/lib/Support/PathV2.cpp774
-rw-r--r--contrib/llvm/lib/Support/PluginLoader.cpp47
-rw-r--r--contrib/llvm/lib/Support/PrettyStackTrace.cpp133
-rw-r--r--contrib/llvm/lib/Support/Process.cpp33
-rw-r--r--contrib/llvm/lib/Support/Program.cpp56
-rw-r--r--contrib/llvm/lib/Support/RWMutex.cpp157
-rw-r--r--contrib/llvm/lib/Support/Regex.cpp168
-rw-r--r--contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp73
-rw-r--r--contrib/llvm/lib/Support/Signals.cpp34
-rw-r--r--contrib/llvm/lib/Support/SmallPtrSet.cpp229
-rw-r--r--contrib/llvm/lib/Support/SmallVector.cpp40
-rw-r--r--contrib/llvm/lib/Support/SourceMgr.cpp232
-rw-r--r--contrib/llvm/lib/Support/Statistic.cpp152
-rw-r--r--contrib/llvm/lib/Support/StringExtras.cpp81
-rw-r--r--contrib/llvm/lib/Support/StringMap.cpp230
-rw-r--r--contrib/llvm/lib/Support/StringPool.cpp35
-rw-r--r--contrib/llvm/lib/Support/StringRef.cpp415
-rw-r--r--contrib/llvm/lib/Support/SystemUtils.cpp55
-rw-r--r--contrib/llvm/lib/Support/TargetRegistry.cpp92
-rw-r--r--contrib/llvm/lib/Support/ThreadLocal.cpp84
-rw-r--r--contrib/llvm/lib/Support/Threading.cpp116
-rw-r--r--contrib/llvm/lib/Support/TimeValue.cpp57
-rw-r--r--contrib/llvm/lib/Support/Timer.cpp393
-rw-r--r--contrib/llvm/lib/Support/ToolOutputFile.cpp43
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp640
-rw-r--r--contrib/llvm/lib/Support/Twine.cpp165
-rw-r--r--contrib/llvm/lib/Support/Unix/Host.inc69
-rw-r--r--contrib/llvm/lib/Support/Unix/Memory.inc151
-rw-r--r--contrib/llvm/lib/Support/Unix/Mutex.inc43
-rw-r--r--contrib/llvm/lib/Support/Unix/Path.inc890
-rw-r--r--contrib/llvm/lib/Support/Unix/PathV2.inc507
-rw-r--r--contrib/llvm/lib/Support/Unix/Process.inc295
-rw-r--r--contrib/llvm/lib/Support/Unix/Program.inc430
-rw-r--r--contrib/llvm/lib/Support/Unix/README.txt16
-rw-r--r--contrib/llvm/lib/Support/Unix/RWMutex.inc43
-rw-r--r--contrib/llvm/lib/Support/Unix/Signals.inc303
-rw-r--r--contrib/llvm/lib/Support/Unix/ThreadLocal.inc26
-rw-r--r--contrib/llvm/lib/Support/Unix/TimeValue.inc56
-rw-r--r--contrib/llvm/lib/Support/Unix/Unix.h87
-rw-r--r--contrib/llvm/lib/Support/Unix/system_error.inc34
-rw-r--r--contrib/llvm/lib/Support/Valgrind.cpp54
-rw-r--r--contrib/llvm/lib/Support/Windows/DynamicLibrary.inc137
-rw-r--r--contrib/llvm/lib/Support/Windows/Host.inc23
-rw-r--r--contrib/llvm/lib/Support/Windows/Memory.inc73
-rw-r--r--contrib/llvm/lib/Support/Windows/Mutex.inc58
-rw-r--r--contrib/llvm/lib/Support/Windows/Path.inc931
-rw-r--r--contrib/llvm/lib/Support/Windows/PathV2.inc757
-rw-r--r--contrib/llvm/lib/Support/Windows/Process.inc222
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc405
-rw-r--r--contrib/llvm/lib/Support/Windows/RWMutex.inc58
-rw-r--r--contrib/llvm/lib/Support/Windows/Signals.inc328
-rw-r--r--contrib/llvm/lib/Support/Windows/ThreadLocal.inc54
-rw-r--r--contrib/llvm/lib/Support/Windows/TimeValue.inc51
-rw-r--r--contrib/llvm/lib/Support/Windows/Windows.h120
-rw-r--r--contrib/llvm/lib/Support/Windows/explicit_symbols.inc66
-rw-r--r--contrib/llvm/lib/Support/Windows/system_error.inc142
-rw-r--r--contrib/llvm/lib/Support/circular_raw_ostream.cpp45
-rw-r--r--contrib/llvm/lib/Support/raw_os_ostream.cpp30
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp763
-rw-r--r--contrib/llvm/lib/Support/regcclass.h70
-rw-r--r--contrib/llvm/lib/Support/regcname.h139
-rw-r--r--contrib/llvm/lib/Support/regcomp.c1525
-rw-r--r--contrib/llvm/lib/Support/regengine.inc1034
-rw-r--r--contrib/llvm/lib/Support/regerror.c135
-rw-r--r--contrib/llvm/lib/Support/regex2.h157
-rw-r--r--contrib/llvm/lib/Support/regex_impl.h108
-rw-r--r--contrib/llvm/lib/Support/regexec.c162
-rw-r--r--contrib/llvm/lib/Support/regfree.c72
-rw-r--r--contrib/llvm/lib/Support/regstrlcpy.c52
-rw-r--r--contrib/llvm/lib/Support/regutils.h53
-rw-r--r--contrib/llvm/lib/Support/system_error.cpp130
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td251
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAddressingModes.h595
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp516
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp1834
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h127
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInfo.h294
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp2584
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h503
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp1287
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h216
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h131
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h160
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td164
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp1898
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp1906
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp130
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h122
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1326
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp2099
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFixupKinds.h97
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp1086
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h76
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp219
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp120
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h54
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp3011
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp7978
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h509
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td1930
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp60
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td4076
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td4937
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td1397
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td3467
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td1128
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp336
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.h183
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp1839
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp1314
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCExpr.h76
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp125
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp389
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h250
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td332
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRelocations.h62
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td261
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA8.td1034
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td1827
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleV6.td294
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp198
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h51
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp214
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h252
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp189
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h143
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp47
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h38
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp154
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp2390
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp589
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h99
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp3818
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h336
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h2459
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp759
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h120
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp78
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp144
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h52
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp331
-rw-r--r--contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp149
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp361
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp100
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h59
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp714
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h69
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp252
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp610
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h78
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h43
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp881
-rw-r--r--contrib/llvm/lib/Target/Alpha/Alpha.h43
-rw-r--r--contrib/llvm/lib/Target/Alpha/Alpha.td68
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp166
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp66
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td38
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp143
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h43
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp426
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp956
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h142
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td268
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp383
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h85
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td1157
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp158
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h62
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp215
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h60
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td133
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRelocations.h31
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSchedule.td85
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp36
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h49
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp53
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h65
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp57
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h40
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/Blackfin/Blackfin.h31
-rw-r--r--contrib/llvm/lib/Target/Blackfin/Blackfin.td202
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp156
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td30
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp130
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h47
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp180
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp643
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h83
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td34
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp256
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h81
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td862
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp104
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h32
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td34
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp360
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h81
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td277
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp44
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h49
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp41
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h67
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp22
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp60
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/CBackend/CBackend.cpp3611
-rw-r--r--contrib/llvm/lib/Target/CBackend/CTargetMachine.h41
-rw-r--r--contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td449
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h28
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp56
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h40
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.h31
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.td66
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td408
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp334
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td57
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp275
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h94
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp141
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp1203
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp3257
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h186
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td320
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp451
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h84
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td4482
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h49
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNodes.td159
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUOperands.td664
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp373
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h107
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td183
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h19
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSchedule.td59
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp66
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h97
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp67
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h90
-rw-r--r--contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp1971
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h43
-rw-r--r--contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp26
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp128
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp567
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile15
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp707
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h55
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp69
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h43
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.h42
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.td73
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp162
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp335
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td28
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp258
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp111
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp450
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h53
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp277
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp1147
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h185
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td219
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td229
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td204
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp296
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h296
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td884
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp113
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h33
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td131
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp222
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp166
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h50
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h170
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp363
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h85
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td148
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h47
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td55
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td236
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td267
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp63
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h75
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp102
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h84
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp90
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h40
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp65
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp113
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h43
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp28
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp58
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.h47
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.td66
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp179
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp180
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td37
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp223
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h53
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp492
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp1197
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h182
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp336
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h92
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td1211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp150
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h50
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h46
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp254
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h71
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td85
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp32
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h42
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp51
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h69
-rw-r--r--contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/Mangler.cpp235
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp127
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h100
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp58
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h39
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.h34
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td101
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp440
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h71
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallingConv.td86
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp82
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp94
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp117
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp331
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.h50
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp482
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp2371
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h191
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td374
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td227
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp461
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.h160
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td881
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp118
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.h43
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp63
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h62
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h114
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp278
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h73
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td198
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td63
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp62
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h128
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp88
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.h82
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp102
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h41
-rw-r--r--contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/PTX/CMakeLists.txt26
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp35
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h28
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp60
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.h48
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.td135
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp605
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXCallingConv.td29
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.h44
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp182
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp347
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.h70
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrFormats.td24
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp410
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.h133
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.td1102
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td84
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp541
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp92
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h91
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp51
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h65
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td555
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp66
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.h121
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp87
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.h77
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/Makefile15
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp25
-rwxr-xr-xcontrib/llvm/lib/Target/PTX/generate-register-td.py163
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp295
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp62
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp70
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h41
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h85
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td112
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp123
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp696
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp174
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td132
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp261
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h45
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp978
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h322
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp308
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h73
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp1087
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp5768
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h486
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td749
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td695
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td907
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp655
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h149
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td1476
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp468
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h49
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp194
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp173
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h132
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPredicates.h39
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp714
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h78
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td326
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td505
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td64
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td74
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td80
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td84
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp141
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h151
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp135
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h94
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp323
-rw-r--r--contrib/llvm/lib/Target/Sparc/FPMover.cpp141
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h29
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp57
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h41
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.h109
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.td72
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp251
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td36
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp80
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h41
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp212
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp1273
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h103
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td114
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp346
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h100
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td825
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h47
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp140
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h62
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td159
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp44
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h58
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp65
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h79
-rw-r--r--contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp32
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp58
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.h52
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.td61
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp223
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td46
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp386
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h57
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp779
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp867
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h145
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h128
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td340
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td133
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp439
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h113
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td1147
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h51
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td325
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp158
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h64
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td205
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp54
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h48
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp43
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h67
-rw-r--r--contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/Target.cpp102
-rw-r--r--contrib/llvm/lib/Target/TargetAsmInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/TargetAsmLexer.cpp14
-rw-r--r--contrib/llvm/lib/Target/TargetData.cpp589
-rw-r--r--contrib/llvm/lib/Target/TargetELFWriterInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/TargetFrameLowering.cpp53
-rw-r--r--contrib/llvm/lib/Target/TargetInstrInfo.cpp174
-rw-r--r--contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/TargetLibraryInfo.cpp74
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp360
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp315
-rw-r--r--contrib/llvm/lib/Target/TargetRegisterInfo.cpp141
-rw-r--r--contrib/llvm/lib/Target/TargetSubtargetInfo.cpp33
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp165
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp1141
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp560
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h155
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c1610
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h575
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h379
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp140
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h86
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp260
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h25
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp143
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h96
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp138
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h46
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp185
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h60
-rw-r--r--contrib/llvm/lib/Target/X86/SSEDomainFix.cpp506
-rw-r--r--contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/Makefile15
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp190
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h87
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h91
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td234
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmBackend.cpp453
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp728
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.h87
-rw-r--r--contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h46
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td401
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp1005
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp153
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h59
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp2133
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupKinds.h33
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp1709
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp1210
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h69
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp2253
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp13157
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h955
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td102
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td1125
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrBuilder.h184
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td104
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td1675
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td304
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td151
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td60
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td648
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td535
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td467
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp3240
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h886
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td1648
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td454
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td5865
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td746
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td429
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVMX.td54
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.cpp574
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.h81
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp1044
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp692
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.h52
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp554
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h135
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp973
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h157
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td467
-rw-r--r--contrib/llvm/lib/Target/X86/X86Relocations.h52
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp259
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h56
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp323
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h259
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp245
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h135
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp121
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h60
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp29
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp56
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h40
-rw-r--r--contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.h31
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.td46
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp280
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreCallingConv.td36
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp387
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h59
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp295
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp1608
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h201
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td120
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp398
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h88
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td1210
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h69
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp340
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h85
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td56
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp28
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.h42
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp44
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h62
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp65
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h25
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp906
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp190
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp1003
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp92
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp380
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp211
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp2856
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp279
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp112
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp85
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp118
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp571
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp191
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp248
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp547
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp868
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp256
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp73
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp413
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h367
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp697
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp2306
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp1317
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp1771
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp2919
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp615
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp731
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp900
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp876
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp753
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp1153
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp575
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h106
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp1677
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp672
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h108
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp225
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp1425
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp170
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h36
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp97
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp1161
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp91
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp207
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp135
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp727
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp470
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp2287
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp1871
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp1594
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp808
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp248
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp634
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp170
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp353
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp3911
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp193
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp1063
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp139
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp986
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp3595
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp1123
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp134
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp2010
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp189
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp2674
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp331
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp2391
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp274
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp373
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp634
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp582
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp550
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp386
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp479
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp538
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp127
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp795
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp1182
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp279
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp875
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp753
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp395
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp603
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp323
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1134
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp520
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp2779
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp94
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp142
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp201
-rw-r--r--contrib/llvm/lib/VMCore/AsmWriter.cpp2038
-rw-r--r--contrib/llvm/lib/VMCore/Attributes.cpp353
-rw-r--r--contrib/llvm/lib/VMCore/AutoUpgrade.cpp281
-rw-r--r--contrib/llvm/lib/VMCore/BasicBlock.cpp346
-rw-r--r--contrib/llvm/lib/VMCore/ConstantFold.cpp2344
-rw-r--r--contrib/llvm/lib/VMCore/ConstantFold.h56
-rw-r--r--contrib/llvm/lib/VMCore/Constants.cpp2173
-rw-r--r--contrib/llvm/lib/VMCore/ConstantsContext.h709
-rw-r--r--contrib/llvm/lib/VMCore/Core.cpp2217
-rw-r--r--contrib/llvm/lib/VMCore/DebugInfoProbe.cpp225
-rw-r--r--contrib/llvm/lib/VMCore/DebugLoc.cpp344
-rw-r--r--contrib/llvm/lib/VMCore/Dominators.cpp106
-rw-r--r--contrib/llvm/lib/VMCore/Function.cpp445
-rw-r--r--contrib/llvm/lib/VMCore/GVMaterializer.cpp18
-rw-r--r--contrib/llvm/lib/VMCore/Globals.cpp263
-rw-r--r--contrib/llvm/lib/VMCore/IRBuilder.cpp149
-rw-r--r--contrib/llvm/lib/VMCore/InlineAsm.cpp294
-rw-r--r--contrib/llvm/lib/VMCore/Instruction.cpp414
-rw-r--r--contrib/llvm/lib/VMCore/Instructions.cpp3187
-rw-r--r--contrib/llvm/lib/VMCore/IntrinsicInst.cpp73
-rw-r--r--contrib/llvm/lib/VMCore/LLVMContext.cpp147
-rw-r--r--contrib/llvm/lib/VMCore/LLVMContextImpl.cpp94
-rw-r--r--contrib/llvm/lib/VMCore/LLVMContextImpl.h238
-rw-r--r--contrib/llvm/lib/VMCore/LeakDetector.cpp69
-rw-r--r--contrib/llvm/lib/VMCore/LeaksContext.h92
-rw-r--r--contrib/llvm/lib/VMCore/Metadata.cpp562
-rw-r--r--contrib/llvm/lib/VMCore/Module.cpp553
-rw-r--r--contrib/llvm/lib/VMCore/Pass.cpp293
-rw-r--r--contrib/llvm/lib/VMCore/PassManager.cpp1882
-rw-r--r--contrib/llvm/lib/VMCore/PassRegistry.cpp208
-rw-r--r--contrib/llvm/lib/VMCore/PrintModulePass.cpp101
-rw-r--r--contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h118
-rw-r--r--contrib/llvm/lib/VMCore/Type.cpp687
-rw-r--r--contrib/llvm/lib/VMCore/Use.cpp144
-rw-r--r--contrib/llvm/lib/VMCore/User.cpp79
-rw-r--r--contrib/llvm/lib/VMCore/Value.cpp632
-rw-r--r--contrib/llvm/lib/VMCore/ValueSymbolTable.cpp117
-rw-r--r--contrib/llvm/lib/VMCore/ValueTypes.cpp212
-rw-r--r--contrib/llvm/lib/VMCore/Verifier.cpp1870
1235 files changed, 564076 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 000000000000..c189a0042928
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,402 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified. This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation. Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+// Register the AliasAnalysis interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
+char AliasAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->alias(LocA, LocB);
+}
+
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->pointsToConstantMemory(Loc, OrLocal);
+}
+
+void AliasAnalysis::deleteValue(Value *V) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->deleteValue(V);
+}
+
+void AliasAnalysis::copyValue(Value *From, Value *To) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->copyValue(From, To);
+}
+
+void AliasAnalysis::addEscapingUse(Use &U) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->addEscapingUse(U);
+}
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ ModRefBehavior MRB = getModRefBehavior(CS);
+ if (MRB == DoesNotAccessMemory)
+ return NoModRef;
+
+ ModRefResult Mask = ModRef;
+ if (onlyReadsMemory(MRB))
+ Mask = Ref;
+
+ if (onlyAccessesArgPointees(MRB)) {
+ bool doesAlias = false;
+ if (doesAccessArgPointees(MRB)) {
+ MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ const Value *Arg = *AI;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CSLoc(Arg, UnknownSize, CSTag);
+ if (!isNoAlias(CSLoc, Loc)) {
+ doesAlias = true;
+ break;
+ }
+ }
+ }
+ if (!doesAlias)
+ return NoModRef;
+ }
+
+ // If Loc is a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & Mod) && pointsToConstantMemory(Loc))
+ Mask = ModRefResult(Mask & ~Mod);
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ // If CS1 or CS2 are readnone, they don't interact.
+ ModRefBehavior CS1B = getModRefBehavior(CS1);
+ if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+ ModRefBehavior CS2B = getModRefBehavior(CS2);
+ if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+ // If they both only read from memory, there is no dependence.
+ if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+ return NoModRef;
+
+ AliasAnalysis::ModRefResult Mask = ModRef;
+
+ // If CS1 only reads memory, the only dependence on CS2 can be
+ // from CS1 reading memory written by CS2.
+ if (onlyReadsMemory(CS1B))
+ Mask = ModRefResult(Mask & Ref);
+
+ // If CS2 only access memory through arguments, accumulate the mod/ref
+ // information from CS1's references to the memory referenced by
+ // CS2's arguments.
+ if (onlyAccessesArgPointees(CS2B)) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ if (doesAccessArgPointees(CS2B)) {
+ MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator
+ I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CS2Loc(Arg, UnknownSize, CS2Tag);
+ R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
+ if (R == Mask)
+ break;
+ }
+ }
+ return R;
+ }
+
+ // If CS1 only accesses memory through arguments, check if CS2 references
+ // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+ if (onlyAccessesArgPointees(CS1B)) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ if (doesAccessArgPointees(CS1B)) {
+ MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator
+ I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CS1Loc(Arg, UnknownSize, CS1Tag);
+ if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
+ R = Mask;
+ break;
+ }
+ }
+ }
+ if (R == NoModRef)
+ return R;
+ }
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // Call back into the alias analysis with the other form of getModRefBehavior
+ // to see if it can give a better response.
+ if (const Function *F = CS.getCalledFunction())
+ Min = getModRefBehavior(F);
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Min;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any result we've managed to compute.
+ return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->getModRefBehavior(F);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis non-virtual helper method implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+ return Location(LI->getPointerOperand(),
+ getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+ return Location(SI->getPointerOperand(),
+ getTypeStoreSize(SI->getValueOperand()->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+ return Location(VI->getPointerOperand(),
+ UnknownSize,
+ VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
+ // Be conservative in the face of volatile.
+ if (L->isVolatile())
+ return ModRef;
+
+ // If the load address doesn't alias the given address, it doesn't read
+ // or write the specified memory.
+ if (!alias(getLocation(L), Loc))
+ return NoModRef;
+
+ // Otherwise, a load just reads.
+ return Ref;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
+ // Be conservative in the face of volatile.
+ if (S->isVolatile())
+ return ModRef;
+
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
+ if (!alias(getLocation(S), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this store.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ // Otherwise, a store just writes.
+ return Mod;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
+ // If the va_arg address cannot alias the pointer in question, then the
+ // specified memory cannot be accessed by the va_arg.
+ if (!alias(getLocation(V), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this va_arg.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ // Otherwise, a va_arg reads and writes.
+ return ModRef;
+}
+
+// AliasAnalysis destructor: DO NOT move this to the header file for
+// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
+// the AliasAnalysis.o file in the current .a file, causing alias analysis
+// support to not be included in the tool correctly!
+//
+AliasAnalysis::~AliasAnalysis() {}
+
+/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
+/// AliasAnalysis interface before any other methods are called.
+///
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
+ TD = P->getAnalysisIfAvailable<TargetData>();
+ AA = &P->getAnalysis<AliasAnalysis>();
+}
+
+// getAnalysisUsage - All alias analysis implementations should invoke this
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
+void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>(); // All AA's chain
+}
+
+/// getTypeStoreSize - Return the TargetData store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+ return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the value pointed to by Ptr.
+///
+bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
+ const Location &Loc) {
+ return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+}
+
+/// canInstructionRangeModify - Return true if it is possible for the execution
+/// of the specified instructions to modify the value pointed to by Ptr. The
+/// instructions to consider are all of the instructions in the range of [I1,I2]
+/// INCLUSIVE. I1 and I2 must be in the same basic block.
+///
+bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+ const Instruction &I2,
+ const Location &Loc) {
+ assert(I1.getParent() == I2.getParent() &&
+ "Instructions not in same basic block!");
+ BasicBlock::const_iterator I = &I1;
+ BasicBlock::const_iterator E = &I2;
+ ++E; // Convert from inclusive to exclusive range.
+
+ for (; I != E; ++I) // Check every instruction in range
+ if (getModRefInfo(I, Loc) & Mod)
+ return true;
+ return false;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ return ImmutableCallSite(cast<Instruction>(V))
+ .paramHasAttr(0, Attribute::NoAlias);
+ return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object. This returns true for:
+/// Global Variables and Functions (but not Global Aliases)
+/// Allocas and Mallocs
+/// ByVal and NoAlias Arguments
+/// NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
+ return true;
+ if (isNoAliasCall(V))
+ return true;
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasNoAliasAttr() || A->hasByValAttr();
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
new file mode 100644
index 000000000000..d947220e078d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -0,0 +1,173 @@
+//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass which can be used to count how many alias queries
+// are being made and how the alias analysis implementation being used responds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
+static cl::opt<bool>
+PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
+
+namespace {
+ class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
+ unsigned No, May, Partial, Must;
+ unsigned NoMR, JustRef, JustMod, MR;
+ Module *M;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasAnalysisCounter() : ModulePass(ID) {
+ initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+ No = May = Partial = Must = 0;
+ NoMR = JustRef = JustMod = MR = 0;
+ }
+
+ void printLine(const char *Desc, unsigned Val, unsigned Sum) {
+ errs() << " " << Val << " " << Desc << " responses ("
+ << Val*100/Sum << "%)\n";
+ }
+ ~AliasAnalysisCounter() {
+ unsigned AASum = No+May+Partial+Must;
+ unsigned MRSum = NoMR+JustRef+JustMod+MR;
+ if (AASum + MRSum) { // Print a report if any counted queries occurred...
+ errs() << "\n===== Alias Analysis Counter Report =====\n"
+ << " Analysis counted:\n"
+ << " " << AASum << " Total Alias Queries Performed\n";
+ if (AASum) {
+ printLine("no alias", No, AASum);
+ printLine("may alias", May, AASum);
+ printLine("partial alias", Partial, AASum);
+ printLine("must alias", Must, AASum);
+ errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+ << May*100/AASum << "%/"
+ << Partial*100/AASum << "%/"
+ << Must*100/AASum<<"%\n\n";
+ }
+
+ errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n";
+ if (MRSum) {
+ printLine("no mod/ref", NoMR, MRSum);
+ printLine("ref", JustRef, MRSum);
+ printLine("mod", JustMod, MRSum);
+ printLine("mod/ref", MR, MRSum);
+ errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+ << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+ << "%/" << MR*100/MRSum <<"%\n\n";
+ }
+ }
+ }
+
+ bool runOnModule(Module &M) {
+ this->M = &M;
+ InitializeAliasAnalysis(this);
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ // FIXME: We could count these too...
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // Forwarding functions: just delegate to a real AA implementation, counting
+ // the number of responses...
+ AliasResult alias(const Location &LocA, const Location &LocB);
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+ };
+}
+
+char AliasAnalysisCounter::ID = 0;
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+ "Count Alias Analysis Query Responses", false, true, false)
+
+ModulePass *llvm::createAliasAnalysisCounterPass() {
+ return new AliasAnalysisCounter();
+}
+
+AliasAnalysis::AliasResult
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+ AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
+
+ const char *AliasString;
+ switch (R) {
+ default: llvm_unreachable("Unknown alias type!");
+ case NoAlias: No++; AliasString = "No alias"; break;
+ case MayAlias: May++; AliasString = "May alias"; break;
+ case PartialAlias: Partial++; AliasString = "Partial alias"; break;
+ case MustAlias: Must++; AliasString = "Must alias"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == MayAlias)) {
+ errs() << AliasString << ":\t";
+ errs() << "[" << LocA.Size << "B] ";
+ WriteAsOperand(errs(), LocA.Ptr, true, M);
+ errs() << ", ";
+ errs() << "[" << LocB.Size << "B] ";
+ WriteAsOperand(errs(), LocB.Ptr, true, M);
+ errs() << "\n";
+ }
+
+ return R;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
+
+ const char *MRString;
+ switch (R) {
+ default: llvm_unreachable("Unknown mod/ref type!");
+ case NoModRef: NoMR++; MRString = "NoModRef"; break;
+ case Ref: JustRef++; MRString = "JustRef"; break;
+ case Mod: JustMod++; MRString = "JustMod"; break;
+ case ModRef: MR++; MRString = "ModRef"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == ModRef)) {
+ errs() << MRString << ": Ptr: ";
+ errs() << "[" << Loc.Size << "B] ";
+ WriteAsOperand(errs(), Loc.Ptr, true, M);
+ errs() << "\t<->" << *CS.getInstruction() << '\n';
+ }
+ return R;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 000000000000..1afc1b71d93e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,304 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+namespace {
+ class AAEval : public FunctionPass {
+ unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
+ unsigned NoModRef, Mod, Ref, ModRef;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AAEval() : FunctionPass(ID) {
+ initializeAAEvalPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M) {
+ NoAlias = MayAlias = PartialAlias = MustAlias = 0;
+ NoModRef = Mod = Ref = ModRef = 0;
+
+ if (PrintAll) {
+ PrintNoAlias = PrintMayAlias = true;
+ PrintPartialAlias = PrintMustAlias = true;
+ PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+ }
+ return false;
+ }
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+}
+
+char AAEval::ID = 0;
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+ const Value *V2, const Module *M) {
+ if (P) {
+ std::string o1, o2;
+ {
+ raw_string_ostream os1(o1), os2(o2);
+ WriteAsOperand(os1, V1, true, M);
+ WriteAsOperand(os2, V2, true, M);
+ }
+
+ if (o2 < o1)
+ std::swap(o1, o2);
+ errs() << " " << Msg << ":\t"
+ << o1 << ", "
+ << o2 << "\n";
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": Ptr: ";
+ WriteAsOperand(errs(), Ptr, true, M);
+ errs() << "\t<->" << *I << '\n';
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *CSA.getInstruction()
+ << " <-> " << *CSB.getInstruction() << '\n';
+ }
+}
+
+static inline bool isInterestingPointer(Value *V) {
+ return V->getType()->isPointerTy()
+ && !isa<ConstantPointerNull>(V);
+}
+
+bool AAEval::runOnFunction(Function &F) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ SetVector<Value *> Pointers;
+ SetVector<CallSite> CallSites;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ if (I->getType()->isPointerTy()) // Add all pointer arguments.
+ Pointers.insert(I);
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (I->getType()->isPointerTy()) // Add all pointer instructions.
+ Pointers.insert(&*I);
+ Instruction &Inst = *I;
+ if (CallSite CS = cast<Value>(&Inst)) {
+ Value *Callee = CS.getCalledValue();
+ // Skip actual functions for direct function calls.
+ if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+ Pointers.insert(Callee);
+ // Consider formals.
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (isInterestingPointer(*AI))
+ Pointers.insert(*AI);
+ CallSites.insert(CS);
+ } else {
+ // Consider all operands.
+ for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+ OI != OE; ++OI)
+ if (isInterestingPointer(*OI))
+ Pointers.insert(*OI);
+ }
+ }
+
+ if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
+ PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+ errs() << "Function: " << F.getName() << ": " << Pointers.size()
+ << " pointers, " << CallSites.size() << " call sites\n";
+
+ // iterate over the worklist, and run the full (n^2)/2 disambiguations
+ for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+ I1 != E; ++I1) {
+ uint64_t I1Size = AliasAnalysis::UnknownSize;
+ const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+ if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+
+ for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+ uint64_t I2Size = AliasAnalysis::UnknownSize;
+ const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+ if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+
+ switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+ case AliasAnalysis::NoAlias:
+ PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+ ++MustAlias; break;
+ default:
+ errs() << "Unknown alias query result!\n";
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls and values
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ Instruction *I = C->getInstruction();
+
+ for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+ V != Ve; ++V) {
+ uint64_t Size = AliasAnalysis::UnknownSize;
+ const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+ if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+
+ switch (AA.getModRefInfo(*C, *V, Size)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
+ ++ModRef; break;
+ default:
+ errs() << "Unknown alias query result!\n";
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ if (D == C)
+ continue;
+ switch (AA.getModRefInfo(*C, *D)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ ++ModRef; break;
+ }
+ }
+ }
+
+ return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+ errs() << "(" << Num*100ULL/Sum << "."
+ << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+ unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
+ errs() << "===== Alias Analysis Evaluator Report =====\n";
+ if (AliasSum == 0) {
+ errs() << " Alias Analysis Evaluator Summary: No pointers!\n";
+ } else {
+ errs() << " " << AliasSum << " Total Alias Queries Performed\n";
+ errs() << " " << NoAlias << " no alias responses ";
+ PrintPercent(NoAlias, AliasSum);
+ errs() << " " << MayAlias << " may alias responses ";
+ PrintPercent(MayAlias, AliasSum);
+ errs() << " " << PartialAlias << " partial alias responses ";
+ PrintPercent(PartialAlias, AliasSum);
+ errs() << " " << MustAlias << " must alias responses ";
+ PrintPercent(MustAlias, AliasSum);
+ errs() << " Alias Analysis Evaluator Pointer Alias Summary: "
+ << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/"
+ << PartialAlias*100/AliasSum << "%/"
+ << MustAlias*100/AliasSum << "%\n";
+ }
+
+ // Display the summary for mod/ref analysis
+ unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+ if (ModRefSum == 0) {
+ errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+ } else {
+ errs() << " " << ModRefSum << " Total ModRef Queries Performed\n";
+ errs() << " " << NoModRef << " no mod/ref responses ";
+ PrintPercent(NoModRef, ModRefSum);
+ errs() << " " << Mod << " mod responses ";
+ PrintPercent(Mod, ModRefSum);
+ errs() << " " << Ref << " ref responses ";
+ PrintPercent(Ref, ModRefSum);
+ errs() << " " << ModRef << " mod & ref responses ";
+ PrintPercent(ModRef, ModRefSum);
+ errs() << " Alias Analysis Evaluator Mod/Ref Summary: "
+ << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/"
+ << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
new file mode 100644
index 000000000000..f15c05153e10
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -0,0 +1,138 @@
+//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass checks alias analysis users to ensure that if they
+// create a new value, they do not query AA without informing it of the value.
+// It acts as a shim over any other AA pass you want.
+//
+// Yes keeping track of every value in the program is expensive, but this is
+// a debugging pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+
+ class AliasDebugger : public ModulePass, public AliasAnalysis {
+
+ //What we do is simple. Keep track of every value the AA could
+ //know about, and verify that queries are one of those.
+ //A query to a value that didn't exist when the AA was created
+ //means someone forgot to update the AA when creating new values
+
+ std::set<const Value*> Vals;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasDebugger() : ModulePass(ID) {
+ initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+
+ for(Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ Vals.insert(&*I);
+ for (User::const_op_iterator OI = I->op_begin(),
+ OE = I->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
+
+ for(Module::iterator I = M.begin(),
+ E = M.end(); I != E; ++I){
+ Vals.insert(&*I);
+ if(!I->isDeclaration()) {
+ for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+ AI != AE; ++AI)
+ Vals.insert(&*AI);
+ for (Function::const_iterator FI = I->begin(), FE = I->end();
+ FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ Vals.insert(&*BI);
+ for (User::const_op_iterator OI = BI->op_begin(),
+ OE = BI->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
+ }
+
+ }
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Location &LocA, const Location &LocB) {
+ assert(Vals.find(LocA.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ assert(Vals.find(LocB.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ return AliasAnalysis::alias(LocA, LocB);
+ }
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+ }
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ virtual void deleteValue(Value *V) {
+ assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
+ AliasAnalysis::deleteValue(V);
+ }
+ virtual void copyValue(Value *From, Value *To) {
+ Vals.insert(To);
+ AliasAnalysis::copyValue(From, To);
+ }
+
+ };
+}
+
+char AliasDebugger::ID = 0;
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+ "AA use debugger", false, true, false)
+
+Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
+
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 000000000000..2ed694941212
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,652 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+ assert(!AS.Forward && "Alias set is already forwarding!");
+ assert(!Forward && "This set is a forwarding set!!");
+
+ // Update the alias and access types of this set...
+ AccessTy |= AS.AccessTy;
+ AliasTy |= AS.AliasTy;
+ Volatile |= AS.Volatile;
+
+ if (AliasTy == MustAlias) {
+ // Check that these two merged sets really are must aliases. Since both
+ // used to be must-alias sets, we can just check any pointer from each set
+ // for aliasing.
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ PointerRec *L = getSomePointer();
+ PointerRec *R = AS.getSomePointer();
+
+ // If the pointers are not a must-alias pair, this set becomes a may alias.
+ if (AA.alias(AliasAnalysis::Location(L->getValue(),
+ L->getSize(),
+ L->getTBAAInfo()),
+ AliasAnalysis::Location(R->getValue(),
+ R->getSize(),
+ R->getTBAAInfo()))
+ != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ }
+
+ if (CallSites.empty()) { // Merge call sites...
+ if (!AS.CallSites.empty())
+ std::swap(CallSites, AS.CallSites);
+ } else if (!AS.CallSites.empty()) {
+ CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
+ AS.CallSites.clear();
+ }
+
+ AS.Forward = this; // Forward across AS now...
+ addRef(); // AS is now pointing to us...
+
+ // Merge the list of constituent pointers...
+ if (AS.PtrList) {
+ *PtrListEnd = AS.PtrList;
+ AS.PtrList->setPrevInList(PtrListEnd);
+ PtrListEnd = AS.PtrListEnd;
+
+ AS.PtrList = 0;
+ AS.PtrListEnd = &AS.PtrList;
+ assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+ }
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+ if (AliasSet *Fwd = AS->Forward) {
+ Fwd->dropRef(*this);
+ AS->Forward = 0;
+ }
+ AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+ assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+ AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+ uint64_t Size, const MDNode *TBAAInfo,
+ bool KnownMustAlias) {
+ assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+ // Check to see if we have to downgrade to _may_ alias.
+ if (isMustAlias() && !KnownMustAlias)
+ if (PointerRec *P = getSomePointer()) {
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ AliasAnalysis::AliasResult Result =
+ AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+ P->getTBAAInfo()),
+ AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+ if (Result != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ else // First entry of must alias must have maximum size!
+ P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+ assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+ }
+
+ Entry.setAliasSet(this);
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+
+ // Add it to the end of the list...
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ *PtrListEnd = &Entry;
+ PtrListEnd = Entry.setPrevInList(PtrListEnd);
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ addRef(); // Entry points to alias set.
+}
+
+void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
+ CallSites.push_back(CS.getInstruction());
+
+ AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return;
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ AliasTy = MayAlias;
+ AccessTy |= Refs;
+ return;
+ }
+
+ // FIXME: This should use mod/ref information to make this not suck so bad
+ AliasTy = MayAlias;
+ AccessTy = ModRef;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo,
+ AliasAnalysis &AA) const {
+ if (AliasTy == MustAlias) {
+ assert(CallSites.empty() && "Illegal must alias set!");
+
+ // If this is a set of MustAliases, only check to see if the pointer aliases
+ // SOME value in the set.
+ PointerRec *SomePtr = getSomePointer();
+ assert(SomePtr && "Empty must-alias set??");
+ return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+ SomePtr->getSize(),
+ SomePtr->getTBAAInfo()),
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+ }
+
+ // If this is a may-alias set, we have to check all of the pointers in the set
+ // to be sure it doesn't alias the set...
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+ AliasAnalysis::Location(I.getPointer(), I.getSize(),
+ I.getTBAAInfo())))
+ return true;
+
+ // Check the call sites list and invoke list...
+ if (!CallSites.empty()) {
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
+ if (AA.getModRefInfo(CallSites[i],
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+ AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ return false;
+}
+
+bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
+ if (AA.doesNotAccessMemory(CS))
+ return false;
+
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+ AliasAnalysis::NoModRef)
+ return true;
+
+ return false;
+}
+
+void AliasSetTracker::clear() {
+ // Delete all the PointerRec entries.
+ for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+ I != E; ++I)
+ I->second->eraseFromList();
+
+ PointerMap.clear();
+
+ // The alias sets should all be clear now.
+ AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into. If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+ uint64_t Size,
+ const MDNode *TBAAInfo) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ }
+
+ return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise. This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+ return true;
+ return false;
+}
+
+
+
+AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesCallSite(CS, AA))
+ continue;
+
+ if (FoundSet == 0) // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ else if (!I->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+ const MDNode *TBAAInfo,
+ bool *New) {
+ AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+ // Check to see if the pointer is already known.
+ if (Entry.hasAliasSet()) {
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ // Return the set!
+ return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+ }
+
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+ // Add it to the alias set it aliases.
+ AS->addPointer(*this, Entry, Size, TBAAInfo);
+ return *AS;
+ }
+
+ if (New) *New = true;
+ // Otherwise create a new alias set to hold the loaded pointer.
+ AliasSets.push_back(new AliasSet());
+ AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+ return AliasSets.back();
+}
+
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ bool NewPtr;
+ addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+ bool NewPtr;
+ AliasSet &AS = addPointer(LI->getOperand(0),
+ AA.getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::Refs, NewPtr);
+ if (LI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+ bool NewPtr;
+ Value *Val = SI->getOperand(0);
+ AliasSet &AS = addPointer(SI->getOperand(1),
+ AA.getTypeStoreSize(Val->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::Mods, NewPtr);
+ if (SI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+ bool NewPtr;
+ addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::ModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(CallSite CS) {
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction()))
+ return true; // Ignore DbgInfo Intrinsics.
+ if (AA.doesNotAccessMemory(CS))
+ return true; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForCallSite(CS);
+ if (AS) {
+ AS->addCallSite(CS, AA);
+ return false;
+ }
+ AliasSets.push_back(new AliasSet());
+ AS = &AliasSets.back();
+ AS->addCallSite(CS, AA);
+ return true;
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+ // Dispatch to one of the other add methods.
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return add(LI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return add(SI);
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return add(CI);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ return add(II);
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return add(VAAI);
+ return true;
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ add(I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+ assert(&AA == &AST.AA &&
+ "Merging AliasSetTracker objects with different Alias Analyses!");
+
+ // Loop over all of the alias sets in AST, adding the pointers contained
+ // therein into the current alias sets. This can cause alias sets to be
+ // merged together in the current AST.
+ for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+ if (I->Forward) continue; // Ignore forwarding alias sets
+
+ AliasSet &AS = const_cast<AliasSet&>(*I);
+
+ // If there are any call sites in the alias set, add them to this AST.
+ for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+ add(AS.CallSites[i]);
+
+ // Loop over all of the pointers in this alias set.
+ bool X;
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+ ASI.getTBAAInfo(),
+ (AliasSet::AccessType)AS.AccessTy, X);
+ if (AS.isVolatile()) NewAS.setVolatile();
+ }
+ }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+ // Drop all call sites.
+ AS.CallSites.clear();
+
+ // Clear the alias set.
+ unsigned NumRefs = 0;
+ while (!AS.empty()) {
+ AliasSet::PointerRec *P = AS.PtrList;
+
+ Value *ValToRemove = P->getValue();
+
+ // Unlink and delete entry from the list of values.
+ P->eraseFromList();
+
+ // Remember how many references need to be dropped.
+ ++NumRefs;
+
+ // Finally, remove the entry.
+ PointerMap.erase(ValToRemove);
+ }
+
+ // Stop using the alias set, removing it.
+ AS.RefCount -= NumRefs;
+ if (AS.RefCount == 0)
+ AS.removeFromTracker(*this);
+}
+
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+ uint64_t Size = AA.getTypeStoreSize(LI->getType());
+ const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+ uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+ const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+ AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+ AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa));
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(CallSite CS) {
+ if (AA.doesNotAccessMemory(CS))
+ return false; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForCallSite(CS);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+ // Dispatch to one of the other remove methods...
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return remove(LI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return remove(SI);
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return remove(CI);
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return remove(VAAI);
+ return true;
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely. It should be used when an instruction is deleted
+// from the program to update the AST. If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+ // Notify the alias analysis implementation that this value is gone.
+ AA.deleteValue(PtrVal);
+
+ // If this is a call instruction, remove the callsite from the appropriate
+ // AliasSet (if present).
+ if (CallSite CS = PtrVal) {
+ if (!AA.doesNotAccessMemory(CS)) {
+ // Scan all the alias sets to see if this call site is contained.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward) continue;
+
+ I->removeCallSite(CS);
+ }
+ }
+ }
+
+ // First, look up the PointerRec for this pointer.
+ PointerMapType::iterator I = PointerMap.find(PtrVal);
+ if (I == PointerMap.end()) return; // Noop
+
+ // If we found one, remove the pointer from the alias set it is in.
+ AliasSet::PointerRec *PtrValEnt = I->second;
+ AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+ // Unlink and delete from the list of values.
+ PtrValEnt->eraseFromList();
+
+ // Stop using the alias set.
+ AS->dropRef(*this);
+
+ PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value. Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+ // Notify the alias analysis implementation that this value is copied.
+ AA.copyValue(From, To);
+
+ // First, look up the PointerRec for this pointer.
+ PointerMapType::iterator I = PointerMap.find(From);
+ if (I == PointerMap.end())
+ return; // Noop
+ assert(I->second->hasAliasSet() && "Dead entry?");
+
+ AliasSet::PointerRec &Entry = getEntryFor(To);
+ if (Entry.hasAliasSet()) return; // Already in the tracker!
+
+ // Add it to the alias set it aliases...
+ I = PointerMap.find(From);
+ AliasSet *AS = I->second->getAliasSet(*this);
+ AS->addPointer(*this, Entry, I->second->getSize(),
+ I->second->getTBAAInfo(),
+ true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(raw_ostream &OS) const {
+ OS << " AliasSet[" << (void*)this << ", " << RefCount << "] ";
+ OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
+ switch (AccessTy) {
+ case NoModRef: OS << "No access "; break;
+ case Refs : OS << "Ref "; break;
+ case Mods : OS << "Mod "; break;
+ case ModRef : OS << "Mod/Ref "; break;
+ default: llvm_unreachable("Bad value for AccessTy!");
+ }
+ if (isVolatile()) OS << "[volatile] ";
+ if (Forward)
+ OS << " forwarding to " << (void*)Forward;
+
+
+ if (!empty()) {
+ OS << "Pointers: ";
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I != begin()) OS << ", ";
+ WriteAsOperand(OS << "(", I.getPointer());
+ OS << ", " << I.getSize() << ")";
+ }
+ }
+ if (!CallSites.empty()) {
+ OS << "\n " << CallSites.size() << " Call Sites: ";
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (i) OS << ", ";
+ WriteAsOperand(OS, CallSites[i]);
+ }
+ }
+ OS << "\n";
+}
+
+void AliasSetTracker::print(raw_ostream &OS) const {
+ OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+ << PointerMap.size() << " pointer values.\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ I->print(OS);
+ OS << "\n";
+}
+
+void AliasSet::dump() const { print(dbgs()); }
+void AliasSetTracker::dump() const { print(dbgs()); }
+
+//===----------------------------------------------------------------------===//
+// ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+ assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+ AST->deleteValue(getValPtr());
+ // this now dangles!
+}
+
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+ AST->copyValue(getValPtr(), V);
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+ : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+ return *this = ASTCallbackVH(V, AST);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class AliasSetPrinter : public FunctionPass {
+ AliasSetTracker *Tracker;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AliasSetPrinter() : FunctionPass(ID) {
+ initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ Tracker->add(&*I);
+ Tracker->print(errs());
+ delete Tracker;
+ return false;
+ }
+ };
+}
+
+char AliasSetPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
new file mode 100644
index 000000000000..71e0a832696c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -0,0 +1,104 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/Verifier.h"
+#include <cstring>
+
+using namespace llvm;
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+ initializeAliasAnalysisAnalysisGroup(Registry);
+ initializeAliasAnalysisCounterPass(Registry);
+ initializeAAEvalPass(Registry);
+ initializeAliasDebuggerPass(Registry);
+ initializeAliasSetPrinterPass(Registry);
+ initializeNoAAPass(Registry);
+ initializeBasicAliasAnalysisPass(Registry);
+ initializeBlockFrequencyPass(Registry);
+ initializeBranchProbabilityInfoPass(Registry);
+ initializeCFGViewerPass(Registry);
+ initializeCFGPrinterPass(Registry);
+ initializeCFGOnlyViewerPass(Registry);
+ initializeCFGOnlyPrinterPass(Registry);
+ initializePrintDbgInfoPass(Registry);
+ initializeDominanceFrontierPass(Registry);
+ initializeDomViewerPass(Registry);
+ initializeDomPrinterPass(Registry);
+ initializeDomOnlyViewerPass(Registry);
+ initializePostDomViewerPass(Registry);
+ initializeDomOnlyPrinterPass(Registry);
+ initializePostDomPrinterPass(Registry);
+ initializePostDomOnlyViewerPass(Registry);
+ initializePostDomOnlyPrinterPass(Registry);
+ initializeIVUsersPass(Registry);
+ initializeInstCountPass(Registry);
+ initializeIntervalPartitionPass(Registry);
+ initializeLazyValueInfoPass(Registry);
+ initializeLibCallAliasAnalysisPass(Registry);
+ initializeLintPass(Registry);
+ initializeLoopDependenceAnalysisPass(Registry);
+ initializeLoopInfoPass(Registry);
+ initializeMemDepPrinterPass(Registry);
+ initializeMemoryDependenceAnalysisPass(Registry);
+ initializeModuleDebugInfoPrinterPass(Registry);
+ initializePostDominatorTreePass(Registry);
+ initializeProfileEstimatorPassPass(Registry);
+ initializeNoProfileInfoPass(Registry);
+ initializeNoPathProfileInfoPass(Registry);
+ initializeProfileInfoAnalysisGroup(Registry);
+ initializePathProfileInfoAnalysisGroup(Registry);
+ initializeLoaderPassPass(Registry);
+ initializePathProfileLoaderPassPass(Registry);
+ initializeProfileVerifierPassPass(Registry);
+ initializePathProfileVerifierPass(Registry);
+ initializeRegionInfoPass(Registry);
+ initializeRegionViewerPass(Registry);
+ initializeRegionPrinterPass(Registry);
+ initializeRegionOnlyViewerPass(Registry);
+ initializeRegionOnlyPrinterPass(Registry);
+ initializeScalarEvolutionPass(Registry);
+ initializeScalarEvolutionAliasAnalysisPass(Registry);
+ initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+ initializeAnalysis(*unwrap(R));
+}
+
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+ char **OutMessages) {
+ std::string Messages;
+
+ LLVMBool Result = verifyModule(*unwrap(M),
+ static_cast<VerifierFailureAction>(Action),
+ OutMessages? &Messages : 0);
+
+ if (OutMessages)
+ *OutMessages = strdup(Messages.c_str());
+
+ return Result;
+}
+
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+ return verifyFunction(*unwrap<Function>(Fn),
+ static_cast<VerifierFailureAction>(Action));
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFGOnly();
+}
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 000000000000..8330ea7c7036
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,1213 @@
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+static bool isKnownNonNull(const Value *V) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V)) return true;
+
+ // A byval argument is never null.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+
+ // Global values are not null unless extern weak.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return !GV->hasExternalWeakLinkage();
+ return false;
+}
+
+/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
+/// object that never escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+ // If this is a local allocation, check to see if it escapes.
+ if (isa<AllocaInst>(V) || isNoAliasCall(V))
+ // Set StoreCaptures to True so that we can assume in our callers that the
+ // pointer is not the result of a load instruction. Currently
+ // PointerMayBeCaptured doesn't have any special analysis for the
+ // StoreCaptures=false case; if it did, our callers could be refined to be
+ // more precise.
+ return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+ // If this is an argument that corresponds to a byval or noalias argument,
+ // then it has not escaped before entering the function. Check if it escapes
+ // inside the function.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValAttr() || A->hasNoAliasAttr()) {
+ // Don't bother analyzing arguments already known not to escape.
+ if (A->hasNoCaptureAttr())
+ return true;
+ return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ }
+ return false;
+}
+
+/// isEscapeSource - Return true if the pointer is one which would have
+/// been considered an escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+ return true;
+
+ // The load case works because isNonEscapingLocalObject considers all
+ // stores to be escapes (it passes true for the StoreCaptures argument
+ // to PointerMayBeCaptured).
+ if (isa<LoadInst>(V))
+ return true;
+
+ return false;
+}
+
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
+ const Type *AccessTy;
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (!GV->hasDefinitiveInitializer())
+ return AliasAnalysis::UnknownSize;
+ AccessTy = GV->getType()->getElementType();
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ if (!AI->isArrayAllocation())
+ AccessTy = AI->getType()->getElementType();
+ else
+ return AliasAnalysis::UnknownSize;
+ } else if (const CallInst* CI = extractMallocCall(V)) {
+ if (!isArrayMalloc(V, &TD))
+ // The size is the argument to the malloc call.
+ if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
+ return C->getZExtValue();
+ return AliasAnalysis::UnknownSize;
+ } else if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (A->hasByValAttr())
+ AccessTy = cast<PointerType>(A->getType())->getElementType();
+ else
+ return AliasAnalysis::UnknownSize;
+ } else {
+ return AliasAnalysis::UnknownSize;
+ }
+
+ if (AccessTy->isSized())
+ return TD.getTypeAllocSize(AccessTy);
+ return AliasAnalysis::UnknownSize;
+}
+
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+ const TargetData &TD) {
+ uint64_t ObjectSize = getObjectSize(V, TD);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
+
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+ const TargetData &TD) {
+ uint64_t ObjectSize = getObjectSize(V, TD);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+ enum ExtensionKind {
+ EK_NotExtended,
+ EK_SignExt,
+ EK_ZeroExt
+ };
+
+ struct VariableGEPIndex {
+ const Value *V;
+ ExtensionKind Extension;
+ int64_t Scale;
+ };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers. Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends. The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+ ExtensionKind &Extension,
+ const TargetData &TD, unsigned Depth) {
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+ // Limit our recursion depth.
+ if (Depth == 6) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ switch (BOp->getOpcode()) {
+ default: break;
+ case Instruction::Or:
+ // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
+ // analyze it.
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+ break;
+ // FALL THROUGH.
+ case Instruction::Add:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset += RHSC->getValue();
+ return V;
+ case Instruction::Mul:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset *= RHSC->getValue();
+ Scale *= RHSC->getValue();
+ return V;
+ case Instruction::Shl:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset <<= RHSC->getValue().getLimitedValue();
+ Scale <<= RHSC->getValue().getLimitedValue();
+ return V;
+ }
+ }
+ }
+
+ // Since GEP indices are sign extended anyway, we don't care about the high
+ // bits of a sign or zero extended value - just scales and offsets. The
+ // extensions have to be consistent though.
+ if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+ (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ Value *CastOp = cast<CastInst>(V)->getOperand(0);
+ unsigned OldWidth = Scale.getBitWidth();
+ unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+ Scale = Scale.trunc(SmallWidth);
+ Offset = Offset.trunc(SmallWidth);
+ Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+ TD, Depth+1);
+ Scale = Scale.zext(OldWidth);
+ Offset = Offset.zext(OldWidth);
+
+ return Result;
+ }
+
+ Scale = 1;
+ Offset = 0;
+ return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that GetUnderlyingObject can look through. When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ const TargetData *TD) {
+ // Limit recursion depth to limit compile time in crazy cases.
+ unsigned MaxLookup = 6;
+
+ BaseOffs = 0;
+ do {
+ // See if this is a bitcast or GEP.
+ const Operator *Op = dyn_cast<Operator>(V);
+ if (Op == 0) {
+ // The only non-operator case we can handle are GlobalAliases.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden()) {
+ V = GA->getAliasee();
+ continue;
+ }
+ }
+ return V;
+ }
+
+ if (Op->getOpcode() == Instruction::BitCast) {
+ V = Op->getOperand(0);
+ continue;
+ }
+
+ const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+ if (GEPOp == 0) {
+ // If it's not a GEP, hand it off to SimplifyInstruction to see if it
+ // can come up with something. This matches what GetUnderlyingObject does.
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Get a DominatorTree and use it here.
+ if (const Value *Simplified =
+ SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+ V = Simplified;
+ continue;
+ }
+
+ return V;
+ }
+
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return V;
+
+ // If we are lacking TargetData information, we can't compute the offets of
+ // elements computed by GEPs. However, we can handle bitcast equivalent
+ // GEPs.
+ if (TD == 0) {
+ if (!GEPOp->hasAllZeroIndices())
+ return V;
+ V = GEPOp->getOperand(0);
+ continue;
+ }
+
+ // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+ gep_type_iterator GTI = gep_type_begin(GEPOp);
+ for (User::const_op_iterator I = GEPOp->op_begin()+1,
+ E = GEPOp->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ if (FieldNo == 0) continue;
+
+ BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+ continue;
+ }
+
+ // For an array/pointer, add the element offset, explicitly scaled.
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero()) continue;
+ BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ continue;
+ }
+
+ uint64_t Scale = TD->getTypeAllocSize(*GTI);
+ ExtensionKind Extension = EK_NotExtended;
+
+ // If the integer type is smaller than the pointer size, it is implicitly
+ // sign extended to pointer size.
+ unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+ if (TD->getPointerSizeInBits() > Width)
+ Extension = EK_SignExt;
+
+ // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+ APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+ *TD, 0);
+
+ // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+ // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+ BaseOffs += IndexOffset.getSExtValue()*Scale;
+ Scale *= IndexScale.getSExtValue();
+
+
+ // If we already had an occurrence of this index variable, merge this
+ // scale into it. For example, we want to handle:
+ // A[x][x] -> x*16 + x*4 -> x*20
+ // This also ensures that 'x' only appears in the index list once.
+ for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+ if (VarIndices[i].V == Index &&
+ VarIndices[i].Extension == Extension) {
+ Scale += VarIndices[i].Scale;
+ VarIndices.erase(VarIndices.begin()+i);
+ break;
+ }
+ }
+
+ // Make sure that we have a scale that makes sense for this target's
+ // pointer size.
+ if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ Scale <<= ShiftBits;
+ Scale = (int64_t)Scale >> ShiftBits;
+ }
+
+ if (Scale) {
+ VariableGEPIndex Entry = {Index, Extension, Scale};
+ VarIndices.push_back(Entry);
+ }
+ }
+
+ // Analyze the base pointer next.
+ V = GEPOp->getOperand(0);
+ } while (--MaxLookup);
+
+ // If the chain of expressions is too deep, just return early.
+ return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src) {
+ if (Src.empty()) return;
+
+ for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+ const Value *V = Src[i].V;
+ ExtensionKind Extension = Src[i].Extension;
+ int64_t Scale = Src[i].Scale;
+
+ // Find V in Dest. This is N^2, but pointer indices almost never have more
+ // than a few variable indexes.
+ for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+ if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+
+ // If we found it, subtract off Scale V's from the entry in Dest. If it
+ // goes to zero, remove the entry.
+ if (Dest[j].Scale != Scale)
+ Dest[j].Scale -= Scale;
+ else
+ Dest.erase(Dest.begin()+j);
+ Scale = 0;
+ break;
+ }
+
+ // If we didn't consume this entry, add it to the end of the Dest list.
+ if (Scale) {
+ VariableGEPIndex Entry = { V, Extension, -Scale };
+ Dest.push_back(Entry);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+ if (const Instruction *inst = dyn_cast<Instruction>(V))
+ return inst->getParent()->getParent();
+
+ if (const Argument *arg = dyn_cast<Argument>(V))
+ return arg->getParent();
+
+ return NULL;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+ const Function *F1 = getParent(O1);
+ const Function *F2 = getParent(O2);
+
+ return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+namespace {
+ /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+ struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ BasicAliasAnalysis() : ImmutablePass(ID),
+ // AliasCache rarely has more than 1 or 2 elements,
+ // so start it off fairly small so that clear()
+ // doesn't have to tromp through 64 (the default)
+ // elements on each alias query. This really wants
+ // something like a SmallDenseMap.
+ AliasCache(8) {
+ initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ }
+
+ virtual AliasResult alias(const Location &LocA,
+ const Location &LocB) {
+ assert(AliasCache.empty() && "AliasCache must be cleared after use!");
+ assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+ "BasicAliasAnalysis doesn't support interprocedural queries.");
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+ LocB.Ptr, LocB.Size, LocB.TBAATag);
+ AliasCache.clear();
+ return Alias;
+ }
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
+
+ /// pointsToConstantMemory - Chase pointers until we find a (constant
+ /// global) or not.
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+
+ /// getModRefBehavior - Return the behavior when calling the given
+ /// call site.
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+ /// getModRefBehavior - Return the behavior when calling the given function.
+ /// For use when the call site is not known.
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ // AliasCache - Track alias queries to guard against recursion.
+ typedef std::pair<Location, Location> LocPair;
+ typedef DenseMap<LocPair, AliasResult> AliasCacheTy;
+ AliasCacheTy AliasCache;
+
+ // Visited - Track instructions visited by pointsToConstantMemory.
+ SmallPtrSet<const Value*, 16> Visited;
+
+ // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
+ // instruction against another.
+ AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+ // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
+ // instruction against another.
+ AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ /// aliasSelect - Disambiguate a Select instruction against another value.
+ AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAATag,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAATag);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char BasicAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)",
+ false, true, false)
+
+ImmutablePass *llvm::createBasicAliasAnalysisPass() {
+ return new BasicAliasAnalysis();
+}
+
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Visited.empty() && "Visited must be cleared after use!");
+
+ unsigned MaxLookup = 8;
+ SmallVector<const Value *, 16> Worklist;
+ Worklist.push_back(Loc.Ptr);
+ do {
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+ if (!Visited.insert(V)) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // An alloca instruction defines local memory.
+ if (OrLocal && isa<AllocaInst>(V))
+ continue;
+
+ // A global constant counts as local memory for our purposes.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+ // global to be marked constant in some modules and non-constant in
+ // others. GV may even be a declaration, not a definition.
+ if (!GV->isConstant()) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ continue;
+ }
+
+ // If both select values point to local memory, then so does the select.
+ if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // If all values incoming to a phi node point to local memory, then so does
+ // the phi.
+ if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Don't bother inspecting phi nodes with many operands.
+ if (PN->getNumIncomingValues() > MaxLookup) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+
+ // Otherwise be conservative.
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ } while (!Worklist.empty() && --MaxLookup);
+
+ Visited.clear();
+ return Worklist.empty();
+}
+
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (CS.doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the callsite knows it only reads memory, don't return worse
+ // than that.
+ if (CS.onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+ // If the function declares it doesn't access memory, we can't do better.
+ if (F->doesNotAccessMemory())
+ return DoesNotAccessMemory;
+
+ // For intrinsics, we can check the table.
+ if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ }
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the function declares it only reads memory, go with that.
+ if (F->onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // Otherwise be conservative.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+}
+
+/// getModRefInfo - Check to see if the specified callsite can clobber the
+/// specified memory object. Since we only look at local properties of this
+/// function, we really can't say much about this query. We do, however, use
+/// simple "address taken" analysis on local objects.
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+ "AliasAnalysis query involving multiple functions!");
+
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
+
+ // If this is a tail call and Loc.Ptr points to a stack location, we know that
+ // the tail call cannot access or modify the local stack.
+ // We cannot exclude byval arguments here; these belong to the caller of
+ // the current function not to the current function, and a tail callee
+ // may reference them.
+ if (isa<AllocaInst>(Object))
+ if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (CI->isTailCall())
+ return NoModRef;
+
+ // If the pointer is to a locally allocated object that does not escape,
+ // then the call can not mod/ref the pointer unless the call takes the pointer
+ // as an argument, and itself doesn't capture it.
+ if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+ isNonEscapingLocalObject(Object)) {
+ bool PassedAsArg = false;
+ unsigned ArgNo = 0;
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI, ++ArgNo) {
+ // Only look at the no-capture or byval pointer arguments. If this
+ // pointer were passed to arguments that were neither of these, then it
+ // couldn't be no-capture.
+ if (!(*CI)->getType()->isPointerTy() ||
+ (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) &&
+ !CS.paramHasAttr(ArgNo+1, Attribute::ByVal)))
+ continue;
+
+ // If this is a no-capture pointer argument, see if we can tell that it
+ // is impossible to alias the pointer we're checking. If not, we have to
+ // assume that the call could touch the pointer, even though it doesn't
+ // escape.
+ if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
+ PassedAsArg = true;
+ break;
+ }
+ }
+
+ if (!PassedAsArg)
+ return NoModRef;
+ }
+
+ ModRefResult Min = ModRef;
+
+ // Finally, handle specific knowledge of intrinsics.
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II != 0)
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ uint64_t Len = UnknownSize;
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ Value *Src = II->getArgOperand(1);
+ // If it can't overlap the source dest, then it doesn't modref the loc.
+ if (isNoAlias(Location(Dest, Len), Loc)) {
+ if (isNoAlias(Location(Src, Len), Loc))
+ return NoModRef;
+ // If it can't overlap the dest, then worst case it reads the loc.
+ Min = Ref;
+ } else if (isNoAlias(Location(Src, Len), Loc)) {
+ // If it can't overlap the source, then worst case it mutates the loc.
+ Min = Mod;
+ }
+ break;
+ }
+ case Intrinsic::memset:
+ // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+ // will handle it for the variable length case.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ uint64_t Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ if (isNoAlias(Location(Dest, Len), Loc))
+ return NoModRef;
+ }
+ // We know that memset doesn't load anything.
+ Min = Mod;
+ break;
+ case Intrinsic::atomic_cmp_swap:
+ case Intrinsic::atomic_swap:
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin:
+ if (TD) {
+ Value *Op1 = II->getArgOperand(0);
+ uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
+ MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
+ if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
+ return NoModRef;
+ }
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ uint64_t PtrSize =
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ if (isNoAlias(Location(II->getArgOperand(1),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::invariant_end: {
+ uint64_t PtrSize =
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ if (isNoAlias(Location(II->getArgOperand(2),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::arm_neon_vld1: {
+ // LLVM's vld1 and vst1 intrinsics currently only support a single
+ // vector register.
+ uint64_t Size =
+ TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+ if (isNoAlias(Location(II->getArgOperand(0), Size,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::arm_neon_vst1: {
+ uint64_t Size =
+ TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+ if (isNoAlias(Location(II->getArgOperand(0), Size,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
+}
+
+/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+/// against another pointer. We know that V1 is a GEP, but we don't know
+/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
+/// UnderlyingV2 is the same for V2.
+///
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
+ const Value *UnderlyingV1,
+ const Value *UnderlyingV2) {
+ int64_t GEP1BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+
+ // If we have two gep instructions with must-alias'ing base pointers, figure
+ // out if the indexes to the GEP tell us anything about the derived pointer.
+ if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+ // Do the base pointers alias?
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ UnderlyingV2, UnknownSize, 0);
+
+ // If we get a No or May, then return it immediately, no amount of analysis
+ // will improve this situation.
+ if (BaseAlias != MustAlias) return BaseAlias;
+
+ // Otherwise, we have a MustAlias. Since the base pointers alias each other
+ // exactly, see if the computed offset from the common pointer tells us
+ // about the relation of the resulting pointer.
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+
+ int64_t GEP2BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+ const Value *GEP2BasePtr =
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+
+ // If DecomposeGEPExpression isn't able to look all the way through the
+ // addressing operation, we must not have TD and this is too complex for us
+ // to handle without it.
+ if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+
+ // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+ // symbolic difference.
+ GEP1BaseOffset -= GEP2BaseOffset;
+ GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+
+ } else {
+ // Check to see if these two pointers are related by the getelementptr
+ // instruction. If one pointer is a GEP with a non-zero index of the other
+ // pointer, we know they cannot alias.
+
+ // If both accesses are unknown size, we can't do anything useful here.
+ if (V1Size == UnknownSize && V2Size == UnknownSize)
+ return MayAlias;
+
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ V2, V2Size, V2TBAAInfo);
+ if (R != MustAlias)
+ // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+ // If V2 is known not to alias GEP base pointer, then the two values
+ // cannot alias per GEP semantics: "A pointer value formed from a
+ // getelementptr instruction is associated with the addresses associated
+ // with the first operand of the getelementptr".
+ return R;
+
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+
+ // If DecomposeGEPExpression isn't able to look all the way through the
+ // addressing operation, we must not have TD and this is too complex for us
+ // to handle without it.
+ if (GEP1BasePtr != UnderlyingV1) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+ }
+
+ // In the two GEP Case, if there is no difference in the offsets of the
+ // computed pointers, the resultant pointers are a must alias. This
+ // hapens when we have two lexically identical GEP's (for example).
+ //
+ // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+ // must aliases the GEP, the end result is a must alias also.
+ if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+ return MustAlias;
+
+ // If there is a difference between the pointers, but the difference is
+ // less than the size of the associated memory object, then we know
+ // that the objects are partially overlapping.
+ if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+ if (GEP1BaseOffset >= 0 ?
+ (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
+ (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
+ GEP1BaseOffset != INT64_MIN))
+ return PartialAlias;
+ }
+
+ // If we have a known constant offset, see if this offset is larger than the
+ // access size being queried. If so, and if no variable indices can remove
+ // pieces of this constant, then we know we have a no-alias. For example,
+ // &A[100] != &A.
+
+ // In order to handle cases like &A[100][i] where i is an out of range
+ // subscript, we have to ignore all constant offset pieces that are a multiple
+ // of a scaled index. Do this by removing constant offsets that are a
+ // multiple of any of our variable indices. This allows us to transform
+ // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
+ // provides an offset of 4 bytes (assuming a <= 4 byte access).
+ for (unsigned i = 0, e = GEP1VariableIndices.size();
+ i != e && GEP1BaseOffset;++i)
+ if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
+ GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
+
+ // If our known offset is bigger than the access size, we know we don't have
+ // an alias.
+ if (GEP1BaseOffset) {
+ if (GEP1BaseOffset >= 0 ?
+ (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
+ (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
+ GEP1BaseOffset != INT64_MIN))
+ return NoAlias;
+ }
+
+ // Statically, we can see that the base objects are the same, but the
+ // pointers have dynamic offsets which we can't resolve. And none of our
+ // little tricks above worked.
+ //
+ // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
+ // practical effect of this is protecting TBAA in the case of dynamic
+ // indices into arrays of unions. An alternative way to solve this would
+ // be to have clang emit extra metadata for unions and/or union accesses.
+ // A union-specific solution wouldn't handle the problem for malloc'd
+ // memory however.
+ return PartialAlias;
+}
+
+static AliasAnalysis::AliasResult
+MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
+ // If the results agree, take it.
+ if (A == B)
+ return A;
+ // A mix of PartialAlias and MustAlias is PartialAlias.
+ if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
+ (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
+ return AliasAnalysis::PartialAlias;
+ // Otherwise, we don't know anything.
+ return AliasAnalysis::MayAlias;
+}
+
+/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
+/// instruction against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for aliases between the values on corresponding arms.
+ if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+ if (SI->getCondition() == SI2->getCondition()) {
+ AliasResult Alias =
+ aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+ SI2->getTrueValue(), V2Size, V2TBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+ AliasResult ThisAlias =
+ aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+ SI2->getFalseValue(), V2Size, V2TBAAInfo);
+ return MergeAliasResults(ThisAlias, Alias);
+ }
+
+ // If both arms of the Select node NoAlias or MustAlias V2, then returns
+ // NoAlias / MustAlias. Otherwise, returns MayAlias.
+ AliasResult Alias =
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+
+ AliasResult ThisAlias =
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+ return MergeAliasResults(ThisAlias, Alias);
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If the values are PHIs in the same block, we can do a more precise
+ // as well as efficient check: just check for aliases between the values
+ // on corresponding edges.
+ if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+ if (PN2->getParent() == PN->getParent()) {
+ AliasResult Alias =
+ aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
+ V2Size, V2TBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ AliasResult ThisAlias =
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+ V2Size, V2TBAAInfo);
+ Alias = MergeAliasResults(ThisAlias, Alias);
+ if (Alias == MayAlias)
+ break;
+ }
+ return Alias;
+ }
+
+ SmallPtrSet<Value*, 4> UniqueSrc;
+ SmallVector<Value*, 4> V1Srcs;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *PV1 = PN->getIncomingValue(i);
+ if (isa<PHINode>(PV1))
+ // If any of the source itself is a PHI, return MayAlias conservatively
+ // to avoid compile time explosion. The worst possible case is if both
+ // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+ // and 'n' are the number of PHI sources.
+ return MayAlias;
+ if (UniqueSrc.insert(PV1))
+ V1Srcs.push_back(PV1);
+ }
+
+ AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V1Srcs[0], PNSize, PNTBAAInfo);
+ // Early exit if the check of the first PHI source against V2 is MayAlias.
+ // Other results are not possible.
+ if (Alias == MayAlias)
+ return MayAlias;
+
+ // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+ // NoAlias / MustAlias. Otherwise, returns MayAlias.
+ for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+ Value *V = V1Srcs[i];
+
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V, PNSize, PNTBAAInfo);
+ Alias = MergeAliasResults(ThisAlias, Alias);
+ if (Alias == MayAlias)
+ break;
+ }
+
+ return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are.
+ if (V1Size == 0 || V2Size == 0)
+ return NoAlias;
+
+ // Strip off any casts if they exist.
+ V1 = V1->stripPointerCasts();
+ V2 = V2->stripPointerCasts();
+
+ // Are we checking for alias of the same value?
+ if (V1 == V2) return MustAlias;
+
+ if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
+ return NoAlias; // Scalars cannot alias each other
+
+ // Figure out what objects these things are pointing to if we can.
+ const Value *O1 = GetUnderlyingObject(V1, TD);
+ const Value *O2 = GetUnderlyingObject(V2, TD);
+
+ // Null values in the default address space don't point to any object, so they
+ // don't alias any other pointer.
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+
+ if (O1 != O2) {
+ // If V1/V2 point to two different objects we know that we have no alias.
+ if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+ return NoAlias;
+
+ // Constant pointers can't alias with non-const isIdentifiedObject objects.
+ if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+ (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+ return NoAlias;
+
+ // Arguments can't alias with local allocations or noalias calls
+ // in the same function.
+ if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+ (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
+ return NoAlias;
+
+ // Most objects can't alias null.
+ if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+ (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
+ return NoAlias;
+
+ // If one pointer is the result of a call/invoke or load and the other is a
+ // non-escaping local object within the same function, then we know the
+ // object couldn't escape to a point where the call could return it.
+ //
+ // Note that if the pointers are in different functions, there are a
+ // variety of complications. A call with a nocapture argument may still
+ // temporary store the nocapture argument's value in a temporary memory
+ // location if that memory location doesn't escape. Or it may pass a
+ // nocapture value to other functions as long as they don't capture it.
+ if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+ return NoAlias;
+ if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+ return NoAlias;
+ }
+
+ // If the size of one access is larger than the entire object on the other
+ // side, then we know such behavior is undefined and can assume no alias.
+ if (TD)
+ if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
+ return NoAlias;
+
+ // Check the cache before climbing up use-def chains. This also terminates
+ // otherwise infinitely recursive queries.
+ LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ if (V1 > V2)
+ std::swap(Locs.first, Locs.second);
+ std::pair<AliasCacheTy::iterator, bool> Pair =
+ AliasCache.insert(std::make_pair(Locs, MayAlias));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+ // GEP can't simplify, we don't even look at the PHI cases.
+ if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ std::swap(O1, O2);
+ }
+ if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+ AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
+ }
+
+ if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ }
+ if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+ AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
+ }
+
+ if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ }
+ if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+ AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
+ }
+
+ // If both pointers are pointing into the same object and one of them
+ // accesses is accessing the entire object, then the accesses must
+ // overlap in some way.
+ if (TD && O1 == O2)
+ if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
+ return AliasCache[Locs] = PartialAlias;
+
+ AliasResult Result =
+ AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ return AliasCache[Locs] = Result;
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequency.cpp b/contrib/llvm/lib/Analysis/BlockFrequency.cpp
new file mode 100644
index 000000000000..4b86d1db1f04
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BlockFrequency.cpp
@@ -0,0 +1,59 @@
+//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequency.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis",
+ true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis",
+ true, true)
+
+char BlockFrequency::ID = 0;
+
+
+BlockFrequency::BlockFrequency() : FunctionPass(ID) {
+ initializeBlockFrequencyPass(*PassRegistry::getPassRegistry());
+ BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequency::~BlockFrequency() {
+ delete BFI;
+}
+
+void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<BranchProbabilityInfo>();
+ AU.setPreservesAll();
+}
+
+bool BlockFrequency::runOnFunction(Function &F) {
+ BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ BFI->doFunction(&F, &BPI);
+ return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) {
+ return BFI->getBlockFreq(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..e39cd221b5a7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -0,0 +1,363 @@
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+ "Branch Probability Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+ "Branch Probability Analysis", false, true)
+
+char BranchProbabilityInfo::ID = 0;
+
+namespace {
+// Please note that BranchProbabilityAnalysis is not a FunctionPass.
+// It is created by BranchProbabilityInfo (which is a FunctionPass), which
+// provides a clear interface. Thanks to that, all heuristics and other
+// private methods are hidden in the .cpp file.
+class BranchProbabilityAnalysis {
+
+ typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+
+ DenseMap<Edge, uint32_t> *Weights;
+
+ BranchProbabilityInfo *BP;
+
+ LoopInfo *LI;
+
+
+ // Weights are for internal use only. They are used by heuristics to help to
+ // estimate edges' probability. Example:
+ //
+ // Using "Loop Branch Heuristics" we predict weights of edges for the
+ // block BB2.
+ // ...
+ // |
+ // V
+ // BB1<-+
+ // | |
+ // | | (Weight = 128)
+ // V |
+ // BB2--+
+ // |
+ // | (Weight = 4)
+ // V
+ // BB3
+ //
+ // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696..
+ // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303..
+
+ static const uint32_t LBH_TAKEN_WEIGHT = 128;
+ static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+ // Standard weight value. Used when none of the heuristics set weight for
+ // the edge.
+ static const uint32_t NORMAL_WEIGHT = 16;
+
+ // Minimum weight of an edge. Please note, that weight is NEVER 0.
+ static const uint32_t MIN_WEIGHT = 1;
+
+ // Return TRUE if BB leads directly to a Return Instruction.
+ static bool isReturningBlock(BasicBlock *BB) {
+ SmallPtrSet<BasicBlock *, 8> Visited;
+
+ while (true) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ return true;
+
+ if (TI->getNumSuccessors() > 1)
+ break;
+
+ // It is unreachable block which we can consider as a return instruction.
+ if (TI->getNumSuccessors() == 0)
+ return true;
+
+ Visited.insert(BB);
+ BB = TI->getSuccessor(0);
+
+ // Stop if cycle is detected.
+ if (Visited.count(BB))
+ return false;
+ }
+
+ return false;
+ }
+
+ // Multiply Edge Weight by two.
+ void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+ uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+ uint32_t MaxWeight = getMaxWeightFor(Src);
+
+ if (Weight * 2 > MaxWeight)
+ BP->setEdgeWeight(Src, Dst, MaxWeight);
+ else
+ BP->setEdgeWeight(Src, Dst, Weight * 2);
+ }
+
+ // Divide Edge Weight by two.
+ void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+ uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+
+ assert(Weight > 0);
+ if (Weight / 2 < MIN_WEIGHT)
+ BP->setEdgeWeight(Src, Dst, MIN_WEIGHT);
+ else
+ BP->setEdgeWeight(Src, Dst, Weight / 2);
+ }
+
+
+ uint32_t getMaxWeightFor(BasicBlock *BB) const {
+ return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+ }
+
+public:
+ BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
+ BranchProbabilityInfo *BP, LoopInfo *LI)
+ : Weights(W), BP(BP), LI(LI) {
+ }
+
+ // Return Heuristics
+ void calcReturnHeuristics(BasicBlock *BB);
+
+ // Pointer Heuristics
+ void calcPointerHeuristics(BasicBlock *BB);
+
+ // Loop Branch Heuristics
+ void calcLoopBranchHeuristics(BasicBlock *BB);
+
+ bool runOnFunction(Function &F);
+};
+} // end anonymous namespace
+
+// Calculate Edge Weights using "Return Heuristics". Predict a successor which
+// leads directly to Return Instruction will not be taken.
+void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
+ if (BB->getTerminator()->getNumSuccessors() == 1)
+ return;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ if (isReturningBlock(Succ)) {
+ decEdgeWeight(BB, Succ);
+ }
+ }
+}
+
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// between two pointer or pointer and NULL will fail.
+void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+ BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return;
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI || !CI->isEquality())
+ return;
+
+ Value *LHS = CI->getOperand(0);
+
+ if (!LHS->getType()->isPointerTy())
+ return;
+
+ assert(CI->getOperand(1)->getType()->isPointerTy());
+
+ BasicBlock *Taken = BI->getSuccessor(0);
+ BasicBlock *NonTaken = BI->getSuccessor(1);
+
+ // p != 0 -> isProb = true
+ // p == 0 -> isProb = false
+ // p != q -> isProb = true
+ // p == q -> isProb = false;
+ bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
+ if (!isProb)
+ std::swap(Taken, NonTaken);
+
+ incEdgeWeight(BB, Taken);
+ decEdgeWeight(BB, NonTaken);
+}
+
+// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
+// as taken, exiting edges as not-taken.
+void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
+ uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
+
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return;
+
+ SmallVector<BasicBlock *, 8> BackEdges;
+ SmallVector<BasicBlock *, 8> ExitingEdges;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ Loop *SuccL = LI->getLoopFor(Succ);
+ if (SuccL != L)
+ ExitingEdges.push_back(Succ);
+ else if (Succ == L->getHeader())
+ BackEdges.push_back(Succ);
+ }
+
+ if (uint32_t numBackEdges = BackEdges.size()) {
+ uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
+ if (backWeight < NORMAL_WEIGHT)
+ backWeight = NORMAL_WEIGHT;
+
+ for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
+ EE = BackEdges.end(); EI != EE; ++EI) {
+ BasicBlock *Back = *EI;
+ BP->setEdgeWeight(BB, Back, backWeight);
+ }
+ }
+
+ uint32_t numExitingEdges = ExitingEdges.size();
+ if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
+ uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
+ if (exitWeight < MIN_WEIGHT)
+ exitWeight = MIN_WEIGHT;
+
+ for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
+ EE = ExitingEdges.end(); EI != EE; ++EI) {
+ BasicBlock *Exiting = *EI;
+ BP->setEdgeWeight(BB, Exiting, exitWeight);
+ }
+ }
+}
+
+bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ // Only LBH uses setEdgeWeight method.
+ calcLoopBranchHeuristics(BB);
+
+ // PH and RH use only incEdgeWeight and decEwdgeWeight methods to
+ // not efface LBH results.
+ calcPointerHeuristics(BB);
+ calcReturnHeuristics(BB);
+ }
+
+ return false;
+}
+
+void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfo::runOnFunction(Function &F) {
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ BranchProbabilityAnalysis BPA(&Weights, this, &LI);
+ return BPA.runOnFunction(F);
+}
+
+uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
+ uint32_t Sum = 0;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(BB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+ }
+
+ return Sum;
+}
+
+bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ uint32_t Weight = getEdgeWeight(Src, Dst);
+ uint32_t Sum = getSumForBlock(Src);
+
+ // FIXME: Implement BranchProbability::compare then change this code to
+ // compare this BranchProbability against a static "hot" BranchProbability.
+ return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
+ uint32_t Sum = 0;
+ uint32_t MaxWeight = 0;
+ BasicBlock *MaxSucc = 0;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(BB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = Succ;
+ }
+ }
+
+ // FIXME: Use BranchProbability::compare.
+ if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4)
+ return MaxSucc;
+
+ return 0;
+}
+
+// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value.
+uint32_t
+BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
+ Edge E(Src, Dst);
+ DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E);
+
+ if (I != Weights.end())
+ return I->second;
+
+ return DEFAULT_WEIGHT;
+}
+
+void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
+ uint32_t Weight) {
+ Weights[std::make_pair(Src, Dst)] = Weight;
+ DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
+ << Dst->getNameStr() << " weight to " << Weight
+ << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n"));
+}
+
+
+BranchProbability BranchProbabilityInfo::
+getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
+
+ uint32_t N = getEdgeWeight(Src, Dst);
+ uint32_t D = getSumForBlock(Src);
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
+ BasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 000000000000..7bb063fbbbcf
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,165 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFGPrinter.h"
+
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ struct CFGViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFG();
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGViewer::ID = 0;
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+
+namespace {
+ struct CFGOnlyViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGOnlyViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFGOnly();
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyViewer::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+ "View CFG of function (with no function bodies)", false, true)
+
+namespace {
+ struct CFGPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGPrinter() : FunctionPass(ID) {
+ initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getNameStr() + ".dot";
+ errs() << "Writing '" << Filename << "'...";
+
+ std::string ErrorInfo;
+ raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty())
+ WriteGraph(File, (const Function*)&F);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGPrinter::ID = 0;
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
+ false, true)
+
+namespace {
+ struct CFGOnlyPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGOnlyPrinter() : FunctionPass(ID) {
+ initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getNameStr() + ".dot";
+ errs() << "Writing '" << Filename << "'...";
+
+ std::string ErrorInfo;
+ raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty())
+ WriteGraph(File, (const Function*)&F, true);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+ return false;
+ }
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyPrinter::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+ "Print CFG of function to 'dot' file (with no function bodies)",
+ false, true)
+
+/// viewCFG - This function is meant for use from the debugger. You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function. This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+ ViewGraph(this, "cfg" + getNameStr());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger. It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label. If you are only interested in the CFG t
+/// his can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+ ViewGraph(this, "cfg" + getNameStr(), true);
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+ return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+ return new CFGOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 000000000000..b2c27d1dfc4b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,148 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call. Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global. Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+/// As its comment mentions, PointerMayBeCaptured can be expensive.
+/// However, it's not easy for BasicAA to cache the result, because
+/// it's an ImmutablePass. To work around this, bound queries at a
+/// fixed number of uses.
+///
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist). This routine can
+/// be expensive, so consider caching the results. The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not. The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V,
+ bool ReturnCaptures, bool StoreCaptures) {
+ assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
+ SmallVector<Use*, Threshold> Worklist;
+ SmallSet<Use*, Threshold> Visited;
+ int Count = 0;
+
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return true;
+
+ Use *U = &UI.getUse();
+ Visited.insert(U);
+ Worklist.push_back(U);
+ }
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ V = U->get();
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ // Not captured if the callee is readonly, doesn't return a copy through
+ // its return value and doesn't unwind (a readonly function can leak bits
+ // by throwing an exception or not depending on the input value).
+ if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+ break;
+
+ // Not captured if only passed via 'nocapture' arguments. Note that
+ // calling a function pointer does not in itself cause the pointer to
+ // be captured. This is a subtle point considering that (for example)
+ // the callee might return its own address. It is analogous to saying
+ // that loading a value from a pointer does not cause the pointer to be
+ // captured, even though the loaded value might be the pointer itself
+ // (think of self-referential objects).
+ CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (CallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+ // The parameter is not marked 'nocapture' - captured.
+ return true;
+ // Only passed via 'nocapture' arguments, or is the called function - not
+ // captured.
+ break;
+ }
+ case Instruction::Load:
+ // Loading from a pointer does not cause it to be captured.
+ break;
+ case Instruction::VAArg:
+ // "va-arg" from a pointer does not cause it to be captured.
+ break;
+ case Instruction::Ret:
+ if (ReturnCaptures)
+ return true;
+ break;
+ case Instruction::Store:
+ if (V == I->getOperand(0))
+ // Stored the pointer - conservatively assume it may be captured.
+ // TODO: If StoreCaptures is not true, we could do Fancy analysis
+ // to determine whether this store is not actually an escape point.
+ // In that case, BasicAliasAnalysis should be updated as well to
+ // take advantage of this.
+ return true;
+ // Storing to the pointee does not cause the pointer to be captured.
+ break;
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ // The original value is not captured via this if the new value isn't.
+ for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Use *U = &UI.getUse();
+ if (Visited.insert(U))
+ Worklist.push_back(U);
+ }
+ break;
+ case Instruction::ICmp:
+ // Don't count comparisons of a no-alias return value against null as
+ // captures. This allows us to ignore comparisons of malloc results
+ // with null, for example.
+ if (isNoAliasCall(V->stripPointerCasts()))
+ if (ConstantPointerNull *CPN =
+ dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+ if (CPN->getType()->getAddressSpace() == 0)
+ break;
+ // Otherwise, be conservative. There are crazy ways to capture pointers
+ // using comparisons.
+ return true;
+ default:
+ // Something else - be conservative and say it is captured.
+ return true;
+ }
+ }
+
+ // All uses examined - not captured.
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 000000000000..7fca17eb69f6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,1412 @@
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FEnv.h"
+#include <cerrno>
+#include <cmath>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
+/// TargetData. This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+ const TargetData &TD) {
+
+ // This only handles casts to vectors currently.
+ const VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+ if (DestVTy == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+ // vector so the code below can handle it uniformly.
+ if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+ Constant *Ops = C; // don't take the address of C!
+ return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+ }
+
+ // If this is a bitcast from constant vector -> vector, fold it.
+ ConstantVector *CV = dyn_cast<ConstantVector>(C);
+ if (CV == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // If the element types match, VMCore can fold it.
+ unsigned NumDstElt = DestVTy->getNumElements();
+ unsigned NumSrcElt = CV->getNumOperands();
+ if (NumDstElt == NumSrcElt)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ const Type *SrcEltTy = CV->getType()->getElementType();
+ const Type *DstEltTy = DestVTy->getElementType();
+
+ // Otherwise, we're changing the number of elements in a vector, which
+ // requires endianness information to do the right thing. For example,
+ // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ // folds to (little endian):
+ // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+ // and to (big endian):
+ // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+ // First thing is first. We only want to think about integer here, so if
+ // we have something in FP form, recast it as integer.
+ if (DstEltTy->isFloatingPointTy()) {
+ // Fold to an vector of integers with same size as our FP type.
+ unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+ const Type *DestIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+ // Recursively handle this integer conversion, if possible.
+ C = FoldBitCast(C, DestIVTy, TD);
+ if (!C) return ConstantExpr::getBitCast(C, DestTy);
+
+ // Finally, VMCore can handle this now that #elts line up.
+ return ConstantExpr::getBitCast(C, DestTy);
+ }
+
+ // Okay, we know the destination is integer, if the input is FP, convert
+ // it to integer first.
+ if (SrcEltTy->isFloatingPointTy()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ const Type *SrcIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+ // Ask VMCore to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ CV = dyn_cast<ConstantVector>(C);
+ if (!CV) // If VMCore wasn't able to fold it, bail out.
+ return C;
+ }
+
+ // Now we know that the input and output vectors are both integer vectors
+ // of the same size, and that their #elements is not the same. Do the
+ // conversion here, which depends on whether the input or output has
+ // more elements.
+ bool isLittleEndian = TD.isLittleEndian();
+
+ SmallVector<Constant*, 32> Result;
+ if (NumDstElt < NumSrcElt) {
+ // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+ Constant *Zero = Constant::getNullValue(DstEltTy);
+ unsigned Ratio = NumSrcElt/NumDstElt;
+ unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+ unsigned SrcElt = 0;
+ for (unsigned i = 0; i != NumDstElt; ++i) {
+ // Build each element of the result.
+ Constant *Elt = Zero;
+ unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // Zero extend the element to the right size.
+ Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+ // Shift it to the right place, depending on endianness.
+ Src = ConstantExpr::getShl(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+ // Mix it in.
+ Elt = ConstantExpr::getOr(Elt, Src);
+ }
+ Result.push_back(Elt);
+ }
+ } else {
+ // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ unsigned Ratio = NumDstElt/NumSrcElt;
+ unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+
+ // Loop over each source value, expanding into multiple results.
+ for (unsigned i = 0; i != NumSrcElt; ++i) {
+ Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ // Shift the piece of the value into the right place, depending on
+ // endianness.
+ Constant *Elt = ConstantExpr::getLShr(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+ // Truncate and remember this piece.
+ Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+ }
+ }
+ }
+
+ return ConstantVector::get(Result);
+}
+
+
+/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
+/// from a global, return the global and the constant. Because of
+/// constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+ int64_t &Offset, const TargetData &TD) {
+ // Trivial case, constant is the global.
+ if ((GV = dyn_cast<GlobalValue>(C))) {
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, if this isn't a constant expr, bail out.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return false;
+
+ // Look through ptr->int and ptr->ptr casts.
+ if (CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::BitCast)
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+
+ // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // Cannot compute this if the element type of the pointer is missing size
+ // info.
+ if (!cast<PointerType>(CE->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return false;
+
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ gep_type_iterator GTI = gep_type_begin(CE);
+ for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
+ i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(*i);
+ if (!CI) return false; // Index isn't a simple constant?
+ if (CI->isZero()) continue; // Not adding anything.
+
+ if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
+ // N = N + Offset
+ Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
+ } else {
+ const SequentialType *SQT = cast<SequentialType>(*GTI);
+ Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
+ }
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
+/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the
+/// pointer to copy results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer. TD is the target data.
+static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
+ unsigned char *CurPtr, unsigned BytesLeft,
+ const TargetData &TD) {
+ assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+ "Out of range access");
+
+ // If this element is zero or undefined, we can just return since *CurPtr is
+ // zero initialized.
+ if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+ return true;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getBitWidth() > 64 ||
+ (CI->getBitWidth() & 7) != 0)
+ return false;
+
+ uint64_t Val = CI->getZExtValue();
+ unsigned IntBytes = unsigned(CI->getBitWidth()/8);
+
+ for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
+ CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
+ ++ByteOffset;
+ }
+ return true;
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isDoubleTy()) {
+ C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ if (CFP->getType()->isFloatTy()){
+ C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ return false;
+ }
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ const StructLayout *SL = TD.getStructLayout(CS->getType());
+ unsigned Index = SL->getElementContainingOffset(ByteOffset);
+ uint64_t CurEltOffset = SL->getElementOffset(Index);
+ ByteOffset -= CurEltOffset;
+
+ while (1) {
+ // If the element access is to the element itself and not to tail padding,
+ // read the bytes from the element.
+ uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+
+ if (ByteOffset < EltSize &&
+ !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
+ BytesLeft, TD))
+ return false;
+
+ ++Index;
+
+ // Check to see if we read from the last struct element, if so we're done.
+ if (Index == CS->getType()->getNumElements())
+ return true;
+
+ // If we read all of the bytes we needed from this element we're done.
+ uint64_t NextEltOffset = SL->getElementOffset(Index);
+
+ if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+ return true;
+
+ // Move to the next element of the struct.
+ CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
+ BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+ ByteOffset = 0;
+ CurEltOffset = NextEltOffset;
+ }
+ // not reached.
+ }
+
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType());
+ uint64_t Index = ByteOffset / EltSize;
+ uint64_t Offset = ByteOffset - Index * EltSize;
+ for (; Index != CA->getType()->getNumElements(); ++Index) {
+ if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr,
+ BytesLeft, TD))
+ return false;
+ if (EltSize >= BytesLeft)
+ return true;
+
+ Offset = 0;
+ BytesLeft -= EltSize;
+ CurPtr += EltSize;
+ }
+ return true;
+ }
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType());
+ uint64_t Index = ByteOffset / EltSize;
+ uint64_t Offset = ByteOffset - Index * EltSize;
+ for (; Index != CV->getType()->getNumElements(); ++Index) {
+ if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr,
+ BytesLeft, TD))
+ return false;
+ if (EltSize >= BytesLeft)
+ return true;
+
+ Offset = 0;
+ BytesLeft -= EltSize;
+ CurPtr += EltSize;
+ }
+ return true;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::IntToPtr &&
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+ BytesLeft, TD);
+ }
+
+ // Otherwise, unknown initializer type.
+ return false;
+}
+
+static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
+ const TargetData &TD) {
+ const Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+
+ // If this isn't an integer load we can't fold it directly.
+ if (!IntType) {
+ // If this is a float/double load, we can try folding it as an int32/64 load
+ // and then bitcast the result. This can be useful for union cases. Note
+ // that address spaces don't matter here since we're not going to result in
+ // an actual new load.
+ const Type *MapTy;
+ if (LoadTy->isFloatTy())
+ MapTy = Type::getInt32PtrTy(C->getContext());
+ else if (LoadTy->isDoubleTy())
+ MapTy = Type::getInt64PtrTy(C->getContext());
+ else if (LoadTy->isVectorTy()) {
+ MapTy = IntegerType::get(C->getContext(),
+ TD.getTypeAllocSizeInBits(LoadTy));
+ MapTy = PointerType::getUnqual(MapTy);
+ } else
+ return 0;
+
+ C = FoldBitCast(C, MapTy, TD);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
+ return FoldBitCast(Res, LoadTy, TD);
+ return 0;
+ }
+
+ unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
+ if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+
+ GlobalValue *GVal;
+ int64_t Offset;
+ if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+ return 0;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ !GV->getInitializer()->getType()->isSized())
+ return 0;
+
+ // If we're loading off the beginning of the global, some bytes may be valid,
+ // but we don't try to handle this.
+ if (Offset < 0) return 0;
+
+ // If we're not accessing anything in this constant, the result is undefined.
+ if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
+ return UndefValue::get(IntType);
+
+ unsigned char RawBytes[32] = {0};
+ if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
+ BytesLoaded, TD))
+ return 0;
+
+ APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
+ for (unsigned i = 1; i != BytesLoaded; ++i) {
+ ResultVal <<= 8;
+ ResultVal |= RawBytes[BytesLoaded-1-i];
+ }
+
+ return ConstantInt::get(IntType->getContext(), ResultVal);
+}
+
+/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
+/// produce if it is constant and determinable. If this is not determinable,
+/// return null.
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
+ const TargetData *TD) {
+ // First, try the easy cases:
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+
+ // If the loaded value isn't a constant expr, we can't handle it.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return 0;
+
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *V =
+ ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+ return V;
+ }
+
+ // Instead of loading constant c string, use corresponding integer value
+ // directly if string length is small enough.
+ std::string Str;
+ if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
+ unsigned StrLen = Str.length();
+ const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+ unsigned NumBits = Ty->getPrimitiveSizeInBits();
+ // Replace load with immediate integer if the result is an integer or fp
+ // value.
+ if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+ (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
+ APInt StrVal(NumBits, 0);
+ APInt SingleChar(NumBits, 0);
+ if (TD->isLittleEndian()) {
+ for (signed i = StrLen-1; i >= 0; i--) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ } else {
+ for (unsigned i = 0; i < StrLen; i++) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ // Append NULL at the end.
+ SingleChar = 0;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+
+ Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+ if (Ty->isFloatingPointTy())
+ Res = ConstantExpr::getBitCast(Res, Ty);
+ return Res;
+ }
+ }
+
+ // If this load comes from anywhere in a constant global, and if the global
+ // is all undef or zero, we know what it loads.
+ if (GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+ const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+ if (GV->getInitializer()->isNullValue())
+ return Constant::getNullValue(ResTy);
+ if (isa<UndefValue>(GV->getInitializer()))
+ return UndefValue::get(ResTy);
+ }
+ }
+
+ // Try hard to fold loads from bitcasted strange and non-type-safe things. We
+ // currently don't do any of this for big endian systems. It can be
+ // generalized in the future if someone is interested.
+ if (TD && TD->isLittleEndian())
+ return FoldReinterpretLoadFromConstPtr(CE, *TD);
+ return 0;
+}
+
+static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){
+ if (LI->isVolatile()) return 0;
+
+ if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
+ return ConstantFoldLoadFromConstPtr(C, TD);
+
+ return 0;
+}
+
+/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together. If target data info is available, it is provided as TD,
+/// otherwise TD is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+ Constant *Op1, const TargetData *TD){
+ // SROA
+
+ // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+ // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+ // bits.
+
+
+ // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+ // constant. This happens frequently when iterating over a global array.
+ if (Opc == Instruction::Sub && TD) {
+ GlobalValue *GV1, *GV2;
+ int64_t Offs1, Offs2;
+
+ if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
+ if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
+ GV1 == GV2) {
+ // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+ return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+ }
+ }
+
+ return 0;
+}
+
+/// CastGEPIndices - If array indices are not pointer-sized integers,
+/// explicitly cast them so that they aren't implicitly casted by the
+/// getelementptr.
+static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
+ const Type *ResultTy,
+ const TargetData *TD) {
+ if (!TD) return 0;
+ const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+
+ bool Any = false;
+ SmallVector<Constant*, 32> NewIdxs;
+ for (unsigned i = 1; i != NumOps; ++i) {
+ if ((i == 1 ||
+ !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
+ reinterpret_cast<Value *const *>(Ops+1),
+ i-1))) &&
+ Ops[i]->getType() != IntPtrTy) {
+ Any = true;
+ NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+ true,
+ IntPtrTy,
+ true),
+ Ops[i], IntPtrTy));
+ } else
+ NewIdxs.push_back(Ops[i]);
+ }
+ if (!Any) return 0;
+
+ Constant *C =
+ ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ return C;
+}
+
+/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
+/// constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
+ const Type *ResultTy,
+ const TargetData *TD) {
+ Constant *Ptr = Ops[0];
+ if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+ return 0;
+
+ const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+
+ // If this is a constant expr gep that is effectively computing an
+ // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+ for (unsigned i = 1; i != NumOps; ++i)
+ if (!isa<ConstantInt>(Ops[i])) {
+
+ // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+ // "inttoptr (sub (ptrtoint Ptr), V)"
+ if (NumOps == 2 &&
+ cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+ assert((CE == 0 || CE->getType() == IntPtrTy) &&
+ "CastGEPIndices didn't canonicalize index types!");
+ if (CE && CE->getOpcode() == Instruction::Sub &&
+ CE->getOperand(0)->isNullValue()) {
+ Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+ Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+ Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+ if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+ Res = ConstantFoldConstantExpression(ResCE, TD);
+ return Res;
+ }
+ }
+ return 0;
+ }
+
+ unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+ APInt Offset = APInt(BitWidth,
+ TD->getIndexedOffset(Ptr->getType(),
+ (Value**)Ops+1, NumOps-1));
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
+
+ // If this is a GEP of a GEP, fold it all into a single GEP.
+ while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+
+ // Do not try the incorporate the sub-GEP if some index is not a number.
+ bool AllConstantInt = true;
+ for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+ if (!isa<ConstantInt>(NestedOps[i])) {
+ AllConstantInt = false;
+ break;
+ }
+ if (!AllConstantInt)
+ break;
+
+ Ptr = cast<Constant>(GEP->getOperand(0));
+ Offset += APInt(BitWidth,
+ TD->getIndexedOffset(Ptr->getType(),
+ (Value**)NestedOps.data(),
+ NestedOps.size()));
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ }
+
+ // If the base value for this address is a literal integer value, fold the
+ // getelementptr to the resulting integer value casted to the pointer type.
+ APInt BasePtr(BitWidth, 0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+ BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+ if (Ptr->isNullValue() || BasePtr != 0) {
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+ return ConstantExpr::getIntToPtr(C, ResultTy);
+ }
+
+ // Otherwise form a regular getelementptr. Recompute the indices so that
+ // we eliminate over-indexing of the notional static type array bounds.
+ // This makes it easy to determine if the getelementptr is "inbounds".
+ // Also, this helps GlobalOpt do SROA on GlobalVariables.
+ const Type *Ty = Ptr->getType();
+ SmallVector<Constant*, 32> NewIdxs;
+ do {
+ if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+ if (ATy->isPointerTy()) {
+ // The only pointer indexing we'll do is on the first index of the GEP.
+ if (!NewIdxs.empty())
+ break;
+
+ // Only handle pointers to sized types, not pointers to functions.
+ if (!ATy->getElementType()->isSized())
+ return 0;
+ }
+
+ // Determine which element of the array the offset points into.
+ APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+ const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ if (ElemSize == 0)
+ // The element size is 0. This may be [0 x Ty]*, so just use a zero
+ // index for this level and proceed to the next level to see if it can
+ // accommodate the offset.
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+ else {
+ // The element size is non-zero divide the offset by the element
+ // size (rounding down), to compute the index at this level.
+ APInt NewIdx = Offset.udiv(ElemSize);
+ Offset -= NewIdx * ElemSize;
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+ }
+ Ty = ATy->getElementType();
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ // Determine which field of the struct the offset points into. The
+ // getZExtValue is at least as safe as the StructLayout API because we
+ // know the offset is within the struct at this point.
+ const StructLayout &SL = *TD->getStructLayout(STy);
+ unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+ NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ ElIdx));
+ Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+ Ty = STy->getTypeAtIndex(ElIdx);
+ } else {
+ // We've reached some non-indexable type.
+ break;
+ }
+ } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+ // If we haven't used up the entire offset by descending the static
+ // type, then the offset is pointing into the middle of an indivisible
+ // member, so we can't simplify it.
+ if (Offset != 0)
+ return 0;
+
+ // Create a GEP.
+ Constant *C =
+ ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+ assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+ "Computed GetElementPtr has unexpected type!");
+
+ // If we ended up indexing a member with a type that doesn't match
+ // the type of what the original indices indexed, add a cast.
+ if (Ty != cast<PointerType>(ResultTy)->getElementType())
+ C = FoldBitCast(C, ResultTy, *TD);
+
+ return C;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant. Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+ // Handle PHI nodes quickly here...
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Constant *CommonValue = 0;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is undef then skip it. Note that while we could
+ // skip the value if it is equal to the phi node itself we choose not to
+ // because that would break the rule that constant folding only applies if
+ // all operands are constants.
+ if (isa<UndefValue>(Incoming))
+ continue;
+ // If the incoming value is not a constant, or is a different constant to
+ // the one we saw previously, then give up.
+ Constant *C = dyn_cast<Constant>(Incoming);
+ if (!C || (CommonValue && C != CommonValue))
+ return 0;
+ CommonValue = C;
+ }
+
+ // If we reach here, all incoming values are the same constant or undef.
+ return CommonValue ? CommonValue : UndefValue::get(PN->getType());
+ }
+
+ // Scan the operand list, checking to see if they are all constants, if so,
+ // hand off to ConstantFoldInstOperands.
+ SmallVector<Constant*, 8> Ops;
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (Constant *Op = dyn_cast<Constant>(*i))
+ Ops.push_back(Op);
+ else
+ return 0; // All operands not constant!
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return ConstantFoldLoadInst(LI, TD);
+
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ return ConstantExpr::getInsertValue(
+ cast<Constant>(IVI->getAggregateOperand()),
+ cast<Constant>(IVI->getInsertedValueOperand()),
+ IVI->getIndices());
+
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ return ConstantExpr::getExtractValue(
+ cast<Constant>(EVI->getAggregateOperand()),
+ EVI->getIndices());
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified TargetData. If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+ const TargetData *TD) {
+ SmallVector<Constant*, 8> Ops;
+ for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+ i != e; ++i) {
+ Constant *NewC = cast<Constant>(*i);
+ // Recursively fold the ConstantExpr's operands.
+ if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
+ NewC = ConstantFoldConstantExpression(NewCE, TD);
+ Ops.push_back(NewC);
+ }
+
+ if (CE->isCompare())
+ return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+ TD);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+ Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified opcode and operands. If successful, the constant result is
+/// returned, if not, null is returned. Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
+ Constant* const* Ops, unsigned NumOps,
+ const TargetData *TD) {
+ // Handle easy binops first.
+ if (Instruction::isBinaryOp(Opcode)) {
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+ return C;
+
+ return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+ }
+
+ switch (Opcode) {
+ default: return 0;
+ case Instruction::ICmp:
+ case Instruction::FCmp: assert(0 && "Invalid for compares");
+ case Instruction::Call:
+ if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
+ if (canConstantFoldCallTo(F))
+ return ConstantFoldCall(F, Ops, NumOps - 1);
+ return 0;
+ case Instruction::PtrToInt:
+ // If the input is a inttoptr, eliminate the pair. This requires knowing
+ // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+ Constant *Input = CE->getOperand(0);
+ unsigned InWidth = Input->getType()->getScalarSizeInBits();
+ if (TD->getPointerSizeInBits() < InWidth) {
+ Constant *Mask =
+ ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
+ TD->getPointerSizeInBits()));
+ Input = ConstantExpr::getAnd(Input, Mask);
+ }
+ // Do a zext or trunc to get to the dest size.
+ return ConstantExpr::getIntegerCast(Input, DestTy, false);
+ }
+ }
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::IntToPtr:
+ // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+ // the int size is >= the ptr size. This requires knowing the width of a
+ // pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+ if (TD &&
+ TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+ CE->getOpcode() == Instruction::PtrToInt)
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::BitCast:
+ if (TD)
+ return FoldBitCast(Ops[0], DestTy, *TD);
+ return ConstantExpr::getBitCast(Ops[0], DestTy);
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD))
+ return C;
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+ return C;
+
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
+ }
+}
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands. If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+ Constant *Ops0, Constant *Ops1,
+ const TargetData *TD) {
+ // fold: icmp (inttoptr x), null -> icmp x, 0
+ // fold: icmp (ptrtoint x), 0 -> icmp x, null
+ // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+ // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+ //
+ // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+ // around to know if bit truncation is happening.
+ if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+ if (TD && Ops1->isNullValue()) {
+ const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if (CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ }
+ }
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
+ if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+ const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+ IntPtrTy, false);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if ((CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+ return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+ CE1->getOperand(0), TD);
+ }
+ }
+
+ // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+ // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+ if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+ CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+ Constant *LHS =
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+ Constant *RHS =
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+ unsigned OpC =
+ Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ Constant *Ops[] = { LHS, RHS };
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD);
+ }
+ }
+
+ return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
+}
+
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
+ ConstantExpr *CE) {
+ if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+ return 0; // Do not allow stepping over the value!
+
+ // Loop over all of the operands, tracking down which value we are
+ // addressing...
+ gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+ for (++I; I != E; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ ConstantInt *CU = cast<ConstantInt>(I.getOperand());
+ assert(CU->getZExtValue() < STy->getNumElements() &&
+ "Struct index out of range!");
+ unsigned El = (unsigned)CU->getZExtValue();
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ C = CS->getOperand(El);
+ } else if (isa<ConstantAggregateZero>(C)) {
+ C = Constant::getNullValue(STy->getElementType(El));
+ } else if (isa<UndefValue>(C)) {
+ C = UndefValue::get(STy->getElementType(El));
+ } else {
+ return 0;
+ }
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return 0;
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ C = CA->getOperand(CI->getZExtValue());
+ else if (isa<ConstantAggregateZero>(C))
+ C = Constant::getNullValue(ATy->getElementType());
+ else if (isa<UndefValue>(C))
+ C = UndefValue::get(ATy->getElementType());
+ else
+ return 0;
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) {
+ if (CI->getZExtValue() >= VTy->getNumElements())
+ return 0;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
+ C = CP->getOperand(CI->getZExtValue());
+ else if (isa<ConstantAggregateZero>(C))
+ C = Constant::getNullValue(VTy->getElementType());
+ else if (isa<UndefValue>(C))
+ C = UndefValue::get(VTy->getElementType());
+ else
+ return 0;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding for Calls
+//
+
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool
+llvm::canConstantFoldCallTo(const Function *F) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::powi:
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::convert_from_fp16:
+ case Intrinsic::convert_to_fp16:
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ return true;
+ default:
+ return false;
+ case 0: break;
+ }
+
+ if (!F->hasName()) return false;
+ StringRef Name = F->getName();
+
+ // In these cases, the check of the length is required. We don't want to
+ // return true for a name like "cos\0blah" which strcmp would return equal to
+ // "cos", but has length 8.
+ switch (Name[0]) {
+ default: return false;
+ case 'a':
+ return Name == "acos" || Name == "asin" ||
+ Name == "atan" || Name == "atan2";
+ case 'c':
+ return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+ case 'e':
+ return Name == "exp" || Name == "exp2";
+ case 'f':
+ return Name == "fabs" || Name == "fmod" || Name == "floor";
+ case 'l':
+ return Name == "log" || Name == "log10";
+ case 'p':
+ return Name == "pow";
+ case 's':
+ return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+ Name == "sinf" || Name == "sqrtf";
+ case 't':
+ return Name == "tan" || Name == "tanh";
+ }
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+ const Type *Ty) {
+ sys::llvm_fenv_clearexcept();
+ V = NativeFP(V);
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
+ return 0;
+ }
+
+ if (Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+ if (Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
+ llvm_unreachable("Can only constant fold float/double");
+ return 0; // dummy return to suppress warning
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+ double V, double W, const Type *Ty) {
+ sys::llvm_fenv_clearexcept();
+ V = NativeFP(V, W);
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
+ return 0;
+ }
+
+ if (Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+ if (Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
+ llvm_unreachable("Can only constant fold float/double");
+ return 0; // dummy return to suppress warning
+}
+
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+ const Type *Ty) {
+ assert(Op && "Called with NULL operand");
+ APFloat Val(Op->getValueAPF());
+
+ // All of these conversion intrinsics form an integer of at most 64bits.
+ unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ assert(ResultWidth <= 64 &&
+ "Can only constant fold conversions to 64 and 32 bit ints");
+
+ uint64_t UIntVal;
+ bool isExact = false;
+ APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+ : APFloat::rmNearestTiesToEven;
+ APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+ /*isSigned=*/true, mode,
+ &isExact);
+ if (status != APFloat::opOK && status != APFloat::opInexact)
+ return 0;
+ return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F,
+ Constant *const *Operands, unsigned NumOperands) {
+ if (!F->hasName()) return 0;
+ StringRef Name = F->getName();
+
+ const Type *Ty = F->getReturnType();
+ if (NumOperands == 1) {
+ if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+ if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
+ APFloat Val(Op->getValueAPF());
+
+ bool lost = false;
+ Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+ return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+ }
+
+ if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+ return 0;
+
+ /// We only fold functions with finite arguments. Folding NaN and inf is
+ /// likely to be aborted with an exception anyway, and some host libms
+ /// have known errors raising exceptions.
+ if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ return 0;
+
+ /// Currently APFloat versions of these functions do not exist, so we use
+ /// the host native double versions. Float versions are not called
+ /// directly but for all these it is true (float)(f((double)arg)) ==
+ /// f(arg). Long double not supported yet.
+ double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
+ Op->getValueAPF().convertToDouble();
+ switch (Name[0]) {
+ case 'a':
+ if (Name == "acos")
+ return ConstantFoldFP(acos, V, Ty);
+ else if (Name == "asin")
+ return ConstantFoldFP(asin, V, Ty);
+ else if (Name == "atan")
+ return ConstantFoldFP(atan, V, Ty);
+ break;
+ case 'c':
+ if (Name == "ceil")
+ return ConstantFoldFP(ceil, V, Ty);
+ else if (Name == "cos")
+ return ConstantFoldFP(cos, V, Ty);
+ else if (Name == "cosh")
+ return ConstantFoldFP(cosh, V, Ty);
+ else if (Name == "cosf")
+ return ConstantFoldFP(cos, V, Ty);
+ break;
+ case 'e':
+ if (Name == "exp")
+ return ConstantFoldFP(exp, V, Ty);
+
+ if (Name == "exp2") {
+ // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+ // C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+ }
+ break;
+ case 'f':
+ if (Name == "fabs")
+ return ConstantFoldFP(fabs, V, Ty);
+ else if (Name == "floor")
+ return ConstantFoldFP(floor, V, Ty);
+ break;
+ case 'l':
+ if (Name == "log" && V > 0)
+ return ConstantFoldFP(log, V, Ty);
+ else if (Name == "log10" && V > 0)
+ return ConstantFoldFP(log10, V, Ty);
+ else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+ (Ty->isFloatTy() || Ty->isDoubleTy())) {
+ if (V >= -0.0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else // Undefined
+ return Constant::getNullValue(Ty);
+ }
+ break;
+ case 's':
+ if (Name == "sin")
+ return ConstantFoldFP(sin, V, Ty);
+ else if (Name == "sinh")
+ return ConstantFoldFP(sinh, V, Ty);
+ else if (Name == "sqrt" && V >= 0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Name == "sqrtf" && V >= 0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Name == "sinf")
+ return ConstantFoldFP(sin, V, Ty);
+ break;
+ case 't':
+ if (Name == "tan")
+ return ConstantFoldFP(tan, V, Ty);
+ else if (Name == "tanh")
+ return ConstantFoldFP(tanh, V, Ty);
+ break;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
+ case Intrinsic::ctpop:
+ return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ case Intrinsic::cttz:
+ return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
+ case Intrinsic::ctlz:
+ return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+ case Intrinsic::convert_from_fp16: {
+ APFloat Val(Op->getValue());
+
+ bool lost = false;
+ APFloat::opStatus status =
+ Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+ // Conversion is always precise.
+ (void)status;
+ assert(status == APFloat::opOK && !lost &&
+ "Precision lost during fp16 constfolding");
+
+ return ConstantFP::get(F->getContext(), Val);
+ }
+ default:
+ return 0;
+ }
+ }
+
+ if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+ }
+ }
+
+ if (isa<UndefValue>(Operands[0])) {
+ if (F->getIntrinsicID() == Intrinsic::bswap)
+ return Operands[0];
+ return 0;
+ }
+
+ return 0;
+ }
+
+ if (NumOperands == 2) {
+ if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+ if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+ return 0;
+ double Op1V = Ty->isFloatTy() ?
+ (double)Op1->getValueAPF().convertToFloat() :
+ Op1->getValueAPF().convertToDouble();
+ if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+ if (Op2->getType() != Op1->getType())
+ return 0;
+
+ double Op2V = Ty->isFloatTy() ?
+ (double)Op2->getValueAPF().convertToFloat():
+ Op2->getValueAPF().convertToDouble();
+
+ if (Name == "pow")
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if (Name == "fmod")
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+ if (Name == "atan2")
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
+ }
+ return 0;
+ }
+
+
+ if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+ if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ APInt Res;
+ bool Overflow;
+ switch (F->getIntrinsicID()) {
+ default: assert(0 && "Invalid case");
+ case Intrinsic::sadd_with_overflow:
+ Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::ssub_with_overflow:
+ Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::usub_with_overflow:
+ Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::smul_with_overflow:
+ Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::umul_with_overflow:
+ Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
+ break;
+ }
+ Constant *Ops[] = {
+ ConstantInt::get(F->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
+ };
+ return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
+ }
+ }
+ }
+
+ return 0;
+ }
+ return 0;
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp
new file mode 100644
index 000000000000..ac5eeeb4706a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp
@@ -0,0 +1,839 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+ assert((Tag & LLVMDebugVersionMask) == 0 &&
+ "Tag too large for debug encoding!");
+ return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+ : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
+ StringRef Directory, StringRef Producer,
+ bool isOptimized, StringRef Flags,
+ unsigned RunTimeVer) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ MDString::get(VMContext, Producer),
+ // Deprecate isMain field.
+ ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ MDString::get(VMContext, Flags),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+ };
+ TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+
+ // Create a named metadata so that it is easier to find cu in a module.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->addOperand(TheCU);
+}
+
+/// createFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+ assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ TheCU
+ };
+ return DIFile(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+ };
+ return DIEnumerator(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic
+/// type, e.g 'char'.
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ unsigned Encoding) {
+ // Basic types are encoded in DIBasicType format. Line number, filename,
+ // offset and flags are always empty here.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+ TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+ ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createQaulifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+ // Qualified types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ TheCU,
+ MDString::get(VMContext, StringRef()), // Empty name.
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FromTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createPointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+ uint64_t AlignInBits, StringRef Name) {
+ // Pointer types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+ TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ PointeeTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::createReferenceType(DIType RTy) {
+ // References are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
+ TheCU,
+ NULL, // Name
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ RTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
+ unsigned LineNo, DIDescriptor Context) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid typedef type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ Ty
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid type!");
+ assert(FriendTy.Verify() && "Invalid friend type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+ Ty,
+ NULL, // Name
+ Ty.getFile(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FriendTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createInheritance - Create debugging information entry to establish
+/// inheritnace relationship between two types.
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
+ uint64_t BaseOffset, unsigned Flags) {
+ // TAG_inheritance is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+ Ty,
+ NULL, // Name
+ Ty.getFile(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ BaseTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createMemberType - Create debugging information entry for a member.
+DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty, StringRef PropertyName,
+ StringRef GetterName, StringRef SetterName,
+ unsigned PropertyAttributes) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File, // Or TheCU ? Ty ?
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty,
+ MDString::get(VMContext, PropertyName),
+ MDString::get(VMContext, GetterName),
+ MDString::get(VMContext, SetterName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createClassType - Create debugging information entry for a class.
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType DerivedFrom, DIArray Elements,
+ MDNode *VTableHoder, MDNode *TemplateParams) {
+ // TAG_class_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ DerivedFrom,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ VTableHoder,
+ TemplateParams
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+ Context,
+ MDString::get(VMContext, Name),
+ Ty,
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, uint64_t Val,
+ MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+ Context,
+ MDString::get(VMContext, Name),
+ Ty,
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateValueParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createStructType - Create debugging information entry for a struct.
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ unsigned Flags, DIArray Elements,
+ unsigned RunTimeLang) {
+ // TAG_structure_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createUnionType - Create debugging information entry for an union.
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
+ DIFile File,
+ unsigned LineNumber, uint64_t SizeInBits,
+ uint64_t AlignInBits, unsigned Flags,
+ DIArray Elements, unsigned RunTimeLang) {
+ // TAG_union_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createSubroutineType - Create subroutine type.
+DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+ // TAG_subroutine_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+ File,
+ MDString::get(VMContext, ""),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ParameterTypes,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerationType - Create debugging information entry for an
+/// enumeration.
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits, DIArray Elements) {
+ // TAG_enumeration_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ return DIType(Node);
+}
+
+/// createArrayType - Create debugging information entry for an array.
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_array_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ TheCU,
+ MDString::get(VMContext, ""),
+ TheCU,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_vector_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
+ TheCU,
+ MDString::get(VMContext, ""),
+ TheCU,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
+ if (Ty.isArtificial())
+ return Ty;
+
+ SmallVector<Value *, 9> Elts;
+ MDNode *N = Ty;
+ assert (N && "Unexpected input DIType!");
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i))
+ Elts.push_back(V);
+ else
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ }
+
+ unsigned CurFlags = Ty.getFlags();
+ CurFlags = CurFlags | DIType::FlagArtificial;
+
+ // Flags are stored at this slot.
+ Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// retainType - Retain DIType in a module even if it is not referenced
+/// through debug info anchors.
+void DIBuilder::retainType(DIType T) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+ NMD->addOperand(T);
+}
+
+/// createUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
+ };
+ return DIDescriptor(MDNode::get(VMContext, Elts));
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType() {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ return DIType(Node);
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType(DIFile F) {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, DW_TAG_base_type),
+ F.getCompileUnit(),
+ NULL,
+ F
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ return DIType(Node);
+}
+
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+ if (Elements.empty()) {
+ Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+ return DIArray(MDNode::get(VMContext, Null));
+ }
+ return DIArray(MDNode::get(VMContext, Elements));
+}
+
+/// getOrCreateSubrange - Create a descriptor for a value range. This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+ };
+
+ return DISubrange(MDNode::get(VMContext, Elts));
+}
+
+/// createGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ TheCU,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// createStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+createStaticVariable(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile File,
+ unsigned LineNo, DIType Ty,
+ bool AlwaysPreserve, unsigned Flags,
+ unsigned ArgNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ if (AlwaysPreserve) {
+ // The optimizer may remove local variable. If there is an interest
+ // to preserve variable info in such situation then stash it in a
+ // named mdnode.
+ DISubprogram Fn(getDISubprogram(Scope));
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+ FnLocals->addOperand(Node);
+ }
+ return DIVariable(Node);
+}
+
+/// createComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile F,
+ unsigned LineNo,
+ DIType Ty, ArrayRef<Value *> Addr,
+ unsigned ArgNo) {
+ SmallVector<Value *, 15> Elts;
+ Elts.push_back(GetTagConstant(VMContext, Tag));
+ Elts.push_back(Scope);
+ Elts.push_back(MDString::get(VMContext, Name));
+ Elts.push_back(F);
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
+ Elts.push_back(Ty);
+ Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ Elts.append(Addr.begin(), Addr.end());
+
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile File, unsigned LineNo,
+ DIType Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned Flags, bool isOptimized,
+ Function *Fn,
+ MDNode *TParams,
+ MDNode *Decl) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn,
+ TParams,
+ Decl
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
+}
+
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty,
+ bool isLocalToUnit,
+ bool isDefinition,
+ unsigned VK, unsigned VIndex,
+ MDNode *VTableHolder,
+ unsigned Flags,
+ bool isOptimized,
+ Function *Fn,
+ MDNode *TParam) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+ ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+ VTableHolder,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn,
+ TParam,
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
+}
+
+/// createNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+ };
+ return DINameSpace(MDNode::get(VMContext, Elts));
+}
+
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+ unsigned Line, unsigned Col) {
+ // Defeat MDNode uniqing for lexical blocks by using unique id.
+ static unsigned int unique_id = 0;
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ Scope,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+ };
+ return DILexicalBlock(MDNode::get(VMContext, Elts));
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+ return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+
+ // If this block already has a terminator then insert this intrinsic
+ // before the terminator.
+ if (TerminatorInst *T = InsertAtEnd->getTerminator())
+ return CallInst::Create(DeclareFn, Args, "", T);
+ else
+ return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, "", InsertBefore);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+}
+
diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
new file mode 100644
index 000000000000..b23c3514d0bd
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
@@ -0,0 +1,224 @@
+//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints instructions, and associated debug
+// info:
+//
+// - source/line/col information
+// - original variable name
+// - original type name
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+PrintDirectory("print-fullpath",
+ cl::desc("Print fullpath when printing debug info"),
+ cl::Hidden);
+
+namespace {
+ class PrintDbgInfo : public FunctionPass {
+ raw_ostream &Out;
+ void printVariableDeclaration(const Value *V);
+ public:
+ static char ID; // Pass identification
+ PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
+ initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+ char PrintDbgInfo::ID = 0;
+}
+
+INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+ "Print debug info in human readable form", false, false)
+
+FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
+
+/// Find the debug info descriptor corresponding to this global variable.
+static Value *findDbgGlobalDeclare(GlobalVariable *V) {
+ const Module *M = V->getParent();
+ NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+ if (!NMD)
+ return 0;
+
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (!DIG.isGlobalVariable())
+ continue;
+ if (DIGlobalVariable(DIG).getGlobal() == V)
+ return DIG;
+ }
+ return 0;
+}
+
+/// Find the debug info descriptor corresponding to this function.
+static Value *findDbgSubprogramDeclare(Function *V) {
+ const Module *M = V->getParent();
+ NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
+ if (!NMD)
+ return 0;
+
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (!DIG.isSubprogram())
+ continue;
+ if (DISubprogram(DIG).getFunction() == V)
+ return DIG;
+ }
+ return 0;
+}
+
+/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+/// It looks through pointer casts too.
+static const DbgDeclareInst *findDbgDeclare(const Value *V) {
+ V = V->stripPointerCasts();
+
+ if (!isa<Instruction>(V) && !isa<Argument>(V))
+ return 0;
+
+ const Function *F = NULL;
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ F = I->getParent()->getParent();
+ else if (const Argument *A = dyn_cast<Argument>(V))
+ F = A->getParent();
+
+ for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+ BI != BE; ++BI)
+ if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ if (DDI->getAddress() == V)
+ return DDI;
+
+ return 0;
+}
+
+static bool getLocationInfo(const Value *V, std::string &DisplayName,
+ std::string &Type, unsigned &LineNo,
+ std::string &File, std::string &Dir) {
+ DICompileUnit Unit;
+ DIType TypeD;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+ Value *DIGV = findDbgGlobalDeclare(GV);
+ if (!DIGV) return false;
+ DIGlobalVariable Var(cast<MDNode>(DIGV));
+
+ StringRef D = Var.getDisplayName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
+ Value *DIF = findDbgSubprogramDeclare(F);
+ if (!DIF) return false;
+ DISubprogram Var(cast<MDNode>(DIF));
+
+ StringRef D = Var.getDisplayName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else {
+ const DbgDeclareInst *DDI = findDbgDeclare(V);
+ if (!DDI) return false;
+ DIVariable Var(cast<MDNode>(DDI->getVariable()));
+
+ StringRef D = Var.getName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ }
+
+ StringRef T = TypeD.getName();
+ if (!T.empty())
+ Type = T;
+ StringRef F = Unit.getFilename();
+ if (!F.empty())
+ File = F;
+ StringRef D = Unit.getDirectory();
+ if (!D.empty())
+ Dir = D;
+ return true;
+}
+
+void PrintDbgInfo::printVariableDeclaration(const Value *V) {
+ std::string DisplayName, File, Directory, Type;
+ unsigned LineNo;
+
+ if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
+ return;
+
+ Out << "; ";
+ WriteAsOperand(Out, V, false, 0);
+ if (isa<Function>(V))
+ Out << " is function " << DisplayName
+ << " of type " << Type << " declared at ";
+ else
+ Out << " is variable " << DisplayName
+ << " of type " << Type << " declared at ";
+
+ if (PrintDirectory)
+ Out << Directory << "/";
+
+ Out << File << ":" << LineNo << "\n";
+}
+
+bool PrintDbgInfo::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ Out << "function " << F.getName() << "\n\n";
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
+ // Skip dead blocks.
+ continue;
+
+ Out << BB->getName();
+ Out << ":";
+
+ Out << "\n";
+
+ for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
+ i != e; ++i) {
+
+ printVariableDeclaration(i);
+
+ if (const User *U = dyn_cast<User>(i)) {
+ for(unsigned i=0;i<U->getNumOperands();i++)
+ printVariableDeclaration(U->getOperand(i));
+ }
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp
new file mode 100644
index 000000000000..b42e946f2ffa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp
@@ -0,0 +1,948 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+StringRef
+DIDescriptor::getStringField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return StringRef();
+
+ if (Elt < DbgNode->getNumOperands())
+ if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
+ return MDS->getString();
+
+ return StringRef();
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+ return CI->getZExtValue();
+
+ return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return DIDescriptor();
+
+ if (Elt < DbgNode->getNumOperands())
+ return
+ DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+ return DIDescriptor();
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+unsigned DIVariable::getNumAddrElements() const {
+ if (getVersion() <= llvm::LLVMDebugVersion8)
+ return DbgNode->getNumOperands()-6;
+ return DbgNode->getNumOperands()-7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Predicates
+//===----------------------------------------------------------------------===//
+
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_base_type;
+}
+
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_member:
+ case dwarf::DW_TAG_inheritance:
+ case dwarf::DW_TAG_friend:
+ return true;
+ default:
+ // CompositeTypes are currently modelled as DerivedTypes.
+ return isCompositeType();
+ }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIDescriptor::isCompositeType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_class_type:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIDescriptor::isVariable() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_auto_variable:
+ case dwarf::DW_TAG_arg_variable:
+ case dwarf::DW_TAG_return_variable:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+ return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+ return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+ getTag() == dwarf::DW_TAG_constant);
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+ return isGlobalVariable();
+}
+
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_lexical_block:
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_namespace:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
+}
+
+/// isFile - Return true if the specified tag is DW_TAG_file_type.
+bool DIDescriptor::isFile() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_file_type;
+}
+
+/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
+bool DIDescriptor::isNameSpace() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_namespace;
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_lexical_block;
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(const MDNode *N) : DIScope(N) {
+ if (!N) return;
+ if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+ DbgNode = 0;
+ }
+}
+
+unsigned DIArray::getNumElements() const {
+ if (!DbgNode)
+ return 0;
+ return DbgNode->getNumOperands();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+ if (!DbgNode)
+ return false;
+ StringRef N = getFilename();
+ if (N.empty())
+ return false;
+ // It is possible that directory and produce string is empty.
+ return true;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (!getContext().Verify())
+ return false;
+ unsigned Tag = getTag();
+ if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+ Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type
+ && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
+ && Tag != dwarf::DW_TAG_enumeration_type
+ && getFilename().empty())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+ return isBasicType();
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+ return isDerivedType();
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (!getContext().Verify())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ if (!getContext().Verify())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+
+ DICompositeType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ if (getDisplayName().empty())
+ return false;
+
+ if (!getContext().Verify())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ if (!getGlobal() && !getConstant())
+ return false;
+
+ return true;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ if (!getContext().Verify())
+ return false;
+
+ if (!getCompileUnit().Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ return true;
+}
+
+/// Verify - Verify that a location descriptor is well formed.
+bool DILocation::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ return DbgNode->getNumOperands() == 4;
+}
+
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (getName().empty())
+ return false;
+ if (!getCompileUnit().Verify())
+ return false;
+ return true;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+ unsigned Tag = getTag();
+ if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
+ Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ DIType BaseType = getTypeDerivedFrom();
+ // If this type is not derived from any type then take conservative
+ // approach.
+ if (!BaseType.isValid())
+ return getSizeInBits();
+ if (BaseType.isDerivedType())
+ return DIDerivedType(BaseType).getOriginalTypeSize();
+ else
+ return BaseType.getSizeInBits();
+ }
+
+ return getSizeInBits();
+}
+
+/// isInlinedFnArgument - Return true if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+ assert(CurFn && "Invalid function");
+ if (!getContext().isSubprogram())
+ return false;
+ // This variable is not inlined function argument if its scope
+ // does not describe current function.
+ return !(DISubprogram(getContext()).describes(CurFn));
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+ assert(F && "Invalid function");
+ if (F == getFunction())
+ return true;
+ StringRef Name = getLinkageName();
+ if (Name.empty())
+ Name = getName();
+ if (F->getName() == Name)
+ return true;
+ return false;
+}
+
+unsigned DISubprogram::isOptimized() const {
+ assert (DbgNode && "Invalid subprogram descriptor!");
+ if (DbgNode->getNumOperands() == 16)
+ return getUnsignedField(15);
+ return 0;
+}
+
+StringRef DIScope::getFilename() const {
+ if (!DbgNode)
+ return StringRef();
+ if (isLexicalBlock())
+ return DILexicalBlock(DbgNode).getFilename();
+ if (isSubprogram())
+ return DISubprogram(DbgNode).getFilename();
+ if (isCompileUnit())
+ return DICompileUnit(DbgNode).getFilename();
+ if (isNameSpace())
+ return DINameSpace(DbgNode).getFilename();
+ if (isType())
+ return DIType(DbgNode).getFilename();
+ if (isFile())
+ return DIFile(DbgNode).getFilename();
+ assert(0 && "Invalid DIScope!");
+ return StringRef();
+}
+
+StringRef DIScope::getDirectory() const {
+ if (!DbgNode)
+ return StringRef();
+ if (isLexicalBlock())
+ return DILexicalBlock(DbgNode).getDirectory();
+ if (isSubprogram())
+ return DISubprogram(DbgNode).getDirectory();
+ if (isCompileUnit())
+ return DICompileUnit(DbgNode).getDirectory();
+ if (isNameSpace())
+ return DINameSpace(DbgNode).getDirectory();
+ if (isType())
+ return DIType(DbgNode).getDirectory();
+ if (isFile())
+ return DIFile(DbgNode).getDirectory();
+ assert(0 && "Invalid DIScope!");
+ return StringRef();
+}
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+ OS << "[" << dwarf::TagString(getTag()) << "] ";
+ OS.write_hex((intptr_t) &*DbgNode) << ']';
+}
+
+/// print - Print compile unit.
+void DICompileUnit::print(raw_ostream &OS) const {
+ if (getLanguage())
+ OS << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+
+ OS << " [" << getDirectory() << "/" << getFilename() << "]";
+}
+
+/// print - Print type.
+void DIType::print(raw_ostream &OS) const {
+ if (!DbgNode) return;
+
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ OS << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().print(OS);
+ OS << " ["
+ << "line " << getLineNumber() << ", "
+ << getSizeInBits() << " bits, "
+ << getAlignInBits() << " bit alignment, "
+ << getOffsetInBits() << " bit offset"
+ << "] ";
+
+ if (isPrivate())
+ OS << " [private] ";
+ else if (isProtected())
+ OS << " [protected] ";
+
+ if (isForwardDecl())
+ OS << " [fwd] ";
+
+ if (isBasicType())
+ DIBasicType(DbgNode).print(OS);
+ else if (isDerivedType())
+ DIDerivedType(DbgNode).print(OS);
+ else if (isCompositeType())
+ DICompositeType(DbgNode).print(OS);
+ else {
+ OS << "Invalid DIType\n";
+ return;
+ }
+
+ OS << "\n";
+}
+
+/// print - Print basic type.
+void DIBasicType::print(raw_ostream &OS) const {
+ OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+}
+
+/// print - Print derived type.
+void DIDerivedType::print(raw_ostream &OS) const {
+ OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
+}
+
+/// print - Print composite type.
+void DICompositeType::print(raw_ostream &OS) const {
+ DIArray A = getTypeArray();
+ OS << " [" << A.getNumElements() << " elements]";
+}
+
+/// print - Print subprogram.
+void DISubprogram::print(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ OS << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+
+ if (isLocalToUnit())
+ OS << " [local] ";
+
+ if (isDefinition())
+ OS << " [def] ";
+
+ OS << "\n";
+}
+
+/// print - Print global variable.
+void DIGlobalVariable::print(raw_ostream &OS) const {
+ OS << " [";
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ OS << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+
+ if (isLocalToUnit())
+ OS << " [local] ";
+
+ if (isDefinition())
+ OS << " [def] ";
+
+ if (isGlobalVariable())
+ DIGlobalVariable(DbgNode).print(OS);
+ OS << "]\n";
+}
+
+/// print - Print variable.
+void DIVariable::print(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+ getType().print(OS);
+ OS << "\n";
+
+ // FIXME: Dump complex addresses
+}
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print compile unit to dbgs() with a newline.
+void DICompileUnit::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print type to dbgs() with a newline.
+void DIType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print basic type to dbgs() with a newline.
+void DIBasicType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print derived type to dbgs() with a newline.
+void DIDerivedType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print composite type to dbgs() with a newline.
+void DICompositeType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print subprogram to dbgs() with a newline.
+void DISubprogram::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print global variable.
+void DIGlobalVariable::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print variable.
+void DIVariable::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+ bool isObjCLike = false;
+ for (size_t i = 0, e = Str.size(); i < e; ++i) {
+ char C = Str[i];
+ if (C == '[')
+ isObjCLike = true;
+
+ if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+ C == '+' || C == '(' || C == ')'))
+ Out.push_back('.');
+ else
+ Out.push_back(C);
+ }
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+ SmallString<32> Name = StringRef("llvm.dbg.lv.");
+ fixupObjcLikeName(FuncName, Name);
+
+ return M.getNamedMetadata(Name.str());
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+ SmallString<32> Name = StringRef("llvm.dbg.lv.");
+ fixupObjcLikeName(FuncName, Name);
+
+ return M.getOrInsertNamedMetadata(Name.str());
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(Module &M) {
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
+ ++BI) {
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ processDeclare(DDI);
+
+ DebugLoc Loc = BI->getDebugLoc();
+ if (Loc.isUnknown())
+ continue;
+
+ LLVMContext &Ctx = BI->getContext();
+ DIDescriptor Scope(Loc.getScope(Ctx));
+
+ if (Scope.isCompileUnit())
+ addCompileUnit(DICompileUnit(Scope));
+ else if (Scope.isSubprogram())
+ processSubprogram(DISubprogram(Scope));
+ else if (Scope.isLexicalBlock())
+ processLexicalBlock(DILexicalBlock(Scope));
+
+ if (MDNode *IA = Loc.getInlinedAt(Ctx))
+ processLocation(DILocation(IA));
+ }
+
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (addGlobalVariable(DIG)) {
+ addCompileUnit(DIG.getCompileUnit());
+ processType(DIG.getType());
+ }
+ }
+ }
+
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ processSubprogram(DISubprogram(NMD->getOperand(i)));
+}
+
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+ if (!Loc.Verify()) return;
+ DIDescriptor S(Loc.getScope());
+ if (S.isCompileUnit())
+ addCompileUnit(DICompileUnit(S));
+ else if (S.isSubprogram())
+ processSubprogram(DISubprogram(S));
+ else if (S.isLexicalBlock())
+ processLexicalBlock(DILexicalBlock(S));
+ processLocation(Loc.getOrigLocation());
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+ if (!addType(DT))
+ return;
+
+ addCompileUnit(DT.getCompileUnit());
+ if (DT.isCompositeType()) {
+ DICompositeType DCT(DT);
+ processType(DCT.getTypeDerivedFrom());
+ DIArray DA = DCT.getTypeArray();
+ for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+ DIDescriptor D = DA.getElement(i);
+ if (D.isType())
+ processType(DIType(D));
+ else if (D.isSubprogram())
+ processSubprogram(DISubprogram(D));
+ }
+ } else if (DT.isDerivedType()) {
+ DIDerivedType DDT(DT);
+ processType(DDT.getTypeDerivedFrom());
+ }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+ DIScope Context = LB.getContext();
+ if (Context.isLexicalBlock())
+ return processLexicalBlock(DILexicalBlock(Context));
+ else
+ return processSubprogram(DISubprogram(Context));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+ if (!addSubprogram(SP))
+ return;
+ addCompileUnit(SP.getCompileUnit());
+ processType(SP.getType());
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+ MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+ if (!N) return;
+
+ DIDescriptor DV(N);
+ if (!DV.isVariable())
+ return;
+
+ if (!NodesSeen.insert(DV))
+ return;
+
+ addCompileUnit(DIVariable(N).getCompileUnit());
+ processType(DIVariable(N).getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+ if (!DT.isValid())
+ return false;
+
+ if (!NodesSeen.insert(DT))
+ return false;
+
+ TYs.push_back(DT);
+ return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+ if (!CU.Verify())
+ return false;
+
+ if (!NodesSeen.insert(CU))
+ return false;
+
+ CUs.push_back(CU);
+ return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+ if (!DIDescriptor(DIG).isGlobalVariable())
+ return false;
+
+ if (!NodesSeen.insert(DIG))
+ return false;
+
+ GVs.push_back(DIG);
+ return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+ if (!DIDescriptor(SP).isSubprogram())
+ return false;
+
+ if (!NodesSeen.insert(SP))
+ return false;
+
+ SPs.push_back(SP);
+ return true;
+}
+
+/// getDISubprogram - Find subprogram that is enclosing this scope.
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
+ DIDescriptor D(Scope);
+ if (D.isSubprogram())
+ return DISubprogram(Scope);
+
+ if (D.isLexicalBlock())
+ return getDISubprogram(DILexicalBlock(Scope).getContext());
+
+ return DISubprogram();
+}
+
+/// getDICompositeType - Find underlying composite type.
+DICompositeType llvm::getDICompositeType(DIType T) {
+ if (T.isCompositeType())
+ return DICompositeType(T);
+
+ if (T.isDerivedType())
+ return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+
+ return DICompositeType();
+}
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
new file mode 100644
index 000000000000..cde431459d50
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -0,0 +1,232 @@
+//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit
+// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the
+// program, with a graph of the dominance/postdominance tree of that
+// function.
+//
+// There are also passes available to directly call dotty ('-view-dom' or
+// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the
+// names of the bbs are printed, but the content is hidden.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+
+using namespace llvm;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+ BasicBlock *BB = Node->getBlock();
+
+ if (!BB)
+ return "Post dominance root node";
+
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+};
+
+template<>
+struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(DominatorTree *DT) {
+ return "Dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree*>
+ : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(PostDominatorTree *DT) {
+ return "Post dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+}
+
+namespace {
+struct DomViewer
+ : public DOTGraphTraitsViewer<DominatorTree, false> {
+ static char ID;
+ DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+ initializeDomViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct DomOnlyViewer
+ : public DOTGraphTraitsViewer<DominatorTree, true> {
+ static char ID;
+ DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+ initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomViewer
+ : public DOTGraphTraitsViewer<PostDominatorTree, false> {
+ static char ID;
+ PostDomViewer() :
+ DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+ initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomOnlyViewer
+ : public DOTGraphTraitsViewer<PostDominatorTree, true> {
+ static char ID;
+ PostDomOnlyViewer() :
+ DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+ initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // end anonymous namespace
+
+char DomViewer::ID = 0;
+INITIALIZE_PASS(DomViewer, "view-dom",
+ "View dominance tree of function", false, false)
+
+char DomOnlyViewer::ID = 0;
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+ "View dominance tree of function (with no function bodies)",
+ false, false)
+
+char PostDomViewer::ID = 0;
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+ "View postdominance tree of function", false, false)
+
+char PostDomOnlyViewer::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+ "View postdominance tree of function "
+ "(with no function bodies)",
+ false, false)
+
+namespace {
+struct DomPrinter
+ : public DOTGraphTraitsPrinter<DominatorTree, false> {
+ static char ID;
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+ initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct DomOnlyPrinter
+ : public DOTGraphTraitsPrinter<DominatorTree, true> {
+ static char ID;
+ DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+ initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomPrinter
+ : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
+ static char ID;
+ PostDomPrinter() :
+ DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+ initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomOnlyPrinter
+ : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
+ static char ID;
+ PostDomOnlyPrinter() :
+ DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+ initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // end anonymous namespace
+
+
+
+char DomPrinter::ID = 0;
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+ "Print dominance tree of function to 'dot' file",
+ false, false)
+
+char DomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+ "Print dominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false)
+
+char PostDomPrinter::ID = 0;
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+ "Print postdominance tree of function to 'dot' file",
+ false, false)
+
+char PostDomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+ "Print postdominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+FunctionPass *llvm::createDomPrinterPass() {
+ return new DomPrinter();
+}
+
+FunctionPass *llvm::createDomOnlyPrinterPass() {
+ return new DomOnlyPrinter();
+}
+
+FunctionPass *llvm::createDomViewerPass() {
+ return new DomViewer();
+}
+
+FunctionPass *llvm::createDomOnlyViewerPass() {
+ return new DomOnlyViewer();
+}
+
+FunctionPass *llvm::createPostDomPrinterPass() {
+ return new PostDomPrinter();
+}
+
+FunctionPass *llvm::createPostDomOnlyPrinterPass() {
+ return new PostDomOnlyPrinter();
+}
+
+FunctionPass *llvm::createPostDomViewerPass() {
+ return new PostDomViewer();
+}
+
+FunctionPass *llvm::createPostDomOnlyViewerPass() {
+ return new PostDomOnlyViewer();
+}
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 000000000000..6de4e1e1d7de
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,137 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+
+namespace {
+ class DFCalculateWorkObject {
+ public:
+ DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
+ const DomTreeNode *N,
+ const DomTreeNode *PN)
+ : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+ BasicBlock *currentBB;
+ BasicBlock *parentBB;
+ const DomTreeNode *Node;
+ const DomTreeNode *parentNode;
+ };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+ const DomTreeNode *Node) {
+ BasicBlock *BB = Node->getBlock();
+ DomSetType *Result = NULL;
+
+ std::vector<DFCalculateWorkObject> workList;
+ SmallPtrSet<BasicBlock *, 32> visited;
+
+ workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+ do {
+ DFCalculateWorkObject *currentW = &workList.back();
+ assert (currentW && "Missing work object.");
+
+ BasicBlock *currentBB = currentW->currentBB;
+ BasicBlock *parentBB = currentW->parentBB;
+ const DomTreeNode *currentNode = currentW->Node;
+ const DomTreeNode *parentNode = currentW->parentNode;
+ assert (currentBB && "Invalid work object. Missing current Basic Block");
+ assert (currentNode && "Invalid work object. Missing current Node");
+ DomSetType &S = Frontiers[currentBB];
+
+ // Visit each block only once.
+ if (visited.count(currentBB) == 0) {
+ visited.insert(currentBB);
+
+ // Loop over CFG successors to calculate DFlocal[currentNode]
+ for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+ SI != SE; ++SI) {
+ // Does Node immediately dominate this successor?
+ if (DT[*SI]->getIDom() != currentNode)
+ S.insert(*SI);
+ }
+ }
+
+ // At this point, S is DFlocal. Now we union in DFup's of our children...
+ // Loop through and visit the nodes that Node immediately dominates (Node's
+ // children in the IDomTree)
+ bool visitChild = false;
+ for (DomTreeNode::const_iterator NI = currentNode->begin(),
+ NE = currentNode->end(); NI != NE; ++NI) {
+ DomTreeNode *IDominee = *NI;
+ BasicBlock *childBB = IDominee->getBlock();
+ if (visited.count(childBB) == 0) {
+ workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+ IDominee, currentNode));
+ visitChild = true;
+ }
+ }
+
+ // If all children are visited or there is any child then pop this block
+ // from the workList.
+ if (!visitChild) {
+
+ if (!parentBB) {
+ Result = &S;
+ break;
+ }
+
+ DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+ DomSetType &parentSet = Frontiers[parentBB];
+ for (; CDFI != CDFE; ++CDFI) {
+ if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+ parentSet.insert(*CDFI);
+ }
+ workList.pop_back();
+ }
+
+ } while (!workList.empty());
+
+ return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " DomFrontier for BB ";
+ if (I->first)
+ WriteAsOperand(OS, I->first, false);
+ else
+ OS << " <<exit node>>";
+ OS << " is:\t";
+
+ const std::set<BasicBlock*> &BBs = I->second;
+
+ for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+ I != E; ++I) {
+ OS << ' ';
+ if (*I)
+ WriteAsOperand(OS, *I, false);
+ else
+ OS << "<<exit node>>";
+ }
+ OS << "\n";
+ }
+}
+
+void DominanceFrontierBase::dump() const {
+ print(dbgs());
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
new file mode 100644
index 000000000000..2e79eab51ff7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -0,0 +1,340 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraph class and provides the BasicCallGraph
+// default implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BasicCallGraph class definition
+//
+class BasicCallGraph : public ModulePass, public CallGraph {
+ // Root is root of the call graph, or the external node if a 'main' function
+ // couldn't be found.
+ //
+ CallGraphNode *Root;
+
+ // ExternalCallingNode - This node has edges to all external functions and
+ // those internal functions that have their address taken.
+ CallGraphNode *ExternalCallingNode;
+
+ // CallsExternalNode - This node has edges to it from all functions making
+ // indirect calls or calling an external function.
+ CallGraphNode *CallsExternalNode;
+
+public:
+ static char ID; // Class identification, replacement for typeinfo
+ BasicCallGraph() : ModulePass(ID), Root(0),
+ ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnModule - Compute the call graph for the specified module.
+ virtual bool runOnModule(Module &M) {
+ CallGraph::initialize(M);
+
+ ExternalCallingNode = getOrInsertFunction(0);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
+
+ // Add every function to the call graph.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
+
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0) Root = ExternalCallingNode;
+
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual void print(raw_ostream &OS, const Module *) const {
+ OS << "CallGraph Root is: ";
+ if (Function *F = getRoot()->getFunction())
+ OS << F->getName() << "\n";
+ else {
+ OS << "<<null function: 0x" << getRoot() << ">>\n";
+ }
+
+ CallGraph::print(OS, 0);
+ }
+
+ virtual void releaseMemory() {
+ destroy();
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it should
+ /// override this to adjust the this pointer as needed for the specified pass
+ /// info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &CallGraph::ID)
+ return (CallGraph*)this;
+ return this;
+ }
+
+ CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
+ CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
+
+ // getRoot - Return the root of the call graph, which is either main, or if
+ // main cannot be found, the external node.
+ //
+ CallGraphNode *getRoot() { return Root; }
+ const CallGraphNode *getRoot() const { return Root; }
+
+private:
+ //===---------------------------------------------------------------------
+ // Implementation of CallGraph construction
+ //
+
+ // addToCallGraph - Add a function to the call graph, and link the node to all
+ // of the functions that it calls.
+ //
+ void addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
+
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
+ }
+ }
+
+ // Loop over all of the users of the function, looking for non-call uses.
+ for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
+ User *U = *I;
+ if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ || !CallSite(cast<Instruction>(U)).isCallee(I)) {
+ // Not a call, or being used as a parameter rather than as the callee.
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+ break;
+ }
+ }
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ CallSite CS(cast<Value>(II));
+ if (CS && !isa<IntrinsicInst>(II)) {
+ const Function *Callee = CS.getCalledFunction();
+ if (Callee)
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+ else
+ Node->addCalledFunction(CS, CallsExternalNode);
+ }
+ }
+ }
+
+ //
+ // destroy - Release memory for the call graph
+ virtual void destroy() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ if (CallsExternalNode) {
+ CallsExternalNode->allReferencesDropped();
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ }
+ CallGraph::destroy();
+ }
+};
+
+} //End anonymous namespace
+
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+ "Basic CallGraph Construction", false, true, true)
+
+char CallGraph::ID = 0;
+char BasicCallGraph::ID = 0;
+
+void CallGraph::initialize(Module &M) {
+ Mod = &M;
+}
+
+void CallGraph::destroy() {
+ if (FunctionMap.empty()) return;
+
+ // Reset all node's use counts to zero before deleting them to prevent an
+ // assertion from firing.
+#ifndef NDEBUG
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ I->second->allReferencesDropped();
+#endif
+
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ delete I->second;
+ FunctionMap.clear();
+}
+
+void CallGraph::print(raw_ostream &OS, Module*) const {
+ for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
+ I->second->print(OS);
+}
+void CallGraph::dump() const {
+ print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementations of public modification methods
+//
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it. Because this removes the function from the module, the call graph node
+// is destroyed. This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN). The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+ assert(CGN->empty() && "Cannot remove function from call "
+ "graph if it references other functions!");
+ Function *F = CGN->getFunction(); // Get the function for the call graph node
+ delete CGN; // Delete the call graph node for this func
+ FunctionMap.erase(F); // Remove the call graph node from the map
+
+ Mod->getFunctionList().remove(F);
+ return F;
+}
+
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+ assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+ assert(!FunctionMap.count(To) &&
+ "Pointing CallGraphNode at a function that already exists");
+ FunctionMapTy::iterator I = FunctionMap.find(From);
+ I->second->F = const_cast<Function*>(To);
+ FunctionMap[To] = I->second;
+ FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+ CallGraphNode *&CGN = FunctionMap[F];
+ if (CGN) return CGN;
+
+ assert((!F || F->getParent() == Mod) && "Function not in current module!");
+ return CGN = new CallGraphNode(const_cast<Function*>(F));
+}
+
+void CallGraphNode::print(raw_ostream &OS) const {
+ if (Function *F = getFunction())
+ OS << "Call graph node for function: '" << F->getName() << "'";
+ else
+ OS << "Call graph node <<null function>>";
+
+ OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n';
+
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " CS<" << I->first << "> calls ";
+ if (Function *FI = I->second->getFunction())
+ OS << "function '" << FI->getName() <<"'\n";
+ else
+ OS << "external node\n";
+ }
+ OS << '\n';
+}
+
+void CallGraphNode::dump() const { print(dbgs()); }
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site. Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS.getInstruction()) {
+ I->second->DropRef();
+ *I = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ return;
+ }
+ }
+}
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function. This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+ for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+ if (CalledFunctions[i].second == Callee) {
+ Callee->DropRef();
+ CalledFunctions[i] = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ --i; --e;
+ }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+ CallRecord &CR = *I;
+ if (CR.second == Callee && CR.first == 0) {
+ Callee->DropRef();
+ *I = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ return;
+ }
+ }
+}
+
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one. Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+ CallSite NewCS, CallGraphNode *NewNode){
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS.getInstruction()) {
+ I->second->DropRef();
+ I->first = NewCS.getInstruction();
+ I->second = NewNode;
+ NewNode->AddRef();
+ return;
+ }
+ }
+}
+
+// Enuse that users of CallGraph.h also link with this file
+DEFINING_FILE_FOR(CallGraph)
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
new file mode 100644
index 000000000000..659ffab0c6f0
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -0,0 +1,608 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph. Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cgscc-passmgr"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+public:
+ static char ID;
+ explicit CGPassManager(int Depth)
+ : ModulePass(ID), PMDataManager(Depth) { }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ bool doInitialization(CallGraph &CG);
+ bool doFinalization(CallGraph &CG);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ // CGPassManager walks SCC and it needs CallGraph.
+ Info.addRequired<CallGraph>();
+ Info.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "CallGraph Pass Manager";
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+ }
+
+ Pass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<Pass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_CallGraphPassManager;
+ }
+
+private:
+ bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool &DevirtualizedCall);
+
+ bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+ CallGraph &CG, bool &CallGraphUpToDate,
+ bool &DevirtualizedCall);
+ bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool IsCheckingMode);
+};
+
+} // end anonymous namespace.
+
+char CGPassManager::ID = 0;
+
+
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+ CallGraph &CG, bool &CallGraphUpToDate,
+ bool &DevirtualizedCall) {
+ bool Changed = false;
+ PMDataManager *PM = P->getAsPMDataManager();
+
+ if (PM == 0) {
+ CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+ if (!CallGraphUpToDate) {
+ DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+ CallGraphUpToDate = true;
+ }
+
+ {
+ TimeRegion PassTimer(getPassTimer(CGSP));
+ Changed = CGSP->runOnSCC(CurSCC);
+ }
+
+ // After the CGSCCPass is done, when assertions are enabled, use
+ // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+ if (Changed)
+ RefreshCallGraph(CurSCC, CG, true);
+#endif
+
+ return Changed;
+ }
+
+
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ FPPassManager *FPP = (FPPassManager*)P;
+
+ // Run pass P on all functions in the current SCC.
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I) {
+ if (Function *F = (*I)->getFunction()) {
+ dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+ TimeRegion PassTimer(getPassTimer(FPP));
+ Changed |= FPP->runOnFunction(*F);
+ }
+ }
+
+ // The function pass(es) modified the IR, they may have clobbered the
+ // callgraph.
+ if (Changed && CallGraphUpToDate) {
+ DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+ << P->getPassName() << '\n');
+ CallGraphUpToDate = false;
+ }
+ return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it. This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call. This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
+ CallGraph &CG, bool CheckingMode) {
+ DenseMap<Value*, CallGraphNode*> CallSites;
+
+ DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+ << " nodes:\n";
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I)
+ (*I)->dump();
+ );
+
+ bool MadeChange = false;
+ bool DevirtualizedCall = false;
+
+ // Scan all functions in the SCC.
+ unsigned FunctionNo = 0;
+ for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+ SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+ CallGraphNode *CGN = *SCCIdx;
+ Function *F = CGN->getFunction();
+ if (F == 0 || F->isDeclaration()) continue;
+
+ // Walk the function body looking for call sites. Sync up the call sites in
+ // CGN with those actually in the function.
+
+ // Keep track of the number of direct and indirect calls that were
+ // invalidated and removed.
+ unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
+
+ // Get the set of call sites currently in the function.
+ for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+ // If this call site is null, then the function pass deleted the call
+ // entirely and the WeakVH nulled it out.
+ if (I->first == 0 ||
+ // If we've already seen this call site, then the FunctionPass RAUW'd
+ // one call with another, which resulted in two "uses" in the edge
+ // list of the same call.
+ CallSites.count(I->first) ||
+
+ // If the call edge is not from a call or invoke, then the function
+ // pass RAUW'd a call with another value. This can happen when
+ // constant folding happens of well known functions etc.
+ !CallSite(I->first)) {
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If this was an indirect call site, count it.
+ if (I->second->getFunction() == 0)
+ ++NumIndirectRemoved;
+ else
+ ++NumDirectRemoved;
+
+ // Just remove the edge from the set of callees, keep track of whether
+ // I points to the last element of the vector.
+ bool WasLast = I + 1 == E;
+ CGN->removeCallEdge(I);
+
+ // If I pointed to the last element of the vector, we have to bail out:
+ // iterator checking rejects comparisons of the resultant pointer with
+ // end.
+ if (WasLast)
+ break;
+ E = CGN->end();
+ continue;
+ }
+
+ assert(!CallSites.count(I->first) &&
+ "Call site occurs in node multiple times");
+ CallSites.insert(std::make_pair(I->first, I->second));
+ ++I;
+ }
+
+ // Loop over all of the instructions in the function, getting the callsites.
+ // Keep track of the number of direct/indirect calls added.
+ unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS(cast<Value>(I));
+ if (!CS || isa<IntrinsicInst>(I)) continue;
+
+ // If this call site already existed in the callgraph, just verify it
+ // matches up to expectations and remove it from CallSites.
+ DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+ CallSites.find(CS.getInstruction());
+ if (ExistingIt != CallSites.end()) {
+ CallGraphNode *ExistingNode = ExistingIt->second;
+
+ // Remove from CallSites since we have now seen it.
+ CallSites.erase(ExistingIt);
+
+ // Verify that the callee is right.
+ if (ExistingNode->getFunction() == CS.getCalledFunction())
+ continue;
+
+ // If we are in checking mode, we are not allowed to actually mutate
+ // the callgraph. If this is a case where we can infer that the
+ // callgraph is less precise than it could be (e.g. an indirect call
+ // site could be turned direct), don't reject it in checking mode, and
+ // don't tweak it to be more precise.
+ if (CheckingMode && CS.getCalledFunction() &&
+ ExistingNode->getFunction() == 0)
+ continue;
+
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If not, we either went from a direct call to indirect, indirect to
+ // direct, or direct to different direct.
+ CallGraphNode *CalleeNode;
+ if (Function *Callee = CS.getCalledFunction()) {
+ CalleeNode = CG.getOrInsertFunction(Callee);
+ // Keep track of whether we turned an indirect call into a direct
+ // one.
+ if (ExistingNode->getFunction() == 0) {
+ DevirtualizedCall = true;
+ DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '"
+ << Callee->getName() << "'\n");
+ }
+ } else {
+ CalleeNode = CG.getCallsExternalNode();
+ }
+
+ // Update the edge target in CGN.
+ CGN->replaceCallEdge(CS, CS, CalleeNode);
+ MadeChange = true;
+ continue;
+ }
+
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If the call site didn't exist in the CGN yet, add it.
+ CallGraphNode *CalleeNode;
+ if (Function *Callee = CS.getCalledFunction()) {
+ CalleeNode = CG.getOrInsertFunction(Callee);
+ ++NumDirectAdded;
+ } else {
+ CalleeNode = CG.getCallsExternalNode();
+ ++NumIndirectAdded;
+ }
+
+ CGN->addCalledFunction(CS, CalleeNode);
+ MadeChange = true;
+ }
+
+ // We scanned the old callgraph node, removing invalidated call sites and
+ // then added back newly found call sites. One thing that can happen is
+ // that an old indirect call site was deleted and replaced with a new direct
+ // call. In this case, we have devirtualized a call, and CGSCCPM would like
+ // to iteratively optimize the new code. Unfortunately, we don't really
+ // have a great way to detect when this happens. As an approximation, we
+ // just look at whether the number of indirect calls is reduced and the
+ // number of direct calls is increased. There are tons of ways to fool this
+ // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+ // direct call) but this is close enough.
+ if (NumIndirectRemoved > NumIndirectAdded &&
+ NumDirectRemoved < NumDirectAdded)
+ DevirtualizedCall = true;
+
+ // After scanning this function, if we still have entries in callsites, then
+ // they are dangling pointers. WeakVH should save us for this, so abort if
+ // this happens.
+ assert(CallSites.empty() && "Dangling pointers found in call sites map");
+
+ // Periodically do an explicit clear to remove tombstones when processing
+ // large scc's.
+ if ((FunctionNo & 15) == 15)
+ CallSites.clear();
+ }
+
+ DEBUG(if (MadeChange) {
+ dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I)
+ (*I)->dump();
+ if (DevirtualizedCall)
+ dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
+ } else {
+ dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+ }
+ );
+
+ return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the
+/// specified SCC. This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool &DevirtualizedCall) {
+ bool Changed = false;
+
+ // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+ // up-to-date or not. The CGSSC pass manager runs two types of passes:
+ // CallGraphSCC Passes and other random function passes. Because other
+ // random function passes are not CallGraph aware, they may clobber the
+ // call graph by introducing new calls or deleting other ones. This flag
+ // is set to false when we run a function pass so that we know to clean up
+ // the callgraph when we need to run a CGSCCPass again.
+ bool CallGraphUpToDate = true;
+
+ // Run all passes on current SCC.
+ for (unsigned PassNo = 0, e = getNumContainedPasses();
+ PassNo != e; ++PassNo) {
+ Pass *P = getContainedPass(PassNo);
+
+ // If we're in -debug-pass=Executions mode, construct the SCC node list,
+ // otherwise avoid constructing this string as it is expensive.
+ if (isPassDebuggingExecutionsOrMore()) {
+ std::string Functions;
+ #ifndef NDEBUG
+ raw_string_ostream OS(Functions);
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I) {
+ if (I != CurSCC.begin()) OS << ", ";
+ (*I)->print(OS);
+ }
+ OS.flush();
+ #endif
+ dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+ }
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ // Actually run this pass on the current SCC.
+ Changed |= RunPassOnSCC(P, CurSCC, CG,
+ CallGraphUpToDate, DevirtualizedCall);
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+ dumpPreservedSet(P);
+
+ verifyPreservedAnalysis(P);
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P, "", ON_CG_MSG);
+ }
+
+ // If the callgraph was left out of date (because the last pass run was a
+ // functionpass), refresh it before we move on to the next SCC.
+ if (!CallGraphUpToDate)
+ DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+ return Changed;
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool Changed = doInitialization(CG);
+
+ // Walk the callgraph in bottom-up SCC order.
+ scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+ CallGraphSCC CurSCC(&CGI);
+ while (!CGI.isAtEnd()) {
+ // Copy the current SCC and increment past it so that the pass can hack
+ // on the SCC if it wants to without invalidating our iterator.
+ std::vector<CallGraphNode*> &NodeVec = *CGI;
+ CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+ ++CGI;
+
+ // At the top level, we run all the passes in this pass manager on the
+ // functions in this SCC. However, we support iterative compilation in the
+ // case where a function pass devirtualizes a call to a function. For
+ // example, it is very common for a function pass (often GVN or instcombine)
+ // to eliminate the addressing that feeds into a call. With that improved
+ // information, we would like the call to be an inline candidate, infer
+ // mod-ref information etc.
+ //
+ // Because of this, we allow iteration up to a specified iteration count.
+ // This only happens in the case of a devirtualized call, so we only burn
+ // compile time in the case that we're making progress. We also have a hard
+ // iteration count limit in case there is crazy code.
+ unsigned Iteration = 0;
+ bool DevirtualizedCall = false;
+ do {
+ DEBUG(if (Iteration)
+ dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #"
+ << Iteration << '\n');
+ DevirtualizedCall = false;
+ Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+ } while (Iteration++ < MaxIterations && DevirtualizedCall);
+
+ if (DevirtualizedCall)
+ DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration
+ << " times, due to -max-cg-scc-iterations\n");
+
+ if (Iteration > MaxSCCIterations)
+ MaxSCCIterations = Iteration;
+
+ }
+ Changed |= doFinalization(CG);
+ return Changed;
+}
+
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule());
+ } else {
+ Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG);
+ }
+ }
+ return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule());
+ } else {
+ Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG);
+ }
+ }
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+ assert(Old != New && "Should not replace node with self");
+ for (unsigned i = 0; ; ++i) {
+ assert(i != Nodes.size() && "Node not in SCC");
+ if (Nodes[i] != Old) continue;
+ Nodes[i] = New;
+ break;
+ }
+
+ // Update the active scc_iterator so that it doesn't contain dangling
+ // pointers to the old CallGraphNode.
+ scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+ CGI->ReplaceNode(Old, New);
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find CGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+ PMS.pop();
+
+ assert(!PMS.empty() && "Unable to handle Call Graph Pass");
+ CGPassManager *CGP;
+
+ if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
+ CGP = (CGPassManager*)PMS.top();
+ else {
+ // Create new Call Graph SCC Pass Manager if it does not exist.
+ assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ CGP = new CGPassManager(PMD->getDepth() + 1);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(CGP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = CGP;
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(CGP);
+ }
+
+ CGP->add(this);
+}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<CallGraph>();
+ AU.addPreserved<CallGraph>();
+}
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+ ///
+ class PrintCallGraphPass : public CallGraphSCCPass {
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+ public:
+ static char ID;
+ PrintCallGraphPass(const std::string &B, raw_ostream &o)
+ : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) {
+ Out << Banner;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ (*I)->getFunction()->print(Out);
+ return false;
+ }
+ };
+
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
new file mode 100644
index 000000000000..6535786668bc
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -0,0 +1,101 @@
+//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program. Note
+// that this analysis explicitly does not include types only used by the symbol
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char FindUsedTypes::ID = 0;
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+ "Find Used Types", false, true)
+
+// IncorporateType - Incorporate one type and all of its subtypes into the
+// collection of used types.
+//
+void FindUsedTypes::IncorporateType(const Type *Ty) {
+ // If ty doesn't already exist in the used types map, add it now, otherwise
+ // return.
+ if (!UsedTypes.insert(Ty)) return; // Already contain Ty.
+
+ // Make sure to add any types this type references now.
+ //
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ IncorporateType(*I);
+}
+
+void FindUsedTypes::IncorporateValue(const Value *V) {
+ IncorporateType(V->getType());
+
+ // If this is a constant, it could be using other types...
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (!isa<GlobalValue>(C))
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI);
+ }
+}
+
+
+// run - This incorporates all types used by the specified module
+//
+bool FindUsedTypes::runOnModule(Module &m) {
+ UsedTypes.clear(); // reset if run multiple times...
+
+ // Loop over global variables, incorporating their types
+ for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
+ I != E; ++I) {
+ IncorporateType(I->getType());
+ if (I->hasInitializer())
+ IncorporateValue(I->getInitializer());
+ }
+
+ for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
+ IncorporateType(MI->getType());
+ const Function &F = *MI;
+
+ // Loop over all of the instructions in the function, adding their return
+ // type as well as the types of their operands.
+ //
+ for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
+ II != IE; ++II) {
+ const Instruction &I = *II;
+
+ IncorporateType(I.getType()); // Incorporate the type of the instruction
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI); // Insert inst operand types as well
+ }
+ }
+
+ return false;
+}
+
+// Print the types found in the module. If the optional Module parameter is
+// passed in, then the types are printed symbolically if possible, using the
+// symbol table from the module.
+//
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+ OS << "Types in use by this module:\n";
+ for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
+ E = UsedTypes.end(); I != E; ++I) {
+ OS << " " << **I << '\n';
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
new file mode 100644
index 000000000000..b226d66cd78a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -0,0 +1,609 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure"). For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalsmodref-aa"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SCCIterator.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+ "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+namespace {
+ /// FunctionRecord - One instance of this structure is stored for every
+ /// function in the program. Later, the entries for these functions are
+ /// removed if the function is found to call an external function (in which
+ /// case we know nothing about it.
+ struct FunctionRecord {
+ /// GlobalInfo - Maintain mod/ref info for all of the globals without
+ /// addresses taken that are read or written (transitively) by this
+ /// function.
+ std::map<const GlobalValue*, unsigned> GlobalInfo;
+
+ /// MayReadAnyGlobal - May read global variables, but it is not known which.
+ bool MayReadAnyGlobal;
+
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
+ unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+ std::map<const GlobalValue*, unsigned>::const_iterator I =
+ GlobalInfo.find(GV);
+ if (I != GlobalInfo.end())
+ Effect |= I->second;
+ return Effect;
+ }
+
+ /// FunctionEffect - Capture whether or not this function reads or writes to
+ /// ANY memory. If not, we can do a lot of aggressive analysis on it.
+ unsigned FunctionEffect;
+
+ FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
+ };
+
+ /// GlobalsModRef - The actual analysis pass.
+ class GlobalsModRef : public ModulePass, public AliasAnalysis {
+ /// NonAddressTakenGlobals - The globals that do not have their addresses
+ /// taken.
+ std::set<const GlobalValue*> NonAddressTakenGlobals;
+
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ std::set<const GlobalValue*> IndirectGlobals;
+
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+
+ /// FunctionInfo - For each function, keep track of what globals are
+ /// modified or read.
+ std::map<const Function*, FunctionRecord> FunctionInfo;
+
+ public:
+ static char ID;
+ GlobalsModRef() : ModulePass(ID) {
+ initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+ AnalyzeGlobals(M); // find non-addr taken globals
+ AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<CallGraph>();
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Location &LocA, const Location &LocB);
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(const Function *F) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (const Function* F = CS.getCalledFunction())
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ }
+
+ virtual void deleteValue(Value *V);
+ virtual void copyValue(Value *From, Value *To);
+ virtual void addEscapingUse(Use &U);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ /// getFunctionInfo - Return the function info for the function, or null if
+ /// we don't have anything useful to say about it.
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function*, FunctionRecord>::iterator I =
+ FunctionInfo.find(F);
+ if (I != FunctionInfo.end())
+ return &I->second;
+ return 0;
+ }
+
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest = 0);
+ bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+ };
+}
+
+char GlobalsModRef::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
+
+Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program. If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsModRef::AnalyzeGlobals(Module &M) {
+ std::vector<Function*> Readers, Writers;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global.
+ NonAddressTakenGlobals.insert(I);
+ ++NumNonAddrTakenFunctions;
+ }
+ Readers.clear(); Writers.clear();
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global, and the mod/ref fns
+ NonAddressTakenGlobals.insert(I);
+
+ for (unsigned i = 0, e = Readers.size(); i != e; ++i)
+ FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
+
+ if (!I->isConstant()) // No need to keep track of writers to constants
+ for (unsigned i = 0, e = Writers.size(); i != e; ++i)
+ FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
+ ++NumNonAddrTakenGlobalVars;
+
+ // If this global holds a pointer type, see if it is an indirect global.
+ if (I->getType()->getElementType()->isPointerTy() &&
+ AnalyzeIndirectGlobalMemory(I))
+ ++NumIndirectGlobalVars;
+ }
+ Readers.clear(); Writers.clear();
+ }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true. Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
+ std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest) {
+ if (!V->getType()->isPointerTy()) return true;
+
+ for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ Readers.push_back(LI->getParent()->getParent());
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (V == SI->getOperand(1)) {
+ Writers.push_back(SI->getParent()->getParent());
+ } else if (SI->getOperand(1) != OkayStoreDest) {
+ return true; // Storing the pointer
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+ return true;
+ } else if (isFreeCall(U)) {
+ Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ if (CI->getArgOperand(i) == V) return true;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+ if (II->getArgOperand(i) == V) return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr ||
+ CE->getOpcode() == Instruction::BitCast) {
+ if (AnalyzeUsesOfPointer(CE, Readers, Writers))
+ return true;
+ } else {
+ return true;
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type. See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere. If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
+ // Keep track of values related to the allocation of the memory, f.e. the
+ // value produced by the malloc call and any casts.
+ std::vector<Value*> AllocRelatedValues;
+
+ // Walk the user list of the global. If we find anything other than a direct
+ // load or store, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // The pointer loaded from the global can only be used in simple ways:
+ // we allow addressing of it and loading storing to it. We do *not* allow
+ // storing the loaded pointer somewhere else or passing to a function.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
+ return false; // Loaded pointer escapes.
+ // TODO: Could try some IP mod/ref of the loaded pointer.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Storing the global itself.
+ if (SI->getOperand(0) == GV) return false;
+
+ // If storing the null pointer, ignore it.
+ if (isa<ConstantPointerNull>(SI->getOperand(0)))
+ continue;
+
+ // Check the value being stored.
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+
+ if (isMalloc(Ptr)) {
+ // Okay, easy case.
+ } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
+ Function *F = CI->getCalledFunction();
+ if (!F || !F->isDeclaration()) return false; // Too hard to analyze.
+ if (F->getName() != "calloc") return false; // Not calloc.
+ } else {
+ return false; // Too hard to analyze.
+ }
+
+ // Analyze all uses of the allocation. If any of them are used in a
+ // non-simple way (e.g. stored to another global) bail out.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
+ return false; // Loaded pointer escapes.
+
+ // Remember that this allocation is related to the indirect global.
+ AllocRelatedValues.push_back(Ptr);
+ } else {
+ // Something complex, bail out.
+ return false;
+ }
+ }
+
+ // Okay, this is an indirect global. Remember all of the allocations for
+ // this global in AllocsForIndirectGlobals.
+ while (!AllocRelatedValues.empty()) {
+ AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+ AllocRelatedValues.pop_back();
+ }
+ IndirectGlobals.insert(GV);
+ return true;
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from. Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
+ ++I) {
+ std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ if (!SCC[0]->getFunction()) {
+ // Calls externally - can't say anything useful. Remove any existing
+ // function records (may have been created when scanning globals).
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
+
+ bool KnowNothing = false;
+ unsigned FunctionEffect = 0;
+
+ // Collect the mod/ref properties due to called functions. We only compute
+ // one mod-ref set.
+ for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (!F) {
+ KnowNothing = true;
+ break;
+ }
+
+ if (F->isDeclaration()) {
+ // Try to get mod/ref behaviour from function attributes.
+ if (F->doesNotAccessMemory()) {
+ // Can't do better than that!
+ } else if (F->onlyReadsMemory()) {
+ FunctionEffect |= Ref;
+ if (!F->isIntrinsic())
+ // This function might call back into the module and read a global -
+ // consider every global as possibly being read by this function.
+ FR.MayReadAnyGlobal = true;
+ } else {
+ FunctionEffect |= ModRef;
+ // Can't say anything useful unless it's an intrinsic - they don't
+ // read or write global variables of the kind considered here.
+ KnowNothing = !F->isIntrinsic();
+ }
+ continue;
+ }
+
+ for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+ CI != E && !KnowNothing; ++CI)
+ if (Function *Callee = CI->second->getFunction()) {
+ if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
+ // Propagate function effect up.
+ FunctionEffect |= CalleeFR->FunctionEffect;
+
+ // Incorporate callee's effects on globals into our info.
+ for (std::map<const GlobalValue*, unsigned>::iterator GI =
+ CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
+ GI != E; ++GI)
+ FR.GlobalInfo[GI->first] |= GI->second;
+ FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
+ } else {
+ // Can't say anything about it. However, if it is inside our SCC,
+ // then nothing needs to be done.
+ CallGraphNode *CalleeNode = CG[Callee];
+ if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ KnowNothing = true;
+ }
+ } else {
+ KnowNothing = true;
+ }
+ }
+
+ // If we can't say anything useful about this SCC, remove all SCC functions
+ // from the FunctionInfo map.
+ if (KnowNothing) {
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ // Scan the function bodies for explicit loads or stores.
+ for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+ for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
+ E = inst_end(SCC[i]->getFunction());
+ II != E && FunctionEffect != ModRef; ++II)
+ if (isa<LoadInst>(*II)) {
+ FunctionEffect |= Ref;
+ if (cast<LoadInst>(*II).isVolatile())
+ // Volatile loads may have side-effects, so mark them as writing
+ // memory (for example, a flag inside the processor).
+ FunctionEffect |= Mod;
+ } else if (isa<StoreInst>(*II)) {
+ FunctionEffect |= Mod;
+ if (cast<StoreInst>(*II).isVolatile())
+ // Treat volatile stores as reading memory somewhere.
+ FunctionEffect |= Ref;
+ } else if (isMalloc(&cast<Instruction>(*II)) ||
+ isFreeCall(&cast<Instruction>(*II))) {
+ FunctionEffect |= ModRef;
+ }
+
+ if ((FunctionEffect & Mod) == 0)
+ ++NumReadMemFunctions;
+ if (FunctionEffect == 0)
+ ++NumNoMemFunctions;
+ FR.FunctionEffect = FunctionEffect;
+
+ // Finally, now that we know the full effect on this SCC, clone the
+ // information to each function in the SCC.
+ for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+ FunctionInfo[SCC[i]->getFunction()] = FR;
+ }
+}
+
+
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasAnalysis::AliasResult
+GlobalsModRef::alias(const Location &LocA,
+ const Location &LocB) {
+ // Get the base object these pointers point to.
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+
+ // If either of the underlying values is a global, they may be non-addr-taken
+ // globals, which we can answer queries about.
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ if (GV1 || GV2) {
+ // If the global's address is taken, pretend we don't know it's a pointer to
+ // the global.
+ if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+
+ // If the two pointers are derived from two different non-addr-taken
+ // globals, or if one is and the other isn't, we know these can't alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ // Otherwise if they are both derived from the same addr-taken global, we
+ // can't know the two accesses don't overlap.
+ }
+
+ // These pointers may be based on the memory owned by an indirect global. If
+ // so, we may be able to handle this. First check to see if the base pointer
+ // is a direct load from an indirect global.
+ GV1 = GV2 = 0;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV1 = GV;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV2 = GV;
+
+ // These pointers may also be from an allocation for the indirect global. If
+ // so, also handle them.
+ if (AllocsForIndirectGlobals.count(UV1))
+ GV1 = AllocsForIndirectGlobals[UV1];
+ if (AllocsForIndirectGlobals.count(UV2))
+ GV2 = AllocsForIndirectGlobals[UV2];
+
+ // Now that we know whether the two pointers are related to indirect globals,
+ // use this to disambiguate the pointers. If either pointer is based on an
+ // indirect global and if they are not both based on the same indirect global,
+ // they cannot alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ return AliasAnalysis::alias(LocA, LocB);
+}
+
+AliasAnalysis::ModRefResult
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ unsigned Known = ModRef;
+
+ // If we are asking for mod/ref info of a direct call with a pointer to a
+ // global we are tracking, return information if we have it.
+ if (const GlobalValue *GV =
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+ if (GV->hasLocalLinkage())
+ if (const Function *F = CS.getCalledFunction())
+ if (NonAddressTakenGlobals.count(GV))
+ if (const FunctionRecord *FR = getFunctionInfo(F))
+ Known = FR->getInfoForGlobal(GV);
+
+ if (Known == NoModRef)
+ return NoModRef; // No need to query other mod/ref analyses
+ return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to update the analysis as a result of the client transformation.
+//
+void GlobalsModRef::deleteValue(Value *V) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (NonAddressTakenGlobals.erase(GV)) {
+ // This global might be an indirect global. If so, remove it and remove
+ // any AllocRelatedValues for it.
+ if (IndirectGlobals.erase(GV)) {
+ // Remove any entries in AllocsForIndirectGlobals for this global.
+ for (std::map<const Value*, const GlobalValue*>::iterator
+ I = AllocsForIndirectGlobals.begin(),
+ E = AllocsForIndirectGlobals.end(); I != E; ) {
+ if (I->second == GV) {
+ AllocsForIndirectGlobals.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ }
+ }
+ }
+
+ // Otherwise, if this is an allocation related to an indirect global, remove
+ // it.
+ AllocsForIndirectGlobals.erase(V);
+
+ AliasAnalysis::deleteValue(V);
+}
+
+void GlobalsModRef::copyValue(Value *From, Value *To) {
+ AliasAnalysis::copyValue(From, To);
+}
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+ // For the purposes of this analysis, it is conservatively correct to treat
+ // a newly escaping value equivalently to a deleted one. We could perhaps
+ // be more precise by processing the new use and attempting to update our
+ // saved analysis results to accommodate it.
+ deleteValue(U);
+
+ AliasAnalysis::addEscapingUse(U);
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 000000000000..0ba2e04c6302
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,29 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+ initializeBasicCallGraphPass(Registry);
+ initializeCallGraphAnalysisGroup(Registry);
+ initializeFindUsedTypesPass(Registry);
+ initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+ initializeIPA(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
new file mode 100644
index 000000000000..e5f0a77ab67d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,272 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+
+Pass *llvm::createIVUsersPass() {
+ return new IVUsers();
+}
+
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+ ScalarEvolution *SE, LoopInfo *LI) {
+ // An addrec is interesting if it's affine or if it has an interesting start.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Keep things simple. Don't touch loop-variant strides unless they're
+ // only used outside the loop and we can simplify them.
+ if (AR->getLoop() == L)
+ return AR->isAffine() ||
+ (!L->contains(I) &&
+ SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
+ // Otherwise recurse to see if the start value is interesting, and that
+ // the step value is not interesting, since we don't yet know how to
+ // do effective SCEV expansions for addrecs with interesting steps.
+ return isInteresting(AR->getStart(), I, L, SE, LI) &&
+ !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
+ }
+
+ // An add is interesting if exactly one of its operands is interesting.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ bool AnyInterestingYet = false;
+ for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+ OI != OE; ++OI)
+ if (isInteresting(*OI, I, L, SE, LI)) {
+ if (AnyInterestingYet)
+ return false;
+ AnyInterestingYet = true;
+ }
+ return AnyInterestingYet;
+ }
+
+ // Nothing else is interesting here.
+ return false;
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true. Otherwise, return false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false; // Void and FP expressions cannot be reduced.
+
+ // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+ // Also avoid creating IVs of non-native types. For example, we don't want a
+ // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
+ uint64_t Width = SE->getTypeSizeInBits(I->getType());
+ if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
+ return false;
+
+ if (!Processed.insert(I))
+ return true; // Instruction already handled.
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *ISE = SE->getSCEV(I);
+
+ // If we've come to an uninteresting expression, stop the traversal and
+ // call this a user.
+ if (!isInteresting(ISE, I, L, SE, LI))
+ return false;
+
+ SmallPtrSet<Instruction *, 4> UniqueUsers;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!UniqueUsers.insert(User))
+ continue;
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && Processed.count(User))
+ continue;
+
+ // Descend recursively, but not into PHI nodes outside the current loop.
+ // It's important to see the entire expression outside the loop to get
+ // choices that depend on addressing mode use right, although we won't
+ // consider references outside the loop in all cases.
+ // If User is already in Processed, we don't want to recurse into it again,
+ // but do want to record a second reference in the same instruction.
+ bool AddUserToIVUsers = false;
+ if (LI->getLoopFor(User->getParent()) != L) {
+ if (isa<PHINode>(User) || Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
+ AddUserToIVUsers = true;
+ }
+ } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
+ DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
+ AddUserToIVUsers = true;
+ }
+
+ if (AddUserToIVUsers) {
+ // Okay, we found a user that we cannot reduce.
+ IVUses.push_back(new IVStrideUse(this, User, I));
+ IVStrideUse &NewUse = IVUses.back();
+ // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
+ // The regular return value here is discarded; instead of recording
+ // it, we just recompute it when we need it.
+ ISE = TransformForPostIncUse(NormalizeAutodetect,
+ ISE, User, I,
+ NewUse.PostIncLoops,
+ *SE, *DT);
+ DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n');
+ }
+ }
+ return true;
+}
+
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+ IVUses.push_back(new IVStrideUse(this, User, Operand));
+ return IVUses.back();
+}
+
+IVUsers::IVUsers()
+ : LoopPass(ID) {
+ initializeIVUsersPass(*PassRegistry::getPassRegistry());
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+ L = l;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ // Find all uses of induction variables in this loop, and categorize
+ // them by stride. Start by finding all of the PHI nodes in the header for
+ // this loop. If they are induction variables, inspect their uses.
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+ (void)AddUsersIfInteresting(I);
+
+ return false;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+ OS << "IV Users for loop ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << " with backedge-taken count "
+ << *SE->getBackedgeTakenCount(L);
+ }
+ OS << ":\n";
+
+ for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+ E = IVUses.end(); UI != E; ++UI) {
+ OS << " ";
+ WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+ OS << " = " << *getReplacementExpr(*UI);
+ for (PostIncLoopSet::const_iterator
+ I = UI->PostIncLoops.begin(),
+ E = UI->PostIncLoops.end(); I != E; ++I) {
+ OS << " (post-inc with loop ";
+ WriteAsOperand(OS, (*I)->getHeader(), false);
+ OS << ")";
+ }
+ OS << " in ";
+ UI->getUser()->print(OS);
+ OS << '\n';
+ }
+}
+
+void IVUsers::dump() const {
+ print(dbgs());
+}
+
+void IVUsers::releaseMemory() {
+ Processed.clear();
+ IVUses.clear();
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+ return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+ return
+ TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+ IU.getUser(), IU.getOperandValToReplace(),
+ const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+ *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (AR->getLoop() == L)
+ return AR;
+ return findAddRecForLoop(AR->getStart(), L);
+ }
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+ return AR;
+ return 0;
+ }
+
+ return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+ return AR->getStepRecurrence(*SE);
+ return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+ PostIncLoops.insert(L);
+}
+
+void IVStrideUse::deleted() {
+ // Remove this user from the list.
+ Parent->IVUses.erase(this);
+ // this now dangles!
+}
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
new file mode 100644
index 000000000000..efde5984c115
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,647 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+ if (!F) return false;
+
+ if (F->hasLocalLinkage()) return false;
+
+ if (!F->hasName()) return false;
+
+ StringRef Name = F->getName();
+
+ // These will all likely lower to a single selection DAG node.
+ if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+ Name == "sin" || Name == "sinf" || Name == "sinl" ||
+ Name == "cos" || Name == "cosf" || Name == "cosl" ||
+ Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+ return true;
+
+ // These are all likely to be optimized into something smaller.
+ if (Name == "pow" || Name == "powf" || Name == "powl" ||
+ Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+ Name == "floor" || Name == "floorf" || Name == "ceil" ||
+ Name == "round" || Name == "ffs" || Name == "ffsl" ||
+ Name == "abs" || Name == "labs" || Name == "llabs")
+ return true;
+
+ return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+ ++NumBlocks;
+ unsigned NumInstsBeforeThisBB = NumInsts;
+ for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+ II != E; ++II) {
+ if (isa<PHINode>(II)) continue; // PHI nodes don't count.
+
+ // Special handling for calls.
+ if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+ if (isa<DbgInfoIntrinsic>(II))
+ continue; // Debug intrinsics don't count as size.
+
+ ImmutableCallSite CS(cast<Instruction>(II));
+
+ if (const Function *F = CS.getCalledFunction()) {
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
+ // exposed by an interleaved devirtualization pass).
+ if (F->hasInternalLinkage() && F->hasOneUse())
+ ++NumInlineCandidates;
+
+ // If this call is to function itself, then the function is recursive.
+ // Inlining it into other functions is a bad idea, because this is
+ // basically just a form of loop peeling, and our metrics aren't useful
+ // for that case.
+ if (F == BB->getParent())
+ isRecursive = true;
+ }
+
+ if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+ // Each argument to a call takes on average one instruction to set up.
+ NumInsts += CS.arg_size();
+
+ // We don't want inline asm to count as a call - that would prevent loop
+ // unrolling. The argument setup cost is still real, though.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ ++NumCalls;
+ }
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (!AI->isStaticAlloca())
+ this->usesDynamicAlloca = true;
+ }
+
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+ ++NumVectorInsts;
+
+ if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+ // Noop casts, including ptr <-> int, don't count.
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
+ isa<PtrToIntInst>(CI))
+ continue;
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // nop on most sane targets.
+ if (isa<CmpInst>(CI->getOperand(0)))
+ continue;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
+ // If a GEP has all constant indices, it will probably be folded with
+ // a load/store.
+ if (GEPI->hasAllConstantIndices())
+ continue;
+ }
+
+ ++NumInsts;
+ }
+
+ if (isa<ReturnInst>(BB->getTerminator()))
+ ++NumRets;
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this indirect
+ // jump would jump from the inlined copy of the function into the original
+ // function which is extremely undefined behavior.
+ if (isa<IndirectBrInst>(BB->getTerminator()))
+ containsIndirectBr = true;
+
+ // Remember NumInsts for this BB.
+ NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+ // We will be able to eliminate all but one of the successors.
+ const TerminatorInst &TI = cast<TerminatorInst>(*U);
+ const unsigned NumSucc = TI.getNumSuccessors();
+ unsigned Instrs = 0;
+ for (unsigned I = 0; I != NumSucc; ++I)
+ Instrs += NumBBInsts[TI.getSuccessor(I)];
+ // We don't know which blocks will be eliminated, so use the average size.
+ Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+ } else {
+ // Figure out if this instruction will be removed due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(*U);
+
+ // We can't constant propagate instructions which have effects or
+ // read memory.
+ //
+ // FIXME: It would be nice to capture the fact that a load from a
+ // pointer-to-constant-global is actually a *really* good thing to zap.
+ // Unfortunately, we don't know the pointer that may get propagated here,
+ // so we can't make this decision.
+ if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+ isa<AllocaInst>(Inst))
+ continue;
+
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant) {
+ // We will get to remove this instruction...
+ Reduction += InlineConstants::InstrCost;
+
+ // And any other instructions that use it which become constants
+ // themselves.
+ Reduction += CountCodeReductionForConstant(&Inst);
+ }
+ }
+ }
+ return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0; // Not a pointer
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ Instruction *I = cast<Instruction>(*UI);
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ Reduction += InlineConstants::InstrCost;
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (GEP->hasAllConstantIndices())
+ Reduction += CountCodeReductionForAlloca(GEP);
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ // Track pointer through bitcasts.
+ Reduction += CountCodeReductionForAlloca(BCI);
+ } else {
+ // If there is some other strange instruction, we're not going to be able
+ // to do much if we inline this.
+ return 0;
+ }
+ }
+
+ return Reduction;
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void CodeMetrics::analyzeFunction(Function *F) {
+ // If this function contains a call to setjmp or _setjmp, never inline
+ // it. This is a hack because we depend on the user marking their local
+ // variables as volatile if they are live across a setjmp call, and they
+ // probably won't do this in callers.
+ if (F->callsFunctionThatReturnsTwice())
+ callsSetJmp = true;
+
+ // Look at the size of the callee.
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ analyzeBasicBlock(&*BB);
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+ Metrics.analyzeFunction(F);
+
+ // A function with exactly one return has it removed during the inlining
+ // process (see InlineFunction), so don't count it.
+ // FIXME: This knowledge should really be encoded outside of FunctionInfo.
+ if (Metrics.NumRets==1)
+ --Metrics.NumInsts;
+
+ // Check out all of the arguments to the function, figuring out how much
+ // code can be eliminated if one of the arguments is a constant.
+ ArgumentWeights.reserve(F->arg_size());
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
+ Metrics.CountCodeReductionForAlloca(I)));
+}
+
+/// NeverInline - returns true if the function should never be inlined into
+/// any caller
+bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
+ return (Metrics.callsSetJmp || Metrics.isRecursive ||
+ Metrics.containsIndirectBr);
+}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ if (Callee->mayBeOverridden())
+ return 0;
+
+ int Bonus = 0;
+ // If this function uses the coldcc calling convention, prefer not to
+ // specialize it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus -= InlineConstants::ColdccPenalty;
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ unsigned ArgNo = 0;
+ unsigned i = 0;
+ for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (ArgNo == SpecializedArgNos[i]) {
+ ++i;
+ Bonus += CountBonusForConstant(I);
+ }
+
+ // Calls usually take a long time, so they make the specialization gain
+ // smaller.
+ Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ return Bonus;
+}
+
+// ConstantFunctionBonus - Figure out how much of a bonus we can get for
+// possibly devirtualizing a function. We'll subtract the size of the function
+// we may wish to inline from the indirect call bonus providing a limit on
+// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
+// inlining because we decide we don't want to give a bonus for
+// devirtualizing.
+int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+
+ // This could just be NULL.
+ if (!C) return 0;
+
+ Function *F = dyn_cast<Function>(C);
+ if (!F) return 0;
+
+ int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
+ return (Bonus > 0) ? 0 : Bonus;
+}
+
+// CountBonusForConstant - Figure out an approximation for how much per-call
+// performance boost we can expect if the specified value is constant.
+int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
+ unsigned Bonus = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Turning an indirect call into a direct call is a BIG win
+ if (CI->getCalledValue() == V)
+ Bonus += ConstantFunctionBonus(CallSite(CI), C);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ // Turning an indirect call into a direct call is a BIG win
+ if (II->getCalledValue() == V)
+ Bonus += ConstantFunctionBonus(CallSite(II), C);
+ }
+ // FIXME: Eliminating conditional branches and switches should
+ // also yield a per-call performance boost.
+ else {
+ // Figure out the bonuses that wll accrue due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(*U);
+
+ // We can't constant propagate instructions which have effects or
+ // read memory.
+ //
+ // FIXME: It would be nice to capture the fact that a load from a
+ // pointer-to-constant-global is actually a *really* good thing to zap.
+ // Unfortunately, we don't know the pointer that may get propagated here,
+ // so we can't make this decision.
+ if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+ isa<AllocaInst>(Inst))
+ continue;
+
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant)
+ Bonus += CountBonusForConstant(&Inst);
+ }
+ }
+
+ return Bonus;
+}
+
+int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ // InlineCost - This value measures how good of an inline candidate this call
+ // site is to inline. A lower inline cost make is more likely for the call to
+ // be inlined. This value may go negative.
+ //
+ int InlineCost = 0;
+
+ // Compute any size reductions we can expect due to arguments being passed into
+ // the function.
+ //
+ unsigned ArgNo = 0;
+ CallSite::arg_iterator I = CS.arg_begin();
+ for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+ FI != FE; ++I, ++FI, ++ArgNo) {
+
+ // If an alloca is passed in, inlining this function is likely to allow
+ // significant future optimization possibilities (like scalar promotion, and
+ // scalarization), so encourage the inlining of the function.
+ //
+ if (isa<AllocaInst>(I))
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
+
+ // If this is a constant being passed into the function, use the argument
+ // weights calculated for the callee to determine how much will be folded
+ // away with this information.
+ else if (isa<Constant>(I))
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
+ }
+
+ // Each argument passed in has a cost at both the caller and the callee
+ // sides. Measurements show that each argument costs about the same as an
+ // instruction.
+ InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+
+ // Now that we have considered all of the factors that make the call site more
+ // likely to be inlined, look at factors that make us not want to inline it.
+
+ // Calls usually take a long time, so they make the inlining gain smaller.
+ InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ // Look at the size of the callee. Each instruction counts as 5.
+ InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+
+ return InlineCost;
+}
+
+int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ bool isDirectCall = CS.getCalledFunction() == Callee;
+ Instruction *TheCall = CS.getInstruction();
+ int Bonus = 0;
+
+ // If there is only one call of the function, and it has internal linkage,
+ // make it almost guaranteed to be inlined.
+ //
+ if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
+ Bonus += InlineConstants::LastCallToStaticBonus;
+
+ // If the instruction after the call, or if the normal destination of the
+ // invoke is an unreachable instruction, the function is noreturn. As such,
+ // there is little point in inlining this.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+ Bonus += InlineConstants::NoreturnPenalty;
+ } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+ Bonus += InlineConstants::NoreturnPenalty;
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus += InlineConstants::ColdccPenalty;
+
+ // Add to the inline quality for properties that make the call valuable to
+ // inline. This includes factors that indicate that the result of inlining
+ // the function will be optimizable. Currently this just looks at arguments
+ // passed into the function.
+ //
+ CallSite::arg_iterator I = CS.arg_begin();
+ for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+ FI != FE; ++I, ++FI)
+ // Compute any constant bonus due to inlining we want to give here.
+ if (isa<Constant>(I))
+ Bonus += CountBonusForConstant(FI, cast<Constant>(I));
+
+ return Bonus;
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ SmallPtrSet<const Function*, 16> &NeverInline) {
+ return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ Function *Callee,
+ SmallPtrSet<const Function*, 16> &NeverInline) {
+ Instruction *TheCall = CS.getInstruction();
+ Function *Caller = TheCall->getParent()->getParent();
+
+ // Don't inline functions which can be redefined at link-time to mean
+ // something else. Don't inline functions marked noinline or call sites
+ // marked noinline.
+ if (Callee->mayBeOverridden() ||
+ Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+ CS.isNoInline())
+ return llvm::InlineCost::getNever();
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ // If we should never inline this, return a huge cost.
+ if (CalleeFI->NeverInline())
+ return InlineCost::getNever();
+
+ // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
+ // could move this up and avoid computing the FunctionInfo for
+ // things we are going to just return always inline for. This
+ // requires handling setjmp somewhere else, however.
+ if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getAlways();
+
+ if (CalleeFI->Metrics.usesDynamicAlloca) {
+ // Get information about the caller.
+ FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CallerFI.Metrics.NumBlocks == 0) {
+ CallerFI.analyzeFunction(Caller);
+
+ // Recompute the CalleeFI pointer, getting Caller could have invalidated
+ // it.
+ CalleeFI = &CachedFunctionInfo[Callee];
+ }
+
+ // Don't inline a callee with dynamic alloca into a caller without them.
+ // Functions containing dynamic alloca's are inefficient in various ways;
+ // don't create more inefficiency.
+ if (!CallerFI.Metrics.usesDynamicAlloca)
+ return InlineCost::getNever();
+ }
+
+ // InlineCost - This value measures how good of an inline candidate this call
+ // site is to inline. A lower inline cost make is more likely for the call to
+ // be inlined. This value may go negative due to the fact that bonuses
+ // are negative numbers.
+ //
+ int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
+ return llvm::InlineCost::get(InlineCost);
+}
+
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ // Don't specialize functions which can be redefined at link-time to mean
+ // something else.
+ if (Callee->mayBeOverridden())
+ return llvm::InlineCost::getNever();
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ int Cost = 0;
+
+ // Look at the original size of the callee. Each instruction counts as 5.
+ Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+
+ // Offset that with the amount of code that can be constant-folded
+ // away with the given arguments replaced by constants.
+ for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+ ae = SpecializedArgNos.end(); an != ae; ++an)
+ Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+
+ return llvm::InlineCost::get(Cost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // Get information about the callee.
+ FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI.Metrics.NumBlocks == 0)
+ CalleeFI.analyzeFunction(Callee);
+
+ float Factor = 1.0f;
+ // Single BB functions are often written to be inlined.
+ if (CalleeFI.Metrics.NumBlocks == 1)
+ Factor += 0.5f;
+
+ // Be more aggressive if the function contains a good chunk (if it mades up
+ // at least 10% of the instructions) of vector instructions.
+ if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
+ Factor += 2.0f;
+ else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
+ Factor += 1.5f;
+ return Factor;
+}
+
+/// growCachedCostInfo - update the cached cost info for Caller after Callee has
+/// been inlined.
+void
+InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
+ CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
+
+ // For small functions we prefer to recalculate the cost for better accuracy.
+ if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) {
+ resetCachedCostInfo(Caller);
+ return;
+ }
+
+ // For large functions, we can save a lot of computation time by skipping
+ // recalculations.
+ if (CallerMetrics.NumCalls > 0)
+ --CallerMetrics.NumCalls;
+
+ if (Callee == 0) return;
+
+ CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+
+ // If we don't have metrics for the callee, don't recalculate them just to
+ // update an approximation in the caller. Instead, just recalculate the
+ // caller info from scratch.
+ if (CalleeMetrics.NumBlocks == 0) {
+ resetCachedCostInfo(Caller);
+ return;
+ }
+
+ // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+ // reference isn't invalidated: both were in the DenseMap.
+ CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+
+ // FIXME: If any of these three are true for the callee, the callee was
+ // not inlined into the caller, so I think they're redundant here.
+ CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
+ CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
+ CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
+
+ CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
+ CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
+ CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
+ CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
+ CallerMetrics.NumRets += CalleeMetrics.NumRets;
+
+ // analyzeBasicBlock counts each function argument as an inst.
+ if (CallerMetrics.NumInsts >= Callee->arg_size())
+ CallerMetrics.NumInsts -= Callee->arg_size();
+ else
+ CallerMetrics.NumInsts = 0;
+
+ // We are not updating the argument weights. We have already determined that
+ // Caller is a fairly large function, so we accept the loss of precision.
+}
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+ CachedFunctionInfo.clear();
+}
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
new file mode 100644
index 000000000000..3b385d26ba3c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -0,0 +1,87 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcount"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/Instruction.def"
+
+
+namespace {
+ class InstCount : public FunctionPass, public InstVisitor<InstCount> {
+ friend class InstVisitor<InstCount>;
+
+ void visitFunction (Function &F) { ++TotalFuncs; }
+ void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/Instruction.def"
+
+ void visitInstruction(Instruction &I) {
+ errs() << "Instruction Count does not know about " << I;
+ llvm_unreachable(0);
+ }
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ InstCount() : FunctionPass(ID) {
+ initializeInstCountPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const {}
+
+ };
+}
+
+char InstCount::ID = 0;
+INITIALIZE_PASS(InstCount, "instcount",
+ "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+ unsigned StartMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst;
+ visit(F);
+ unsigned EndMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst;
+ TotalMemInst += EndMemInsts-StartMemInsts;
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 000000000000..8709f6bf9d26
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,2526 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions. This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Operator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ // Arguments and constants dominate all instructions.
+ return true;
+
+ // If we have a DominatorTree then do a precise test.
+ if (DT)
+ return DT->dominates(I, P);
+
+ // Otherwise, if the instruction is in the entry block, and is not an invoke,
+ // then it obviously dominates all phi nodes.
+ if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+ !isa<InvokeInst>(I))
+ return true;
+
+ return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExpand, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Check whether the expression has the form "(A op' B) op C".
+ if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+ if (Op0->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op C) op' (B op C)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == B && R == A)) {
+ ++NumExpand;
+ return LHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ // Check whether the expression has the form "A op (B op' C)".
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+ if (Op1->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op B) op' (A op C)".
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == C && R == B)) {
+ ++NumExpand;
+ return RHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract. For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExtract, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+ !Op1 || Op1->getOpcode() != OpcodeToExtract)
+ return 0;
+
+ // The expression has the form "(A op' B) op (C op' D)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+ // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+ Value *DD = A == C ? D : C;
+ // Form "A op' (B op DD)" if it simplifies completely.
+ // Does "B op DD" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+ // It does! Return "A op' V" if it simplifies or is already available.
+ // If V equals B then "A op' V" is just the LHS. If V equals DD then
+ // "A op' V" is just the RHS.
+ if (V == B || V == DD) {
+ ++NumFactor;
+ return V == B ? LHS : RHS;
+ }
+ // Otherwise return "A op' V" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+ Value *CC = B == D ? C : D;
+ // Form "(A op CC) op' B" if it simplifies completely..
+ // Does "A op CC" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+ // It does! Return "V op' B" if it simplifies or is already available.
+ // If V equals A then "V op' B" is just the LHS. If V equals CC then
+ // "V op' B" is just the RHS.
+ if (V == A || V == CC) {
+ ++NumFactor;
+ return V == A ? LHS : RHS;
+ }
+ // Otherwise return "V op' B" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations. Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+ const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+ assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ // It does! Return "A op V" if it simplifies or is already available.
+ // If V equals B then "A op V" is just the LHS.
+ if (V == B) return LHS;
+ // Otherwise return "A op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+ // It does! Return "V op C" if it simplifies or is already available.
+ // If V equals B then "V op C" is just the RHS.
+ if (V == B) return RHS;
+ // Otherwise return "V op C" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // The remaining transforms require commutativity as well as associativity.
+ if (!Instruction::isCommutative(Opcode))
+ return 0;
+
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ // It does! Return "V op B" if it simplifies or is already available.
+ // If V equals A then "V op B" is just the LHS.
+ if (V == A) return LHS;
+ // Otherwise return "V op B" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ // It does! Return "B op V" if it simplifies or is already available.
+ // If V equals C then "B op V" is just the RHS.
+ if (V == C) return RHS;
+ // Otherwise return "B op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ SelectInst *SI;
+ if (isa<SelectInst>(LHS)) {
+ SI = cast<SelectInst>(LHS);
+ } else {
+ assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+ SI = cast<SelectInst>(RHS);
+ }
+
+ // Evaluate the BinOp on the true and false branches of the select.
+ Value *TV;
+ Value *FV;
+ if (SI == LHS) {
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+ } else {
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+ }
+
+ // If they simplified to the same value, then return the common value.
+ // If they both failed to simplify then return null.
+ if (TV == FV)
+ return TV;
+
+ // If one branch simplified to undef, return the other one.
+ if (TV && isa<UndefValue>(TV))
+ return FV;
+ if (FV && isa<UndefValue>(FV))
+ return TV;
+
+ // If applying the operation did not change the true and false select values,
+ // then the result of the binop is the select itself.
+ if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+ return SI;
+
+ // If one branch simplified and the other did not, and the simplified
+ // value is equal to the unsimplified one, return the simplified value.
+ // For example, select (cond, X, X & Z) & Z -> X & Z.
+ if ((FV && !TV) || (TV && !FV)) {
+ // Check that the simplified value has the form "X op Y" where "op" is the
+ // same as the original operation.
+ Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+ if (Simplified && Simplified->getOpcode() == Opcode) {
+ // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+ // We already know that "op" is the same as for the simplified value. See
+ // if the operands match too. If so, return the simplified value.
+ Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+ Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+ Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+ if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+ Simplified->getOperand(1) == UnsimplifiedRHS)
+ return Simplified;
+ if (Simplified->isCommutative() &&
+ Simplified->getOperand(1) == UnsimplifiedLHS &&
+ Simplified->getOperand(0) == UnsimplifiedRHS)
+ return Simplified;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value. Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the select is on the LHS.
+ if (!isa<SelectInst>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+ SelectInst *SI = cast<SelectInst>(LHS);
+
+ // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+ // Does "cmp TV, RHS" simplify?
+ if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
+ MaxRecurse)) {
+ // It does! Does "cmp FV, RHS" simplify?
+ if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
+ MaxRecurse)) {
+ // It does! If they simplified to the same value, then use it as the
+ // result of the original comparison.
+ if (TCmp == FCmp)
+ return TCmp;
+ Value *Cond = SI->getCondition();
+ // If the false value simplified to false, then the result of the compare
+ // is equal to "Cond && TCmp". This also catches the case when the false
+ // value simplified to false and the true value to true, returning "Cond".
+ if (match(FCmp, m_Zero()))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+ return V;
+ // If the true value simplified to true, then the result of the compare
+ // is equal to "Cond || FCmp".
+ if (match(TCmp, m_One()))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+ return V;
+ // Finally, if the false value simplified to true and the true value to
+ // false, then the result of the compare is equal to "!Cond".
+ if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+ if (Value *V =
+ SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+ TD, DT, MaxRecurse))
+ return V;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value. If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ PHINode *PI;
+ if (isa<PHINode>(LHS)) {
+ PI = cast<PHINode>(LHS);
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, DT))
+ return 0;
+ } else {
+ assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+ PI = cast<PHINode>(RHS);
+ // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(LHS, PI, DT))
+ return 0;
+ }
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = PI == LHS ?
+ SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time. If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the phi is on the LHS.
+ if (!isa<PHINode>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+ PHINode *PI = cast<PHINode>(LHS);
+
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, DT))
+ return 0;
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// SimplifyAddInst - Given operands for an Add, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X + undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // X + 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X + (Y - X) -> Y
+ // (Y - X) + X -> Y
+ // Eg: X + -X -> 0
+ Value *Y = 0;
+ if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+ match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+ return Y;
+
+ // X + ~X -> -1 since ~X = -X-1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ /// i1 add -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Threading Add over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A + select(cond, B, C)" means evaluating
+ // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0))
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // X - undef -> undef
+ // undef - X -> undef
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return UndefValue::get(Op0->getType());
+
+ // X - 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X - X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // (X*2) - X -> X
+ // (X<<1) - X -> X
+ Value *X = 0;
+ if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+ match(Op0, m_Shl(m_Specific(Op1), m_One())))
+ return Op1;
+
+ // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+ // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+ Value *Y = 0, *Z = Op1;
+ if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+ // See if "V === Y - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "X + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "Y + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+ // For example, X - (X + 1) -> -1
+ X = Op0;
+ if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V - Z" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V - Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+ // For example, X - (X - Y) -> Y.
+ Z = Op0;
+ if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+ // See if "V === Z - X" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V + Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+
+ // Mul distributes over Sub. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // i1 sub -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Threading Sub over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A - select(cond, B, C)" means evaluating
+ // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X * undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X * 0 -> 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X * 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ // (X / Y) * Y -> X if the division is exact.
+ Value *X = 0, *Y = 0;
+ if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
+ (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
+ BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
+ if (Div->isExact())
+ return X;
+ }
+
+ // i1 mul -> and.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ bool isSigned = Opcode == Instruction::SDiv;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef / X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 / X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X / 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be division by zero, hence it must be division by one.
+ return Op0;
+
+ // X / X -> 1
+ if (Op0 == Op1)
+ return ConstantInt::get(Op0->getType(), 1);
+
+ // (X * Y) / Y -> X if the multiplication does not overflow.
+ Value *X = 0, *Y = 0;
+ if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+ if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+ BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+ // If the Mul knows it does not overflow, then we are good to go.
+ if ((isSigned && Mul->hasNoSignedWrap()) ||
+ (!isSigned && Mul->hasNoUnsignedWrap()))
+ return X;
+ // If X has the form X = A / Y then X * Y cannot overflow.
+ if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+ if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+ return X;
+ }
+
+ // (X rem Y) / Y -> 0
+ if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+ const DominatorTree *, unsigned) {
+ // undef / X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef % X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 % X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X % 0 -> undef, we don't need to preserve faults!
+ if (match(Op1, m_Zero()))
+ return UndefValue::get(Op0->getType());
+
+ // X % 1 -> 0
+ if (match(Op1, m_One()))
+ return Constant::getNullValue(Op0->getType());
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be remainder by zero, hence it must be remainder by one.
+ return Constant::getNullValue(Op0->getType());
+
+ // X % X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
+ const DominatorTree *, unsigned) {
+ // undef % X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ // 0 shift by X -> 0
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X shift by 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X shift by undef -> undef because it may shift by the bitwidth.
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // Shifting by the bitwidth or more is undefined.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+ if (CI->getValue().getLimitedValue() >=
+ Op0->getType()->getScalarSizeInBits())
+ return UndefValue::get(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // undef << X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X >> A) << A -> X
+ Value *X;
+ if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
+ cast<PossiblyExactOperator>(Op0)->isExact())
+ return X;
+ return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // undef >>l X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // all ones >>a X -> all ones
+ if (match(Op0, m_AllOnes()))
+ return Op0;
+
+ // undef >>a X -> all ones
+ if (match(Op0, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X & undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X & X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X & 0 = 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X & -1 = X
+ if (match(Op1, m_AllOnes()))
+ return Op0;
+
+ // A & ~A = ~A & A = 0
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getNullValue(Op0->getType());
+
+ // (A | ?) & A = A
+ Value *A = 0, *B = 0;
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A & (A | ?) = A
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X | undef -> -1
+ if (match(Op1, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // X | X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X | 0 = X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X | -1 = -1
+ if (match(Op1, m_AllOnes()))
+ return Op1;
+
+ // A | ~A = ~A | A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (A & ?) | A = A
+ Value *A = 0, *B = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A | (A & ?) = A
+ if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ // ~(A & ?) | A = -1
+ if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+ (A == Op1 || B == Op1))
+ return Constant::getAllOnesValue(Op1->getType());
+
+ // A | ~(A & ?) = -1
+ if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+ (A == Op0 || B == Op0))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // A ^ undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // A ^ 0 = A
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // A ^ A = 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // A ^ ~A = ~A ^ A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Threading Xor over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+ // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static const Type *GetCompareTy(Value *Op) {
+ return CmpInst::makeCmpResultType(Op->getType());
+}
+
+/// ExtractEquivalentCondition - Rummage around inside V looking for something
+/// equivalent to the comparison "LHS Pred RHS". Return such a value if found,
+/// otherwise return null. Helper function for analyzing max/min idioms.
+static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (!SI)
+ return 0;
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+ if (!Cmp)
+ return 0;
+ Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
+ if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
+ return Cmp;
+ if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
+ LHS == CmpRHS && RHS == CmpLHS)
+ return Cmp;
+ return 0;
+}
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ const Type *ITy = GetCompareTy(LHS); // The return type.
+ const Type *OpTy = LHS->getType(); // The operand type.
+
+ // icmp X, X -> true/false
+ // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
+ // because X could be 0.
+ if (LHS == RHS || isa<UndefValue>(RHS))
+ return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+ // Special case logic when the operands have i1 type.
+ if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
+ cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ // X == 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_NE:
+ // X != 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // X >=u 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // X <s 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // X <=s -1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ }
+ }
+
+ // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
+ // different addresses, and what's more the address of a stack variable is
+ // never null or equal to the address of a global. Note that generalizing
+ // to the case where LHS is a global variable address or null is pointless,
+ // since if both LHS and RHS are constants then we already constant folded
+ // the compare, and if only one of them is then we moved it to RHS already.
+ if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+ isa<ConstantPointerNull>(RHS)))
+ // We already know that LHS != RHS.
+ return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+ // If we are comparing with zero then try hard since this is a common case.
+ if (match(RHS, m_Zero())) {
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ // getNullValue also works for vectors, unlike getFalse.
+ return Constant::getNullValue(ITy);
+ case ICmpInst::ICMP_UGE:
+ // getAllOnesValue also works for vectors, unlike getTrue.
+ return ConstantInt::getAllOnesValue(ITy);
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownNonZero(LHS, TD))
+ return Constant::getNullValue(ITy);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, TD))
+ return ConstantInt::getAllOnesValue(ITy);
+ break;
+ case ICmpInst::ICMP_SLT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getAllOnesValue(ITy);
+ if (LHSKnownNonNegative)
+ return Constant::getNullValue(ITy);
+ break;
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getAllOnesValue(ITy);
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ return Constant::getNullValue(ITy);
+ break;
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return Constant::getNullValue(ITy);
+ if (LHSKnownNonNegative)
+ return ConstantInt::getAllOnesValue(ITy);
+ break;
+ case ICmpInst::ICMP_SGT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return Constant::getNullValue(ITy);
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ return ConstantInt::getAllOnesValue(ITy);
+ break;
+ }
+ }
+
+ // See if we are doing a comparison with a constant integer.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Rule out tautological comparisons (eg., ult 0 or uge 0).
+ ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+ if (RHS_CR.isEmptySet())
+ return ConstantInt::getFalse(CI->getContext());
+ if (RHS_CR.isFullSet())
+ return ConstantInt::getTrue(CI->getContext());
+
+ // Many binary operators with constant RHS have easy to compute constant
+ // range. Use them to check whether the comparison is a tautology.
+ uint32_t Width = CI->getBitWidth();
+ APInt Lower = APInt(Width, 0);
+ APInt Upper = APInt(Width, 0);
+ ConstantInt *CI2;
+ if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+ // 'urem x, CI2' produces [0, CI2).
+ Upper = CI2->getValue();
+ } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+ // 'srem x, CI2' produces (-|CI2|, |CI2|).
+ Upper = CI2->getValue().abs();
+ Lower = (-Upper) + 1;
+ } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+ // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (!CI2->isZero())
+ Upper = NegOne.udiv(CI2->getValue()) + 1;
+ } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+ // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ APInt Val = CI2->getValue().abs();
+ if (!Val.isMinValue()) {
+ Lower = IntMin.sdiv(Val);
+ Upper = IntMax.sdiv(Val) + 1;
+ }
+ } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+ // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (CI2->getValue().ult(Width))
+ Upper = NegOne.lshr(CI2->getValue()) + 1;
+ } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+ // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (CI2->getValue().ult(Width)) {
+ Lower = IntMin.ashr(CI2->getValue());
+ Upper = IntMax.ashr(CI2->getValue()) + 1;
+ }
+ } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+ // 'or x, CI2' produces [CI2, UINT_MAX].
+ Lower = CI2->getValue();
+ } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+ // 'and x, CI2' produces [0, CI2].
+ Upper = CI2->getValue() + 1;
+ }
+ if (Lower != Upper) {
+ ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+ if (RHS_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(RHS->getContext());
+ if (RHS_CR.inverse().contains(LHS_CR))
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
+ // Compare of cast, for example (zext X) != 0 -> X != 0
+ if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+ Instruction *LI = cast<CastInst>(LHS);
+ Value *SrcOp = LI->getOperand(0);
+ const Type *SrcTy = SrcOp->getType();
+ const Type *DstTy = LI->getType();
+
+ // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+ // if the integer type is the same size as the pointer type.
+ if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
+ TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // Transfer the cast to the constant.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+ ConstantExpr::getIntToPtr(RHSC, SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+ if (RI->getOperand(0)->getType() == SrcTy)
+ // Compare without the cast.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ TD, DT, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (isa<ZExtInst>(LHS)) {
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+ // same type.
+ if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that signed predicates become unsigned.
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, RI->getOperand(0), TD, DT,
+ MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two zero-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, Trunc, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+ // there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ // LHS <u RHS.
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
+ // is non-negative then LHS <s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+ }
+ }
+ }
+ }
+
+ if (isa<SExtInst>(LHS)) {
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that the predicate does not change.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ TD, DT, MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two sign-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+ MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are all equal, while RHS has varying
+ // bits there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ case ICmpInst::ICMP_EQ:
+ return ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_NE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // If RHS is non-negative then LHS <s RHS. If RHS is negative then
+ // LHS >s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+
+ // If LHS is non-negative then LHS <u RHS. If LHS is negative then
+ // LHS >u RHS.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // Comparison is true iff the LHS <s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+ Constant::getNullValue(SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // Comparison is true iff the LHS >=s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+ Constant::getNullValue(SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Special logic for binary operators.
+ BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+ if (MaxRecurse && (LBO || RBO)) {
+ // Analyze the case when either LHS or RHS is an add instruction.
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+ bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+ if (LBO && LBO->getOpcode() == Instruction::Add) {
+ A = LBO->getOperand(0); B = LBO->getOperand(1);
+ NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ }
+ if (RBO && RBO->getOpcode() == Instruction::Add) {
+ C = RBO->getOperand(0); D = RBO->getOperand(1);
+ NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ }
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+ Constant::getNullValue(RHS->getType()),
+ TD, DT, MaxRecurse-1))
+ return V;
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred,
+ Constant::getNullValue(LHS->getType()),
+ C == LHS ? D : C, TD, DT, MaxRecurse-1))
+ return V;
+
+ // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoLHSWrapProblem && NoRHSWrapProblem) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y = (A == C || A == D) ? B : A;
+ Value *Z = (C == A || C == B) ? D : C;
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+ bool KnownNonNegative, KnownNegative;
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // getNullValue also works for vectors, unlike getFalse.
+ return Constant::getNullValue(ITy);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // getAllOnesValue also works for vectors, unlike getTrue.
+ return Constant::getAllOnesValue(ITy);
+ }
+ }
+ if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
+ bool KnownNonNegative, KnownNegative;
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // getAllOnesValue also works for vectors, unlike getTrue.
+ return Constant::getAllOnesValue(ITy);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // getNullValue also works for vectors, unlike getFalse.
+ return Constant::getNullValue(ITy);
+ }
+ }
+
+ if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+ LBO->getOperand(1) == RBO->getOperand(1)) {
+ switch (LBO->getOpcode()) {
+ default: break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (ICmpInst::isSigned(Pred))
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!LBO->isExact() || !RBO->isExact())
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case Instruction::Shl: {
+ bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+ bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && ICmpInst::isSigned(Pred))
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+
+ // Simplify comparisons involving max/min.
+ Value *A, *B;
+ CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
+ CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
+
+ // Signed variants on "max(a,b)>=a -> true".
+ if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+ EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+ // We analyze this as smax(A, B) pred A.
+ P = Pred;
+ } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+ EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+ // We analyze this as smax(A, B) swapped-pred A.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+ (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+ EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+ // We analyze this as smax(-A, -B) swapped-pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+ EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+ // We analyze this as smax(-A, -B) pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = Pred;
+ }
+ if (P != CmpInst::BAD_ICMP_PREDICATE) {
+ // Cases correspond to "max(A, B) p A".
+ switch (P) {
+ default:
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_SLE:
+ // Equivalent to "A EqP B". This may be the same as the condition tested
+ // in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ return V;
+ // Otherwise, see if "A EqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_SGT: {
+ CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+ // Equivalent to "A InvEqP B". This may be the same as the condition
+ // tested in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ return V;
+ // Otherwise, see if "A InvEqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ case CmpInst::ICMP_SGE:
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ case CmpInst::ICMP_SLT:
+ // Always false.
+ return Constant::getNullValue(ITy);
+ }
+ }
+
+ // Unsigned variants on "max(a,b)>=a -> true".
+ P = CmpInst::BAD_ICMP_PREDICATE;
+ if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+ EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+ // We analyze this as umax(A, B) pred A.
+ P = Pred;
+ } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+ EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+ // We analyze this as umax(A, B) swapped-pred A.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+ (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+ EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+ // We analyze this as umax(-A, -B) swapped-pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+ EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+ // We analyze this as umax(-A, -B) pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = Pred;
+ }
+ if (P != CmpInst::BAD_ICMP_PREDICATE) {
+ // Cases correspond to "max(A, B) p A".
+ switch (P) {
+ default:
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_ULE:
+ // Equivalent to "A EqP B". This may be the same as the condition tested
+ // in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ return V;
+ // Otherwise, see if "A EqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_UGT: {
+ CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+ // Equivalent to "A InvEqP B". This may be the same as the condition
+ // tested in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ return V;
+ // Otherwise, see if "A InvEqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ case CmpInst::ICMP_UGE:
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ case CmpInst::ICMP_ULT:
+ // Always false.
+ return Constant::getNullValue(ITy);
+ }
+ }
+
+ // Variants on "max(x,y) >= min(x,z)".
+ Value *C, *D;
+ if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
+ match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // max(x, ?) pred min(x, ?).
+ if (Pred == CmpInst::ICMP_SGE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_SLT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+ match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // min(x, ?) pred max(x, ?).
+ if (Pred == CmpInst::ICMP_SLE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_SGT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
+ match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // max(x, ?) pred min(x, ?).
+ if (Pred == CmpInst::ICMP_UGE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_ULT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+ match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // min(x, ?) pred max(x, ?).
+ if (Pred == CmpInst::ICMP_ULE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_UGT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Fold trivial predicates.
+ if (Pred == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ if (Pred == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+
+ if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef
+ return UndefValue::get(GetCompareTy(LHS));
+
+ // fcmp x,x -> true/false. Not all compares are foldable.
+ if (LHS == RHS) {
+ if (CmpInst::isTrueWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+ if (CmpInst::isFalseWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // If the constant is a nan, see if we can fold the comparison based on it.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ return ConstantInt::getFalse(CFP->getContext());
+ assert(FCmpInst::isUnordered(Pred) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ConstantInt::getTrue(CFP->getContext());
+ }
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result. If not, this returns null.
+Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
+ const TargetData *TD, const DominatorTree *) {
+ // select true, X, Y -> X
+ // select false, X, Y -> Y
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+ return CB->getZExtValue() ? TrueVal : FalseVal;
+
+ // select C, X, X -> X
+ if (TrueVal == FalseVal)
+ return TrueVal;
+
+ if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y
+ if (isa<Constant>(TrueVal))
+ return TrueVal;
+ return FalseVal;
+ }
+ if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X
+ return FalseVal;
+ if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
+ return TrueVal;
+
+ return 0;
+}
+
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
+ const TargetData *TD, const DominatorTree *) {
+ // The type of the GEP pointer operand.
+ const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+
+ // getelementptr P -> P.
+ if (NumOps == 1)
+ return Ops[0];
+
+ if (isa<UndefValue>(Ops[0])) {
+ // Compute the (pointer) type returned by the GEP instruction.
+ const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
+ NumOps-1);
+ const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+ return UndefValue::get(GEPTy);
+ }
+
+ if (NumOps == 2) {
+ // getelementptr P, 0 -> P.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
+ if (C->isZero())
+ return Ops[0];
+ // getelementptr P, N -> P if P points to a type of zero size.
+ if (TD) {
+ const Type *Ty = PtrTy->getElementType();
+ if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+ return Ops[0];
+ }
+ }
+
+ // Check to see if this is constant foldable.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (!isa<Constant>(Ops[i]))
+ return 0;
+
+ return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
+ (Constant *const*)Ops+1, NumOps-1);
+}
+
+/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+ // If all of the PHI's incoming values are the same then replace the PHI node
+ // with the common value.
+ Value *CommonValue = 0;
+ bool HasUndefInput = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PN) continue;
+ if (isa<UndefValue>(Incoming)) {
+ // Remember that we saw an undef value, but otherwise ignore them.
+ HasUndefInput = true;
+ continue;
+ }
+ if (CommonValue && Incoming != CommonValue)
+ return 0; // Not the same, bail out.
+ CommonValue = Incoming;
+ }
+
+ // If CommonValue is null then all of the incoming values were either undef or
+ // equal to the phi node itself.
+ if (!CommonValue)
+ return UndefValue::get(PN->getType());
+
+ // If we have a PHI node like phi(X, undef, X), where X is defined by some
+ // instruction, we cannot return X as the result of the PHI node unless it
+ // dominates the PHI block.
+ if (HasUndefInput)
+ return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+
+ return CommonValue;
+}
+
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::Sub:
+ return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Shl:
+ return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::LShr:
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ case Instruction::AShr:
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+ default:
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ Constant *COps[] = {CLHS, CRHS};
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+ }
+
+ // If the operation is associative, try some generic simplifications.
+ if (Instruction::isAssociative(Opcode))
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+ }
+}
+
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+ return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+}
+
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction. If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+ const DominatorTree *DT) {
+ Value *Result;
+
+ switch (I->getOpcode()) {
+ default:
+ Result = ConstantFoldInstruction(I, TD);
+ break;
+ case Instruction::Add:
+ Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::Sub:
+ Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::Mul:
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::SDiv:
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::UDiv:
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FDiv:
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::SRem:
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::URem:
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FRem:
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Shl:
+ Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::LShr:
+ Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, DT);
+ break;
+ case Instruction::AShr:
+ Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, DT);
+ break;
+ case Instruction::And:
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Or:
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Xor:
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::ICmp:
+ Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FCmp:
+ Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Select:
+ Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+ I->getOperand(2), TD, DT);
+ break;
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+ break;
+ }
+ case Instruction::PHI:
+ Result = SimplifyPHINode(cast<PHINode>(I), DT);
+ break;
+ }
+
+ /// If called on unreachable code, the above logic may report that the
+ /// instruction simplified to itself. Make life easier for users by
+ /// detecting that case here, returning a safe value instead.
+ return Result == I ? UndefValue::get(I->getType()) : Result;
+}
+
+/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+/// delete the From instruction. In addition to a basic RAUW, this does a
+/// recursive simplification of the newly formed instructions. This catches
+/// things where one simplification exposes other opportunities. This only
+/// simplifies and deletes scalar operations, it does not change the CFG.
+///
+void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+ const TargetData *TD,
+ const DominatorTree *DT) {
+ assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
+
+ // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+ // we can know if it gets deleted out from under us or replaced in a
+ // recursive simplification.
+ WeakVH FromHandle(From);
+ WeakVH ToHandle(To);
+
+ while (!From->use_empty()) {
+ // Update the instruction to use the new value.
+ Use &TheUse = From->use_begin().getUse();
+ Instruction *User = cast<Instruction>(TheUse.getUser());
+ TheUse = To;
+
+ // Check to see if the instruction can be folded due to the operand
+ // replacement. For example changing (or X, Y) into (or X, -1) can replace
+ // the 'or' with -1.
+ Value *SimplifiedVal;
+ {
+ // Sanity check to make sure 'User' doesn't dangle across
+ // SimplifyInstruction.
+ AssertingVH<> UserHandle(User);
+
+ SimplifiedVal = SimplifyInstruction(User, TD, DT);
+ if (SimplifiedVal == 0) continue;
+ }
+
+ // Recursively simplify this user to the new value.
+ ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
+ From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+ To = ToHandle;
+
+ assert(ToHandle && "To value deleted by recursive simplification?");
+
+ // If the recursive simplification ended up revisiting and deleting
+ // 'From' then we're done.
+ if (From == 0)
+ return;
+ }
+
+ // If 'From' has value handles referring to it, do a real RAUW to update them.
+ From->replaceAllUsesWith(To);
+
+ From->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp
new file mode 100644
index 000000000000..ca9cdcaf2464
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Interval.cpp
@@ -0,0 +1,58 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+ // There is a loop in this interval iff one of the predecessors of the header
+ // node lives in the interval.
+ for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+ I != E; ++I)
+ if (contains(*I))
+ return true;
+ return false;
+}
+
+
+void Interval::print(raw_ostream &OS) const {
+ OS << "-------------------------------------------------------------\n"
+ << "Interval Contents:\n";
+
+ // Print out all of the basic blocks in the interval...
+ for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+ E = Nodes.end(); I != E; ++I)
+ OS << **I << "\n";
+
+ OS << "Interval Predecessors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+ E = Predecessors.end(); I != E; ++I)
+ OS << **I << "\n";
+
+ OS << "Interval Successors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+ E = Successors.end(); I != E; ++I)
+ OS << **I << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 000000000000..2e259b147b8b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+INITIALIZE_PASS(IntervalPartition, "intervals",
+ "Interval Partition Construction", true, true)
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ delete Intervals[i];
+ IntervalMap.clear();
+ Intervals.clear();
+ RootInterval = 0;
+}
+
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
+ for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+ Intervals.push_back(I);
+
+ // Add mappings for all of the basic blocks in I to the IntervalPartition
+ for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+ It != End; ++It)
+ IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures. After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+ BasicBlock *Header = Int->getHeaderNode();
+ for (Interval::succ_iterator I = Int->Successors.begin(),
+ E = Int->Successors.end(); I != E; ++I)
+ getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+ // Pass false to intervals_begin because we take ownership of it's memory
+ function_interval_iterator I = intervals_begin(&F, false);
+ assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+ return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph. This takes an additional boolean parameter to
+// distinguish it from a copy constructor. Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+ : FunctionPass(ID) {
+ assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+ // Pass false to intervals_begin because we take ownership of it's memory
+ interval_part_interval_iterator I = intervals_begin(IP, false);
+ assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 000000000000..6e275978276d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,1128 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+#include <stack>
+using namespace llvm;
+
+char LazyValueInfo::ID = 0;
+INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true)
+
+namespace llvm {
+ FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This Value has no known value yet.
+ undefined,
+
+ /// constant - This Value has a specific constant value.
+ constant,
+ /// notconstant - This Value is known to not have the specified value.
+ notconstant,
+
+ /// constantrange - The Value falls within this range.
+ constantrange,
+
+ /// overdefined - This value is not known to be constant, and we know that
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'notconstant' value.
+ LatticeValueTy Tag;
+ Constant *Val;
+ ConstantRange Range;
+
+public:
+ LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+
+ static LVILatticeVal get(Constant *C) {
+ LVILatticeVal Res;
+ if (!isa<UndefValue>(C))
+ Res.markConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getNot(Constant *C) {
+ LVILatticeVal Res;
+ if (!isa<UndefValue>(C))
+ Res.markNotConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getRange(ConstantRange CR) {
+ LVILatticeVal Res;
+ Res.markConstantRange(CR);
+ return Res;
+ }
+
+ bool isUndefined() const { return Tag == undefined; }
+ bool isConstant() const { return Tag == constant; }
+ bool isNotConstant() const { return Tag == notconstant; }
+ bool isConstantRange() const { return Tag == constantrange; }
+ bool isOverdefined() const { return Tag == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val;
+ }
+
+ Constant *getNotConstant() const {
+ assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+ return Val;
+ }
+
+ ConstantRange getConstantRange() const {
+ assert(isConstantRange() &&
+ "Cannot get the constant-range of a non-constant-range!");
+ return Range;
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+ Tag = overdefined;
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()));
+ if (isa<UndefValue>(V))
+ return false;
+
+ assert((!isConstant() || getConstant() == V) &&
+ "Marking constant with different value");
+ assert(isUndefined());
+ Tag = constant;
+ Val = V;
+ return true;
+ }
+
+ /// markNotConstant - Return true if this is a change in status.
+ bool markNotConstant(Constant *V) {
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ if (isa<UndefValue>(V))
+ return false;
+
+ assert((!isConstant() || getConstant() != V) &&
+ "Marking constant !constant with same value");
+ assert((!isNotConstant() || getNotConstant() == V) &&
+ "Marking !constant with different value");
+ assert(isUndefined() || isConstant());
+ Tag = notconstant;
+ Val = V;
+ return true;
+ }
+
+ /// markConstantRange - Return true if this is a change in status.
+ bool markConstantRange(const ConstantRange NewR) {
+ if (isConstantRange()) {
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ bool changed = Range == NewR;
+ Range = NewR;
+ return changed;
+ }
+
+ assert(isUndefined());
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ Tag = constantrange;
+ Range = NewR;
+ return true;
+ }
+
+ /// mergeIn - Merge the specified lattice value into this one, updating this
+ /// one and returning true if anything changed.
+ bool mergeIn(const LVILatticeVal &RHS) {
+ if (RHS.isUndefined() || isOverdefined()) return false;
+ if (RHS.isOverdefined()) return markOverdefined();
+
+ if (isUndefined()) {
+ Tag = RHS.Tag;
+ Val = RHS.Val;
+ Range = RHS.Range;
+ return true;
+ }
+
+ if (isConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return markOverdefined();
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use TargetData for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getConstant(),
+ RHS.getNotConstant())))
+ if (Res->isOne())
+ return markNotConstant(RHS.getNotConstant());
+
+ return markOverdefined();
+ }
+
+ // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+ // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+ // a function. The correct result is to pick up RHS.
+
+ return markOverdefined();
+ }
+
+ if (isNotConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return markOverdefined();
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use TargetData for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getNotConstant(),
+ RHS.getConstant())))
+ if (Res->isOne())
+ return false;
+
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ return markOverdefined();
+ }
+
+ assert(isConstantRange() && "New LVILattice type?");
+ if (!RHS.isConstantRange())
+ return markOverdefined();
+
+ ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+ if (NewR.isFullSet())
+ return markOverdefined();
+ return markConstantRange(NewR);
+ }
+};
+
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+ LLVM_ATTRIBUTE_USED;
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+ if (Val.isUndefined())
+ return OS << "undefined";
+ if (Val.isOverdefined())
+ return OS << "overdefined";
+
+ if (Val.isNotConstant())
+ return OS << "notconstant<" << *Val.getNotConstant() << '>';
+ else if (Val.isConstantRange())
+ return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+ << Val.getConstantRange().getUpper() << '>';
+ return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// LVIValueHandle - A callback value handle update the cache when
+ /// values are erased.
+ class LazyValueInfoCache;
+ struct LVIValueHandle : public CallbackVH {
+ LazyValueInfoCache *Parent;
+
+ LVIValueHandle(Value *V, LazyValueInfoCache *P)
+ : CallbackVH(V), Parent(P) { }
+
+ void deleted();
+ void allUsesReplacedWith(Value *V) {
+ deleted();
+ }
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LVIValueHandle> {
+ typedef DenseMapInfo<Value*> PointerInfo;
+ static inline LVIValueHandle getEmptyKey() {
+ return LVIValueHandle(PointerInfo::getEmptyKey(),
+ static_cast<LazyValueInfoCache*>(0));
+ }
+ static inline LVIValueHandle getTombstoneKey() {
+ return LVIValueHandle(PointerInfo::getTombstoneKey(),
+ static_cast<LazyValueInfoCache*>(0));
+ }
+ static unsigned getHashValue(const LVIValueHandle &Val) {
+ return PointerInfo::getHashValue(Val);
+ }
+ static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ template<>
+ struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
+ typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
+ typedef DenseMapInfo<Value*> BPointerInfo;
+ static inline PairTy getEmptyKey() {
+ return std::make_pair(APointerInfo::getEmptyKey(),
+ BPointerInfo::getEmptyKey());
+ }
+ static inline PairTy getTombstoneKey() {
+ return std::make_pair(APointerInfo::getTombstoneKey(),
+ BPointerInfo::getTombstoneKey());
+ }
+ static unsigned getHashValue( const PairTy &Val) {
+ return APointerInfo::getHashValue(Val.first) ^
+ BPointerInfo::getHashValue(Val.second);
+ }
+ static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
+ return APointerInfo::isEqual(LHS.first, RHS.first) &&
+ BPointerInfo::isEqual(LHS.second, RHS.second);
+ }
+ };
+}
+
+namespace {
+ /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+ /// maintains information about queries across the clients' queries.
+ class LazyValueInfoCache {
+ /// ValueCacheEntryTy - This is all of the cached block information for
+ /// exactly one Value*. The entries are sorted by the BasicBlock* of the
+ /// entries, allowing us to do a lookup with a binary search.
+ typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+
+ /// ValueCache - This is all of the cached information for all values,
+ /// mapped from Value* to key information.
+ DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ /// OverDefinedCache - This tracks, on a per-block basis, the set of
+ /// values that are over-defined at the end of that block. This is required
+ /// for cache updating.
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+ DenseSet<OverDefinedPairTy> OverDefinedCache;
+
+ /// BlockValueStack - This stack holds the state of the value solver
+ /// during a query. It basically emulates the callstack of the naive
+ /// recursive value lookup process.
+ std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+ friend struct LVIValueHandle;
+
+ /// OverDefinedCacheUpdater - A helper object that ensures that the
+ /// OverDefinedCache is updated whenever solveBlockValue returns.
+ struct OverDefinedCacheUpdater {
+ LazyValueInfoCache *Parent;
+ Value *Val;
+ BasicBlock *BB;
+ LVILatticeVal &BBLV;
+
+ OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+ LazyValueInfoCache *P)
+ : Parent(P), Val(V), BB(B), BBLV(LV) { }
+
+ bool markResult(bool changed) {
+ if (changed && BBLV.isOverdefined())
+ Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+ return changed;
+ }
+ };
+
+
+
+ LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+ bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+ LVILatticeVal &Result);
+ bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+ // These methods process one work item and may add more. A false value
+ // returned means that the work item was not completely processed and must
+ // be revisited after going through the new items.
+ bool solveBlockValue(Value *Val, BasicBlock *BB);
+ bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB);
+ bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB);
+ bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI, BasicBlock *BB);
+
+ void solve();
+
+ ValueCacheEntryTy &lookup(Value *V) {
+ return ValueCache[LVIValueHandle(V, this)];
+ }
+
+ public:
+ /// getValueInBlock - This is the query interface to determine the lattice
+ /// value for the specified Value* at the end of the specified block.
+ LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+ /// getValueOnEdge - This is the query interface to determine the lattice
+ /// value for the specified Value* that is true on the specified edge.
+ LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+
+ /// threadEdge - This is the update interface to inform the cache that an
+ /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+ /// NewSucc.
+ void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ /// eraseBlock - This is part of the update interface to inform the cache
+ /// that a block has been deleted.
+ void eraseBlock(BasicBlock *BB);
+
+ /// clear - Empty the cache.
+ void clear() {
+ ValueCache.clear();
+ OverDefinedCache.clear();
+ }
+ };
+} // end anonymous namespace
+
+void LVIValueHandle::deleted() {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator
+ I = Parent->OverDefinedCache.begin(),
+ E = Parent->OverDefinedCache.end();
+ I != E; ++I) {
+ if (I->second == getValPtr())
+ ToErase.push_back(*I);
+ }
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ Parent->OverDefinedCache.erase(*I);
+
+ // This erasure deallocates *this, so it MUST happen after we're done
+ // using any and all members of *this.
+ Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == BB)
+ ToErase.push_back(*I);
+ }
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ OverDefinedCache.erase(*I);
+
+ for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
+ I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ I->second.erase(BB);
+}
+
+void LazyValueInfoCache::solve() {
+ while (!BlockValueStack.empty()) {
+ std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ if (solveBlockValue(e.second, e.first))
+ BlockValueStack.pop();
+ }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (isa<Constant>(Val))
+ return true;
+
+ LVIValueHandle ValHandle(Val, this);
+ if (!ValueCache.count(ValHandle)) return false;
+ return ValueCache[ValHandle].count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val))
+ return LVILatticeVal::get(VC);
+
+ return lookup(Val)[BB];
+}
+
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+ if (isa<Constant>(Val))
+ return true;
+
+ ValueCacheEntryTy &Cache = lookup(Val);
+ LVILatticeVal &BBLV = Cache[BB];
+
+ // OverDefinedCacheUpdater is a helper object that will update
+ // the OverDefinedCache for us when this method exits. Make sure to
+ // call markResult on it as we exist, passing a bool to indicate if the
+ // cache needs updating, i.e. if we have solve a new value or not.
+ OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
+ // If we've already computed this block's value, return it.
+ if (!BBLV.isUndefined()) {
+ DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+
+ // Since we're reusing a cached value here, we don't need to update the
+ // OverDefinedCahce. The cache will have been properly updated
+ // whenever the cached value was inserted.
+ ODCacheUpdater.markResult(false);
+ return true;
+ }
+
+ // Otherwise, this is the first time we're seeing this block. Reset the
+ // lattice value to overdefined, so that cycles will terminate and be
+ // conservatively correct.
+ BBLV.markOverdefined();
+
+ Instruction *BBI = dyn_cast<Instruction>(Val);
+ if (BBI == 0 || BBI->getParent() != BB) {
+ return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+ }
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+ BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ return ODCacheUpdater.markResult(true);
+ }
+
+ // We can only analyze the definitions of certain classes of instructions
+ // (integral binops and casts at the moment), so bail if this isn't one.
+ LVILatticeVal Result;
+ if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+ !BBI->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ // FIXME: We're currently limited to binops with a constant RHS. This should
+ // be improved.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+ if (BO && !isa<ConstantInt>(BO->getOperand(1))) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ return L->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(L->getPointerOperand()) ==
+ GetUnderlyingObject(Ptr);
+ }
+ if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ return S->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(S->getPointerOperand()) ==
+ GetUnderlyingObject(Ptr);
+ }
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (MI->isVolatile()) return false;
+
+ // FIXME: check whether it has a valuerange that excludes zero?
+ ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+ if (!Len || Len->isZero()) return false;
+
+ if (MI->getDestAddressSpace() == 0)
+ if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+ return true;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ if (MTI->getSourceAddressSpace() == 0)
+ if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr)
+ return true;
+ }
+ return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // If this is a pointer, and there's a load from that pointer in this BB,
+ // then we know that the pointer can't be NULL.
+ bool NotNull = false;
+ if (Val->getType()->isPointerTy()) {
+ if (isa<AllocaInst>(Val)) {
+ NotNull = true;
+ } else {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+ if (InstructionDereferencesPointer(BI, Val)) {
+ NotNull = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // If this is the entry block, we must be asking about an argument. The
+ // value is overdefined.
+ if (BB == &BB->getParent()->getEntryBlock()) {
+ assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ } else {
+ Result.markOverdefined();
+ }
+ BBLV = Result;
+ return true;
+ }
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ // If we previously determined that this is a pointer that can't be null
+ // then return that rather than giving up entirely.
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ }
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PhiBB = PN->getIncomingBlock(i);
+ Value *PhiVal = PN->getIncomingValue(i);
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI,
+ BasicBlock *BB) {
+ // Figure out the range of the LHS. If that fails, bail.
+ if (!hasBlockValue(BBI->getOperand(0), BB)) {
+ BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+ return false;
+ }
+
+ LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+ if (!LHSVal.isConstantRange()) {
+ BBLV.markOverdefined();
+ return true;
+ }
+
+ ConstantRange LHSRange = LHSVal.getConstantRange();
+ ConstantRange RHSRange(1);
+ const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+ if (isa<BinaryOperator>(BBI)) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+ RHSRange = ConstantRange(RHS->getValue());
+ } else {
+ BBLV.markOverdefined();
+ return true;
+ }
+ }
+
+ // NOTE: We're currently limited by the set of operations that ConstantRange
+ // can evaluate symbolically. Enhancing that set will allows us to analyze
+ // more definitions.
+ LVILatticeVal Result;
+ switch (BBI->getOpcode()) {
+ case Instruction::Add:
+ Result.markConstantRange(LHSRange.add(RHSRange));
+ break;
+ case Instruction::Sub:
+ Result.markConstantRange(LHSRange.sub(RHSRange));
+ break;
+ case Instruction::Mul:
+ Result.markConstantRange(LHSRange.multiply(RHSRange));
+ break;
+ case Instruction::UDiv:
+ Result.markConstantRange(LHSRange.udiv(RHSRange));
+ break;
+ case Instruction::Shl:
+ Result.markConstantRange(LHSRange.shl(RHSRange));
+ break;
+ case Instruction::LShr:
+ Result.markConstantRange(LHSRange.lshr(RHSRange));
+ break;
+ case Instruction::Trunc:
+ Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+ break;
+ case Instruction::SExt:
+ Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::ZExt:
+ Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::BitCast:
+ Result.markConstantRange(LHSRange);
+ break;
+ case Instruction::And:
+ Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+ break;
+ case Instruction::Or:
+ Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+ break;
+
+ // Unhandled instructions are overdefined.
+ default:
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ break;
+ }
+
+ BBLV = Result;
+ return true;
+}
+
+/// getEdgeValue - This method attempts to infer more complex
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo, LVILatticeVal &Result) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val)) {
+ Result = LVILatticeVal::get(VC);
+ return true;
+ }
+
+ // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+ // know that v != 0.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+ // If this is a conditional branch and only one successor goes to BBTo, then
+ // we maybe able to infer something from the condition.
+ if (BI->isConditional() &&
+ BI->getSuccessor(0) != BI->getSuccessor(1)) {
+ bool isTrueDest = BI->getSuccessor(0) == BBTo;
+ assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+ "BBTo isn't a successor of BBFrom");
+
+ // If V is the condition of the branch itself, then we know exactly what
+ // it is.
+ if (BI->getCondition() == Val) {
+ Result = LVILatticeVal::get(ConstantInt::get(
+ Type::getInt1Ty(Val->getContext()), isTrueDest));
+ return true;
+ }
+
+ // If the condition of the branch is an equality comparison, we may be
+ // able to infer the value.
+ ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (ICI && ICI->getOperand(0) == Val &&
+ isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality()) {
+ // We know that V has the RHS constant if this is a true SETEQ or
+ // false SETNE.
+ if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+ Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ else
+ Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ return true;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ // Figure out the possible values of the query BEFORE this branch.
+ if (!hasBlockValue(Val, BBFrom)) {
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+ }
+
+ LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ if (!InBlock.isConstantRange()) {
+ Result = LVILatticeVal::getRange(TrueValues);
+ return true;
+ }
+
+ // Find all potential values that satisfy both the input and output
+ // conditions.
+ ConstantRange PossibleValues =
+ TrueValues.intersectWith(InBlock.getConstantRange());
+
+ Result = LVILatticeVal::getRange(PossibleValues);
+ return true;
+ }
+ }
+ }
+ }
+
+ // If the edge was formed by a switch on the value, then we may know exactly
+ // what it is.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+ if (SI->getCondition() == Val) {
+ // We don't know anything in the default case.
+ if (SI->getDefaultDest() == BBTo) {
+ Result.markOverdefined();
+ return true;
+ }
+
+ // We only know something if there is exactly one value that goes from
+ // BBFrom to BBTo.
+ unsigned NumEdges = 0;
+ ConstantInt *EdgeVal = 0;
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ if (SI->getSuccessor(i) != BBTo) continue;
+ if (NumEdges++) break;
+ EdgeVal = SI->getCaseValue(i);
+ }
+ assert(EdgeVal && "Missing successor?");
+ if (NumEdges == 1) {
+ Result = LVILatticeVal::get(EdgeVal);
+ return true;
+ }
+ }
+ }
+
+ // Otherwise see if the value is known in the block.
+ if (hasBlockValue(Val, BBFrom)) {
+ Result = getBlockValue(Val, BBFrom);
+ return true;
+ }
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+ DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+ << BB->getName() << "'\n");
+
+ BlockValueStack.push(std::make_pair(BB, V));
+ solve();
+ LVILatticeVal Result = getBlockValue(V, BB);
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+ DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+ << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+
+ LVILatticeVal Result;
+ if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+ solve();
+ bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+ (void)WasFastQuery;
+ assert(WasFastQuery && "More work to do after problem solved?");
+ }
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be possible
+ // to solve now. We do NOT try to proactively update these values. Instead,
+ // we clear their entries from the cache, and allow lazy updating to recompute
+ // them when needed.
+
+ // The updating process is fairly simple: we need to dropped cached info
+ // for all values that were marked overdefined in OldSucc, and for those same
+ // values in any successor of OldSucc (except NewSucc) in which they were
+ // also marked overdefined.
+ std::vector<BasicBlock*> worklist;
+ worklist.push_back(OldSucc);
+
+ DenseSet<Value*> ClearSet;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == OldSucc)
+ ClearSet.insert(I->second);
+ }
+
+ // Use a worklist to perform a depth-first search of OldSucc's successors.
+ // NOTE: We do not need a visited list since any blocks we have already
+ // visited will have had their overdefined markers cleared already, and we
+ // thus won't loop to their successors.
+ while (!worklist.empty()) {
+ BasicBlock *ToUpdate = worklist.back();
+ worklist.pop_back();
+
+ // Skip blocks only accessible through NewSucc.
+ if (ToUpdate == NewSucc) continue;
+
+ bool changed = false;
+ for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
+ I != E; ++I) {
+ // If a value was marked overdefined in OldSucc, and is here too...
+ DenseSet<OverDefinedPairTy>::iterator OI =
+ OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ if (OI == OverDefinedCache.end()) continue;
+
+ // Remove it from the caches.
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+ assert(CI != Entry.end() && "Couldn't find entry to update?");
+ Entry.erase(CI);
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
+ // blocks successors too.
+ changed = true;
+ }
+
+ if (!changed) continue;
+
+ worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+ if (!PImpl)
+ PImpl = new LazyValueInfoCache();
+ return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+ if (PImpl)
+ getCache(PImpl).clear();
+
+ TD = getAnalysisIfAvailable<TargetData>();
+ // Fully lazy.
+ return false;
+}
+
+void LazyValueInfo::releaseMemory() {
+ // If the cache was allocated, free it.
+ if (PImpl) {
+ delete &getCache(PImpl);
+ PImpl = 0;
+ }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+ LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
+ return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge. Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+ BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
+ return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+ BasicBlock *FromBB, BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ // If we know the value is a constant, evaluate the conditional.
+ Constant *Res = 0;
+ if (Result.isConstant()) {
+ Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+ if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+ return ResCI->isZero() ? False : True;
+ return Unknown;
+ }
+
+ if (Result.isConstantRange()) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return Unknown;
+
+ ConstantRange CR = Result.getConstantRange();
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (!CR.contains(CI->getValue()))
+ return False;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return True;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (!CR.contains(CI->getValue()))
+ return True;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return False;
+ }
+
+ // Handle more complex predicates.
+ ConstantRange TrueValues =
+ ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+ if (TrueValues.contains(CR))
+ return True;
+ if (TrueValues.inverse().contains(CR))
+ return False;
+ return Unknown;
+ }
+
+ if (Result.isNotConstant()) {
+ // If this is an equality comparison, we can try to fold it knowing that
+ // "V != C1".
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // !C1 == C -> false iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD);
+ if (Res->isNullValue())
+ return False;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ // !C1 != C -> true iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD);
+ if (Res->isNullValue())
+ return True;
+ }
+ return Unknown;
+ }
+
+ return Unknown;
+}
+
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+ if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
new file mode 100644
index 000000000000..efb722bb97c4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -0,0 +1,137 @@
+//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallAliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+// Register this pass...
+char LibCallAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+ "LibCall Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
+ return new LibCallAliasAnalysis(LCI);
+}
+
+LibCallAliasAnalysis::~LibCallAliasAnalysis() {
+ delete LCI;
+}
+
+void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+}
+
+
+
+/// AnalyzeLibCallDetails - Given a call to a function with the specified
+/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
+/// vs the specified pointer/size.
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+ ImmutableCallSite CS,
+ const Location &Loc) {
+ // If we have a function, check to see what kind of mod/ref effects it
+ // has. Start by including any info globally known about the function.
+ AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
+ if (MRInfo == NoModRef) return MRInfo;
+
+ // If that didn't tell us that the function is 'readnone', check to see
+ // if we have detailed info and if 'P' is any of the locations we know
+ // about.
+ const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
+ if (Details == 0)
+ return MRInfo;
+
+ // If the details array is of the 'DoesNot' kind, we only know something if
+ // the pointer is a match for one of the locations in 'Details'. If we find a
+ // match, we can prove some interactions cannot happen.
+ //
+ if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
+ // Find out if the pointer refers to a known location.
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &LocInfo =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+ if (Res != LibCallLocationInfo::Yes) continue;
+
+ // If we find a match against a location that we 'do not' interact with,
+ // learn this info into MRInfo.
+ return ModRefResult(MRInfo & ~Details[i].MRInfo);
+ }
+ return MRInfo;
+ }
+
+ // If the details are of the 'DoesOnly' sort, we know something if the pointer
+ // is a match for one of the locations in 'Details'. Also, if we can prove
+ // that the pointers is *not* one of the locations in 'Details', we know that
+ // the call is NoModRef.
+ assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
+
+ // Find out if the pointer refers to a known location.
+ bool NoneMatch = true;
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &LocInfo =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+ if (Res == LibCallLocationInfo::No) continue;
+
+ // If we don't know if this pointer points to the location, then we have to
+ // assume it might alias in some case.
+ if (Res == LibCallLocationInfo::Unknown) {
+ NoneMatch = false;
+ continue;
+ }
+
+ // If we know that this pointer definitely is pointing into the location,
+ // merge in this information.
+ return ModRefResult(MRInfo & Details[i].MRInfo);
+ }
+
+ // If we found that the pointer is guaranteed to not match any of the
+ // locations in our 'DoesOnly' rule, then we know that the pointer must point
+ // to some other location. Since the libcall doesn't mod/ref any other
+ // locations, return NoModRef.
+ if (NoneMatch)
+ return NoModRef;
+
+ // Otherwise, return any other info gained so far.
+ return MRInfo;
+}
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object.
+//
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ ModRefResult MRInfo = ModRef;
+
+ // If this is a direct call to a function that LCI knows about, get the
+ // information about the runtime function.
+ if (LCI) {
+ if (const Function *F = CS.getCalledFunction()) {
+ if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
+ MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
+ if (MRInfo == NoModRef) return NoModRef;
+ }
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
new file mode 100644
index 000000000000..81b0f46f3740
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -0,0 +1,63 @@
+//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements interfaces that can be used to describe language
+// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
+// optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This
+/// helper does the cast.
+static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
+ return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
+}
+
+LibCallInfo::~LibCallInfo() {
+ delete getMap(Impl);
+}
+
+const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
+ // Get location info on the first call.
+ if (NumLocations == 0)
+ NumLocations = getLocationInfo(Locations);
+
+ assert(LocID < NumLocations && "Invalid location ID!");
+ return Locations[LocID];
+}
+
+
+/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// the specified function if we have it. If not, return null.
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
+ StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
+
+ /// If this is the first time we are querying for this info, lazily construct
+ /// the StringMap to index it.
+ if (Map == 0) {
+ Impl = Map = new StringMap<const LibCallFunctionInfo*>();
+
+ const LibCallFunctionInfo *Array = getFunctionInfoArray();
+ if (Array == 0) return 0;
+
+ // We now have the array of entries. Populate the StringMap.
+ for (unsigned i = 0; Array[i].Name; ++i)
+ (*Map)[Array[i].Name] = Array+i;
+ }
+
+ // Look up this function in the string map.
+ return Map->lookup(F->getName());
+}
+
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
new file mode 100644
index 000000000000..89755da85097
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -0,0 +1,655 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+//
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+//
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+ namespace MemRef {
+ static unsigned Read = 1;
+ static unsigned Write = 2;
+ static unsigned Callee = 4;
+ static unsigned Branchee = 8;
+ }
+
+ class Lint : public FunctionPass, public InstVisitor<Lint> {
+ friend class InstVisitor<Lint>;
+
+ void visitFunction(Function &F);
+
+ void visitCallSite(CallSite CS);
+ void visitMemoryReference(Instruction &I, Value *Ptr,
+ uint64_t Size, unsigned Align,
+ const Type *Ty, unsigned Flags);
+
+ void visitCallInst(CallInst &I);
+ void visitInvokeInst(InvokeInst &I);
+ void visitReturnInst(ReturnInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitXor(BinaryOperator &I);
+ void visitSub(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+ void visitShl(BinaryOperator &I);
+ void visitSDiv(BinaryOperator &I);
+ void visitUDiv(BinaryOperator &I);
+ void visitSRem(BinaryOperator &I);
+ void visitURem(BinaryOperator &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitVAArgInst(VAArgInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitUnreachableInst(UnreachableInst &I);
+
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const;
+
+ public:
+ Module *Mod;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ TargetData *TD;
+
+ std::string Messages;
+ raw_string_ostream MessagesStr;
+
+ static char ID; // Pass identification, replacement for typeid
+ Lint() : FunctionPass(ID), MessagesStr(Messages) {
+ initializeLintPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const {}
+
+ void WriteValue(const Value *V) {
+ if (!V) return;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ WriteAsOperand(MessagesStr, V, true, Mod);
+ MessagesStr << '\n';
+ }
+ }
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const Twine &Message,
+ const Value *V1 = 0, const Value *V2 = 0,
+ const Value *V3 = 0, const Value *V4 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ }
+ };
+}
+
+char Lint::ID = 0;
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+ do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+ do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+ do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+ Mod = F.getParent();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ visit(F);
+ dbgs() << MessagesStr.str();
+ Messages.clear();
+ return false;
+}
+
+void Lint::visitFunction(Function &F) {
+ // This isn't undefined behavior, it's just a little unusual, and it's a
+ // fairly common mistake to neglect to name a function.
+ Assert1(F.hasName() || F.hasLocalLinkage(),
+ "Unusual: Unnamed function with non-local linkage", &F);
+
+ // TODO: Check for irreducible control flow.
+}
+
+void Lint::visitCallSite(CallSite CS) {
+ Instruction &I = *CS.getInstruction();
+ Value *Callee = CS.getCalledValue();
+
+ visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Callee);
+
+ if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
+ Assert1(CS.getCallingConv() == F->getCallingConv(),
+ "Undefined behavior: Caller and callee calling convention differ",
+ &I);
+
+ const FunctionType *FT = F->getFunctionType();
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+ Assert1(FT->isVarArg() ?
+ FT->getNumParams() <= NumActualArgs :
+ FT->getNumParams() == NumActualArgs,
+ "Undefined behavior: Call argument count mismatches callee "
+ "argument count", &I);
+
+ Assert1(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type", &I);
+
+ // Check argument types (in case the callee was casted) and attributes.
+ // TODO: Verify that caller and callee attributes are compatible.
+ Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ for (; AI != AE; ++AI) {
+ Value *Actual = *AI;
+ if (PI != PE) {
+ Argument *Formal = PI++;
+ Assert1(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type", &I);
+
+ // Check that noalias arguments don't alias other arguments. This is
+ // not fully precise because we don't know the sizes of the dereferenced
+ // memory regions.
+ if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+ if (AI != BI && (*BI)->getType()->isPointerTy()) {
+ AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+ Assert1(Result != AliasAnalysis::MustAlias &&
+ Result != AliasAnalysis::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
+ }
+
+ // Check that an sret argument points to valid memory.
+ if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+ const Type *Ty =
+ cast<PointerType>(Formal->getType())->getElementType();
+ visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
+ TD ? TD->getABITypeAlignment(Ty) : 0,
+ Ty, MemRef::Read | MemRef::Write);
+ }
+ }
+ }
+ }
+
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca", &I);
+ }
+
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // TODO: Check more intrinsics
+
+ case Intrinsic::memcpy: {
+ MemCpyInst *MCI = cast<MemCpyInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
+ MemRef::Write);
+ visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
+ MemRef::Read);
+
+ // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+ // isn't expressive enough for what we really want to do. Known partial
+ // overlap is not distinguished from the case where nothing is known.
+ uint64_t Size = 0;
+ if (const ConstantInt *Len =
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+ /*OffsetOk=*/false)))
+ if (Len->getValue().isIntN(32))
+ Size = Len->getValue().getZExtValue();
+ Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+ AliasAnalysis::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
+ break;
+ }
+ case Intrinsic::memmove: {
+ MemMoveInst *MMI = cast<MemMoveInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
+ MemRef::Write);
+ visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
+ MemRef::Read);
+ break;
+ }
+ case Intrinsic::memset: {
+ MemSetInst *MSI = cast<MemSetInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+ MSI->getAlignment(), 0,
+ MemRef::Write);
+ break;
+ }
+
+ case Intrinsic::vastart:
+ Assert1(I.getParent()->getParent()->isVarArg(),
+ "Undefined behavior: va_start called in a non-varargs function",
+ &I);
+
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+ case Intrinsic::vacopy:
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read);
+ break;
+ case Intrinsic::vaend:
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+
+ case Intrinsic::stackrestore:
+ // Stackrestore doesn't read or write memory, but it sets the
+ // stack pointer, which the compiler may read from or write to
+ // at any time, so check it for both readability and writeability.
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+ }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+ return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+ return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+ Function *F = I.getParent()->getParent();
+ Assert1(!F->doesNotReturn(),
+ "Unusual: Return statement in function with noreturn attribute",
+ &I);
+
+ if (Value *V = I.getReturnValue()) {
+ Value *Obj = findValue(V, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Unusual: Returning alloca value", &I);
+ }
+}
+
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
+void Lint::visitMemoryReference(Instruction &I,
+ Value *Ptr, uint64_t Size, unsigned Align,
+ const Type *Ty, unsigned Flags) {
+ // If no memory is being referenced, it doesn't matter if the pointer
+ // is valid.
+ if (Size == 0)
+ return;
+
+ Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
+ Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+ "Undefined behavior: Null pointer dereference", &I);
+ Assert1(!isa<UndefValue>(UnderlyingObject),
+ "Undefined behavior: Undef pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ "Unusual: All-ones pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
+
+ if (Flags & MemRef::Write) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+ Assert1(!GV->isConstant(),
+ "Undefined behavior: Write to read-only memory", &I);
+ Assert1(!isa<Function>(UnderlyingObject) &&
+ !isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Write to text section", &I);
+ }
+ if (Flags & MemRef::Read) {
+ Assert1(!isa<Function>(UnderlyingObject),
+ "Unusual: Load from function body", &I);
+ Assert1(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Load from block address", &I);
+ }
+ if (Flags & MemRef::Callee) {
+ Assert1(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Call to block address", &I);
+ }
+ if (Flags & MemRef::Branchee) {
+ Assert1(!isa<Constant>(UnderlyingObject) ||
+ isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Branch to non-blockaddress", &I);
+ }
+
+ if (TD) {
+ if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+
+ if (Align != 0) {
+ unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
+ APInt Mask = APInt::getAllOnesValue(BitWidth),
+ KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+ Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+ "Undefined behavior: Memory reference address is misaligned", &I);
+ }
+ }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+ I.getType(), MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getOperand(0)->getType()),
+ I.getAlignment(),
+ I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+ Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+ !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+ Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+ !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, TargetData *TD) {
+ // Assume undef could be zero.
+ if (isa<UndefValue>(V)) return true;
+
+ unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ APInt Mask = APInt::getAllOnesValue(BitWidth),
+ KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+ if (isa<ConstantInt>(I.getArraySize()))
+ // This isn't undefined behavior, it's just an obvious pessimization.
+ Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+ "Pessimization: Static alloca outside of entry block", &I);
+
+ // TODO: Check for an unusual size (MSB set?)
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+ visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+ visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Branchee);
+
+ Assert1(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+ /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+ "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+ /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+ "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+ // This isn't undefined behavior, it's merely suspicious.
+ Assert1(&I == I.getParent()->begin() ||
+ prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ "Unusual: unreachable immediately preceded by instruction without "
+ "side effects", &I);
+}
+
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value. If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+ SmallPtrSet<Value *, 4> Visited;
+ return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const {
+ // Detect self-referential values.
+ if (!Visited.insert(V))
+ return UndefValue::get(V->getType());
+
+ // TODO: Look through sext or zext cast, when the result is known to
+ // be interpreted as signed or unsigned, respectively.
+ // TODO: Look through eliminable cast pairs.
+ // TODO: Look through calls with unique return values.
+ // TODO: Look through vector insert/extract/shuffle.
+ V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
+ if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+ BasicBlock::iterator BBI = L;
+ BasicBlock *BB = L->getParent();
+ SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+ for (;;) {
+ if (!VisitedBlocks.insert(BB)) break;
+ if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
+ BB, BBI, 6, AA))
+ return findValueImpl(U, OffsetOk, Visited);
+ if (BBI != BB->begin()) break;
+ BB = BB->getUniquePredecessor();
+ if (!BB) break;
+ BBI = BB->end();
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (Value *W = PN->hasConstantValue())
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+ } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+ if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+ Ex->getIndices()))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ // Same as above, but for ConstantExpr instead of Instruction.
+ if (Instruction::isCast(CE->getOpcode())) {
+ if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+ CE->getOperand(0)->getType(),
+ CE->getType(),
+ TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+ } else if (CE->getOpcode() == Instruction::ExtractValue) {
+ ArrayRef<unsigned> Indices = CE->getIndices();
+ if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+ }
+
+ // As a last resort, try SimplifyInstruction or constant folding.
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Value *W = SimplifyInstruction(Inst, TD, DT))
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (Value *W = ConstantFoldConstantExpression(CE, TD))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+
+ return V;
+}
+
+//===----------------------------------------------------------------------===//
+// Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+ return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+ Function &F = const_cast<Function&>(f);
+ assert(!F.isDeclaration() && "Cannot lint external functions");
+
+ FunctionPassManager FPM(F.getParent());
+ Lint *V = new Lint();
+ FPM.add(V);
+ FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+///
+void llvm::lintModule(const Module &M) {
+ PassManager PM;
+ Lint *V = new Lint();
+ PM.add(V);
+ PM.run(const_cast<Module&>(M));
+}
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
new file mode 100644
index 000000000000..c5c676b5265f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -0,0 +1,236 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+using namespace llvm;
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come from identical arithmetic instructions.
+ // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+ // this function is only used when one address use dominates the
+ // other, which means that they'll always either have the same
+ // value or one of them will have an undefined value.
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+ isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (const Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
+/// bitcasts to get back to the underlying object being addressed, keeping
+/// track of the offset in bytes from the GEPs relative to the result.
+/// This is closely related to GetUnderlyingObject but is located
+/// here to avoid making VMCore depend on TargetData.
+static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
+ uint64_t &ByteOffset,
+ unsigned MaxLookup = 6) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->hasAllConstantIndices())
+ return V;
+ SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
+ ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+ unsigned Align, const TargetData *TD) {
+ uint64_t ByteOffset = 0;
+ Value *Base = V;
+ if (TD)
+ Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+
+ const Type *BaseType = 0;
+ unsigned BaseAlign = 0;
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ // An alloca is safe to load from as load as it is suitably aligned.
+ BaseType = AI->getAllocatedType();
+ BaseAlign = AI->getAlignment();
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+ // Global variables are safe to load from but their size cannot be
+ // guaranteed if they are overridden.
+ if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+ BaseType = GV->getType()->getElementType();
+ BaseAlign = GV->getAlignment();
+ }
+ }
+
+ if (BaseType && BaseType->isSized()) {
+ if (TD && BaseAlign == 0)
+ BaseAlign = TD->getPrefTypeAlignment(BaseType);
+
+ if (Align <= BaseAlign) {
+ if (!TD)
+ return true; // Loading directly from an alloca or global is OK.
+
+ // Check if the load is within the bounds of the underlying object.
+ const PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+ if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+ (Align == 0 || (ByteOffset % Align) == 0))
+ return true;
+ }
+ }
+
+ // Otherwise, be a little bit aggressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call which may write to memory (i.e. which might do
+ // a free) the pointer could be marked invalid.
+ if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+ !isa<DbgInfoIntrinsic>(BBI))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
+ }
+ }
+ return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the
+/// value would be live through. If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null. ScanFrom could also be left
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA) {
+ if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+ // If we're using alias analysis to disambiguate get the size of *Ptr.
+ uint64_t AccessSize = 0;
+ if (AA) {
+ const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+ AccessSize = AA->getTypeStoreSize(AccessTy);
+ }
+
+ while (ScanFrom != ScanBB->begin()) {
+ // We must ignore debug info directives when counting (otherwise they
+ // would affect codegen).
+ Instruction *Inst = --ScanFrom;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ // Restore ScanFrom to expected value in case next test succeeds
+ ScanFrom++;
+
+ // Don't scan huge blocks.
+ if (MaxInstsToScan-- == 0) return 0;
+
+ --ScanFrom;
+ // If this is a load of Ptr, the loaded value is available.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+ return LI;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If this is a store through Ptr, the value is available!
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+ return SI->getOperand(0);
+
+ // If Ptr is an alloca and this is a store to a different alloca, ignore
+ // the store. This is a trivial form of alias analysis that is important
+ // for reg2mem'd code.
+ if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+ (isa<AllocaInst>(SI->getOperand(1)) ||
+ isa<GlobalVariable>(SI->getOperand(1))))
+ continue;
+
+ // If we have alias analysis and it says the store won't modify the loaded
+ // value, ignore the store.
+ if (AA &&
+ (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // Otherwise the store that may or may not alias the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+
+ // If this is some other instruction that may clobber Ptr, bail out.
+ if (Inst->mayWriteToMemory()) {
+ // If alias analysis claims that it really won't modify the load,
+ // ignore it.
+ if (AA &&
+ (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // May modify the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+ }
+
+ // Got to the start of the block, we didn't find it, but are done for this
+ // block.
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
new file mode 100644
index 000000000000..c1afe8fbd618
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -0,0 +1,358 @@
+//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the (beginning) of an implementation of a loop dependence analysis
+// framework, which is used to detect dependences in memory accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as implementation progresses.
+//
+// TODO: document lingo (pair, subscript, index)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lda"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopDependenceAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumAnswered, "Number of dependence queries answered");
+STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed");
+STATISTIC(NumDependent, "Number of pairs with dependent accesses");
+STATISTIC(NumIndependent, "Number of pairs with independent accesses");
+STATISTIC(NumUnknown, "Number of pairs with unknown accesses");
+
+LoopPass *llvm::createLoopDependenceAnalysisPass() {
+ return new LoopDependenceAnalysis();
+}
+
+INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true)
+char LoopDependenceAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Utility Functions
+//===----------------------------------------------------------------------===//
+
+static inline bool IsMemRefInstr(const Value *V) {
+ const Instruction *I = dyn_cast<const Instruction>(V);
+ return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
+}
+
+static void GetMemRefInstrs(const Loop *L,
+ SmallVectorImpl<Instruction*> &Memrefs) {
+ for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+ b != be; ++b)
+ for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
+ i != ie; ++i)
+ if (IsMemRefInstr(i))
+ Memrefs.push_back(i);
+}
+
+static bool IsLoadOrStoreInst(Value *I) {
+ return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+static Value *GetPointerOperand(Value *I) {
+ if (LoadInst *i = dyn_cast<LoadInst>(I))
+ return i->getPointerOperand();
+ if (StoreInst *i = dyn_cast<StoreInst>(I))
+ return i->getPointerOperand();
+ llvm_unreachable("Value is no load or store instruction!");
+ // Never reached.
+ return 0;
+}
+
+static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
+ const Value *A,
+ const Value *B) {
+ const Value *aObj = GetUnderlyingObject(A);
+ const Value *bObj = GetUnderlyingObject(B);
+ return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
+ bObj, AA->getTypeStoreSize(bObj->getType()));
+}
+
+static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) {
+ return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L);
+}
+
+//===----------------------------------------------------------------------===//
+// Dependence Testing
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::isDependencePair(const Value *A,
+ const Value *B) const {
+ return IsMemRefInstr(A) &&
+ IsMemRefInstr(B) &&
+ (cast<const Instruction>(A)->mayWriteToMemory() ||
+ cast<const Instruction>(B)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
+ Value *B,
+ DependencePair *&P) {
+ void *insertPos = 0;
+ FoldingSetNodeID id;
+ id.AddPointer(A);
+ id.AddPointer(B);
+
+ P = Pairs.FindNodeOrInsertPos(id, insertPos);
+ if (P) return true;
+
+ P = new (PairAllocator) DependencePair(id, A, B);
+ Pairs.InsertNode(P, insertPos);
+ return false;
+}
+
+void LoopDependenceAnalysis::getLoops(const SCEV *S,
+ DenseSet<const Loop*>* Loops) const {
+ // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
+ for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
+ if (!SE->isLoopInvariant(S, L))
+ Loops->insert(L);
+}
+
+bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const {
+ DenseSet<const Loop*> loops;
+ getLoops(S, &loops);
+ return loops.empty();
+}
+
+bool LoopDependenceAnalysis::isAffine(const SCEV *S) const {
+ const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S);
+ return isLoopInvariant(S) || (rec && rec->isAffine());
+}
+
+bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const {
+ return isLoopInvariant(A) && isLoopInvariant(B);
+}
+
+bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const {
+ DenseSet<const Loop*> loops;
+ getLoops(A, &loops);
+ getLoops(B, &loops);
+ return loops.size() == 1;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseZIV(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!");
+ return A == B ? Dependent : Independent;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSIV(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseMIV(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSubscript(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ DEBUG(dbgs() << " Testing subscript: " << *A << ", " << *B << "\n");
+
+ if (A == B) {
+ DEBUG(dbgs() << " -> [D] same SCEV\n");
+ return Dependent;
+ }
+
+ if (!isAffine(A) || !isAffine(B)) {
+ DEBUG(dbgs() << " -> [?] not affine\n");
+ return Unknown;
+ }
+
+ if (isZIVPair(A, B))
+ return analyseZIV(A, B, S);
+
+ if (isSIVPair(A, B))
+ return analyseSIV(A, B, S);
+
+ return analyseMIV(A, B, S);
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analysePair(DependencePair *P) const {
+ DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n");
+
+ // We only analyse loads and stores but no possible memory accesses by e.g.
+ // free, call, or invoke instructions.
+ if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) {
+ DEBUG(dbgs() << "--> [?] no load/store\n");
+ return Unknown;
+ }
+
+ Value *aPtr = GetPointerOperand(P->A);
+ Value *bPtr = GetPointerOperand(P->B);
+
+ switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ // We can not analyse objects if we do not know about their aliasing.
+ DEBUG(dbgs() << "---> [?] may alias\n");
+ return Unknown;
+
+ case AliasAnalysis::NoAlias:
+ // If the objects noalias, they are distinct, accesses are independent.
+ DEBUG(dbgs() << "---> [I] no alias\n");
+ return Independent;
+
+ case AliasAnalysis::MustAlias:
+ break; // The underlying objects alias, test accesses for dependence.
+ }
+
+ const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr);
+ const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr);
+
+ if (!aGEP || !bGEP)
+ return Unknown;
+
+ // FIXME: Is filtering coupled subscripts necessary?
+
+ // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding
+ // trailing zeroes to the smaller GEP, if needed.
+ typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy;
+ GEPOpdPairsTy opds;
+ for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(),
+ aEnd = aGEP->idx_end(),
+ bIdx = bGEP->idx_begin(),
+ bEnd = bGEP->idx_end();
+ aIdx != aEnd && bIdx != bEnd;
+ aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) {
+ const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE);
+ const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE);
+ opds.push_back(std::make_pair(aSCEV, bSCEV));
+ }
+
+ if (!opds.empty() && opds[0].first != opds[0].second) {
+ // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting
+ //
+ // TODO: this could be relaxed by adding the size of the underlying object
+ // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we
+ // know that x is a [100 x i8]*, we could modify the first subscript to be
+ // (i, 200-i) instead of (i, -i).
+ return Unknown;
+ }
+
+ // Now analyse the collected operand pairs (skipping the GEP ptr offsets).
+ for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end();
+ i != end; ++i) {
+ Subscript subscript;
+ DependenceResult result = analyseSubscript(i->first, i->second, &subscript);
+ if (result != Dependent) {
+ // We either proved independence or failed to analyse this subscript.
+ // Further subscripts will not improve the situation, so abort early.
+ return result;
+ }
+ P->Subscripts.push_back(subscript);
+ }
+ // We successfully analysed all subscripts but failed to prove independence.
+ return Dependent;
+}
+
+bool LoopDependenceAnalysis::depends(Value *A, Value *B) {
+ assert(isDependencePair(A, B) && "Values form no dependence pair!");
+ ++NumAnswered;
+
+ DependencePair *p;
+ if (!findOrInsertDependencePair(A, B, p)) {
+ // The pair is not cached, so analyse it.
+ ++NumAnalysed;
+ switch (p->Result = analysePair(p)) {
+ case Dependent: ++NumDependent; break;
+ case Independent: ++NumIndependent; break;
+ case Unknown: ++NumUnknown; break;
+ }
+ }
+ return p->Result != Independent;
+}
+
+//===----------------------------------------------------------------------===//
+// LoopDependenceAnalysis Implementation
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
+ this->L = L;
+ AA = &getAnalysis<AliasAnalysis>();
+ SE = &getAnalysis<ScalarEvolution>();
+ return false;
+}
+
+void LoopDependenceAnalysis::releaseMemory() {
+ Pairs.clear();
+ PairAllocator.Reset();
+}
+
+void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<ScalarEvolution>();
+}
+
+static void PrintLoopInfo(raw_ostream &OS,
+ LoopDependenceAnalysis *LDA, const Loop *L) {
+ if (!L->empty()) return; // ignore non-innermost loops
+
+ SmallVector<Instruction*, 8> memrefs;
+ GetMemRefInstrs(L, memrefs);
+
+ OS << "Loop at depth " << L->getLoopDepth() << ", header block: ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ OS << "\n";
+
+ OS << " Load/store instructions: " << memrefs.size() << "\n";
+ for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+ end = memrefs.end(); x != end; ++x)
+ OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n";
+
+ OS << " Pairwise dependence results:\n";
+ for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+ end = memrefs.end(); x != end; ++x)
+ for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
+ y != end; ++y)
+ if (LDA->isDependencePair(*x, *y))
+ OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
+ << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
+ << "\n";
+}
+
+void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+ // TODO: doc why const_cast is safe
+ PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 000000000000..05831402f409
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,419 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG. Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyLoopInfo = true;
+#else
+static bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+ cl::desc("Verify loop info (time consuming)"));
+
+char LoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return !contains(I);
+ return true; // All non-instructions are loop invariant
+}
+
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant.
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!isLoopInvariant(I->getOperand(i)))
+ return false;
+
+ return true;
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+ Instruction *InsertPt) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return makeLoopInvariant(I, Changed, InsertPt);
+ return true; // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+ Instruction *InsertPt) const {
+ // Test if the value is already loop-invariant.
+ if (isLoopInvariant(I))
+ return true;
+ if (!I->isSafeToSpeculativelyExecute())
+ return false;
+ if (I->mayReadFromMemory())
+ return false;
+ // Determine the insertion point, unless one was given.
+ if (!InsertPt) {
+ BasicBlock *Preheader = getLoopPreheader();
+ // Without a preheader, hoisting is not feasible.
+ if (!Preheader)
+ return false;
+ InsertPt = Preheader->getTerminator();
+ }
+ // Don't hoist instructions with loop-variant operands.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+ return false;
+
+ // Hoist.
+ I->moveBefore(InsertPt);
+ Changed = true;
+ return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop. If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+ BasicBlock *H = getHeader();
+
+ BasicBlock *Incoming = 0, *Backedge = 0;
+ pred_iterator PI = pred_begin(H);
+ assert(PI != pred_end(H) &&
+ "Loop must have at least one backedge!");
+ Backedge = *PI++;
+ if (PI == pred_end(H)) return 0; // dead loop
+ Incoming = *PI++;
+ if (PI != pred_end(H)) return 0; // multiple backedges?
+
+ if (contains(Incoming)) {
+ if (contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!contains(Backedge))
+ return 0;
+
+ // Loop over all of the PHI nodes, looking for a canonical indvar.
+ for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+ if (CI->isNullValue())
+ if (Instruction *Inc =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+ if (Inc->getOpcode() == Instruction::Add &&
+ Inc->getOperand(0) == PN)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+ if (CI->equalsInt(1))
+ return PN;
+ }
+ return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the number of
+/// times the loop will be executed. Note that this means that the backedge
+/// of the loop executes N-1 times. If the trip-count cannot be determined,
+/// this returns null.
+///
+/// The IndVarSimplify pass transforms loops to have a form that this
+/// function easily understands.
+///
+Value *Loop::getTripCount() const {
+ // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
+ // canonical induction variable and V is the trip count of the loop.
+ PHINode *IV = getCanonicalInductionVariable();
+ if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
+
+ bool P0InLoop = contains(IV->getIncomingBlock(0));
+ Value *Inc = IV->getIncomingValue(!P0InLoop);
+ BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
+ if (BI->isConditional()) {
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+ if (ICI->getOperand(0) == Inc) {
+ if (BI->getSuccessor(0) == getHeader()) {
+ if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+ return ICI->getOperand(1);
+ } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+ return ICI->getOperand(1);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// getSmallConstantTripCount - Returns the trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the trip count is very large
+/// (>= 2^32)
+unsigned Loop::getSmallConstantTripCount() const {
+ Value* TripCount = this->getTripCount();
+ if (TripCount) {
+ if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
+ // Guard against huge trip counts.
+ if (TripCountC->getValue().getActiveBits() <= 32) {
+ return (unsigned)TripCountC->getZExtValue();
+ }
+ }
+ }
+ return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned Loop::getSmallConstantTripMultiple() const {
+ Value* TripCount = this->getTripCount();
+ // This will hold the ConstantInt result, if any
+ ConstantInt *Result = NULL;
+ if (TripCount) {
+ // See if the trip count is constant itself
+ Result = dyn_cast<ConstantInt>(TripCount);
+ // if not, see if it is a multiplication
+ if (!Result)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
+ switch (BO->getOpcode()) {
+ case BinaryOperator::Mul:
+ Result = dyn_cast<ConstantInt>(BO->getOperand(1));
+ break;
+ case BinaryOperator::Shl:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (CI->getValue().getActiveBits() <= 5)
+ return 1u << CI->getZExtValue();
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ // Guard against huge trip counts.
+ if (Result && Result->getValue().getActiveBits() <= 32) {
+ return (unsigned)Result->getZExtValue();
+ } else {
+ return 1;
+ }
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
+
+ for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+ BasicBlock *BB = *BI;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U))
+ UserBB = P->getIncomingBlock(UI);
+
+ // Check the current block, as a fast-path, before checking whether
+ // the use is anywhere in the loop. Most values are used in the same
+ // block they are defined in. Also, blocks not reachable from the
+ // entry are special; uses in them don't need to go through PHIs.
+ if (UserBB != BB &&
+ !LoopBBs.count(UserBB) &&
+ DT.isReachableFromEntry(UserBB))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+ // Normal-form loops have a preheader, a single backedge, and all of their
+ // exits have all their predecessors inside the loop.
+ return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+ // Each predecessor of each exit block of a normal loop is contained
+ // within the loop.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+ PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+ if (!LoopBBs.count(*PI))
+ return false;
+ // All the requirements are met.
+ return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop exits are in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+ assert(hasDedicatedExits() &&
+ "getUniqueExitBlocks assumes the loop has canonical form exits!");
+
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+ std::sort(LoopBBs.begin(), LoopBBs.end());
+
+ SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+ for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+ BasicBlock *current = *BI;
+ switchExitBlocks.clear();
+
+ for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
+ // If block is inside the loop then it is not a exit block.
+ if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+ continue;
+
+ pred_iterator PI = pred_begin(*I);
+ BasicBlock *firstPred = *PI;
+
+ // If current basic block is this exit block's first predecessor
+ // then only insert exit block in to the output ExitBlocks vector.
+ // This ensures that same exit block is not inserted twice into
+ // ExitBlocks vector.
+ if (current != firstPred)
+ continue;
+
+ // If a terminator has more then two successors, for example SwitchInst,
+ // then it is possible that there are multiple edges from current block
+ // to one exit block.
+ if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
+ ExitBlocks.push_back(*I);
+ continue;
+ }
+
+ // In case of multiple edges from current block to exit block, collect
+ // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+ // duplicate edges.
+ if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+ == switchExitBlocks.end()) {
+ switchExitBlocks.push_back(*I);
+ ExitBlocks.push_back(*I);
+ }
+ }
+ }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+ SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+ getUniqueExitBlocks(UniqueExitBlocks);
+ if (UniqueExitBlocks.size() == 1)
+ return UniqueExitBlocks[0];
+ return 0;
+}
+
+void Loop::dump() const {
+ print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+bool LoopInfo::runOnFunction(Function &) {
+ releaseMemory();
+ LI.Calculate(getAnalysis<DominatorTree>().getBase()); // Update
+ return false;
+}
+
+void LoopInfo::verifyAnalysis() const {
+ // LoopInfo is a FunctionPass, but verifying every loop in the function
+ // each time verifyAnalysis is called is very expensive. The
+ // -verify-loop-info option can enable this. In order to perform some
+ // checking by default, LoopPass has been taught to call verifyLoop
+ // manually during loop pass sequences.
+
+ if (!VerifyLoopInfo) return;
+
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+ (*I)->verifyLoopNest();
+ }
+
+ // TODO: check BBMap consistency.
+}
+
+void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTree>();
+}
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+ LI.print(OS);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
new file mode 100644
index 000000000000..10e3f297f9c5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,422 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPass : public LoopPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintLoopPass(const std::string &B, raw_ostream &o)
+ : LoopPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &) {
+ Out << Banner;
+ for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+ b != be;
+ ++b) {
+ (*b)->print(Out);
+ }
+ return false;
+ }
+};
+
+char PrintLoopPass::ID = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+ if (TheDebugProbe) return;
+
+ // Constructed the first time this is called. This guarantees that the
+ // object will be constructed, if -enable-debug-info-probe is set,
+ // before static globals, thus it will be destroyed before them.
+ static ManagedStatic<DebugInfoProbeInfo> DIP;
+ TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+
+LPPassManager::LPPassManager(int Depth)
+ : FunctionPass(ID), PMDataManager(Depth) {
+ skipThisLoop = false;
+ redoThisLoop = false;
+ LI = NULL;
+ CurrentLoop = NULL;
+}
+
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
+void LPPassManager::deleteLoopFromQueue(Loop *L) {
+
+ if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
+ // Reparent all of the blocks in this loop. Since BBLoop had a parent,
+ // they are now all in it.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops.
+ LI->changeLoopFor(*I, ParentLoop);
+
+ // Remove the loop from its parent loop.
+ for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
+ ++I) {
+ assert(I != E && "Couldn't find loop");
+ if (*I == L) {
+ ParentLoop->removeChildLoop(I);
+ break;
+ }
+ }
+
+ // Move all subloops into the parent loop.
+ while (!L->empty())
+ ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
+ } else {
+ // Reparent all of the blocks in this loop. Since BBLoop had no parent,
+ // they no longer in a loop at all.
+
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ // Don't change blocks in subloops.
+ if (LI->getLoopFor(L->getBlocks()[i]) == L) {
+ LI->removeBlock(L->getBlocks()[i]);
+ --i;
+ }
+ }
+
+ // Remove the loop from the top-level LoopInfo object.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
+ assert(I != E && "Couldn't find loop");
+ if (*I == L) {
+ LI->removeLoop(I);
+ break;
+ }
+ }
+
+ // Move all of the subloops to the top-level.
+ while (!L->empty())
+ LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
+ }
+
+ delete L;
+
+ // If L is current loop then skip rest of the passes and let
+ // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
+ // and continue applying other passes on CurrentLoop.
+ if (CurrentLoop == L) {
+ skipThisLoop = true;
+ return;
+ }
+
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L) {
+ LQ.erase(I);
+ break;
+ }
+ }
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
+
+ assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+
+ // Insert into loop nest
+ if (ParentLoop)
+ ParentLoop->addChildLoop(L);
+ else
+ LI->addTopLevelLoop(L);
+
+ insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
+ // Insert L into loop queue
+ if (L == CurrentLoop)
+ redoLoop(L);
+ else if (!L->getParentLoop())
+ // This is top level loop.
+ LQ.push_front(L);
+ else {
+ // Insert L after the parent loop.
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L->getParentLoop()) {
+ // deque does not support insert after.
+ ++I;
+ LQ.insert(I, 1, L);
+ break;
+ }
+ }
+ }
+}
+
+// Reoptimize this loop. LPPassManager will re-insert this loop into the
+// queue. This allows LoopPass to change loop nest for the loop. This
+// utility may send LPPassManager into infinite loops so use caution.
+void LPPassManager::redoLoop(Loop *L) {
+ assert (CurrentLoop == L && "Can redo only CurrentLoop");
+ redoThisLoop = true;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
+ BasicBlock *To, Loop *L) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->cloneBasicBlockAnalysis(From, To, L);
+ }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
+ ++BI) {
+ Instruction &I = *BI;
+ deleteSimpleAnalysisValue(&I, L);
+ }
+ }
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->deleteAnalysisValue(V, L);
+ }
+}
+
+
+// Recurse through all subloops and all loops into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+ LQ.push_back(L);
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ // LPPassManager needs LoopInfo. In the long term LoopInfo class will
+ // become part of LPPassManager.
+ Info.addRequired<LoopInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+ bool Changed = false;
+ createDebugInfoProbe();
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ // Populate Loop Queue
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+
+ if (LQ.empty()) // No loops, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+ I != E; ++I) {
+ Loop *L = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ Changed |= P->doInitialization(L, *this);
+ }
+ }
+
+ // Walk Loops
+ while (!LQ.empty()) {
+
+ CurrentLoop = LQ.back();
+ skipThisLoop = false;
+ redoThisLoop = false;
+
+ // Run all passes on the current Loop.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+ CurrentLoop->getHeader()->getName());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+ if (TheDebugProbe)
+ TheDebugProbe->initialize(P, F);
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
+ TimeRegion PassTimer(getPassTimer(P));
+
+ Changed |= P->runOnLoop(CurrentLoop, *this);
+ }
+ if (TheDebugProbe)
+ TheDebugProbe->finalize(P, F);
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+ skipThisLoop ? "<deleted>" :
+ CurrentLoop->getHeader()->getName());
+ dumpPreservedSet(P);
+
+ if (!skipThisLoop) {
+ // Manually check that this loop is still healthy. This is done
+ // instead of relying on LoopInfo::verifyLoop since LoopInfo
+ // is a function pass and it's really expensive to verify every
+ // loop in the function every time. That level of checking can be
+ // enabled with the -verify-loop-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(LI));
+ CurrentLoop->verifyLoop();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisLoop ? "<deleted>" :
+ CurrentLoop->getHeader()->getName(),
+ ON_LOOP_MSG);
+
+ if (skipThisLoop)
+ // Do not run other passes on this loop.
+ break;
+ }
+
+ // If the loop was deleted, release all the loop passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisLoop)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_LOOP_MSG);
+ }
+
+ // Pop the loop from queue after running all passes.
+ LQ.pop_back();
+
+ if (redoThisLoop)
+ LQ.push_back(CurrentLoop);
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Loop Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintLoopPass(Banner, O);
+}
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new LPPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ LPPassManager *LPPM;
+ if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
+ LPPM = (LPPassManager*)PMS.top();
+ else {
+ // Create new Loop Pass Manager if it does not exist.
+ assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ LPPM = new LPPassManager(PMD->getDepth() + 1);
+ LPPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(LPPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = LPPM->getAsPass();
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(LPPM);
+ }
+
+ LPPM->add(this);
+}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 000000000000..2283db0bc482
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,176 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+namespace {
+ struct MemDepPrinter : public FunctionPass {
+ const Function *F;
+
+ typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
+ typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+ typedef SmallSetVector<Dep, 4> DepSet;
+ typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+ DepSetMap Deps;
+
+ static char ID; // Pass identifcation, replacement for typeid
+ MemDepPrinter() : FunctionPass(ID) {
+ initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ void print(raw_ostream &OS, const Module * = 0) const;
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ virtual void releaseMemory() {
+ Deps.clear();
+ F = 0;
+ }
+ };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+ return new MemDepPrinter();
+}
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+ this->F = &F;
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+ // All this code uses non-const interfaces because MemDep is not
+ // const-friendly, though nothing is actually modified.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+ continue;
+
+ MemDepResult Res = MDA.getDependency(Inst);
+ if (!Res.isNonLocal()) {
+ assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+ "Local dep should be unknown, def or clobber!");
+ Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ static_cast<BasicBlock *>(0)));
+ } else if (CallSite CS = cast<Value>(Inst)) {
+ const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+ MDA.getNonLocalCallDependency(CS);
+
+ DepSet &InstDeps = Deps[Inst];
+ for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+ "Resolved non-local call dep should be unknown, def or "
+ "clobber!");
+ InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ I->getBB()));
+ }
+ } else {
+ SmallVector<NonLocalDepResult, 4> NLDI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // FIXME: Volatile is not handled properly here.
+ AliasAnalysis::Location Loc = AA.getLocation(LI);
+ MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
+ LI->getParent(), NLDI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // FIXME: Volatile is not handled properly here.
+ AliasAnalysis::Location Loc = AA.getLocation(SI);
+ MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+ AliasAnalysis::Location Loc = AA.getLocation(VI);
+ MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+ } else {
+ llvm_unreachable("Unknown memory instruction!");
+ }
+
+ DepSet &InstDeps = Deps[Inst];
+ for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ assert(Res.isClobber() != Res.isDef() &&
+ "Resolved non-local pointer dep should be def or clobber!");
+ InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ I->getBB()));
+ }
+ }
+ }
+
+ return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+ for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+ const Instruction *Inst = &*I;
+
+ DepSetMap::const_iterator DI = Deps.find(Inst);
+ if (DI == Deps.end())
+ continue;
+
+ const DepSet &InstDeps = DI->second;
+
+ for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+ I != E; ++I) {
+ const Instruction *DepInst = I->first.getPointer();
+ bool isClobber = I->first.getInt();
+ const BasicBlock *DepBB = I->second;
+
+ OS << " ";
+ if (!DepInst)
+ OS << "Unknown";
+ else if (isClobber)
+ OS << "Clobber";
+ else
+ OS << " Def";
+ if (DepBB) {
+ OS << " in block ";
+ WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+ }
+ if (DepInst) {
+ OS << " from: ";
+ if (DepInst == Inst)
+ OS << "<unspecified>";
+ else
+ DepInst->print(OS);
+ }
+ OS << "\n";
+ }
+
+ Inst->print(OS);
+ OS << "\n\n";
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 000000000000..53d430491198
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,213 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value *I) {
+ return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+ if (!CI)
+ return false;
+
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration())
+ return false;
+ if (Callee->getName() != "malloc" &&
+ Callee->getName() != "_Znwj" && // operator new(unsigned int)
+ Callee->getName() != "_Znwm" && // operator new(unsigned long)
+ Callee->getName() != "_Znaj" && // operator new[](unsigned int)
+ Callee->getName() != "_Znam") // operator new[](unsigned long)
+ return false;
+
+ // Check malloc prototype.
+ // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+ // attribute will exist.
+ const FunctionType *FTy = Callee->getFunctionType();
+ if (FTy->getNumParams() != 1)
+ return false;
+ return FTy->getParamType(0)->isIntegerTy(32) ||
+ FTy->getParamType(0)->isIntegerTy(64);
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst *llvm::extractMallocCall(Value *I) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst *BCI) {
+ if (!BCI)
+ return false;
+
+ return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst *llvm::extractMallocCallFromBitCast(Value *I) {
+ BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+ return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+ : NULL;
+}
+
+const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
+ const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+ return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+ : NULL;
+}
+
+static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
+ bool LookThroughSExt = false) {
+ if (!CI)
+ return NULL;
+
+ // The size of the malloc's result type must be known to determine array size.
+ const Type *T = getMallocAllocatedType(CI);
+ if (!T || !T->isSized() || !TD)
+ return NULL;
+
+ unsigned ElementSize = TD->getTypeAllocSize(T);
+ if (const StructType *ST = dyn_cast<StructType>(T))
+ ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+
+ // If malloc call's arg can be determined to be a multiple of ElementSize,
+ // return the multiple. Otherwise, return NULL.
+ Value *MallocArg = CI->getArgOperand(0);
+ Value *Multiple = NULL;
+ if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+ LookThroughSExt))
+ return Multiple;
+
+ return NULL;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1. Otherwise, return NULL.
+const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
+ const CallInst *CI = extractMallocCall(I);
+ Value *ArraySize = computeArraySize(CI, TD);
+
+ if (ArraySize &&
+ ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+ return CI;
+
+ // CI is a non-array malloc or we can't figure out that it is an array malloc.
+ return NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+/// 0: PointerType is the calls' return type.
+/// 1: PointerType is the bitcast's result type.
+/// >1: Unique PointerType cannot be determined, return NULL.
+const PointerType *llvm::getMallocType(const CallInst *CI) {
+ assert(isMalloc(CI) && "getMallocType and not malloc call");
+
+ const PointerType *MallocType = NULL;
+ unsigned NumOfBitCastUses = 0;
+
+ // Determine if CallInst has a bitcast use.
+ for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; )
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+ MallocType = cast<PointerType>(BCI->getDestTy());
+ NumOfBitCastUses++;
+ }
+
+ // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+ if (NumOfBitCastUses == 1)
+ return MallocType;
+
+ // Malloc call was not bitcast, so type is the malloc function's return type.
+ if (NumOfBitCastUses == 0)
+ return cast<PointerType>(CI->getType());
+
+ // Type could not be determined.
+ return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+/// 0: PointerType is the malloc calls' return type.
+/// 1: PointerType is the bitcast's result type.
+/// >1: Unique PointerType cannot be determined, return NULL.
+const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
+ const PointerType *PT = getMallocType(CI);
+ return PT ? PT->getElementType() : NULL;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call. If the
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple. For non-array mallocs, the multiple is
+/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
+ bool LookThroughSExt) {
+ assert(isMalloc(CI) && "getMallocArraySize and not malloc call");
+ return computeArraySize(CI, TD, LookThroughSExt);
+}
+
+//===----------------------------------------------------------------------===//
+// free Call Utility Functions.
+//
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ return 0;
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration())
+ return 0;
+
+ if (Callee->getName() != "free" &&
+ Callee->getName() != "_ZdlPv" && // operator delete(void*)
+ Callee->getName() != "_ZdaPv") // operator delete[](void*)
+ return 0;
+
+ // Check free prototype.
+ // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+ // attribute will exist.
+ const FunctionType *FTy = Callee->getFunctionType();
+ if (!FTy->getReturnType()->isVoidTy())
+ return 0;
+ if (FTy->getNumParams() != 1)
+ return 0;
+ if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+ return 0;
+
+ return CI;
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 000000000000..bba4482f4da5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1469 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on. It builds on
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memdep"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+ "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+ "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+ "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+ "Number of block queries that were completely cached");
+
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+// (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
+char MemoryDependenceAnalysis::ID = 0;
+
+// Register this pass...
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+: FunctionPass(ID), PredCache(0) {
+ initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+ LocalDeps.clear();
+ NonLocalDeps.clear();
+ NonLocalPointerDeps.clear();
+ ReverseLocalDeps.clear();
+ ReverseNonLocalDeps.clear();
+ ReverseNonLocalPtrDeps.clear();
+ PredCache->clear();
+}
+
+
+
+/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+ AA = &getAnalysis<AliasAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (PredCache == 0)
+ PredCache.reset(new PredIteratorCache());
+ return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*,
+ SmallPtrSet<KeyTy, 4> > &ReverseMap,
+ Instruction *Inst, KeyTy Val) {
+ typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+ InstIt = ReverseMap.find(Inst);
+ assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+ bool Found = InstIt->second.erase(Val);
+ assert(Found && "Invalid reverse map!"); (void)Found;
+ if (InstIt->second.empty())
+ ReverseMap.erase(InstIt);
+}
+
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+ AliasAnalysis::Location &Loc,
+ AliasAnalysis *AA) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isVolatile()) {
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isVolatile()) {
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ }
+
+ if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+ Loc = AA->getLocation(V);
+ return AliasAnalysis::ModRef;
+ }
+
+ if (const CallInst *CI = isFreeCall(Inst)) {
+ // calls to free() deallocate the entire structure
+ Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+ return AliasAnalysis::Mod;
+ }
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ Loc = AliasAnalysis::Location(II->getArgOperand(1),
+ cast<ConstantInt>(II->getArgOperand(0))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ case Intrinsic::invariant_end:
+ Loc = AliasAnalysis::Location(II->getArgOperand(2),
+ cast<ConstantInt>(II->getArgOperand(1))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ default:
+ break;
+ }
+
+ // Otherwise, just do the coarse-grained thing that always works.
+ if (Inst->mayWriteToMemory())
+ return AliasAnalysis::ModRef;
+ if (Inst->mayReadFromMemory())
+ return AliasAnalysis::Ref;
+ return AliasAnalysis::NoModRef;
+}
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ unsigned Limit = BlockScanLimit;
+
+ // Walk backwards through the block, looking for dependencies
+ while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
+ Instruction *Inst = --ScanIt;
+
+ // If this inst is a memory op, get the pointer it accessed
+ AliasAnalysis::Location Loc;
+ AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+ if (Loc.Ptr) {
+ // A simple instruction.
+ if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
+ continue;
+ }
+
+ if (CallSite InstCS = cast<Value>(Inst)) {
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+ // If these two calls do not interfere, look past it.
+ switch (AA->getModRefInfo(CS, InstCS)) {
+ case AliasAnalysis::NoModRef:
+ // If the two calls are the same, return InstCS as a Def, so that
+ // CS can be found redundant and eliminated.
+ if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+ CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ return MemDepResult::getDef(Inst);
+
+ // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // keep scanning.
+ break;
+ default:
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+ }
+
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getUnknown();
+}
+
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool
+isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
+ const Value *&MemLocBase,
+ int64_t &MemLocOffs,
+ const LoadInst *LI,
+ const TargetData *TD) {
+ // If we have no target data, we can't do this.
+ if (TD == 0) return false;
+
+ // If we haven't already computed the base/offset of MemLoc, do so now.
+ if (MemLocBase == 0)
+ MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+ LI, *TD);
+ return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load. If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use. If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+ unsigned MemLocSize, const LoadInst *LI,
+ const TargetData &TD) {
+ // We can only extend non-volatile integer loads.
+ if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+
+ // Get the base of this load.
+ int64_t LIOffs = 0;
+ const Value *LIBase =
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
+
+ // If the two pointers are not based on the same pointer, we can't tell that
+ // they are related.
+ if (LIBase != MemLocBase) return 0;
+
+ // Okay, the two values are based on the same pointer, but returned as
+ // no-alias. This happens when we have things like two byte loads at "P+1"
+ // and "P+3". Check to see if increasing the size of the "LI" load up to its
+ // alignment (or the largest native integer type) will allow us to load all
+ // the bits required by MemLoc.
+
+ // If MemLoc is before LI, then no widening of LI will help us out.
+ if (MemLocOffs < LIOffs) return 0;
+
+ // Get the alignment of the load in bytes. We assume that it is safe to load
+ // any legal integer up to this size without a problem. For example, if we're
+ // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+ // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
+ // to i16.
+ unsigned LoadAlign = LI->getAlignment();
+
+ int64_t MemLocEnd = MemLocOffs+MemLocSize;
+
+ // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+ if (LIOffs+LoadAlign < MemLocEnd) return 0;
+
+ // This is the size of the load to try. Start with the next larger power of
+ // two.
+ unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+ NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+
+ while (1) {
+ // If this load size is bigger than our known alignment or would not fit
+ // into a native integer register, then we fail.
+ if (NewLoadByteSize > LoadAlign ||
+ !TD.fitsInLegalInteger(NewLoadByteSize*8))
+ return 0;
+
+ // If a load of this width would include all of MemLoc, then we succeed.
+ if (LIOffs+NewLoadByteSize >= MemLocEnd)
+ return NewLoadByteSize;
+
+ NewLoadByteSize <<= 1;
+ }
+
+ return 0;
+}
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends. If isLoad is true, this routine ignores may-aliases with
+/// read-only operations. If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
+MemDepResult MemoryDependenceAnalysis::
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+
+ const Value *MemLocBase = 0;
+ int64_t MemLocOffset = 0;
+
+ unsigned Limit = BlockScanLimit;
+
+ // Walk backwards through the basic block, looking for dependencies.
+ while (ScanIt != BB->begin()) {
+ // Limit the amount of scanning we do so we don't end up with quadratic
+ // running time on extreme testcases.
+ --Limit;
+ if (!Limit)
+ return MemDepResult::getUnknown();
+
+ Instruction *Inst = --ScanIt;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Debug intrinsics don't (and can't) cause dependences.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
+
+ // If we reach a lifetime begin or end marker, then the query ends here
+ // because the value is undefined.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // FIXME: This only considers queries directly on the invariant-tagged
+ // pointer, not on query pointers that are indexed off of them. It'd
+ // be nice to handle that at some point (the right approach is to use
+ // GetPointerBaseWithConstantOffset).
+ if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
+ MemLoc))
+ return MemDepResult::getDef(II);
+ continue;
+ }
+ }
+
+ // Values depend on loads if the pointers are must aliased. This means that
+ // a load depends on another must aliased load from the same value.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
+
+ if (isLoad) {
+ if (R == AliasAnalysis::NoAlias) {
+ // If this is an over-aligned integer load (for example,
+ // "load i8* %P, align 4") see if it would obviously overlap with the
+ // queried location if widened to a larger load (e.g. if the queried
+ // location is 1 byte at P+1). If so, return it as a load/load
+ // clobber result, allowing the client to decide to widen the load if
+ // it wants to.
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+ if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+ isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+ MemLocOffset, LI, TD))
+ return MemDepResult::getClobber(Inst);
+
+ continue;
+ }
+
+ // Must aliased loads are defs of each other.
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+ // in terms of clobbering loads, but since it does this by looking
+ // at the clobbering load directly, it doesn't know about any
+ // phi translation that may have happened along the way.
+
+ // If we have a partial alias, then return this as a clobber for the
+ // client to handle.
+ if (R == AliasAnalysis::PartialAlias)
+ return MemDepResult::getClobber(Inst);
+#endif
+
+ // Random may-alias loads don't depend on each other without a
+ // dependence.
+ continue;
+ }
+
+ // Stores don't depend on other no-aliased accesses.
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+
+ // Stores don't alias loads from read-only memory.
+ if (AA->pointsToConstantMemory(LoadLoc))
+ continue;
+
+ // Stores depend on may/must aliased loads.
+ return MemDepResult::getDef(Inst);
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If alias analysis can tell that this store is guaranteed to not modify
+ // the query pointer, ignore it. Use getModRefInfo to handle cases where
+ // the query pointer points to constant memory etc.
+ if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+ continue;
+
+ // Ok, this store might clobber the query pointer. Check to see if it is
+ // a must alias: in this case, we want to return this as a def.
+ AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
+
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+ return MemDepResult::getClobber(Inst);
+ }
+
+ // If this is an allocation, and if we know that the accessed pointer is to
+ // the allocation, return Def. This means that there is no dependence and
+ // the access can be optimized based on that. For example, a load could
+ // turn into undef.
+ // Note: Only determine this to be a malloc if Inst is the malloc call, not
+ // a subsequent bitcast of the malloc call result. There can be stores to
+ // the malloced memory between the malloc call and its bitcast uses, and we
+ // need to continue scanning until the malloc call.
+ if (isa<AllocaInst>(Inst) ||
+ (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
+ const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
+
+ if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
+ return MemDepResult::getDef(Inst);
+ continue;
+ }
+
+ // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+ switch (AA->getModRefInfo(Inst, MemLoc)) {
+ case AliasAnalysis::NoModRef:
+ // If the call has no effect on the queried pointer, just ignore it.
+ continue;
+ case AliasAnalysis::Mod:
+ return MemDepResult::getClobber(Inst);
+ case AliasAnalysis::Ref:
+ // If the call is known to never store to the pointer, and if this is a
+ // load query, we can safely ignore it (scan past it).
+ if (isLoad)
+ continue;
+ default:
+ // Otherwise, there is a potential dependence. Return a clobber.
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getUnknown();
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+ Instruction *ScanPos = QueryInst;
+
+ // Check for a cached result
+ MemDepResult &LocalCache = LocalDeps[QueryInst];
+
+ // If the cached entry is non-dirty, just return it. Note that this depends
+ // on MemDepResult's default constructing to 'dirty'.
+ if (!LocalCache.isDirty())
+ return LocalCache;
+
+ // Otherwise, if we have a dirty entry, we know we can start the scan at that
+ // instruction, which may save us some work.
+ if (Instruction *Inst = LocalCache.getInst()) {
+ ScanPos = Inst;
+
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+ }
+
+ BasicBlock *QueryParent = QueryInst->getParent();
+
+ // Do the scan.
+ if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
+ if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+ LocalCache = MemDepResult::getNonLocal();
+ else
+ LocalCache = MemDepResult::getUnknown();
+ } else {
+ AliasAnalysis::Location MemLoc;
+ AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+ if (MemLoc.Ptr) {
+ // If we can do a pointer scan, make it happen.
+ bool isLoad = !(MR & AliasAnalysis::Mod);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
+
+ LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+ QueryParent);
+ } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+ CallSite QueryCS(QueryInst);
+ bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+ LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+ QueryParent);
+ } else
+ // Non-memory instruction.
+ LocalCache = MemDepResult::getUnknown();
+ }
+
+ // Remember the result!
+ if (Instruction *I = LocalCache.getInst())
+ ReverseLocalDeps[I].insert(QueryInst);
+
+ return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ int Count = -1) {
+ if (Count == -1) Count = Cache.size();
+ if (Count == 0) return;
+
+ for (unsigned i = 1; i != unsigned(Count); ++i)
+ assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across. The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed. Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+ assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+ PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+ NonLocalDepInfo &Cache = CacheP.first;
+
+ /// DirtyBlocks - This is the set of blocks that need to be recomputed. In
+ /// the cached case, this can happen due to instructions being deleted etc. In
+ /// the uncached case, this starts out as the set of predecessors we care
+ /// about.
+ SmallVector<BasicBlock*, 32> DirtyBlocks;
+
+ if (!Cache.empty()) {
+ // Okay, we have a cache entry. If we know it is not dirty, just return it
+ // with no computation.
+ if (!CacheP.second) {
+ ++NumCacheNonLocal;
+ return Cache;
+ }
+
+ // If we already have a partially computed set of results, scan them to
+ // determine what is dirty, seeding our initial DirtyBlocks worklist.
+ for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+ I != E; ++I)
+ if (I->getResult().isDirty())
+ DirtyBlocks.push_back(I->getBB());
+
+ // Sort the cache so that we can do fast binary search lookups below.
+ std::sort(Cache.begin(), Cache.end());
+
+ ++NumCacheDirtyNonLocal;
+ //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+ // << Cache.size() << " cached: " << *QueryInst;
+ } else {
+ // Seed DirtyBlocks with each of the preds of QueryInst's block.
+ BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+ for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ ++NumUncacheNonLocal;
+ }
+
+ // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+ bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+ SmallPtrSet<BasicBlock*, 64> Visited;
+
+ unsigned NumSortedEntries = Cache.size();
+ DEBUG(AssertSorted(Cache));
+
+ // Iterate while we still have blocks to update.
+ while (!DirtyBlocks.empty()) {
+ BasicBlock *DirtyBB = DirtyBlocks.back();
+ DirtyBlocks.pop_back();
+
+ // Already processed this block?
+ if (!Visited.insert(DirtyBB))
+ continue;
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ DEBUG(AssertSorted(Cache, NumSortedEntries));
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+ NonLocalDepEntry(DirtyBB));
+ if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
+ --Entry;
+
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache.begin()+NumSortedEntries &&
+ Entry->getBB() == DirtyBB) {
+ // If we already have an entry, and if it isn't already dirty, the block
+ // is done.
+ if (!Entry->getResult().isDirty())
+ continue;
+
+ // Otherwise, remember this slot so we can update the value.
+ ExistingResult = &*Entry;
+ }
+
+ // If the dirty entry has a pointer, start scanning from it so we don't have
+ // to rescan the entire block.
+ BasicBlock::iterator ScanPos = DirtyBB->end();
+ if (ExistingResult) {
+ if (Instruction *Inst = ExistingResult->getResult().getInst()) {
+ ScanPos = Inst;
+ // We're removing QueryInst's use of Inst.
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+ QueryCS.getInstruction());
+ }
+ }
+
+ // Find out if this block has a local dependency for QueryInst.
+ MemDepResult Dep;
+
+ if (ScanPos != DirtyBB->begin()) {
+ Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+ } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+ // No dependence found. If this is the entry block of the function, it is
+ // a clobber, otherwise it is unknown.
+ Dep = MemDepResult::getNonLocal();
+ } else {
+ Dep = MemDepResult::getUnknown();
+ }
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ ExistingResult->setResult(Dep);
+ else
+ Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the association!
+ if (!Dep.isNonLocal()) {
+ // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+ // update this when we remove instructions.
+ if (Instruction *Inst = Dep.getInst())
+ ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+ } else {
+
+ // If the block *is* completely transparent to the load, we need to check
+ // the predecessors of this block. Add them to our worklist.
+ for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ }
+ }
+
+ return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+ BasicBlock *FromBB,
+ SmallVectorImpl<NonLocalDepResult> &Result) {
+ assert(Loc.Ptr->getType()->isPointerTy() &&
+ "Can't get pointer deps of a non-pointer!");
+ Result.clear();
+
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
+
+ // This is the set of blocks we've inspected, and the pointer we consider in
+ // each block. Because of critical edges, we currently bail out if querying
+ // a block with multiple different pointers. This can happen during PHI
+ // translation.
+ DenseMap<BasicBlock*, Value*> Visited;
+ if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+ Result, Visited, true))
+ return;
+ Result.clear();
+ Result.push_back(NonLocalDepResult(FromBB,
+ MemDepResult::getUnknown(),
+ const_cast<Value *>(Loc.Ptr)));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available). If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+ bool isLoad, BasicBlock *BB,
+ NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+ NonLocalDepEntry(BB));
+ if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
+ --Entry;
+
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+ ExistingResult = &*Entry;
+
+ // If we have a cached entry, and it is non-dirty, use it as the value for
+ // this dependency.
+ if (ExistingResult && !ExistingResult->getResult().isDirty()) {
+ ++NumCacheNonLocalPtr;
+ return ExistingResult->getResult();
+ }
+
+ // Otherwise, we have to scan for the value. If we have a dirty cache
+ // entry, start scanning from its position, otherwise we scan from the end
+ // of the block.
+ BasicBlock::iterator ScanPos = BB->end();
+ if (ExistingResult && ExistingResult->getResult().getInst()) {
+ assert(ExistingResult->getResult().getInst()->getParent() == BB &&
+ "Instruction invalidated?");
+ ++NumCacheDirtyNonLocalPtr;
+ ScanPos = ExistingResult->getResult().getInst();
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+ } else {
+ ++NumUncacheNonLocalPtr;
+ }
+
+ // Scan the block for the dependency.
+ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ ExistingResult->setResult(Dep);
+ else
+ Cache->push_back(NonLocalDepEntry(BB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the reverse association because we just added it
+ // to Cache!
+ if (Dep.isNonLocal() || Dep.isUnknown())
+ return Dep;
+
+ // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+ // update MemDep when we remove instructions.
+ Instruction *Inst = Dep.getInst();
+ assert(Inst && "Didn't depend on anything?");
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+ ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+ return Dep;
+}
+
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered. This is
+/// optimized for the case when only a few entries are added.
+static void
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ unsigned NumSortedEntries) {
+ switch (Cache.size() - NumSortedEntries) {
+ case 0:
+ // done, no new entries.
+ break;
+ case 2: {
+ // Two new entries, insert the last one into place.
+ NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+ Cache.insert(Entry, Val);
+ // FALL THROUGH.
+ }
+ case 1:
+ // One new entry, Just insert the new value at the appropriate position.
+ if (Cache.size() != 1) {
+ NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end(), Val);
+ Cache.insert(Entry, Val);
+ }
+ break;
+ default:
+ // Added many values, do a full scale sort.
+ std::sort(Cache.begin(), Cache.end());
+ break;
+ }
+}
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true). In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason. This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+ const AliasAnalysis::Location &Loc,
+ bool isLoad, BasicBlock *StartBB,
+ SmallVectorImpl<NonLocalDepResult> &Result,
+ DenseMap<BasicBlock*, Value*> &Visited,
+ bool SkipFirstBlock) {
+
+ // Look up the cached info for Pointer.
+ ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
+
+ // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+ // CacheKey, this value will be inserted as the associated value. Otherwise,
+ // it'll be ignored, and we'll have to check to see if the cached size and
+ // tbaa tag are consistent with the current query.
+ NonLocalPointerInfo InitialNLPI;
+ InitialNLPI.Size = Loc.Size;
+ InitialNLPI.TBAATag = Loc.TBAATag;
+
+ // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+ // already have one.
+ std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+ NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+ NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+ // If we already have a cache entry for this CacheKey, we may need to do some
+ // work to reconcile the cache entry and the current query.
+ if (!Pair.second) {
+ if (CacheInfo->Size < Loc.Size) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and procede with the query at the greater size.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->Size = Loc.Size;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ } else if (CacheInfo->Size > Loc.Size) {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(Pointer,
+ Loc.getWithNewSize(CacheInfo->Size),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+
+ // If the query's TBAATag is inconsistent with the cached one,
+ // conservatively throw out the cached data and restart the query with
+ // no tag if needed.
+ if (CacheInfo->TBAATag != Loc.TBAATag) {
+ if (CacheInfo->TBAATag) {
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->TBAATag = 0;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ }
+ if (Loc.TBAATag)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+ }
+
+ NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
+
+ // If we have valid cached information for exactly the block we are
+ // investigating, just return it with no recomputation.
+ if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ // We have a fully cached result for this query then we can just return the
+ // cached results and populate the visited set. However, we have to verify
+ // that we don't already have conflicting results for these blocks. Check
+ // to ensure that if a block in the results set is in the visited set that
+ // it was for the same pointer query.
+ if (!Visited.empty()) {
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+ if (VI == Visited.end() || VI->second == Pointer.getAddr())
+ continue;
+
+ // We have a pointer mismatch in a block. Just return clobber, saying
+ // that something was clobbered in this result. We could also do a
+ // non-fully cached query, but there is little point in doing this.
+ return true;
+ }
+ }
+
+ Value *Addr = Pointer.getAddr();
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ Visited.insert(std::make_pair(I->getBB(), Addr));
+ if (!I->getResult().isNonLocal())
+ Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+ }
+ ++NumCacheCompleteNonLocalPtr;
+ return false;
+ }
+
+ // Otherwise, either this is a new block, a block with an invalid cache
+ // pointer or one that we're about to invalidate by putting more info into it
+ // than its valid cache info. If empty, the result will be valid cache info,
+ // otherwise it isn't.
+ if (Cache->empty())
+ CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+ else
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(StartBB);
+
+ // PredList used inside loop.
+ SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
+ // Keep track of the entries that we know are sorted. Previously cached
+ // entries will all be sorted. The entries we add we only sort on demand (we
+ // don't insert every element into its sorted position). We know that we
+ // won't get any reuse from currently inserted values, because we don't
+ // revisit blocks after we insert info for them.
+ unsigned NumSortedEntries = Cache->size();
+ DEBUG(AssertSorted(*Cache));
+
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+
+ // Skip the first block if we have it.
+ if (!SkipFirstBlock) {
+ // Analyze the dependency of *Pointer in FromBB. See if we already have
+ // been here.
+ assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+ // Get the dependency info for Pointer in BB. If we have cached
+ // information, we will use it, otherwise we compute it.
+ DEBUG(AssertSorted(*Cache, NumSortedEntries));
+ MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+ NumSortedEntries);
+
+ // If we got a Def or Clobber, add this to the list of results.
+ if (!Dep.isNonLocal()) {
+ Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+ continue;
+ }
+ }
+
+ // If 'Pointer' is an instruction defined in this block, then we need to do
+ // phi translation to change it into a value live in the predecessor block.
+ // If not, we just add the predecessors to the worklist and scan them with
+ // the same Pointer.
+ if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+ SkipFirstBlock = false;
+ SmallVector<BasicBlock*, 16> NewBlocks;
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ // Verify that we haven't looked at this block yet.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+ if (InsertRes.second) {
+ // First time we've looked at *PI.
+ NewBlocks.push_back(*PI);
+ continue;
+ }
+
+ // If we have seen this block before, but it was with a different
+ // pointer then we have a phi translation failure and we have to treat
+ // this as a clobber.
+ if (InsertRes.first->second != Pointer.getAddr()) {
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < NewBlocks.size(); i++)
+ Visited.erase(NewBlocks[i]);
+ goto PredTranslationFailure;
+ }
+ }
+ Worklist.append(NewBlocks.begin(), NewBlocks.end());
+ continue;
+ }
+
+ // We do need to do phi translation, if we know ahead of time we can't phi
+ // translate this value, don't even try.
+ if (!Pointer.IsPotentiallyPHITranslatable())
+ goto PredTranslationFailure;
+
+ // We may have added values to the cache list before this PHI translation.
+ // If so, we haven't done anything to ensure that the cache remains sorted.
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+ // value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ NumSortedEntries = Cache->size();
+ }
+ Cache = 0;
+
+ PredList.clear();
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ BasicBlock *Pred = *PI;
+ PredList.push_back(std::make_pair(Pred, Pointer));
+
+ // Get the PHI translated pointer in this predecessor. This can fail if
+ // not translatable, in which case the getAddr() returns null.
+ PHITransAddr &PredPointer = PredList.back().second;
+ PredPointer.PHITranslateValue(BB, Pred, 0);
+
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ // Check to see if we have already visited this pred block with another
+ // pointer. If so, we can't do this lookup. This failure can occur
+ // with PHI translation when a critical edge exists and the PHI node in
+ // the successor translates to a pointer value different than the
+ // pointer the block was first analyzed with.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
+
+ if (!InsertRes.second) {
+ // We found the pred; take it off the list of preds to visit.
+ PredList.pop_back();
+
+ // If the predecessor was visited with PredPtr, then we already did
+ // the analysis and can ignore it.
+ if (InsertRes.first->second == PredPtrVal)
+ continue;
+
+ // Otherwise, the block was previously analyzed with a different
+ // pointer. We can't represent the result of this case, so we just
+ // treat this as a phi translation failure.
+
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < PredList.size(); i++)
+ Visited.erase(PredList[i].first);
+
+ goto PredTranslationFailure;
+ }
+ }
+
+ // Actually process results here; this need to be a separate loop to avoid
+ // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+ // any results for. (getNonLocalPointerDepFromBB will modify our
+ // datastructures in ways the code after the PredTranslationFailure label
+ // doesn't expect.)
+ for (unsigned i = 0; i < PredList.size(); i++) {
+ BasicBlock *Pred = PredList[i].first;
+ PHITransAddr &PredPointer = PredList[i].second;
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ bool CanTranslate = true;
+ // If PHI translation was unable to find an available pointer in this
+ // predecessor, then we have to assume that the pointer is clobbered in
+ // that predecessor. We can still do PRE of the load, which would insert
+ // a computation of the pointer in this predecessor.
+ if (PredPtrVal == 0)
+ CanTranslate = false;
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If getNonLocalPointerDepFromBB fails here, that means the cached
+ // result conflicted with the Visited list; we have to conservatively
+ // assume it is unknown, but this also does not block PRE of the load.
+ if (!CanTranslate ||
+ getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPtrVal),
+ isLoad, Pred,
+ Result, Visited)) {
+ // Add the entry to the Result list.
+ NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
+ Result.push_back(Entry);
+
+ // Since we had a phi translation failure, the cache for CacheKey won't
+ // include all of the entries that we need to immediately satisfy future
+ // queries. Mark this in NonLocalPointerDeps by setting the
+ // BBSkipFirstBlockPair pointer to null. This requires reuse of the
+ // cached value to do more work but not miss the phi trans failure.
+ NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+ NLPI.Pair = BBSkipFirstBlockPair();
+ continue;
+ }
+ }
+
+ // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->NonLocalDeps;
+ NumSortedEntries = Cache->size();
+
+ // Since we did phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set" Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ SkipFirstBlock = false;
+ continue;
+
+ PredTranslationFailure:
+ // The following code is "failure"; we can't produce a sane translation
+ // for the given block. It assumes that we haven't modified any of
+ // our datastructures while processing the current block.
+
+ if (Cache == 0) {
+ // Refresh the CacheInfo/Cache pointer if it got invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->NonLocalDeps;
+ NumSortedEntries = Cache->size();
+ }
+
+ // Since we failed phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+
+ // If *nothing* works, mark the pointer as unknown.
+ //
+ // If this is the magic first block, return this as a clobber of the whole
+ // incoming value. Since we can't phi translate to one of the predecessors,
+ // we have to bail out.
+ if (SkipFirstBlock)
+ return true;
+
+ for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+ assert(I != Cache->rend() && "Didn't find current block??");
+ if (I->getBB() != BB)
+ continue;
+
+ assert(I->getResult().isNonLocal() &&
+ "Should only be here with transparent block");
+ I->setResult(MemDepResult::getUnknown());
+ Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+ Pointer.getAddr()));
+ break;
+ }
+ }
+
+ // Okay, we're done now. If we added new values to the cache, re-sort it.
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ DEBUG(AssertSorted(*Cache));
+ return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+ CachedNonLocalPointerInfo::iterator It =
+ NonLocalPointerDeps.find(P);
+ if (It == NonLocalPointerDeps.end()) return;
+
+ // Remove all of the entries in the BB->val map. This involves removing
+ // instructions from the reverse map.
+ NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
+
+ for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+ Instruction *Target = PInfo[i].getResult().getInst();
+ if (Target == 0) continue; // Ignore non-local dep results.
+ assert(Target->getParent() == PInfo[i].getBB());
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+ }
+
+ // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+ NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep. This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+ // If Ptr isn't really a pointer, just ignore it.
+ if (!Ptr->getType()->isPointerTy()) return;
+ // Flush store info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+ // Flush load info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+ PredCache->clear();
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+ // Walk through the Non-local dependencies, removing this one as the value
+ // for any cached queries.
+ NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+ if (NLDI != NonLocalDeps.end()) {
+ NonLocalDepInfo &BlockMap = NLDI->second.first;
+ for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+ DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+ NonLocalDeps.erase(NLDI);
+ }
+
+ // If we have a cached local dependence query for this instruction, remove it.
+ //
+ LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+ if (LocalDepEntry != LocalDeps.end()) {
+ // Remove us from DepInst's reverse set now that the local dep info is gone.
+ if (Instruction *Inst = LocalDepEntry->second.getInst())
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+ // Remove this local dependency info.
+ LocalDeps.erase(LocalDepEntry);
+ }
+
+ // If we have any cached pointer dependencies on this instruction, remove
+ // them. If the instruction has non-pointer type, then it can't be a pointer
+ // base.
+
+ // Remove it from both the load info and the store info. The instruction
+ // can't be in either of these maps if it is non-pointer.
+ if (RemInst->getType()->isPointerTy()) {
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+ }
+
+ // Loop over all of the things that depend on the instruction we're removing.
+ //
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+ // If we find RemInst as a clobber or Def in any of the maps for other values,
+ // we need to replace its entry with a dirty version of the instruction after
+ // it. If RemInst is a terminator, we use a null dirty value.
+ //
+ // Using a dirty version of the instruction after RemInst saves having to scan
+ // the entire block to get to this point.
+ MemDepResult NewDirtyVal;
+ if (!RemInst->isTerminator())
+ NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+
+ ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
+ // RemInst can't be the terminator if it has local stuff depending on it.
+ assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ "Nothing can locally depend on a terminator");
+
+ for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
+ E = ReverseDeps.end(); I != E; ++I) {
+ Instruction *InstDependingOnRemInst = *I;
+ assert(InstDependingOnRemInst != RemInst &&
+ "Already removed our local dep info");
+
+ LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+
+ // Make sure to remember that new things depend on NewDepInst.
+ assert(NewDirtyVal.getInst() && "There is no way something else can have "
+ "a local dep on this if it is a terminator!");
+ ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+ InstDependingOnRemInst));
+ }
+
+ ReverseLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating the
+ // 'ReverseDeps' reference.
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
+ for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
+ I != E; ++I) {
+ assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+
+ PerInstNLInfo &INLD = NonLocalDeps[*I];
+ // The information is now dirty!
+ INLD.second = true;
+
+ for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+ DE = INLD.first.end(); DI != DE; ++DI) {
+ if (DI->getResult().getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->setResult(NewDirtyVal);
+
+ if (Instruction *NextI = NewDirtyVal.getInst())
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ }
+ }
+
+ ReverseNonLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+ // value in the NonLocalPointerDeps info.
+ ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+ ReverseNonLocalPtrDeps.find(RemInst);
+ if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+ SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
+ SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+ E = Set.end(); I != E; ++I) {
+ ValueIsLoadPair P = *I;
+ assert(P.getPointer() != RemInst &&
+ "Already removed NonLocalPointerDeps info for RemInst");
+
+ NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
+
+ // The cache is not valid for any specific block anymore.
+ NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+
+ // Update any entries for RemInst to use the instruction after it.
+ for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+ DI != DE; ++DI) {
+ if (DI->getResult().getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->setResult(NewDirtyVal);
+
+ if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+ ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+ }
+
+ // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
+ // subsequent value may invalidate the sortedness.
+ std::sort(NLPDI.begin(), NLPDI.end());
+ }
+
+ ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+
+ while (!ReversePtrDepsToAdd.empty()) {
+ ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+ .insert(ReversePtrDepsToAdd.back().second);
+ ReversePtrDepsToAdd.pop_back();
+ }
+ }
+
+
+ assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+ AA->deleteValue(RemInst);
+ DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+ for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+ E = LocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ assert(I->second.getInst() != D &&
+ "Inst occurs in data structures");
+ }
+
+ for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+ E = NonLocalPointerDeps.end(); I != E; ++I) {
+ assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+ const NonLocalDepInfo &Val = I->second.NonLocalDeps;
+ for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+ II != E; ++II)
+ assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
+ }
+
+ for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+ E = NonLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ const PerInstNLInfo &INLD = I->second;
+ for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+ EE = INLD.first.end(); II != EE; ++II)
+ assert(II->getResult().getInst() != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+ E = ReverseLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+ E = ReverseNonLocalDeps.end();
+ I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseNonLocalPtrDepTy::const_iterator
+ I = ReverseNonLocalPtrDeps.begin(),
+ E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in rev NLPD map");
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
+ E = I->second.end(); II != E; ++II)
+ assert(*II != ValueIsLoadPair(D, false) &&
+ *II != ValueIsLoadPair(D, true) &&
+ "Inst occurs in ReverseNonLocalPtrDeps map");
+ }
+
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 000000000000..e7e999cebeb9
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,87 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+ class ModuleDebugInfoPrinter : public ModulePass {
+ DebugInfoFinder Finder;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ModuleDebugInfoPrinter() : ModulePass(ID) {
+ initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const;
+ };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+ "Decodes module-level debug info", false, true)
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+ return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+ Finder.processModule(M);
+ return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+ for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+ E = Finder.compile_unit_end(); I != E; ++I) {
+ O << "Compile Unit: ";
+ DICompileUnit(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+ E = Finder.subprogram_end(); I != E; ++I) {
+ O << "Subprogram: ";
+ DISubprogram(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+ E = Finder.global_variable_end(); I != E; ++I) {
+ O << "GlobalVariable: ";
+ DIGlobalVariable(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.type_begin(),
+ E = Finder.type_end(); I != E; ++I) {
+ O << "Type: ";
+ DIType(*I).print(O);
+ O << '\n';
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 000000000000..101c2d5b0285
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace {
+ /// NoAA - This class implements the -no-aa pass, which always returns "I
+ /// don't know" for alias queries. NoAA is unlike other alias analysis
+ /// implementations, in that it does not chain to a previous analysis. As
+ /// such it doesn't follow many of the rules that other alias analyses must.
+ ///
+ struct NoAA : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ NoAA() : ImmutablePass(ID) {
+ initializeNoAAPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+ virtual void initializePass() {
+ // Note: NoAA does not call InitializeAliasAnalysis because it's
+ // special and does not support chaining.
+ TD = getAnalysisIfAvailable<TargetData>();
+ }
+
+ virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+ return MayAlias;
+ }
+
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return UnknownModRefBehavior;
+ }
+ virtual ModRefBehavior getModRefBehavior(const Function *F) {
+ return UnknownModRefBehavior;
+ }
+
+ virtual bool pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ return false;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ return ModRef;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return ModRef;
+ }
+
+ virtual void deleteValue(Value *V) {}
+ virtual void copyValue(Value *From, Value *To) {}
+ virtual void addEscapingUse(Use &U) {}
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+ "No Alias Analysis (always returns 'may' alias)",
+ true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 000000000000..70dcd0df242d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,442 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+ if (isa<PHINode>(Inst) ||
+ isa<GetElementPtrInst>(Inst))
+ return true;
+
+ if (isa<CastInst>(Inst) &&
+ Inst->isSafeToSpeculativelyExecute())
+ return true;
+
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1)))
+ return true;
+
+ // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+ // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+ // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+ return false;
+}
+
+void PHITransAddr::dump() const {
+ if (Addr == 0) {
+ dbgs() << "PHITransAddr: null\n";
+ return;
+ }
+ dbgs() << "PHITransAddr: " << *Addr << "\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+
+
+static bool VerifySubExpr(Value *Expr,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ // If this is a non-instruction value, there is nothing to do.
+ Instruction *I = dyn_cast<Instruction>(Expr);
+ if (I == 0) return true;
+
+ // If it's an instruction, it is either in Tmp or its operands recursively
+ // are.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return true;
+ }
+
+ // If it isn't in the InstInputs list it is a subexpr incorporated into the
+ // address. Sanity check that it is phi translatable.
+ if (!CanPHITrans(I)) {
+ errs() << "Non phi translatable instruction found in PHITransAddr:\n";
+ errs() << *I << '\n';
+ llvm_unreachable("Either something is missing from InstInputs or "
+ "CanPHITrans is wrong.");
+ return false;
+ }
+
+ // Validate the operands of the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!VerifySubExpr(I->getOperand(i), InstInputs))
+ return false;
+
+ return true;
+}
+
+/// Verify - Check internal consistency of this data structure. If the
+/// structure is valid, it returns true. If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+ if (Addr == 0) return true;
+
+ SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+ if (!VerifySubExpr(Addr, Tmp))
+ return false;
+
+ if (!Tmp.empty()) {
+ errs() << "PHITransAddr contains extra instructions:\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n";
+ llvm_unreachable("This is unexpected.");
+ return false;
+ }
+
+ // a-ok.
+ return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it. This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+ // If the input value is not an instruction, or if it is not defined in CurBB,
+ // then we don't need to phi translate it.
+ Instruction *Inst = dyn_cast<Instruction>(Addr);
+ return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return;
+
+ // If the instruction is in the InstInputs list, remove it.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return;
+ }
+
+ assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+ // Otherwise, it must have instruction inputs itself. Zap them recursively.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+ RemoveInstInputs(Op, InstInputs);
+ }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+ BasicBlock *PredBB,
+ const DominatorTree *DT) {
+ // If this is a non-instruction value, it can't require PHI translation.
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (Inst == 0) return V;
+
+ // Determine whether 'Inst' is an input to our PHI translatable expression.
+ bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+ // Handle inputs instructions if needed.
+ if (isInput) {
+ if (Inst->getParent() != CurBB) {
+ // If it is an input defined in a different block, then it remains an
+ // input.
+ return Inst;
+ }
+
+ // If 'Inst' is defined in this block and is an input that needs to be phi
+ // translated, we need to incorporate the value into the expression or fail.
+
+ // In either case, the instruction itself isn't an input any longer.
+ InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+ // If this is a PHI, go ahead and translate it.
+ if (PHINode *PN = dyn_cast<PHINode>(Inst))
+ return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+ // If this is a non-phi value, and it is analyzable, we can incorporate it
+ // into the expression by making all instruction operands be inputs.
+ if (!CanPHITrans(Inst))
+ return 0;
+
+ // All instruction operands are now inputs (and of course, they may also be
+ // defined in this block, so they may need to be phi translated themselves.
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+ InstInputs.push_back(Op);
+ }
+
+ // Ok, it must be an intermediate result (either because it started that way
+ // or because we just incorporated it into the expression). See if its
+ // operands need to be phi translated, and if so, reconstruct it.
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+ if (PHIIn == 0) return 0;
+ if (PHIIn == Cast->getOperand(0))
+ return Cast;
+
+ // Find an available version of this cast.
+
+ // Constants are trivial to find.
+ if (Constant *C = dyn_cast<Constant>(PHIIn))
+ return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+ C, Cast->getType()));
+
+ // Otherwise we have to see if a casted version of the incoming pointer
+ // is available. If so, we can use it, otherwise we have to fail.
+ for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+ UI != E; ++UI) {
+ if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+ if (CastI->getOpcode() == Cast->getOpcode() &&
+ CastI->getType() == Cast->getType() &&
+ (!DT || DT->dominates(CastI->getParent(), PredBB)))
+ return CastI;
+ }
+ return 0;
+ }
+
+ // Handle getelementptr with at least one PHI translatable operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ bool AnyChanged = false;
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+ if (GEPOp == 0) return 0;
+
+ AnyChanged |= GEPOp != GEP->getOperand(i);
+ GEPOps.push_back(GEPOp);
+ }
+
+ if (!AnyChanged)
+ return GEP;
+
+ // Simplify the GEP to handle 'gep x, 0' -> x etc.
+ if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ RemoveInstInputs(GEPOps[i], InstInputs);
+
+ return AddAsInput(V);
+ }
+
+ // Scan to see if we have this GEP available.
+ Value *APHIOp = GEPOps[0];
+ for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+ UI != E; ++UI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+ if (GEPI->getType() == GEP->getType() &&
+ GEPI->getNumOperands() == GEPOps.size() &&
+ GEPI->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
+ bool Mismatch = false;
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ if (GEPI->getOperand(i) != GEPOps[i]) {
+ Mismatch = true;
+ break;
+ }
+ if (!Mismatch)
+ return GEPI;
+ }
+ }
+ return 0;
+ }
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+ bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+ bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+ Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+ if (LHS == 0) return 0;
+
+ // If the PHI translated LHS is an add of a constant, fold the immediates.
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+ if (BOp->getOpcode() == Instruction::Add)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ LHS = BOp->getOperand(0);
+ RHS = ConstantExpr::getAdd(RHS, CI);
+ isNSW = isNUW = false;
+
+ // If the old 'LHS' was an input, add the new 'LHS' as an input.
+ if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+ RemoveInstInputs(BOp, InstInputs);
+ AddAsInput(LHS);
+ }
+ }
+
+ // See if the add simplifies away.
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+ // If we simplified the operands, the LHS is no longer an input, but Res
+ // is.
+ RemoveInstInputs(LHS, InstInputs);
+ return AddAsInput(Res);
+ }
+
+ // If we didn't modify the add, just return it.
+ if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+ return Inst;
+
+ // Otherwise, see if we have this add available somewhere.
+ for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+ UI != E; ++UI) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+ if (BO->getOpcode() == Instruction::Add &&
+ BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+ BO->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(BO->getParent(), PredBB)))
+ return BO;
+ }
+
+ return 0;
+ }
+
+ // Otherwise, we failed.
+ return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state to reflect any needed changes. If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB. This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT) {
+ assert(Verify() && "Invalid PHITransAddr!");
+ Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+ assert(Verify() && "Invalid PHITransAddr!");
+
+ if (DT) {
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+ if (!DT->dominates(Inst->getParent(), PredBB))
+ Addr = 0;
+ }
+
+ return Addr == 0;
+}
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list. This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ unsigned NISize = NewInsts.size();
+
+ // Attempt to PHI translate with insertion.
+ Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+ // If successful, return the new value.
+ if (Addr) return Addr;
+
+ // If not, destroy any intermediate instructions inserted.
+ while (NewInsts.size() != NISize)
+ NewInsts.pop_back_val()->eraseFromParent();
+ return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block. All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+ BasicBlock *PredBB, const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ // See if we have a version of this value already available and dominating
+ // PredBB. If so, there is no need to insert a new instance of it.
+ PHITransAddr Tmp(InVal, TD);
+ if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+ return Tmp.getAddr();
+
+ // If we don't have an available version of this value, it must be an
+ // instruction.
+ Instruction *Inst = cast<Instruction>(InVal);
+
+ // Handle cast of PHI translatable value.
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ // Otherwise insert a cast at the end of PredBB.
+ CastInst *New = CastInst::Create(Cast->getOpcode(),
+ OpVal, InVal->getType(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ NewInsts.push_back(New);
+ return New;
+ }
+
+ // Handle getelementptr with at least one PHI operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ BasicBlock *CurBB = GEP->getParent();
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+ GEPOps.push_back(OpVal);
+ }
+
+ GetElementPtrInst *Result =
+ GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Result->setIsInBounds(GEP->isInBounds());
+ NewInsts.push_back(Result);
+ return Result;
+ }
+
+#if 0
+ // FIXME: This code works, but it is unclear that we actually want to insert
+ // a big chain of computation in order to make a value available in a block.
+ // This needs to be evaluated carefully to consider its cost trade offs.
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+ NewInsts.push_back(Res);
+ return Res;
+ }
+#endif
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 000000000000..7c584daef734
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,522 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner. As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+#include <stack>
+#include <string>
+#include <utility>
+#include <sstream>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+ "path-profile-early-termination", cl::Hidden,
+ cl::desc("In path profiling, insert extra instrumentation to account for "
+ "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+ return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+ return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+ _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+ return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+ _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+ return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+ return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+ return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+ return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+ return(_succEdges.end());
+}
+
+// Returns the number of successor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+ return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+ _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+ removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+ _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+ removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented. If BasicBlock
+// is null then returns "<null>". If BasicBlock has no name, then
+// "<unnamed>" is returned. Intended for use with debug output.
+std::string BallLarusNode::getName() {
+ std::stringstream name;
+
+ if(getBlock() != NULL) {
+ if(getBlock()->hasName()) {
+ std::string tempName(getBlock()->getName());
+ name << tempName.c_str() << " (" << _uid << ")";
+ } else
+ name << "<unnamed> (" << _uid << ")";
+ } else
+ name << "<null> (" << _uid << ")";
+
+ return name.str();
+}
+
+// Removes an edge from an edgeVector. Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+ // TODO: Avoid linear scan by using a set instead
+ for(BLEdgeIterator i = v.begin(),
+ end = v.end();
+ i != end;
+ ++i) {
+ if((*i) == e) {
+ v.erase(i);
+ break;
+ }
+ }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+ return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+ return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+ return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+ _edgeType = type;
+}
+
+// Returns the weight of this edge. Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+ return(_weight);
+}
+
+// Sets the weight of the edge. Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+ _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+ return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+ _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+ return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+ _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+ return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+ _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+ return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+ BLBlockNodeMap inDag;
+ std::stack<BallLarusNode*> dfsStack;
+
+ _root = addNode(&(_function.getEntryBlock()));
+ _exit = addNode(NULL);
+
+ // start search from root
+ dfsStack.push(getRoot());
+
+ // dfs to add each bb into the dag
+ while(dfsStack.size())
+ buildNode(inDag, dfsStack);
+
+ // put in the final edge
+ addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+ ++edge)
+ delete (*edge);
+
+ for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+ ++node)
+ delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+ BallLarusNode* node;
+ std::queue<BallLarusNode*> bfsQueue;
+ bfsQueue.push(getExit());
+
+ while(bfsQueue.size() > 0) {
+ node = bfsQueue.front();
+
+ DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+ bfsQueue.pop();
+ unsigned prevPathNumber = node->getNumberPaths();
+ calculatePathNumbersFrom(node);
+
+ // Check for DAG splitting
+ if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+ // Add new phony edge from the split-node to the DAG's exit
+ BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+ exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+ // Counters to handle the possibility of a multi-graph
+ BasicBlock* oldTarget = 0;
+ unsigned duplicateNumber = 0;
+
+ // Iterate through each successor edge, adding phony edges
+ for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+ if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+ // is this edge a duplicate?
+ if( oldTarget != (*succ)->getTarget()->getBlock() )
+ duplicateNumber = 0;
+
+ // create the new phony edge: root -> succ
+ BallLarusEdge* rootEdge =
+ addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+ rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+ rootEdge->setRealEdge(*succ);
+
+ // split on this edge and reference it's exit/root phony edges
+ (*succ)->setType(BallLarusEdge::SPLITEDGE);
+ (*succ)->setPhonyRoot(rootEdge);
+ (*succ)->setPhonyExit(exitEdge);
+ (*succ)->setWeight(0);
+ }
+ }
+
+ calculatePathNumbersFrom(node);
+ }
+
+ DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+ << node->getNumberPaths() << ".\n");
+
+ if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+ DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+ for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+ pred != end; pred++) {
+ if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+ (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ BallLarusNode* nextNode = (*pred)->getSource();
+ // not yet visited?
+ if(nextNode->getNumberPaths() == 0)
+ bfsQueue.push(nextNode);
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+ return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+ return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+ return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+ return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+ for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+ (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+ BallLarusNode* currentNode = dfsStack.top();
+ BasicBlock* currentBlock = currentNode->getBlock();
+
+ if(currentNode->getColor() != BallLarusNode::WHITE) {
+ // we have already visited this node
+ dfsStack.pop();
+ currentNode->setColor(BallLarusNode::BLACK);
+ } else {
+ // are there any external procedure calls?
+ if( ProcessEarlyTermination ) {
+ for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+ bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+ bbCurrent++ ) {
+ Instruction& instr = *bbCurrent;
+ if( instr.getOpcode() == Instruction::Call ) {
+ BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+ callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+ break;
+ }
+ }
+ }
+
+ TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+ if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
+ || isa<UnwindInst>(terminator))
+ addEdge(currentNode, getExit(),0);
+
+ currentNode->setColor(BallLarusNode::GRAY);
+ inDag[currentBlock] = currentNode;
+
+ BasicBlock* oldSuccessor = 0;
+ unsigned duplicateNumber = 0;
+
+ // iterate through this node's successors
+ for(succ_iterator successor = succ_begin(currentBlock),
+ succEnd = succ_end(currentBlock); successor != succEnd;
+ oldSuccessor = *successor, ++successor ) {
+ BasicBlock* succBB = *successor;
+
+ // is this edge a duplicate?
+ if (oldSuccessor == succBB)
+ duplicateNumber++;
+ else
+ duplicateNumber = 0;
+
+ buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+ }
+ }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+ dfsStack, BallLarusNode* currentNode,
+ BasicBlock* succBB, unsigned duplicateCount) {
+ BallLarusNode* succNode = inDag[succBB];
+
+ if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+ // visited node and forward edge
+ addEdge(currentNode, succNode, duplicateCount);
+ } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+ // visited node and back edge
+ DEBUG(dbgs() << "Backedge detected.\n");
+ addBackedge(currentNode, succNode, duplicateCount);
+ } else {
+ BallLarusNode* childNode;
+ // not visited node and forward edge
+ if(succNode) // an unvisited node that is child of a gray node
+ childNode = succNode;
+ else { // an unvisited node that is a child of a an unvisted node
+ childNode = addNode(succBB);
+ inDag[succBB] = childNode;
+ }
+ addEdge(currentNode, childNode, duplicateCount);
+ dfsStack.push(childNode);
+ }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+ if(node == getExit())
+ // The Exit node must be base case
+ node->setNumberPaths(1);
+ else {
+ unsigned sumPaths = 0;
+ BallLarusNode* succNode;
+
+ for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; succ++) {
+ if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+ (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ (*succ)->setWeight(sumPaths);
+ succNode = (*succ)->getTarget();
+
+ if( !succNode->getNumberPaths() )
+ return;
+ sumPaths += succNode->getNumberPaths();
+ }
+
+ node->setNumberPaths(sumPaths);
+ }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+ return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor. Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+ BallLarusNode* newNode = createNode(BB);
+ _nodes.push_back(newNode);
+ return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+ _edges.push_back(newEdge);
+ source->addSuccEdge(newEdge);
+ target->addPredEdge(newEdge);
+ return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+ childEdge->setType(BallLarusEdge::BACKEDGE);
+
+ childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+ childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+ childEdge->getPhonyRoot()->setRealEdge(childEdge);
+ childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+ childEdge->getPhonyExit()->setRealEdge(childEdge);
+ childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+ _backEdges.push_back(childEdge);
+}
diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 000000000000..b361d3f4fa94
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,434 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+ class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+ public:
+ PathProfileLoaderPass() : ModulePass(ID) { }
+ ~PathProfileLoaderPass();
+
+ // this pass doesn't change anything (only loads information)
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // the full name of the loader pass
+ virtual const char* getPassName() const {
+ return "Path Profiling Information Loader";
+ }
+
+ // required since this pass implements multiple inheritance
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ // entry point to run the pass
+ bool runOnModule(Module &M);
+
+ // pass identification
+ static char ID;
+
+ private:
+ // make a reference table to refer to function by number
+ void buildFunctionRefs(Module &M);
+
+ // process argument info of a program from the input file
+ void handleArgumentInfo();
+
+ // process path number information from the input file
+ void handlePathInfo();
+
+ // array of references to the functions in the module
+ std::vector<Function*> _functions;
+
+ // path profile file handle
+ FILE* _file;
+
+ // path profile file name
+ std::string _filename;
+ };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+ NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+ "path-profile-loader",
+ "Load path profile information from file",
+ false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+ return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+ unsigned duplicateNumber)
+ : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+ double countStdDev, PathProfileInfo* ppi)
+ : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+ return 100 * double(_count) /
+ double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+ unsigned int pathNumber) {
+ BallLarusEdge* best = 0;
+
+ for( BLEdgeIterator next = node->succBegin(),
+ end = node->succEnd(); next != end; next++ ) {
+ if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+ (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+ (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+ (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+ best = *next;
+ }
+
+ return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+ increment -= next->getWeight();
+
+ if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+ next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getTarget() != _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getSource()->getBlock(),
+ next->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+ next->getTarget() == _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getSource() == _ppi->_currentDag->getRoot() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+ increment -= next->getWeight();
+
+ // add block to the block list if it is a real edge
+ if( next->getType() == BallLarusEdge::NORMAL)
+ pbv->push_back (currentNode->getBlock());
+ // make the back edge the last edge since we are at the end
+ else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+ pbv->push_back (currentNode->getBlock());
+ pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+ }
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+ BallLarusNode* root = _ppi->_currentDag->getRoot();
+ BallLarusEdge* edge = getNextEdge(root, _number);
+
+ if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+ return edge->getTarget()->getBlock();
+
+ return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+ if (_currentDag)
+ delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+ // Make sure it exists
+ if (!F) return;
+
+ if (_currentDag)
+ delete _currentDag;
+
+ _currentFunction = F;
+ _currentDag = new BallLarusDag(*F);
+ _currentDag->init();
+ _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+ return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+ return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+ return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+ return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+ return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+ return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+ return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+ for( FunctionPathIterator funcNext = _functionPaths.begin(),
+ funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+ for( ProfilePathIterator pathNext = funcNext->second.begin(),
+ pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+ delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+ // get the filename and setup the module's function references
+ _filename = PathProfileInfoFilename;
+ buildFunctionRefs (M);
+
+ if (!(_file = fopen(_filename.c_str(), "rb"))) {
+ errs () << "error: input '" << _filename << "' file does not exist.\n";
+ return false;
+ }
+
+ ProfilingType profType;
+
+ while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+ switch (profType) {
+ case ArgumentInfo:
+ handleArgumentInfo ();
+ break;
+ case PathInfo:
+ handlePathInfo ();
+ break;
+ default:
+ errs () << "error: bad path profiling file syntax, " << profType << "\n";
+ fclose (_file);
+ return false;
+ }
+ }
+
+ fclose (_file);
+
+ return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+ _functions.push_back(0); // make the 0 index a null pointer
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+ _functions.push_back(F);
+ }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+ // get the argument list's length
+ unsigned savedArgsLength;
+ if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+ errs() << "warning: argument info header/data mismatch\n";
+ return;
+ }
+
+ // allocate a buffer, and get the arguments
+ char* args = new char[savedArgsLength+1];
+ if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+ errs() << "warning: argument info header/data mismatch\n";
+
+ args[savedArgsLength] = '\0';
+ argList = std::string(args);
+ delete [] args; // cleanup dynamic string
+
+ // byte alignment
+ if (savedArgsLength & 3)
+ fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+ // get the number of functions in this profile
+ unsigned functionCount;
+ if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+ errs() << "warning: path info header/data mismatch\n";
+ return;
+ }
+
+ // gather path information for each function
+ for (unsigned i = 0; i < functionCount; i++) {
+ PathProfileHeader pathHeader;
+ if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+ errs() << "warning: bad header for path function info\n";
+ break;
+ }
+
+ Function* f = _functions[pathHeader.fnNumber];
+
+ // dynamically allocate a table to store path numbers
+ PathProfileTableEntry* pathTable =
+ new PathProfileTableEntry[pathHeader.numEntries];
+
+ if( fread(pathTable, sizeof(PathProfileTableEntry),
+ pathHeader.numEntries, _file) != pathHeader.numEntries) {
+ delete [] pathTable;
+ errs() << "warning: path function info header/data mismatch\n";
+ return;
+ }
+
+ // Build a new path for the current function
+ unsigned int totalPaths = 0;
+ for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+ totalPaths += pathTable[j].pathCounter;
+ _functionPaths[f][pathTable[j].pathNumber]
+ = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+ 0, this);
+ }
+
+ _functionPathCounts[f] = totalPaths;
+
+ delete [] pathTable;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// NoProfile PathProfileInfo implementation
+//
+
+namespace {
+ struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoPathProfileInfo() : ImmutablePass(ID) {
+ initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoPathProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+ "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 000000000000..0ae734e259db
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,207 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class PathProfileVerifier : public ModulePass {
+ private:
+ bool runOnModule(Module &M);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfileVerifier() : ModulePass(ID) {
+ initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+
+ virtual const char *getPassName() const {
+ return "Path Profiler Verifier";
+ }
+
+ // The verifier requires the path profile and edge profile.
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+ };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+ cl::init("edgefrompath.llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Edge profile file generated by -path-profile-verifier"),
+ cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+ "Compare the path profile derived edge profile against the "
+ "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+ return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<PathProfileInfo>();
+ AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+ PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+ // setup a data structure to map path edges which index an
+ // array of edge counters
+ NestedBlockToIndexMap arrayMap;
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ arrayMap[0][F->begin()][0] = i++;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ unsigned duplicate = 0;
+ BasicBlock* prev = 0;
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+ prev = TI->getSuccessor(s), ++s) {
+ if (prev == TI->getSuccessor(s))
+ duplicate++;
+ else duplicate = 0;
+
+ arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+ }
+ }
+ }
+
+ std::vector<unsigned> edgeArray(i);
+
+ // iterate through each path and increment the edge counters as needed
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ pathProfileInfo.setCurrentFunction(F);
+
+ DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+ << pathProfileInfo.pathsRun()
+ << "/" << pathProfileInfo.getPotentialPathCount()
+ << " potential paths\n");
+
+ for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+ endPath = pathProfileInfo.pathEnd();
+ nextPath != endPath; nextPath++ ) {
+ ProfilePath* currentPath = nextPath->second;
+
+ ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+ DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+ << currentPath->getCount() << "\n");
+ // setup the entry edge (normally path profiling doesn't care about this)
+ if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+ edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+ += currentPath->getCount();
+
+ for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+ endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+ if (nextEdge != pev->begin())
+ DEBUG(dbgs() << " :: ");
+
+ BasicBlock* source = nextEdge->getSource();
+ BasicBlock* target = nextEdge->getTarget();
+ unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+ DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
+ << "}--> " << target->getNameStr());
+
+ // Ensure all the referenced edges exist
+ // TODO: make this a separate function
+ if( !arrayMap.count(source) ) {
+ errs() << " error [" << F->getNameStr() << "()]: source '"
+ << source->getNameStr()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source].count(target) ) {
+ errs() << " error [" << F->getNameStr() << "()]: target '"
+ << target->getNameStr()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+ errs() << " error [" << F->getNameStr() << "()]: edge "
+ << source->getNameStr() << " -> " << target->getNameStr()
+ << " duplicate number " << duplicateNumber
+ << " does not exist in the array map.\n";
+ } else {
+ edgeArray[arrayMap[source][target][duplicateNumber]]
+ += currentPath->getCount();
+ }
+ }
+
+ DEBUG(errs() << "\n");
+
+ delete pev;
+ }
+ }
+
+ std::string errorInfo;
+ std::string filename = EdgeProfileFilename;
+
+ // Open a handle to the file
+ FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+ if (!edgeFile) {
+ errs() << "error: unable to open file '" << filename << "' for output.\n";
+ return false;
+ }
+
+ errs() << "Generating edge profile '" << filename << "' ...\n";
+
+ // write argument info
+ unsigned type = ArgumentInfo;
+ unsigned num = pathProfileInfo.argList.size();
+ int zeros = 0;
+
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+ fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+ if (num&3)
+ fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+ type = EdgeInfo;
+ num = edgeArray.size();
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+ // write each edge to the file
+ for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+ e = edgeArray.end(); s != e; s++)
+ fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+ fclose (edgeFile);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
new file mode 100644
index 000000000000..6ed27297923f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,51 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postdomtree"
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DominatorInternals.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+ "Post-Dominator Tree Construction", true, true)
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+PostDominatorTree::~PostDominatorTree() {
+ delete DT;
+}
+
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+ DT->print(OS);
+}
+
+
+FunctionPass* llvm::createPostDomTree() {
+ return new PostDominatorTree();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 000000000000..b594e2ba5506
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,426 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+ "profile-estimator-loop-weight", cl::init(10),
+ cl::value_desc("loop-weight"),
+ cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+ class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
+ double ExecCount;
+ LoopInfo *LI;
+ std::set<BasicBlock*> BBToVisit;
+ std::map<Loop*,double> LoopExitWeights;
+ std::map<Edge,double> MinimalWeight;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit ProfileEstimatorPass(const double execcount = 0)
+ : FunctionPass(ID), ExecCount(execcount) {
+ initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
+ if (execcount == 0) ExecCount = LoopWeight;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information estimator";
+ }
+
+ /// run - Estimate the profile information from the specified file.
+ virtual bool runOnFunction(Function &F);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ virtual void recurseBasicBlock(BasicBlock *BB);
+
+ void inline printEdgeWeight(Edge);
+ };
+} // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+
+namespace llvm {
+ char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
+
+ FunctionPass *createProfileEstimatorPass() {
+ return new ProfileEstimatorPass();
+ }
+
+ /// createProfileEstimatorPass - This function returns a Pass that estimates
+ /// profiling information using the given loop execution count.
+ Pass *createProfileEstimatorPass(const unsigned execcount) {
+ return new ProfileEstimatorPass(execcount);
+ }
+}
+
+static double ignoreMissing(double w) {
+ if (w == ProfileInfo::MissingValue) return 0;
+ return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+ DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+ DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
+ << format("%20.20g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+ // Break the recursion if this BasicBlock was already visited.
+ if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+ // Read the LoopInfo for this block.
+ bool BBisHeader = LI->isLoopHeader(BB);
+ Loop* BBLoop = LI->getLoopFor(BB);
+
+ // To get the block weight, read all incoming edges.
+ double BBWeight = 0;
+ std::set<BasicBlock*> ProcessedPreds;
+ for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ // If this block was not considered already, add weight.
+ Edge edge = getEdge(*bbi,BB);
+ double w = getEdgeWeight(edge);
+ if (ProcessedPreds.insert(*bbi).second) {
+ BBWeight += ignoreMissing(w);
+ }
+ // If this block is a loop header and the predecessor is contained in this
+ // loop, thus the edge is a backedge, continue and do not check if the
+ // value is valid.
+ if (BBisHeader && BBLoop->contains(*bbi)) {
+ printEdgeError(edge, "but is backedge, continuing");
+ continue;
+ }
+ // If the edges value is missing (and this is no loop header, and this is
+ // no backedge) return, this block is currently non estimatable.
+ if (w == MissingValue) {
+ printEdgeError(edge, "returning");
+ return;
+ }
+ }
+ if (getExecutionCount(BB) != MissingValue) {
+ BBWeight = getExecutionCount(BB);
+ }
+
+ // Fetch all necessary information for current block.
+ SmallVector<Edge, 8> ExitEdges;
+ SmallVector<Edge, 8> Edges;
+ if (BBLoop) {
+ BBLoop->getExitEdges(ExitEdges);
+ }
+
+ // If this is a loop header, consider the following:
+ // Exactly the flow that is entering this block, must exit this block too. So
+ // do the following:
+ // *) get all the exit edges, read the flow that is already leaving this
+ // loop, remember the edges that do not have any flow on them right now.
+ // (The edges that have already flow on them are most likely exiting edges of
+ // other loops, do not touch those flows because the previously caclulated
+ // loopheaders would not be exact anymore.)
+ // *) In case there is not a single exiting edge left, create one at the loop
+ // latch to prevent the flow from building up in the loop.
+ // *) Take the flow that is not leaving the loop already and distribute it on
+ // the remaining exiting edges.
+ // (This ensures that all flow that enters the loop also leaves it.)
+ // *) Increase the flow into the loop by increasing the weight of this block.
+ // There is at least one incoming backedge that will bring us this flow later
+ // on. (So that the flow condition in this node is valid again.)
+ if (BBisHeader) {
+ double incoming = BBWeight;
+ // Subtract the flow leaving the loop.
+ std::set<Edge> ProcessedExits;
+ for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+ ee = ExitEdges.end(); ei != ee; ++ei) {
+ if (ProcessedExits.insert(*ei).second) {
+ double w = getEdgeWeight(*ei);
+ if (w == MissingValue) {
+ Edges.push_back(*ei);
+ // Check if there is a necessary minimal weight, if yes, subtract it
+ // from weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ incoming -= MinimalWeight[*ei];
+ DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ } else {
+ incoming -= w;
+ }
+ }
+ }
+ // If no exit edges, create one:
+ if (Edges.size() == 0) {
+ BasicBlock *Latch = BBLoop->getLoopLatch();
+ if (Latch) {
+ Edge edge = getEdge(Latch,0);
+ EdgeInformation[BB->getParent()][edge] = BBWeight;
+ printEdgeWeight(edge);
+ edge = getEdge(Latch, BB);
+ EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+ printEdgeWeight(edge);
+ }
+ }
+
+ // Distribute remaining weight to the exting edges. To prevent fractions
+ // from building up and provoking precision problems the weight which is to
+ // be distributed is split and the rounded, the last edge gets a somewhat
+ // bigger value, but we are close enough for an estimation.
+ double fraction = floor(incoming/Edges.size());
+ for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+ ei != ee; ++ei) {
+ double w = 0;
+ if (ei != (ee-1)) {
+ w = fraction;
+ incoming -= fraction;
+ } else {
+ w = incoming;
+ }
+ EdgeInformation[BB->getParent()][*ei] += w;
+ // Read necessary minimal weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ printEdgeWeight(*ei);
+
+ // Add minimal weight to paths to all exit edges, this is used to ensure
+ // that enough flow is reaching this edges.
+ Path p;
+ const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+ while (Dest != BB) {
+ const BasicBlock *Parent = p.find(Dest)->second;
+ Edge e = getEdge(Parent, Dest);
+ if (MinimalWeight.find(e) == MinimalWeight.end()) {
+ MinimalWeight[e] = 0;
+ }
+ MinimalWeight[e] += w;
+ DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+ Dest = Parent;
+ }
+ }
+ // Increase flow into the loop.
+ BBWeight *= (ExecCount+1);
+ }
+
+ BlockInformation[BB->getParent()][BB] = BBWeight;
+ // Up until now we considered only the loop exiting edges, now we have a
+ // definite block weight and must distribute this onto the outgoing edges.
+ // Since there may be already flow attached to some of the edges, read this
+ // flow first and remember the edges that have still now flow attached.
+ Edges.clear();
+ std::set<BasicBlock*> ProcessedSuccs;
+
+ succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // Also check for (BB,0) edges that may already contain some flow. (But only
+ // in case there are no successors.)
+ if (bbi == bbe) {
+ Edge edge = getEdge(BB,0);
+ EdgeInformation[BB->getParent()][edge] = BBWeight;
+ printEdgeWeight(edge);
+ }
+ for ( ; bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ Edge edge = getEdge(BB,*bbi);
+ double w = getEdgeWeight(edge);
+ if (w != MissingValue) {
+ BBWeight -= getEdgeWeight(edge);
+ } else {
+ Edges.push_back(edge);
+ // If minimal weight is necessary, reserve weight by subtracting weight
+ // from block weight, this is readded later on.
+ if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+ BBWeight -= MinimalWeight[edge];
+ DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+ }
+ }
+ }
+ }
+
+ double fraction = floor(BBWeight/Edges.size());
+ // Finally we know what flow is still not leaving the block, distribute this
+ // flow onto the empty edges.
+ for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+ ei != ee; ++ei) {
+ if (ei != (ee-1)) {
+ EdgeInformation[BB->getParent()][*ei] += fraction;
+ BBWeight -= fraction;
+ } else {
+ EdgeInformation[BB->getParent()][*ei] += BBWeight;
+ }
+ // Readd minial necessary weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ printEdgeWeight(*ei);
+ }
+
+ // This block is visited, mark this before the recursion.
+ BBToVisit.erase(BB);
+
+ // Recurse into successors.
+ for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+ if (F.isDeclaration()) return false;
+
+ // Fetch LoopInfo and clear ProfileInfo for this function.
+ LI = &getAnalysis<LoopInfo>();
+ FunctionInformation.erase(&F);
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ BBToVisit.clear();
+
+ // Mark all blocks as to visit.
+ for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+ BBToVisit.insert(bi);
+
+ // Clear Minimal Edges.
+ MinimalWeight.clear();
+
+ DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n");
+
+ // Since the entry block is the first one and has no predecessors, the edge
+ // (0,entry) is inserted with the starting weight of 1.
+ BasicBlock *entry = &F.getEntryBlock();
+ BlockInformation[&F][entry] = pow(2.0, 32.0);
+ Edge edge = getEdge(0,entry);
+ EdgeInformation[&F][edge] = BlockInformation[&F][entry];
+ printEdgeWeight(edge);
+
+ // Since recurseBasicBlock() maybe returns with a block which was not fully
+ // estimated, use recurseBasicBlock() until everything is calculated.
+ bool cleanup = false;
+ recurseBasicBlock(entry);
+ while (BBToVisit.size() > 0 && !cleanup) {
+ // Remember number of open blocks, this is later used to check if progress
+ // was made.
+ unsigned size = BBToVisit.size();
+
+ // Try to calculate all blocks in turn.
+ for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+ be = BBToVisit.end(); bi != be; ++bi) {
+ recurseBasicBlock(*bi);
+ // If at least one block was finished, break because iterator may be
+ // invalid.
+ if (BBToVisit.size() < size) break;
+ }
+
+ // If there was not a single block resolved, make some assumptions.
+ if (BBToVisit.size() == size) {
+ bool found = false;
+ for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
+ (BBI != BBE) && (!found); ++BBI) {
+ BasicBlock *BB = *BBI;
+ // Try each predecessor if it can be assumend.
+ for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ (bbi != bbe) && (!found); ++bbi) {
+ Edge e = getEdge(*bbi,BB);
+ double w = getEdgeWeight(e);
+ // Check that edge from predecessor is still free.
+ if (w == MissingValue) {
+ // Check if there is a circle from this block to predecessor.
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+ if (Dest != *bbi) {
+ // If there is no circle, just set edge weight to 0
+ EdgeInformation[&F][e] = 0;
+ DEBUG(dbgs() << "Assuming edge weight: ");
+ printEdgeWeight(e);
+ found = true;
+ }
+ }
+ }
+ }
+ if (!found) {
+ cleanup = true;
+ DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+ }
+ }
+ }
+ // In case there was no safe way to assume edges, set as a last measure,
+ // set _everything_ to zero.
+ if (cleanup) {
+ FunctionInformation[&F] = 0;
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ const BasicBlock *BB = &(*FI);
+ BlockInformation[&F][BB] = 0;
+ const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
+ if (predi == prede) {
+ Edge e = getEdge(0,BB);
+ setEdgeWeight(e,0);
+ }
+ for (;predi != prede; ++predi) {
+ Edge e = getEdge(*predi,BB);
+ setEdgeWeight(e,0);
+ }
+ succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+ if (succi == succe) {
+ Edge e = getEdge(BB,0);
+ setEdgeWeight(e,0);
+ }
+ for (;succi != succe; ++succi) {
+ Edge e = getEdge(*succi,BB);
+ setEdgeWeight(e,0);
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
new file mode 100644
index 000000000000..173de2c02791
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
@@ -0,0 +1,1105 @@
+//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract ProfileInfo interface, and the default
+// "no profile" implementation.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-info"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+#include <queue>
+#include <limits>
+using namespace llvm;
+
+namespace llvm {
+ template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
+// Register the ProfileInfo interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
+
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+ MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+ if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
+
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
+
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(BB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ double Count = MissingValue;
+
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // Are there zero predecessors of this block?
+ if (PI == PE) {
+ Edge e = getEdge(0, BB);
+ Count = getEdgeWeight(e);
+ } else {
+ // Otherwise, if there are predecessors, the execution count of this block is
+ // the sum of the edge frequencies from the incoming edges.
+ std::set<const BasicBlock*> ProcessedPreds;
+ Count = 0;
+ for (; PI != PE; ++PI) {
+ const BasicBlock *P = *PI;
+ if (ProcessedPreds.insert(P).second) {
+ double w = getEdgeWeight(getEdge(P, BB));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
+ }
+ }
+
+ // If the predecessors did not suffice to get block weight, try successors.
+ if (Count == MissingValue) {
+
+ succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+ // Are there zero successors of this block?
+ if (SI == SE) {
+ Edge e = getEdge(BB,0);
+ Count = getEdgeWeight(e);
+ } else {
+ std::set<const BasicBlock*> ProcessedSuccs;
+ Count = 0;
+ for (; SI != SE; ++SI)
+ if (ProcessedSuccs.insert(*SI).second) {
+ double w = getEdgeWeight(getEdge(BB, *SI));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
+ }
+ }
+
+ if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+ return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineBasicBlock *MBB) {
+ std::map<const MachineFunction*, BlockCounts>::iterator J =
+ BlockInformation.find(MBB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(MBB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
+ std::map<const Function*, double>::iterator J =
+ FunctionInformation.find(F);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ // isDeclaration() is checked here and not at start of function to allow
+ // functions without a body still to have a execution count.
+ if (F->isDeclaration()) return MissingValue;
+
+ double Count = getExecutionCount(&F->getEntryBlock());
+ if (Count != MissingValue) FunctionInformation[F] = Count;
+ return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineFunction *MF) {
+ std::map<const MachineFunction*, double>::iterator J =
+ FunctionInformation.find(MF);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ double Count = getExecutionCount(&MF->front());
+ if (Count != MissingValue) FunctionInformation[MF] = Count;
+ return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ setExecutionCount(const BasicBlock *BB, double w) {
+ DEBUG(dbgs() << "Creating Block " << BB->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ setExecutionCount(const MachineBasicBlock *MBB, double w) {
+ DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+ double oldw = getEdgeWeight(e);
+ assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+ DEBUG(dbgs() << "Adding to Edge " << e
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ addExecutionCount(const BasicBlock *BB, double w) {
+ double oldw = getExecutionCount(BB);
+ assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+ DEBUG(dbgs() << "Adding to Block " << BB->getName()
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J == BlockInformation.end()) return;
+
+ DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
+ J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(getFunction(e));
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Deleting" << e << "\n");
+ J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceEdge(const Edge &oldedge, const Edge &newedge) {
+ double w;
+ if ((w = getEdgeWeight(newedge)) == MissingValue) {
+ w = getEdgeWeight(oldedge);
+ DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n");
+ } else {
+ w += getEdgeWeight(oldedge);
+ DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n");
+ }
+ setEdgeWeight(newedge,w);
+ removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+ GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+ Path &P, unsigned Mode) {
+ const BasicBlock *BB = 0;
+ bool hasFoundPath = false;
+
+ std::queue<const BasicBlock *> BFS;
+ BFS.push(Src);
+
+ while(BFS.size() && !hasFoundPath) {
+ BB = BFS.front();
+ BFS.pop();
+
+ succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ if (Succ == End) {
+ P[0] = BB;
+ if (Mode & GetPathToExit) {
+ hasFoundPath = true;
+ BB = 0;
+ }
+ }
+ for(;Succ != End; ++Succ) {
+ if (P.find(*Succ) != P.end()) continue;
+ Edge e = getEdge(BB,*Succ);
+ if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+ P[*Succ] = BB;
+ BFS.push(*Succ);
+ if ((Mode & GetPathToDest) && *Succ == Dest) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ }
+ }
+
+ return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ divertFlow(const Edge &oldedge, const Edge &newedge) {
+ DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
+
+ // First check if the old edge was taken, if not, just delete it...
+ if (getEdgeWeight(oldedge) == 0) {
+ removeEdge(oldedge);
+ return;
+ }
+
+ Path P;
+ P[newedge.first] = 0;
+ P[newedge.second] = newedge.first;
+ const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+ double w = getEdgeWeight (oldedge);
+ DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
+ do {
+ const BasicBlock *Parent = P.find(BB)->second;
+ Edge e = getEdge(Parent,BB);
+ double oldw = getEdgeWeight(e);
+ double oldc = getExecutionCount(e.first);
+ setEdgeWeight(e, w+oldw);
+ if (Parent != oldedge.first) {
+ setExecutionCount(e.first, w+oldc);
+ }
+ BB = Parent;
+ } while (BB != newedge.first);
+ removeEdge(oldedge);
+}
+
+/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurrences of RmBB with DestBB.
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+ DEBUG(dbgs() << "Replacing " << RmBB->getName()
+ << " with " << DestBB->getName() << "\n");
+ const Function *F = DestBB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ Edge e, newedge;
+ bool erasededge = false;
+ EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+ while(I != E) {
+ e = (I++)->first;
+ bool foundedge = false; bool eraseedge = false;
+ if (e.first == RmBB) {
+ if (e.second == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(DestBB, e.second);
+ foundedge = true;
+ }
+ }
+ if (e.second == RmBB) {
+ if (e.first == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(e.first, DestBB);
+ foundedge = true;
+ }
+ }
+ if (foundedge) {
+ replaceEdge(e, newedge);
+ }
+ if (eraseedge) {
+ if (erasededge) {
+ Edge newedge = getEdge(DestBB, DestBB);
+ replaceEdge(e, newedge);
+ } else {
+ removeEdge(e);
+ erasededge = true;
+ }
+ }
+ }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+ const BasicBlock *SecondBB,
+ const BasicBlock *NewBB,
+ bool MergeIdenticalEdges) {
+ const Function *F = FirstBB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ // Generate edges and read current weight.
+ Edge e = getEdge(FirstBB, SecondBB);
+ Edge n1 = getEdge(FirstBB, NewBB);
+ Edge n2 = getEdge(NewBB, SecondBB);
+ EdgeWeights &ECs = J->second;
+ double w = ECs[e];
+
+ int succ_count = 0;
+ if (!MergeIdenticalEdges) {
+ // First count the edges from FristBB to SecondBB, if there is more than
+ // one, only slice out a proporional part for NewBB.
+ for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+ BBI != BBE; ++BBI) {
+ if (*BBI == SecondBB) succ_count++;
+ }
+ // When the NewBB is completely new, increment the count by one so that
+ // the counts are properly distributed.
+ if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+ } else {
+ // When the edges are merged anyway, then redirect all flow.
+ succ_count = 1;
+ }
+
+ // We know now how many edges there are from FirstBB to SecondBB, reroute a
+ // proportional part of the edge weight over NewBB.
+ double neww = floor(w / succ_count);
+ ECs[n1] += neww;
+ ECs[n2] += neww;
+ BlockInformation[F][NewBB] += neww;
+ if (succ_count == 1) {
+ ECs.erase(e);
+ } else {
+ ECs[e] -= neww;
+ }
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+ const BasicBlock* New) {
+ const Function *F = Old->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+ std::set<Edge> Edges;
+ for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
+ ewi != ewe; ++ewi) {
+ Edge old = ewi->first;
+ if (old.first == Old) {
+ Edges.insert(old);
+ }
+ }
+ for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
+ EI != EE; ++EI) {
+ Edge newedge = getEdge(New, EI->second);
+ replaceEdge(*EI, newedge);
+ }
+
+ double w = getExecutionCount(Old);
+ setEdgeWeight(getEdge(Old, New), w);
+ setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+ const BasicBlock* NewBB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds) {
+ const Function *F = BB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
+ << " to " << NewBB->getName() << "\n");
+
+ // Collect weight that was redirected over NewBB.
+ double newweight = 0;
+
+ std::set<const BasicBlock *> ProcessedPreds;
+ // For all requestes Predecessors.
+ for (unsigned pred = 0; pred < NumPreds; ++pred) {
+ const BasicBlock * Pred = Preds[pred];
+ if (ProcessedPreds.insert(Pred).second) {
+ // Create edges and read old weight.
+ Edge oldedge = getEdge(Pred, BB);
+ Edge newedge = getEdge(Pred, NewBB);
+
+ // Remember how much weight was redirected.
+ newweight += getEdgeWeight(oldedge);
+
+ replaceEdge(oldedge,newedge);
+ }
+ }
+
+ Edge newedge = getEdge(NewBB,BB);
+ setEdgeWeight(newedge, newweight);
+ setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+ const Function *New) {
+ DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
+ << New->getName() << "\n");
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(Old);
+ if(J != EdgeInformation.end()) {
+ EdgeInformation[New] = J->second;
+ }
+ EdgeInformation.erase(Old);
+ BlockInformation.erase(Old);
+ FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+ ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+ if (w == ProfileInfo::MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ return 0;
+ } else {
+ return w;
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+ CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+ bool assumeEmptySelf) {
+ Edge edgetocalc;
+ unsigned uncalculated = 0;
+
+ // collect weights of all incoming and outgoing edges, rememer edges that
+ // have no value
+ double incount = 0;
+ SmallSet<const BasicBlock*,8> pred_visited;
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi==bbe) {
+ Edge e = getEdge(0,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (pred_visited.insert(*bbi)) {
+ Edge e = getEdge(*bbi,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ }
+
+ double outcount = 0;
+ SmallSet<const BasicBlock*,8> succ_visited;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi==sbbe) {
+ Edge e = getEdge(BB,0);
+ if (getEdgeWeight(e) == MissingValue) {
+ double w = getExecutionCount(BB);
+ if (w != MissingValue) {
+ setEdgeWeight(e,w);
+ removed = e;
+ }
+ }
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ for (;sbbi != sbbe; ++sbbi) {
+ if (succ_visited.insert(*sbbi)) {
+ Edge e = getEdge(BB,*sbbi);
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ }
+
+ // if exactly one edge weight was missing, calculate it and remove it from
+ // spanning tree
+ if (uncalculated == 0 ) {
+ return true;
+ } else
+ if (uncalculated == 1) {
+ if (incount < outcount) {
+ EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+ } else {
+ EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+ }
+ DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
+ << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+ removed = edgetocalc;
+ return true;
+ } else
+ if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+ setEdgeWeight(edgetocalc, incount * 10);
+ removed = edgetocalc;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+ double w = PI->getEdgeWeight(e);
+ if (w != ProfileInfo::MissingValue) {
+ calcw += w;
+ } else {
+ misscount.insert(e);
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+ double inWeight = 0;
+ std::set<Edge> inMissing;
+ std::set<const BasicBlock*> ProcessedPreds;
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi == bbe) {
+ readEdge(this,getEdge(0,BB),inWeight,inMissing);
+ }
+ for( ; bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+ }
+ }
+
+ double outWeight = 0;
+ std::set<Edge> outMissing;
+ std::set<const BasicBlock*> ProcessedSuccs;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi == sbbe)
+ readEdge(this,getEdge(BB,0),outWeight,outMissing);
+ for ( ; sbbi != sbbe; ++sbbi ) {
+ if (ProcessedSuccs.insert(*sbbi).second) {
+ readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+ }
+ }
+
+ double share;
+ std::set<Edge>::iterator ei,ee;
+ if (inMissing.size() == 0 && outMissing.size() > 0) {
+ ei = outMissing.begin();
+ ee = outMissing.end();
+ share = inWeight/outMissing.size();
+ setExecutionCount(BB,inWeight);
+ } else
+ if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+ ei = inMissing.begin();
+ ee = inMissing.end();
+ share = 0;
+ setExecutionCount(BB,0);
+ } else
+ if (inMissing.size() == 0 && outMissing.size() == 0) {
+ setExecutionCount(BB,outWeight);
+ return true;
+ } else {
+ return false;
+ }
+ for ( ; ei != ee; ++ei ) {
+ setEdgeWeight(*ei,share);
+ }
+ return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+// for (Function::const_iterator FI = F->begin(), FE = F->end();
+// FI != FE; ++FI) {
+// const BasicBlock* BB = &(*FI);
+// {
+// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// {
+// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// }
+// return;
+// }
+ // The set of BasicBlocks that are still unvisited.
+ std::set<const BasicBlock*> Unvisited;
+
+ // The set of return edges (Edges with no successors).
+ std::set<Edge> ReturnEdges;
+ double ReturnWeight = 0;
+
+ // First iterate over the whole function and collect:
+ // 1) The blocks in this function in the Unvisited set.
+ // 2) The return edges in the ReturnEdges set.
+ // 3) The flow that is leaving the function already via return edges.
+
+ // Data structure for searching the function.
+ std::queue<const BasicBlock *> BFS;
+ const BasicBlock *BB = &(F->getEntryBlock());
+ BFS.push(BB);
+ Unvisited.insert(BB);
+
+ while (BFS.size()) {
+ BB = BFS.front(); BFS.pop();
+ succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ if (NBB == End) {
+ Edge e = getEdge(BB,0);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ // If the return edge has no value, try to read value from block.
+ double bw = getExecutionCount(BB);
+ if (bw != MissingValue) {
+ setEdgeWeight(e,bw);
+ ReturnWeight += bw;
+ } else {
+ // If both return edge and block provide no value, collect edge.
+ ReturnEdges.insert(e);
+ }
+ } else {
+ // If the return edge has a proper value, collect it.
+ ReturnWeight += w;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (Unvisited.insert(*NBB).second) {
+ BFS.push(*NBB);
+ }
+ }
+ }
+
+ while (Unvisited.size() > 0) {
+ unsigned oldUnvisitedCount = Unvisited.size();
+ bool FoundPath = false;
+
+ // If there is only one edge left, calculate it.
+ if (ReturnEdges.size() == 1) {
+ ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+ Edge e = *ReturnEdges.begin();
+ setEdgeWeight(e,ReturnWeight);
+ setExecutionCount(e.first,ReturnWeight);
+
+ Unvisited.erase(e.first);
+ ReturnEdges.erase(e);
+ continue;
+ }
+
+ // Calculate all blocks where only one edge is missing, this may also
+ // resolve furhter return edges.
+ std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ Edge e;
+ if(CalculateMissingEdge(BB,e,true)) {
+ if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+ setExecutionCount(BB,getExecutionCount(BB));
+ }
+ Unvisited.erase(BB);
+ if (e.first != 0 && e.second == 0) {
+ ReturnEdges.erase(e);
+ ReturnWeight += getEdgeWeight(e);
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Estimate edge weights by dividing the flow proportionally.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ bool AllEdgesHaveSameReturn = true;
+ // Check each Successor, these must all end up in the same or an empty
+ // return block otherwise its dangerous to do an estimation on them.
+ for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ Succ != End; ++Succ) {
+ Path P;
+ GetPath(*Succ, 0, P, GetPathToExit);
+ if (Dest && Dest != P[0]) {
+ AllEdgesHaveSameReturn = false;
+ }
+ Dest = P[0];
+ }
+ if (AllEdgesHaveSameReturn) {
+ if(EstimateMissingEdges(BB)) {
+ Unvisited.erase(BB);
+ break;
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Check if there is a path to an block that has a known value and redirect
+ // flow accordingly.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ // Fetch path.
+ const BasicBlock *BB = *FI; ++FI;
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+ // Calculate incoming flow.
+ double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ invalid++;
+ } else {
+ // If the path contains the successor, this means its a backedge,
+ // do not count as missing.
+ if (P.find(*NBB) == P.end())
+ inmissing++;
+ }
+ incount++;
+ }
+ }
+ if (inmissing == incount) continue;
+ if (invalid == 0) continue;
+
+ // Subtract (already) outgoing flow.
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw -= ew;
+ }
+ }
+ }
+ if (iw < 0) continue;
+
+ // Check the receiving end of the path if it can handle the flow.
+ double ow = getExecutionCount(Dest);
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ ow -= ew;
+ }
+ }
+ }
+ if (ow < 0) continue;
+
+ // Determine how much flow shall be used.
+ double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+ if (ew != MissingValue) {
+ ew = ew<ow?ew:ow;
+ ew = ew<iw?ew:iw;
+ } else {
+ if (inmissing == 0)
+ ew = iw;
+ }
+
+ // Create flow.
+ if (ew != MissingValue) {
+ do {
+ Edge e = getEdge(P[Dest],Dest);
+ if (getEdgeWeight(e) == MissingValue) {
+ setEdgeWeight(e,ew);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Calculate a block with self loop.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ bool SelfEdgeFound = false;
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (*NBB == BB) {
+ SelfEdgeFound = true;
+ break;
+ }
+ }
+ if (SelfEdgeFound) {
+ Edge e = getEdge(BB,BB);
+ if (getEdgeWeight(e) == MissingValue) {
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ setEdgeWeight(e,iw * 10);
+ FoundPath = true;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ // Determine backedges, set them to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ Path P;
+ bool BackEdgeFound = false;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+ if (Dest == *NBB) {
+ BackEdgeFound = true;
+ break;
+ }
+ }
+ if (BackEdgeFound) {
+ Edge e = getEdge(Dest,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Channel flow to return block.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+ Dest = P[0];
+ if (!Dest) continue;
+
+ if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+ // Calculate incoming flow.
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,iw);
+ FoundPath = true;
+ } else {
+ assert(0 && "Edge should not have value already!");
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Speculatively set edges to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Edge e = getEdge(*NBB,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ break;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ errs() << "{";
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ dbgs() << BB->getName();
+ if (FI != FE)
+ dbgs() << ",";
+ }
+ errs() << "}";
+
+ errs() << "ASSERT: could not repair function";
+ assert(0 && "could not repair function");
+ }
+
+ EdgeWeights J = EdgeInformation[F];
+ for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+ Edge e = EI->first;
+
+ bool SuccFound = false;
+ if (e.first != 0) {
+ succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+ if (NBB == End) {
+ if (0 == e.second) {
+ SuccFound = true;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (*NBB == e.second) {
+ SuccFound = true;
+ break;
+ }
+ }
+ if (!SuccFound) {
+ removeEdge(e);
+ }
+ }
+ }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const Function *F) {
+ return O << F->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+ return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) {
+ return O << BB->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+ return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+// NoProfile ProfileInfo implementation
+//
+
+namespace {
+ struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoProfileInfo() : ImmutablePass(ID) {
+ initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+ "No Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
new file mode 100644
index 000000000000..eaa38dad16a1
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
@@ -0,0 +1,157 @@
+//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+// ByteSwap - Byteswap 'Var' if 'Really' is true.
+//
+static inline unsigned ByteSwap(unsigned Var, bool Really) {
+ if (!Really) return Var;
+ return ((Var & (255U<< 0U)) << 24U) |
+ ((Var & (255U<< 8U)) << 8U) |
+ ((Var & (255U<<16U)) >> 8U) |
+ ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+ // If either value is undefined, use the other.
+ if (A == ProfileInfoLoader::Uncounted) return B;
+ if (B == ProfileInfoLoader::Uncounted) return A;
+ return A + B;
+}
+
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+ bool ShouldByteSwap,
+ std::vector<unsigned> &Data) {
+ // Read the number of entries...
+ unsigned NumEntries;
+ if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
+ errs() << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
+
+ // Read the counts...
+ std::vector<unsigned> TempSpace(NumEntries);
+
+ // Read in the block of data...
+ if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
+ errs() << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+
+ // Make sure we have enough space... The space is initialised to -1 to
+ // facitiltate the loading of missing values for OptimalEdgeProfiling.
+ if (Data.size() < NumEntries)
+ Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
+
+ // Accumulate the data we just read into the data.
+ if (!ShouldByteSwap) {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ Data[i] = AddCounts(TempSpace[i], Data[i]);
+ }
+ } else {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+ }
+ }
+}
+
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
+// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
+// program if the file is invalid or broken.
+//
+ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
+ const std::string &Filename,
+ Module &TheModule) :
+ Filename(Filename),
+ M(TheModule), Warned(false) {
+ FILE *F = fopen(Filename.c_str(), "rb");
+ if (F == 0) {
+ errs() << ToolName << ": Error opening '" << Filename << "': ";
+ perror(0);
+ exit(1);
+ }
+
+ // Keep reading packets until we run out of them.
+ unsigned PacketType;
+ while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+ // If the low eight bits of the packet are zero, we must be dealing with an
+ // endianness mismatch. Byteswap all words read from the profiling
+ // information.
+ bool ShouldByteSwap = (char)PacketType == 0;
+ PacketType = ByteSwap(PacketType, ShouldByteSwap);
+
+ switch (PacketType) {
+ case ArgumentInfo: {
+ unsigned ArgLength;
+ if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
+ errs() << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
+
+ // Read in the arguments...
+ std::vector<char> Chars(ArgLength+4);
+
+ if (ArgLength)
+ if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
+ errs() << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
+ break;
+ }
+
+ case FunctionInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
+ break;
+
+ case BlockInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
+ break;
+
+ case EdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+ break;
+
+ case OptEdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+ break;
+
+ case BBTraceInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
+ break;
+
+ default:
+ errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+ exit(1);
+ }
+ }
+
+ fclose(F);
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
new file mode 100644
index 000000000000..098079bcffc4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -0,0 +1,267 @@
+//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// loads the information from a profile dump file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-loader"
+#include "llvm/BasicBlock.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
+static cl::opt<std::string>
+ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -profile-loader"));
+
+namespace {
+ class LoaderPass : public ModulePass, public ProfileInfo {
+ std::string Filename;
+ std::set<Edge> SpanningTree;
+ std::set<const BasicBlock*> BBisUnvisited;
+ unsigned ReadCount;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit LoaderPass(const std::string &filename = "")
+ : ModulePass(ID), Filename(filename) {
+ initializeLoaderPassPass(*PassRegistry::getPassRegistry());
+ if (filename.empty()) Filename = ProfileInfoFilename;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information loader";
+ }
+
+ // recurseBasicBlock() - Calculates the edge weights for as much basic
+ // blocks as possbile.
+ virtual void recurseBasicBlock(const BasicBlock *BB);
+ virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
+ virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ /// run - Load the profile information from the specified file.
+ virtual bool runOnModule(Module &M);
+ };
+} // End of anonymous namespace
+
+char LoaderPass::ID = 0;
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+ "Load profile information from llvmprof.out", false, true, false)
+
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
+
+ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
+
+/// createProfileLoaderPass - This function returns a Pass that loads the
+/// profiling information for the module from the specified filename, making it
+/// available to the optimizers.
+Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
+ return new LoaderPass(Filename);
+}
+
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc,
+ unsigned &uncalc, double &count) {
+ double w;
+ if ((w = getEdgeWeight(edge)) == MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ } else {
+ count+=w;
+ }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+ // break recursion if already visited
+ if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+ BBisUnvisited.erase(BB);
+ if (!BB) return;
+
+ for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+
+ Edge tocalc;
+ if (CalculateMissingEdge(BB, tocalc)) {
+ SpanningTree.erase(tocalc);
+ }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+ std::vector<unsigned> &ECs) {
+ if (ReadCount < ECs.size()) {
+ double weight = ECs[ReadCount++];
+ if (weight != ProfileInfoLoader::Uncounted) {
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also representable
+ // in double.
+ EdgeInformation[getFunction(e)][e] += (double)weight;
+
+ DEBUG(dbgs() << "--Read Edge Counter for " << e
+ << " (# "<< (ReadCount-1) << "): "
+ << (unsigned)getEdgeWeight(e) << "\n");
+ } else {
+ // This happens only if reading optimal profiling information, not when
+ // reading regular profiling information.
+ SpanningTree.insert(e);
+ }
+ }
+}
+
+bool LoaderPass::runOnModule(Module &M) {
+ ProfileInfoLoader PIL("profile-loader", Filename, M);
+
+ EdgeInformation.clear();
+ std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+ }
+ }
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+ }
+
+ Counters = PIL.getRawOptimalEdgeCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ readEdge(getEdge(BB,0), Counters);
+ }
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+ }
+ }
+ while (SpanningTree.size() > 0) {
+
+ unsigned size = SpanningTree.size();
+
+ BBisUnvisited.clear();
+ for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+ ee = SpanningTree.end(); ei != ee; ++ei) {
+ BBisUnvisited.insert(ei->first);
+ BBisUnvisited.insert(ei->second);
+ }
+ while (BBisUnvisited.size() > 0) {
+ recurseBasicBlock(*BBisUnvisited.begin());
+ }
+
+ if (SpanningTree.size() == size) {
+ DEBUG(dbgs()<<"{");
+ for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+ ee = SpanningTree.end(); ei != ee; ++ei) {
+ DEBUG(dbgs()<< *ei <<",");
+ }
+ assert(0 && "No edge calculated!");
+ }
+
+ }
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+ }
+
+ BlockInformation.clear();
+ Counters = PIL.getRawBlockCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReadCount < Counters.size())
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also
+ // representable in double.
+ BlockInformation[F][BB] = (double)Counters[ReadCount++];
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ }
+
+ FunctionInformation.clear();
+ Counters = PIL.getRawFunctionCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ if (ReadCount < Counters.size())
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also
+ // representable in double.
+ FunctionInformation[F] = (double)Counters[ReadCount++];
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 000000000000..a01751849c51
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,382 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+ cl::desc("Disable assertions"));
+
+namespace llvm {
+ template<class FType, class BType>
+ class ProfileVerifierPassT : public FunctionPass {
+
+ struct DetailedBlockInfo {
+ const BType *BB;
+ double BBWeight;
+ double inWeight;
+ int inCount;
+ double outWeight;
+ int outCount;
+ };
+
+ ProfileInfoT<FType, BType> *PI;
+ std::set<const BType*> BBisVisited;
+ std::set<const FType*> FisVisited;
+ bool DisableAssertions;
+
+ // When debugging is enabled, the verifier prints a whole slew of debug
+ // information, otherwise its just the assert. These are all the helper
+ // functions.
+ bool PrintedDebugTree;
+ std::set<const BType*> BBisPrinted;
+ void debugEntry(DetailedBlockInfo*);
+ void printDebugInfo(const BType *BB);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ explicit ProfileVerifierPassT () : FunctionPass(ID) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+ DisableAssertions = ProfileVerifierDisableAssertions;
+ }
+ explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
+ DisableAssertions(da) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<ProfileInfoT<FType, BType> >();
+ }
+
+ const char *getPassName() const {
+ return "Profiling information verifier";
+ }
+
+ /// run - Verify the profile information.
+ bool runOnFunction(FType &F);
+ void recurseBasicBlock(const BType*);
+
+ bool exitReachable(const FType*);
+ double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
+ void CheckValue(bool, const char*, DetailedBlockInfo*);
+ };
+
+ typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+ if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+ double BBWeight = PI->getExecutionCount(BB);
+ if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+ double inWeight = 0;
+ int inCount = 0;
+ std::set<const BType*> ProcessedPreds;
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ dbgs() << "calculated in-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ inWeight += EdgeWeight;
+ inCount++;
+ }
+ }
+ double outWeight = 0;
+ int outCount = 0;
+ std::set<const BType*> ProcessedSuccs;
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ dbgs() << "calculated out-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ outWeight += EdgeWeight;
+ outCount++;
+ }
+ }
+ dbgs() << "Block " << BB->getNameStr() << " in "
+ << BB->getParent()->getNameStr() << ":"
+ << "BBWeight=" << format("%20.20g",BBWeight) << ","
+ << "inWeight=" << format("%20.20g",inWeight) << ","
+ << "inCount=" << inCount << ","
+ << "outWeight=" << format("%20.20g",outWeight) << ","
+ << "outCount" << outCount << "\n";
+
+ // mark as visited and recurse into subnodes
+ BBisPrinted.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ printDebugInfo(*bbi);
+ }
+ }
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+ dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
+ << DI->BB->getParent()->getNameStr() << ":"
+ << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
+ << "inWeight=" << format("%20.20g",DI->inWeight) << ","
+ << "inCount=" << DI->inCount << ","
+ << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+ << "outCount=" << DI->outCount << "\n";
+ if (!PrintedDebugTree) {
+ PrintedDebugTree = true;
+ printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+ }
+ }
+
+ // This compares A and B for equality.
+ static bool Equals(double A, double B) {
+ return A == B;
+ }
+
+ // This checks if the function "exit" is reachable from an given function
+ // via calls, this is necessary to check if a profile is valid despite the
+ // counts not fitting exactly.
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+ if (!F) return false;
+
+ if (FisVisited.count(F)) return false;
+
+ FType *Exit = F->getParent()->getFunction("exit");
+ if (Exit == F) {
+ return true;
+ }
+
+ FisVisited.insert(F);
+ bool exits = false;
+ for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ exits |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ exits = true;
+ }
+ if (exits) break;
+ }
+ }
+ return exits;
+ }
+
+ #define ASSERTMESSAGE(M) \
+ { dbgs() << "ASSERT:" << (M) << "\n"; \
+ if (!DisableAssertions) assert(0 && (M)); }
+
+ template<class FType, class BType>
+ double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+ dbgs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ ASSERTMESSAGE("Edge has missing value");
+ return 0;
+ } else {
+ if (EdgeWeight < 0) {
+ dbgs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ ASSERTMESSAGE("Edge has negative value");
+ }
+ return EdgeWeight;
+ }
+ }
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
+ const char *Message,
+ DetailedBlockInfo *DI) {
+ if (Error) {
+ DEBUG(debugEntry(DI));
+ dbgs() << "Block " << DI->BB->getNameStr() << " in Function "
+ << DI->BB->getParent()->getNameStr() << ": ";
+ ASSERTMESSAGE(Message);
+ }
+ return;
+ }
+
+ // This calculates the Information for a block and then recurses into the
+ // successors.
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+ // Break the recursion by remembering all visited blocks.
+ if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+ // Use a data structure to store all the information, this can then be handed
+ // to debug printers.
+ DetailedBlockInfo DI;
+ DI.BB = BB;
+ DI.outCount = DI.inCount = 0;
+ DI.inWeight = DI.outWeight = 0;
+
+ // Read predecessors.
+ std::set<const BType*> ProcessedPreds;
+ const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+ // If there are none, check for (0,BB) edge.
+ if (bpi == bpe) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+ DI.inCount++;
+ }
+ for (;bpi != bpe; ++bpi) {
+ if (ProcessedPreds.insert(*bpi).second) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+ DI.inCount++;
+ }
+ }
+
+ // Read successors.
+ std::set<const BType*> ProcessedSuccs;
+ succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // If there is an (0,BB) edge, consider it too. (This is done not only when
+ // there are no successors, but every time; not every function contains
+ // return blocks with no successors (think loop latch as return block)).
+ double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+ if (w != ProfileInfoT<FType, BType>::MissingValue) {
+ DI.outWeight += w;
+ DI.outCount++;
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+ DI.outCount++;
+ }
+ }
+
+ // Read block weight.
+ DI.BBWeight = PI->getExecutionCount(BB);
+ CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+ "BasicBlock has missing value", &DI);
+ CheckValue(DI.BBWeight < 0,
+ "BasicBlock has negative value", &DI);
+
+ // Check if this block is a setjmp target.
+ bool isSetJmpTarget = false;
+ if (DI.outWeight > DI.inWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F && (F->getName() == "_setjmp")) {
+ isSetJmpTarget = true; break;
+ }
+ }
+ }
+ }
+ // Check if this block is eventually reaching exit.
+ bool isExitReachable = false;
+ if (DI.inWeight > DI.outWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ FisVisited.clear();
+ isExitReachable |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ isExitReachable = true;
+ }
+ if (isExitReachable) break;
+ }
+ }
+ }
+
+ if (DI.inCount > 0 && DI.outCount == 0) {
+ // If this is a block with no successors.
+ if (!isSetJmpTarget) {
+ CheckValue(!Equals(DI.inWeight,DI.BBWeight),
+ "inWeight and BBWeight do not match", &DI);
+ }
+ } else if (DI.inCount == 0 && DI.outCount > 0) {
+ // If this is a block with no predecessors.
+ if (!isExitReachable)
+ CheckValue(!Equals(DI.BBWeight,DI.outWeight),
+ "BBWeight and outWeight do not match", &DI);
+ } else {
+ // If this block has successors and predecessors.
+ if (DI.inWeight > DI.outWeight && !isExitReachable)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ }
+
+
+ // Mark this block as visited, rescurse into successors.
+ BBisVisited.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ recurseBasicBlock(*bbi);
+ }
+ }
+
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+ PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+ if (!PI)
+ ASSERTMESSAGE("No ProfileInfo available");
+
+ // Prepare global variables.
+ PrintedDebugTree = false;
+ BBisVisited.clear();
+
+ // Fetch entry block and recurse into it.
+ const BType *entry = &F.getEntryBlock();
+ recurseBasicBlock(entry);
+
+ if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+ ASSERTMESSAGE("Function count and entry block count do not match");
+
+ return false;
+ }
+
+ template<class FType, class BType>
+ char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+
+namespace llvm {
+ FunctionPass *createProfileVerifierPass() {
+ return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 000000000000..52753cbe85af
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,851 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+ cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions, "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
+ cl::Hidden,
+ cl::desc("style of printing regions"),
+ cl::values(
+ clEnumValN(Region::PrintNone, "none", "print no details"),
+ clEnumValN(Region::PrintBB, "bb",
+ "print regions in detail with block_iterator"),
+ clEnumValN(Region::PrintRN, "rn",
+ "print regions in detail with element_iterator"),
+ clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+ DominatorTree *dt, Region *Parent)
+ : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+ ie = BBNodeMap.end(); it != ie; ++it)
+ delete it->second;
+
+ // Only clean the cache for this Region. Caches of child Regions will be
+ // cleaned when the child Regions are deleted.
+ BBNodeMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+void Region::replaceEntry(BasicBlock *BB) {
+ entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+ assert(exit && "No exit to replace!");
+ exit = BB;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+ BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+ assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // Toplevel region.
+ if (!exit)
+ return true;
+
+ return (DT->dominates(entry, BB)
+ && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+ // BBs that are not part of any loop are element of the Loop
+ // described by the NULL pointer. This loop is not part of any region,
+ // except if the region describes the whole function.
+ if (L == 0)
+ return getExit() == 0;
+
+ if (!contains(L->getHeader()))
+ return false;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+ BE = ExitingBlocks.end(); BI != BE; ++BI)
+ if (!contains(*BI))
+ return false;
+
+ return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+ if (!contains(L))
+ return 0;
+
+ while (L && contains(L->getParentLoop())) {
+ L = L->getParentLoop();
+ }
+
+ return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+ assert(LI && BB && "LI and BB cannot be null!");
+ Loop *L = LI->getLoopFor(BB);
+ return outermostLoopInRegion(L);
+}
+
+BasicBlock *Region::getEnteringBlock() const {
+ BasicBlock *entry = getEntry();
+ BasicBlock *Pred;
+ BasicBlock *enteringBlock = 0;
+
+ for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+ ++PI) {
+ Pred = *PI;
+ if (DT->getNode(Pred) && !contains(Pred)) {
+ if (enteringBlock)
+ return 0;
+
+ enteringBlock = Pred;
+ }
+ }
+
+ return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+ BasicBlock *exit = getExit();
+ BasicBlock *Pred;
+ BasicBlock *exitingBlock = 0;
+
+ if (!exit)
+ return 0;
+
+ for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+ ++PI) {
+ Pred = *PI;
+ if (contains(Pred)) {
+ if (exitingBlock)
+ return 0;
+
+ exitingBlock = Pred;
+ }
+ }
+
+ return exitingBlock;
+}
+
+bool Region::isSimple() const {
+ return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
+}
+
+std::string Region::getNameStr() const {
+ std::string exitName;
+ std::string entryName;
+
+ if (getEntry()->getName().empty()) {
+ raw_string_ostream OS(entryName);
+
+ WriteAsOperand(OS, getEntry(), false);
+ entryName = OS.str();
+ } else
+ entryName = getEntry()->getNameStr();
+
+ if (getExit()) {
+ if (getExit()->getName().empty()) {
+ raw_string_ostream OS(exitName);
+
+ WriteAsOperand(OS, getExit(), false);
+ exitName = OS.str();
+ } else
+ exitName = getExit()->getNameStr();
+ } else
+ exitName = "<Function Return>";
+
+ return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+ if (!contains(BB))
+ llvm_unreachable("Broken region found!");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (!contains(*SI) && exit != *SI)
+ llvm_unreachable("Broken region found!");
+
+ if (entry != BB)
+ for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+ if (!contains(*SI))
+ llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+ BasicBlock *exit = getExit();
+
+ visited->insert(BB);
+
+ verifyBBInRegion(BB);
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (*SI != exit && visited->find(*SI) == visited->end())
+ verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+ // Only do verification when user wants to, otherwise this expensive
+ // check will be invoked by PassManager.
+ if (!VerifyRegionInfo) return;
+
+ std::set<BasicBlock*> visited;
+ verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+ for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->verifyRegionNest();
+
+ verifyRegion();
+}
+
+Region::block_iterator Region::block_begin() {
+ return GraphTraits<FlatIt<Region*> >::nodes_begin(this);
+}
+
+Region::block_iterator Region::block_end() {
+ return GraphTraits<FlatIt<Region*> >::nodes_end(this);
+}
+
+Region::const_block_iterator Region::block_begin() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_begin(this);
+}
+
+Region::const_block_iterator Region::block_end() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
+}
+
+Region::element_iterator Region::element_begin() {
+ return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+ return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+ return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+ return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+ Region *R = RI->getRegionFor(BB);
+
+ if (!R || R == this)
+ return 0;
+
+ // If we pass the BB out of this region, that means our code is broken.
+ assert(contains(R) && "BB not in current region!");
+
+ while (contains(R->getParent()) && R->getParent() != this)
+ R = R->getParent();
+
+ if (R->getEntry() != BB)
+ return 0;
+
+ return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+
+ BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+ if (at != BBNodeMap.end())
+ return at->second;
+
+ RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+ BBNodeMap.insert(std::make_pair(BB, NewNode));
+ return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+ if (Region* Child = getSubRegionNode(BB))
+ return Child->getNode();
+
+ return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ (*I)->parent = To;
+ To->children.push_back(*I);
+ }
+ children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
+ assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+ assert(std::find(begin(), end(), SubRegion) == children.end()
+ && "Subregion already exists!");
+
+ SubRegion->parent = this;
+ children.push_back(SubRegion);
+
+ if (!moveChildren)
+ return;
+
+ assert(SubRegion->children.size() == 0
+ && "SubRegions that contain children are not supported");
+
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSubRegion()) {
+ BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+ if (SubRegion->contains(BB))
+ RI->setRegionFor(BB, SubRegion);
+ }
+
+ std::vector<Region*> Keep;
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (SubRegion->contains(*I) && *I != SubRegion) {
+ SubRegion->children.push_back(*I);
+ (*I)->parent = SubRegion;
+ } else
+ Keep.push_back(*I);
+
+ children.clear();
+ children.insert(children.begin(), Keep.begin(), Keep.end());
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+ assert(Child->parent == this && "Child is not a child of this region!");
+ Child->parent = 0;
+ RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+ assert(I != children.end() && "Region does not exit. Unable to remove.");
+ children.erase(children.begin()+(I-begin()));
+ return Child;
+}
+
+unsigned Region::getDepth() const {
+ unsigned Depth = 0;
+
+ for (Region *R = parent; R != 0; R = R->parent)
+ ++Depth;
+
+ return Depth;
+}
+
+Region *Region::getExpandedRegion() const {
+ unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+ if (NumSuccessors == 0)
+ return NULL;
+
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(getEntry(), *PI))
+ return NULL;
+
+ Region *R = RI->getRegionFor(exit);
+
+ if (R->getEntry() != exit) {
+ if (exit->getTerminator()->getNumSuccessors() == 1)
+ return new Region(getEntry(), *succ_begin(exit), RI, DT);
+ else
+ return NULL;
+ }
+
+ while (R->getParent() && R->getParent()->getEntry() == exit)
+ R = R->getParent();
+
+ if (!DT->dominates(getEntry(), R->getExit()))
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(R->getExit(), *PI))
+ return NULL;
+
+ return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
+ enum PrintStyle Style) const {
+ if (print_tree)
+ OS.indent(level*2) << "[" << level << "] " << getNameStr();
+ else
+ OS.indent(level*2) << getNameStr();
+
+ OS << "\n";
+
+
+ if (Style != PrintNone) {
+ OS.indent(level*2) << "{\n";
+ OS.indent(level*2 + 2);
+
+ if (Style == PrintBB) {
+ for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ","
+ } else if (Style == PrintRN) {
+ for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ",
+ }
+
+ OS << "\n";
+ }
+
+ if (print_tree)
+ for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->print(OS, print_tree, level+1, Style);
+
+ if (Style != PrintNone)
+ OS.indent(level*2) << "} \n";
+}
+
+void Region::dump() const {
+ print(dbgs(), true, getDepth(), printStyle.getValue());
+}
+
+void Region::clearNodeCache() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+ IE = BBNodeMap.end(); I != IE; ++I)
+ delete I->second;
+
+ BBNodeMap.clear();
+ for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+ BasicBlock *exit) const {
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+ return false;
+ }
+ return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+ typedef DominanceFrontier::DomSetType DST;
+
+ DST *entrySuccs = &DF->find(entry)->second;
+
+ // Exit is the header of a loop that contains the entry. In this case,
+ // the dominance frontier must only contain the exit.
+ if (!DT->dominates(entry, exit)) {
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI)
+ if (*SI != exit && *SI != entry)
+ return false;
+
+ return true;
+ }
+
+ DST *exitSuccs = &DF->find(exit)->second;
+
+ // Do not allow edges leaving the region.
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI) {
+ if (*SI == exit || *SI == entry)
+ continue;
+ if (exitSuccs->find(*SI) == exitSuccs->end())
+ return false;
+ if (!isCommonDomFrontier(*SI, entry, exit))
+ return false;
+ }
+
+ // Do not allow edges pointing into the region.
+ for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+ SI != SE; ++SI)
+ if (DT->properlyDominates(entry, *SI) && *SI != exit)
+ return false;
+
+
+ return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+ BBtoBBMap *ShortCut) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ BBtoBBMap::iterator e = ShortCut->find(exit);
+
+ if (e == ShortCut->end())
+ // No further region at exit available.
+ (*ShortCut)[entry] = exit;
+ else {
+ // We found a region e that starts at exit. Therefore (entry, e->second)
+ // is also a region, that is larger than (entry, exit). Insert the
+ // larger one.
+ BasicBlock *BB = e->second;
+ (*ShortCut)[entry] = BB;
+ }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+ BBtoBBMap *ShortCut) const {
+ BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+ if (e == ShortCut->end())
+ return N->getIDom();
+
+ return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+ if (num_successors <= 1 && exit == *(succ_begin(entry)))
+ return true;
+
+ return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+ ++numRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ if (isTrivialRegion(entry, exit))
+ return 0;
+
+ Region *region = new Region(entry, exit, this, DT);
+ BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+ region->verifyRegion();
+ #else
+ DEBUG(region->verifyRegion());
+ #endif
+
+ updateStatistics(region);
+ return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+ assert(entry);
+
+ DomTreeNode *N = PDT->getNode(entry);
+
+ if (!N)
+ return;
+
+ Region *lastRegion= 0;
+ BasicBlock *lastExit = entry;
+
+ // As only a BasicBlock that postdominates entry can finish a region, walk the
+ // post dominance tree upwards.
+ while ((N = getNextPostDom(N, ShortCut))) {
+ BasicBlock *exit = N->getBlock();
+
+ if (!exit)
+ break;
+
+ if (isRegion(entry, exit)) {
+ Region *newRegion = createRegion(entry, exit);
+
+ if (lastRegion)
+ newRegion->addSubRegion(lastRegion);
+
+ lastRegion = newRegion;
+ lastExit = exit;
+ }
+
+ // This can never be a region, so stop the search.
+ if (!DT->dominates(entry, exit))
+ break;
+ }
+
+ // Tried to create regions from entry to lastExit. Next time take a
+ // shortcut from entry to lastExit.
+ if (lastExit != entry)
+ insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+ BasicBlock *entry = &(F.getEntryBlock());
+ DomTreeNode *N = DT->getNode(entry);
+
+ // Iterate over the dominance tree in post order to start with the small
+ // regions from the bottom of the dominance tree. If the small regions are
+ // detected first, detection of bigger regions is faster, as we can jump
+ // over the small regions.
+ for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+ ++FI) {
+ findRegionsWithEntry(FI->getBlock(), ShortCut);
+ }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+ while (region->parent)
+ region = region->getParent();
+
+ return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+ BasicBlock *BB = N->getBlock();
+
+ // Passed region exit
+ while (BB == region->getExit())
+ region = region->getParent();
+
+ BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+ // This basic block is a start block of a region. It is already in the
+ // BBtoRegion relation. Only the child basic blocks have to be updated.
+ if (it != BBtoRegion.end()) {
+ Region *newRegion = it->second;;
+ region->addSubRegion(getTopMostParent(newRegion));
+ region = newRegion;
+ } else {
+ BBtoRegion[BB] = region;
+ }
+
+ for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+ buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+ BBtoRegion.clear();
+ if (TopLevelRegion)
+ delete TopLevelRegion;
+ TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+ releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+ // ShortCut a function where for every BB the exit of the largest region
+ // starting with BB is stored. These regions can be threated as single BBS.
+ // This improves performance on linear CFGs.
+ BBtoBBMap ShortCut;
+
+ scanForRegions(F, &ShortCut);
+ BasicBlock *BB = &F.getEntryBlock();
+ buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+ releaseMemory();
+
+ DT = &getAnalysis<DominatorTree>();
+ PDT = &getAnalysis<PostDominatorTree>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+ updateStatistics(TopLevelRegion);
+
+ Calculate(F);
+
+ return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "Region tree:\n";
+ TopLevelRegion->print(OS, true, 0, printStyle.getValue());
+ OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (!VerifyRegionInfo) return;
+
+ TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+ BBtoRegionMap::const_iterator I=
+ BBtoRegion.find(BB);
+ return I != BBtoRegion.end() ? I->second : 0;
+}
+
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+ BBtoRegion[BB] = R;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+ return getRegionFor(BB);
+}
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+ BasicBlock *Exit = NULL;
+
+ while (true) {
+ // Get largest region that starts at BB.
+ Region *R = getRegionFor(BB);
+ while (R && R->getParent() && R->getParent()->getEntry() == BB)
+ R = R->getParent();
+
+ // Get the single exit of BB.
+ if (R && R->getEntry() == BB)
+ Exit = R->getExit();
+ else if (++succ_begin(BB) == succ_end(BB))
+ Exit = *succ_begin(BB);
+ else // No single exit exists.
+ return Exit;
+
+ // Get largest region that starts at Exit.
+ Region *ExitR = getRegionFor(Exit);
+ while (ExitR && ExitR->getParent()
+ && ExitR->getParent()->getEntry() == Exit)
+ ExitR = ExitR->getParent();
+
+ for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+ ++PI)
+ if (!R->contains(*PI) && !ExitR->contains(*PI))
+ break;
+
+ // This stops infinite cycles.
+ if (DT->dominates(Exit, BB))
+ break;
+
+ BB = Exit;
+ }
+
+ return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+ assert (A && B && "One of the Regions is NULL");
+
+ if (A->contains(B)) return A;
+
+ while (!B->contains(A))
+ B = B->getParent();
+
+ return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+ Region* ret = Regions.back();
+ Regions.pop_back();
+
+ for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+ E = Regions.end(); I != E; ++I)
+ ret = getCommonRegion(ret, *I);
+
+ return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+ Region* ret = getRegionFor(BBs.back());
+ BBs.pop_back();
+
+ for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+ E = BBs.end(); I != E; ++I)
+ ret = getCommonRegion(ret, getRegionFor(*I));
+
+ return ret;
+}
+
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+ Region *R = getRegionFor(OldBB);
+
+ setRegionFor(NewBB, R);
+
+ while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+ R->replaceEntry(NewBB);
+ R = R->getParent();
+ }
+
+ setRegionFor(OldBB, R);
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createRegionInfoPass() {
+ return new RegionInfo();
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
new file mode 100644
index 000000000000..80eda79d8a42
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager(int Depth)
+ : FunctionPass(ID), PMDataManager(Depth) {
+ skipThisRegion = false;
+ redoThisRegion = false;
+ RI = NULL;
+ CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+ RQ.push_back(R);
+ for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+ addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.addRequired<RegionInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+ RI = &getAnalysis<RegionInfo>();
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+ if (RQ.empty()) // No regions, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+ I != E; ++I) {
+ Region *R = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *RP = (RegionPass *)getContainedPass(Index);
+ Changed |= RP->doInitialization(R, *this);
+ }
+ }
+
+ // Walk Regions
+ while (!RQ.empty()) {
+
+ CurrentRegion = RQ.back();
+ skipThisRegion = false;
+ redoThisRegion = false;
+
+ // Run all passes on the current Region.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+ CurrentRegion->getNameStr());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+ TimeRegion PassTimer(getPassTimer(P));
+ Changed |= P->runOnRegion(CurrentRegion, *this);
+ }
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr());
+ dumpPreservedSet(P);
+
+ if (!skipThisRegion) {
+ // Manually check that this region is still healthy. This is done
+ // instead of relying on RegionInfo::verifyRegion since RegionInfo
+ // is a function pass and it's really expensive to verify every
+ // Region in the function every time. That level of checking can be
+ // enabled with the -verify-region-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(P));
+ CurrentRegion->verifyRegion();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr(),
+ ON_REGION_MSG);
+
+ if (skipThisRegion)
+ // Do not run other passes on this region.
+ break;
+ }
+
+ // If the region was deleted, release all the region passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisRegion)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_REGION_MSG);
+ }
+
+ // Pop the region from queue after running all passes.
+ RQ.pop_back();
+
+ if (redoThisRegion)
+ RQ.push_back(CurrentRegion);
+
+ // Free all region nodes created in region passes.
+ RI->clearNodeCache();
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ // Print the region tree after all pass.
+ DEBUG(
+ dbgs() << "\nRegion tree of function " << F.getName()
+ << " after all region Pass:\n";
+ RI->dump();
+ dbgs() << "\n";
+ );
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Region Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+ PrintRegionPass(const std::string &B, raw_ostream &o)
+ : RegionPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+ Out << Banner;
+ for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+ I != E; ++I)
+ (*I)->getEntry()->print(Out);
+
+ return false;
+ }
+};
+
+char PrintRegionPass::ID = 0;
+} //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new RGPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+ RGPassManager *RGPM;
+
+ // Create new Region Pass Manager if it does not exist.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+ RGPM = (RGPassManager*)PMS.top();
+ else {
+
+ assert (!PMS.empty() && "Unable to create Region Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Region Pass Manager
+ RGPM = new RGPassManager(PMD->getDepth() + 1);
+ RGPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(RGPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ TPM->schedulePass(RGPM);
+
+ // [4] Push new manager into PMS
+ PMS.push(RGPM);
+ }
+
+ RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintRegionPass(Banner, O);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 000000000000..a1730b0a3ca1
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,220 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+ cl::desc("Show only simple regions in the graphviz viewer"),
+ cl::Hidden,
+ cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+ if (!Node->isSubRegion()) {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+
+ return "Not implemented";
+ }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+ static std::string getGraphName(RegionInfo *DT) {
+ return "Region Graph";
+ }
+
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+ G->getTopLevelRegion());
+ }
+
+ std::string getEdgeAttributes(RegionNode *srcNode,
+ GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
+
+ RegionNode *destNode = *CI;
+
+ if (srcNode->isSubRegion() || destNode->isSubRegion())
+ return "";
+
+ // In case of a backedge, do not use it to define the layout of the nodes.
+ BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+ BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+ Region *R = RI->getRegionFor(destBB);
+
+ while (R && R->getParent())
+ if (R->getParent()->getEntry() == destBB)
+ R = R->getParent();
+ else
+ break;
+
+ if (R->getEntry() == destBB && R->contains(srcBB))
+ return "constraint=false";
+
+ return "";
+ }
+
+ // Print the cluster of the subregions. This groups the single basic blocks
+ // and adds a different background color for each group.
+ static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+ unsigned depth = 0) {
+ raw_ostream &O = GW.getOStream();
+ O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+ << " {\n";
+ O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+ if (!onlySimpleRegions || R->isSimple()) {
+ O.indent(2 * (depth + 1)) << "style = filled;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+ } else {
+ O.indent(2 * (depth + 1)) << "style = solid;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 2) << "\n";
+ }
+
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ printRegionCluster(*RI, GW, depth + 1);
+
+ RegionInfo *RI = R->getRegionInfo();
+
+ for (Region::const_block_iterator BI = R->block_begin(),
+ BE = R->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>();
+ if (RI->getRegionFor(BB) == R)
+ O.indent(2 * (depth + 1)) << "Node"
+ << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+ << ";\n";
+ }
+
+ O.indent(2 * depth) << "}\n";
+ }
+
+ static void addCustomGraphFeatures(const RegionInfo* RI,
+ GraphWriter<RegionInfo*> &GW) {
+ raw_ostream &O = GW.getOStream();
+ O << "\tcolorscheme = \"paired12\"\n";
+ printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+ }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+ : public DOTGraphTraitsViewer<RegionInfo, false> {
+ static char ID;
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+ initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionViewer::ID = 0;
+
+struct RegionOnlyViewer
+ : public DOTGraphTraitsViewer<RegionInfo, true> {
+ static char ID;
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+ initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionOnlyViewer::ID = 0;
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, false> {
+ static char ID;
+ RegionPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+ initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionPrinter::ID = 0;
+} //end anonymous namespace
+
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+ "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+ true, true)
+
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+ "View regions of function (with no function bodies)",
+ true, true)
+
+namespace {
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, true> {
+ static char ID;
+ RegionOnlyPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+ initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file "
+ "(with no function bodies)",
+ true, true)
+
+FunctionPass* llvm::createRegionViewerPass() {
+ return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+ return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+ return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 000000000000..025718e09feb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,6432 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library. First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression. These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+// Chains of recurrences -- a method to expedite the evaluation
+// of closed-form functions
+// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+// On computational properties of chains of recurrences
+// Eugene V. Zima
+//
+// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+// Robert A. van Engelen
+//
+// Efficient Symbolic Analysis for Optimizing Compilers
+// Robert A. van Engelen
+//
+// Using the chains of recurrences algebra for data dependence testing and
+// induction variable substitution
+// MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalar-evolution"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumArrayLenItCounts,
+ "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+ "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+ "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+ "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::desc("Maximum number of iterations SCEV will "
+ "symbolically execute a constant "
+ "derived loop"),
+ cl::init(100));
+
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+char ScalarEvolution::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+
+void SCEV::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void SCEV::print(raw_ostream &OS) const {
+ switch (getSCEVType()) {
+ case scConstant:
+ WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+ return;
+ case scTruncate: {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+ const SCEV *Op = Trunc->getOperand();
+ OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+ << *Trunc->getType() << ")";
+ return;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+ const SCEV *Op = ZExt->getOperand();
+ OS << "(zext " << *Op->getType() << " " << *Op << " to "
+ << *ZExt->getType() << ")";
+ return;
+ }
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+ const SCEV *Op = SExt->getOperand();
+ OS << "(sext " << *Op->getType() << " " << *Op << " to "
+ << *SExt->getType() << ")";
+ return;
+ }
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+ OS << "{" << *AR->getOperand(0);
+ for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+ OS << ",+," << *AR->getOperand(i);
+ OS << "}<";
+ if (AR->getNoWrapFlags(FlagNUW))
+ OS << "nuw><";
+ if (AR->getNoWrapFlags(FlagNSW))
+ OS << "nsw><";
+ if (AR->getNoWrapFlags(FlagNW) &&
+ !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
+ OS << "nw><";
+ WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+ OS << ">";
+ return;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+ const char *OpStr = 0;
+ switch (NAry->getSCEVType()) {
+ case scAddExpr: OpStr = " + "; break;
+ case scMulExpr: OpStr = " * "; break;
+ case scUMaxExpr: OpStr = " umax "; break;
+ case scSMaxExpr: OpStr = " smax "; break;
+ }
+ OS << "(";
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ OS << **I;
+ if (llvm::next(I) != E)
+ OS << OpStr;
+ }
+ OS << ")";
+ return;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+ OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+ return;
+ }
+ case scUnknown: {
+ const SCEVUnknown *U = cast<SCEVUnknown>(this);
+ const Type *AllocTy;
+ if (U->isSizeOf(AllocTy)) {
+ OS << "sizeof(" << *AllocTy << ")";
+ return;
+ }
+ if (U->isAlignOf(AllocTy)) {
+ OS << "alignof(" << *AllocTy << ")";
+ return;
+ }
+
+ const Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo)) {
+ OS << "offsetof(" << *CTy << ", ";
+ WriteAsOperand(OS, FieldNo, false);
+ OS << ")";
+ return;
+ }
+
+ // Otherwise just print it normally.
+ WriteAsOperand(OS, U->getValue(), false);
+ return;
+ }
+ case scCouldNotCompute:
+ OS << "***COULDNOTCOMPUTE***";
+ return;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
+
+const Type *SCEV::getType() const {
+ switch (getSCEVType()) {
+ case scConstant:
+ return cast<SCEVConstant>(this)->getType();
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return cast<SCEVCastExpr>(this)->getType();
+ case scAddRecExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ return cast<SCEVNAryExpr>(this)->getType();
+ case scAddExpr:
+ return cast<SCEVAddExpr>(this)->getType();
+ case scUDivExpr:
+ return cast<SCEVUDivExpr>(this)->getType();
+ case scUnknown:
+ return cast<SCEVUnknown>(this)->getType();
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return 0;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return 0;
+}
+
+bool SCEV::isZero() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isZero();
+ return false;
+}
+
+bool SCEV::isOne() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isOne();
+ return false;
+}
+
+bool SCEV::isAllOnesValue() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isAllOnesValue();
+ return false;
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() :
+ SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+ return S->getSCEVType() == scCouldNotCompute;
+}
+
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scConstant);
+ ID.AddPointer(V);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+ return getConstant(ConstantInt::get(getContext(), Val));
+}
+
+const SCEV *
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+ const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+ return getConstant(ConstantInt::get(ITy, V, isSigned));
+}
+
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
+ unsigned SCEVTy, const SCEV *op, const Type *ty)
+ : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, const Type *ty)
+ : SCEVCastExpr(ID, scTruncate, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate non-integer value!");
+}
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, const Type *ty)
+ : SCEVCastExpr(ID, scZeroExtend, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot zero extend non-integer value!");
+}
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, const Type *ty)
+ : SCEVCastExpr(ID, scSignExtend, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot sign extend non-integer value!");
+}
+
+void SCEVUnknown::deleted() {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Release the value.
+ setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Update this SCEVUnknown to point to the new value. This is needed
+ // because there may still be outstanding SCEVs which still point to
+ // this SCEVUnknown.
+ setValPtr(New);
+}
+
+bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue() &&
+ CE->getNumOperands() == 2)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+ if (CI->isOne()) {
+ AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+ ->getElementType();
+ return true;
+ }
+
+ return false;
+}
+
+bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue()) {
+ const Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked() &&
+ CE->getNumOperands() == 3 &&
+ CE->getOperand(1)->isNullValue()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+ if (CI->isOne() &&
+ STy->getNumElements() == 2 &&
+ STy->getElementType(0)->isIntegerTy(1)) {
+ AllocTy = STy->getElementType(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getNumOperands() == 3 &&
+ CE->getOperand(0)->isNullValue() &&
+ CE->getOperand(1)->isNullValue()) {
+ const Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ // Ignore vector types here so that ScalarEvolutionExpander doesn't
+ // emit getelementptrs that index into vectors.
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
+ CTy = Ty;
+ FieldNo = CE->getOperand(2);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+ /// than the complexity of the RHS. This comparator is used to canonicalize
+ /// expressions.
+ class SCEVComplexityCompare {
+ const LoopInfo *const LI;
+ public:
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+
+ // Return true or false if LHS is less than, or at least RHS, respectively.
+ bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
+
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
+ // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+ if (LHS == RHS)
+ return 0;
+
+ // Primarily, sort the SCEVs by their getSCEVType().
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
+
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+ switch (LType) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+ // Order pointer values after integer values. This helps SCEVExpander
+ // form GEPs.
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
+
+ // Compare getValueID values.
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
+
+ // Sort arguments by their position.
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
+ }
+
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
+
+ // Compare loop depths.
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Compare the number of operands.
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
+ }
+
+ return 0;
+ }
+
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+ // Compare constant values.
+ const APInt &LA = LC->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
+ }
+
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+
+ return 0;
+ }
+
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+ return (int)LNumOps - (int)RNumOps;
+ }
+
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
+ }
+
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
+ }
+
+ default:
+ break;
+ }
+
+ llvm_unreachable("Unknown SCEV kind!");
+ return 0;
+ }
+ };
+}
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get deterministic
+/// results from this routine. In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
+ LoopInfo *LI) {
+ if (Ops.size() < 2) return; // Noop
+ if (Ops.size() == 2) {
+ // This is the common case, which also happens to be trivially simple.
+ // Special case it.
+ const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+ if (SCEVComplexityCompare(LI)(RHS, LHS))
+ std::swap(LHS, RHS);
+ return;
+ }
+
+ // Do the rough sort by complexity.
+ std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+ // Now that we are sorted by complexity, group elements of the same
+ // complexity. Note that this is, at worst, N^2, but the vector is likely to
+ // be extremely short in practice. Note that we take this approach because we
+ // do not want to depend on the addresses of the objects we are grouping.
+ for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+ const SCEV *S = Ops[i];
+ unsigned Complexity = S->getSCEVType();
+
+ // If there are any objects of the same complexity and same value as this
+ // one, group them.
+ for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+ if (Ops[j] == S) { // Found a duplicate.
+ // Move it to immediately after i'th element.
+ std::swap(Ops[i+1], Ops[j]);
+ ++i; // no need to rescan it.
+ if (i == e-2) return; // Done!
+ }
+ }
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K). The result has width W.
+/// Assume, K > 0.
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+ ScalarEvolution &SE,
+ const Type* ResultTy) {
+ // Handle the simplest case efficiently.
+ if (K == 1)
+ return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+ // We are using the following formula for BC(It, K):
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+ //
+ // Suppose, W is the bitwidth of the return value. We must be prepared for
+ // overflow. Hence, we must assure that the result of our computation is
+ // equal to the accurate one modulo 2^W. Unfortunately, division isn't
+ // safe in modular arithmetic.
+ //
+ // However, this code doesn't use exactly that formula; the formula it uses
+ // is something like the following, where T is the number of factors of 2 in
+ // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+ // exponentiation:
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+ //
+ // This formula is trivially equivalent to the previous formula. However,
+ // this formula can be implemented much more efficiently. The trick is that
+ // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+ // arithmetic. To do exact division in modular arithmetic, all we have
+ // to do is multiply by the inverse. Therefore, this step can be done at
+ // width W.
+ //
+ // The next issue is how to safely do the division by 2^T. The way this
+ // is done is by doing the multiplication step at a width of at least W + T
+ // bits. This way, the bottom W+T bits of the product are accurate. Then,
+ // when we perform the division by 2^T (which is equivalent to a right shift
+ // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
+ // truncated out after the division by 2^T.
+ //
+ // In comparison to just directly using the first formula, this technique
+ // is much more efficient; using the first formula requires W * K bits,
+ // but this formula less than W + K bits. Also, the first formula requires
+ // a division step, whereas this formula only requires multiplies and shifts.
+ //
+ // It doesn't matter whether the subtraction step is done in the calculation
+ // width or the input iteration count's width; if the subtraction overflows,
+ // the result must be zero anyway. We prefer here to do it in the width of
+ // the induction variable because it helps a lot for certain cases; CodeGen
+ // isn't smart enough to ignore the overflow, which leads to much less
+ // efficient code if the width of the subtraction is wider than the native
+ // register width.
+ //
+ // (It's possible to not widen at all by pulling out factors of 2 before
+ // the multiplication; for example, K=2 can be calculated as
+ // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+ // extra arithmetic, so it's not an obvious win, and it gets
+ // much more complicated for K > 3.)
+
+ // Protection from insane SCEVs; this bound is conservative,
+ // but it probably doesn't matter.
+ if (K > 1000)
+ return SE.getCouldNotCompute();
+
+ unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+ // Calculate K! / 2^T and T; we divide out the factors of two before
+ // multiplying for calculating K! / 2^T to avoid overflow.
+ // Other overflow doesn't matter because we only care about the bottom
+ // W bits of the result.
+ APInt OddFactorial(W, 1);
+ unsigned T = 1;
+ for (unsigned i = 3; i <= K; ++i) {
+ APInt Mult(W, i);
+ unsigned TwoFactors = Mult.countTrailingZeros();
+ T += TwoFactors;
+ Mult = Mult.lshr(TwoFactors);
+ OddFactorial *= Mult;
+ }
+
+ // We need at least W + T bits for the multiplication step
+ unsigned CalculationBits = W + T;
+
+ // Calculate 2^T, at width T+W.
+ APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+
+ // Calculate the multiplicative inverse of K! / 2^T;
+ // this multiplication factor will perform the exact division by
+ // K! / 2^T.
+ APInt Mod = APInt::getSignedMinValue(W+1);
+ APInt MultiplyFactor = OddFactorial.zext(W+1);
+ MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+ MultiplyFactor = MultiplyFactor.trunc(W);
+
+ // Calculate the product, at width T+W
+ const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+ CalculationBits);
+ const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+ for (unsigned i = 1; i != K; ++i) {
+ const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
+ Dividend = SE.getMulExpr(Dividend,
+ SE.getTruncateOrZeroExtend(S, CalculationTy));
+ }
+
+ // Divide by 2^T
+ const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+ // Truncate the result, and divide by K! / 2^T.
+
+ return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+ SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number. We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+ ScalarEvolution &SE) const {
+ const SCEV *Result = getStart();
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ // The computation is correct in the face of overflow provided that the
+ // multiplication is performed _after_ the evaluation of the binomial
+ // coefficient.
+ const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
+ if (isa<SCEVCouldNotCompute>(Coeff))
+ return Coeff;
+
+ Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+ }
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+ "This is not a truncating conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scTruncate);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
+
+ // trunc(trunc(x)) --> trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+ return getTruncateExpr(ST->getOperand(), Ty);
+
+ // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+ // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+ // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getAddExpr(Operands);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getMulExpr(Operands);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // If the input value is a chrec scev, truncate the chrec's operands.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+ return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
+ }
+
+ // As a special case, fold trunc(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
+ // The cast wasn't folded; create an explicit cast node. We can reuse
+ // the existing insert position since if we get here, we won't have
+ // made any changes which would invalidate it.
+ SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
+
+ // zext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // Before doing any expensive analysis, check to see if we've already
+ // computed a SCEV for this Op and Ty.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scZeroExtend);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // zext(trunc(x)) --> zext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all zero bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getUnsignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+ CR.zextOrTrunc(NewBits)))
+ return getTruncateOrZeroExtend(X, Ty);
+ }
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can zero extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // If we have special knowledge that this addrec won't overflow,
+ // we don't need to do any further analysis.
+ if (AR->getNoWrapFlags(SCEV::FlagNUW))
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ const SCEV *CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ const SCEV *RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ // Check whether Start+Step*MaxBECount has no unsigned overflow.
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
+ const SCEV *Add = getAddExpr(Start, ZMul);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getZeroExtendExpr(Step, WideTy)));
+ if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ // Similar to above, only this time treat the step value as signed.
+ // This covers loops that count down.
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+ Add = getAddExpr(Start, SMul);
+ OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getSignExtendExpr(Step, WideTy)));
+ if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+
+ // If the backedge is guarded by a comparison with the pre-inc value
+ // the addrec is safe. Also, if the entry is guarded by a comparison
+ // with the start value and the backedge is guarded by a comparison
+ // with the post-inc value, the addrec is safe.
+ if (isKnownPositive(Step)) {
+ const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+ getUnsignedRange(Step).getUnsignedMax());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ } else if (isKnownNegative(Step)) {
+ const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+ getSignedRange(Step).getSignedMin());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ }
+ }
+
+ // The cast wasn't folded; create an explicit cast node.
+ // Recompute the insert position, as it may have been invalidated.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ if (SE->isKnownPositive(Step)) {
+ *Pred = ICmpInst::ICMP_SLT;
+ return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMax());
+ }
+ if (SE->isKnownNegative(Step)) {
+ *Pred = ICmpInst::ICMP_SGT;
+ return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMin());
+ }
+ return 0;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+ const Type *Ty,
+ ScalarEvolution *SE) {
+ const Loop *L = AR->getLoop();
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Check for a simple looking step prior to loop entry.
+ const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+ if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
+ return 0;
+
+ // This is a postinc AR. Check for overflow on the preinc recurrence using the
+ // same three conditions that getSignExtendedExpr checks.
+
+ // 1. NSW flags on the step increment.
+ const SCEV *PreStart = SA->getOperand(1);
+ const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+ if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+ return PreStart;
+
+ // 2. Direct overflow check on the step operation's expression.
+ unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+ const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ const SCEV *OperandExtendedStart =
+ SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+ SE->getSignExtendExpr(Step, WideTy));
+ if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+ // Cache knowledge of PreAR NSW.
+ if (PreAR)
+ const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+ // FIXME: this optimization needs a unit test
+ DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+ return PreStart;
+ }
+
+ // 3. Loop precondition.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+ if (OverflowLimit &&
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ return PreStart;
+ }
+ return 0;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+ const Type *Ty,
+ ScalarEvolution *SE) {
+ const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+ if (!PreStart)
+ return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+ return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+ SE->getSignExtendExpr(PreStart, Ty));
+}
+
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
+
+ // sext(sext(x)) --> sext(x)
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getSignExtendExpr(SS->getOperand(), Ty);
+
+ // sext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // Before doing any expensive analysis, check to see if we've already
+ // computed a SCEV for this Op and Ty.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scSignExtend);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // If the input value is provably positive, build a zext instead.
+ if (isKnownNonNegative(Op))
+ return getZeroExtendExpr(Op, Ty);
+
+ // sext(trunc(x)) --> sext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all sign bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getSignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+ CR.sextOrTrunc(NewBits)))
+ return getTruncateOrSignExtend(X, Ty);
+ }
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can sign extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // If we have special knowledge that this addrec won't overflow,
+ // we don't need to do any further analysis.
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, SCEV::FlagNSW);
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ const SCEV *CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ const SCEV *RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ // Check whether Start+Step*MaxBECount has no signed overflow.
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+ const SCEV *Add = getAddExpr(Start, SMul);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(getSignExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getSignExtendExpr(Step, WideTy)));
+ if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ // Similar to above, only this time treat the step value as unsigned.
+ // This covers loops that count up with an unsigned step.
+ const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
+ Add = getAddExpr(Start, UMul);
+ OperandExtendedAdd =
+ getAddExpr(getSignExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getZeroExtendExpr(Step, WideTy)));
+ if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+
+ // If the backedge is guarded by a comparison with the pre-inc value
+ // the addrec is safe. Also, if the entry is guarded by a comparison
+ // with the start value and the backedge is guarded by a comparison
+ // with the post-inc value, the addrec is safe.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+ if (OverflowLimit &&
+ (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+ (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+ isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+ OverflowLimit)))) {
+ // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ }
+
+ // The cast wasn't folded; create an explicit cast node.
+ // Recompute the insert position, as it may have been invalidated.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Sign-extend negative constants.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ if (SC->getValue()->getValue().isNegative())
+ return getSignExtendExpr(Op, Ty);
+
+ // Peel off a truncate cast.
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+ const SCEV *NewOp = T->getOperand();
+ if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+ return getAnyExtendExpr(NewOp, Ty);
+ return getTruncateOrNoop(NewOp, Ty);
+ }
+
+ // Next try a zext cast. If the cast is folded, use it.
+ const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
+ if (!isa<SCEVZeroExtendExpr>(ZExt))
+ return ZExt;
+
+ // Next try a sext cast. If the cast is folded, use it.
+ const SCEV *SExt = getSignExtendExpr(Op, Ty);
+ if (!isa<SCEVSignExtendExpr>(SExt))
+ return SExt;
+
+ // Force the cast to be folded into the operands of an addrec.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Ops;
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ Ops.push_back(getAnyExtendExpr(*I, Ty));
+ return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
+ }
+
+ // As a special case, fold anyext(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
+ // If the expression is obviously signed, use the sext cast value.
+ if (isa<SCEVSMaxExpr>(Op))
+ return SExt;
+
+ // Absent any other information, use the zext cast value.
+ return ZExt;
+}
+
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+ SmallVector<const SCEV *, 8> &NewOps,
+ APInt &AccumulatedConstant,
+ const SCEV *const *Ops, size_t NumOperands,
+ const APInt &Scale,
+ ScalarEvolution &SE) {
+ bool Interesting = false;
+
+ // Iterate over the add operands. They are sorted, with constants first.
+ unsigned i = 0;
+ while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ ++i;
+ // Pull a buried constant out to the outside.
+ if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+ Interesting = true;
+ AccumulatedConstant += Scale * C->getValue()->getValue();
+ }
+
+ // Next comes everything else. We're especially interested in multiplies
+ // here, but they're in the middle, so just visit the rest with one loop.
+ for (; i != NumOperands; ++i) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+ if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+ APInt NewScale =
+ Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+ if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+ // A multiplication of a constant with another add; recurse.
+ const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
+ Interesting |=
+ CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Add->op_begin(), Add->getNumOperands(),
+ NewScale, SE);
+ } else {
+ // A multiplication of a constant with some other value. Update
+ // the map.
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *Key = SE.getMulExpr(MulOps);
+ std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Key, NewScale));
+ if (Pair.second) {
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += NewScale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ } else {
+ // An ordinary operand. Update the map.
+ std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Ops[i], Scale));
+ if (Pair.second) {
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += Scale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ }
+
+ return Interesting;
+}
+
+namespace {
+ struct APIntCompare {
+ bool operator()(const APInt &LHS, const APInt &RHS) const {
+ return LHS.ult(RHS);
+ }
+ };
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+ "only nuw or nsw allowed");
+ assert(!Ops.empty() && "Cannot get empty add!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVAddExpr operand types don't match!");
+#endif
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ Ops[0] = getConstant(LHSC->getValue()->getValue() +
+ RHSC->getValue()->getValue());
+ if (Ops.size() == 2) return Ops[0];
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant zero being added, strip it off.
+ if (LHSC->getValue()->isZero()) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Okay, check to see if the same value occurs in the operand list more than
+ // once. If so, merge them together into an multiply expression. Since we
+ // sorted the list, these values are required to be adjacent.
+ const Type *Ty = Ops[0]->getType();
+ bool FoundMatch = false;
+ for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+ if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
+ // Scan ahead to count how many equal operands there are.
+ unsigned Count = 2;
+ while (i+Count != e && Ops[i+Count] == Ops[i])
+ ++Count;
+ // Merge the values into a multiply.
+ const SCEV *Scale = getConstant(Ty, Count);
+ const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+ if (Ops.size() == Count)
+ return Mul;
+ Ops[i] = Mul;
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+ --i; e -= Count - 1;
+ FoundMatch = true;
+ }
+ if (FoundMatch)
+ return getAddExpr(Ops, Flags);
+
+ // Check for truncates. If all the operands are truncated from the same
+ // type, see if factoring out the truncate would permit the result to be
+ // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+ // if the contents of the resulting outer trunc fold to something simple.
+ for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+ const Type *DstType = Trunc->getType();
+ const Type *SrcType = Trunc->getOperand()->getType();
+ SmallVector<const SCEV *, 8> LargeOps;
+ bool Ok = true;
+ // Check all the operands to see if they can be represented in the
+ // source type of the truncate.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+ } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+ SmallVector<const SCEV *, 8> LargeMulOps;
+ for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+ if (const SCEVTruncateExpr *T =
+ dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeMulOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C =
+ dyn_cast<SCEVConstant>(M->getOperand(j))) {
+ LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok)
+ LargeOps.push_back(getMulExpr(LargeMulOps));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok) {
+ // Evaluate the expression in the larger type.
+ const SCEV *Fold = getAddExpr(LargeOps, Flags);
+ // If it folds to something simple, use it. Otherwise, don't.
+ if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+ return getTruncateExpr(Fold, DstType);
+ }
+ }
+
+ // Skip past any other cast SCEVs.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+ ++Idx;
+
+ // If there are add operands they would be next.
+ if (Idx < Ops.size()) {
+ bool DeletedAdd = false;
+ while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+ // If we have an add, expand the add operands onto the end of the operands
+ // list.
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(Add->op_begin(), Add->op_end());
+ DeletedAdd = true;
+ }
+
+ // If we deleted at least one add, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just acquired.
+ if (DeletedAdd)
+ return getAddExpr(Ops);
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // Check to see if there are any folding opportunities present with
+ // operands multiplied by constant values.
+ if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+ uint64_t BitWidth = getTypeSizeInBits(Ty);
+ DenseMap<const SCEV *, APInt> M;
+ SmallVector<const SCEV *, 8> NewOps;
+ APInt AccumulatedConstant(BitWidth, 0);
+ if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Ops.data(), Ops.size(),
+ APInt(BitWidth, 1), *this)) {
+ // Some interesting folding opportunity is present, so its worthwhile to
+ // re-generate the operands list. Group the operands by constant scale,
+ // to avoid multiplying by the same constant scale multiple times.
+ std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+ for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+ E = NewOps.end(); I != E; ++I)
+ MulOpLists[M.find(*I)->second].push_back(*I);
+ // Re-generate the operands list.
+ Ops.clear();
+ if (AccumulatedConstant != 0)
+ Ops.push_back(getConstant(AccumulatedConstant));
+ for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+ I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+ if (I->first != 0)
+ Ops.push_back(getMulExpr(getConstant(I->first),
+ getAddExpr(I->second)));
+ if (Ops.empty())
+ return getConstant(Ty, 0);
+ if (Ops.size() == 1)
+ return Ops[0];
+ return getAddExpr(Ops);
+ }
+ }
+
+ // If we are adding something to a multiply expression, make sure the
+ // something is not already an operand of the multiply. If so, merge it into
+ // the multiply.
+ for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+ const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+ for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+ const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+ if (isa<SCEVConstant>(MulOpSCEV))
+ continue;
+ for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+ if (MulOpSCEV == Ops[AddOp]) {
+ // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
+ const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ // If the multiply has more than two operands, we must get the
+ // Y*Z term.
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+ InnerMul = getMulExpr(MulOps);
+ }
+ const SCEV *One = getConstant(Ty, 1);
+ const SCEV *AddOne = getAddExpr(One, InnerMul);
+ const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+ if (Ops.size() == 2) return OuterMul;
+ if (AddOp < Idx) {
+ Ops.erase(Ops.begin()+AddOp);
+ Ops.erase(Ops.begin()+Idx-1);
+ } else {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+AddOp-1);
+ }
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+
+ // Check this multiply against other multiplies being added together.
+ for (unsigned OtherMulIdx = Idx+1;
+ OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+ ++OtherMulIdx) {
+ const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+ // If MulOp occurs in OtherMul, we can fold the two multiplies
+ // together.
+ for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+ OMulOp != e; ++OMulOp)
+ if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+ // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+ const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+ InnerMul1 = getMulExpr(MulOps);
+ }
+ const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+ if (OtherMul->getNumOperands() != 2) {
+ SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+ OtherMul->op_begin()+OMulOp);
+ MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
+ InnerMul2 = getMulExpr(MulOps);
+ }
+ const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+ const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+ if (Ops.size() == 2) return OuterMul;
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+OtherMulIdx-1);
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+ }
+ }
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this add and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ SmallVector<const SCEV *, 8> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
+ LIOps.push_back(AddRec->getStart());
+
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ AddRecOps[0] = getAddExpr(LIOps);
+
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer add and the inner addrec are guaranteed to have no overflow.
+ // Always propagate NW.
+ Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+ const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, add the folded AddRec by the non-liv parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getAddExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // added together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+ i != e; ++i) {
+ if (i >= AddRecOps.size()) {
+ AddRecOps.append(OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
+ }
+ AddRecOps[i] = getAddExpr(AddRecOps[i],
+ OtherAddRec->getOperand(i));
+ }
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ // Step size has changed, so we cannot guarantee no self-wraparound.
+ Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+ return getAddExpr(Ops);
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an add expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ SCEVAddExpr *S =
+ static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+ "only nuw or nsw allowed");
+ assert(!Ops.empty() && "Cannot get empty mul!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVMulExpr operand types don't match!");
+#endif
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+ // C1*(C2+V) -> C1*C2 + C1*V
+ if (Ops.size() == 2)
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ if (Add->getNumOperands() == 2 &&
+ isa<SCEVConstant>(Add->getOperand(0)))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+ getMulExpr(LHSC, Add->getOperand(1)));
+
+ ++Idx;
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ LHSC->getValue()->getValue() *
+ RHSC->getValue()->getValue());
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant one being multiplied, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+ // If we have a multiply of zero, it will always be zero.
+ return Ops[0];
+ } else if (Ops[0]->isAllOnesValue()) {
+ // If we have a mul by -1 of an add, try distributing the -1 among the
+ // add operands.
+ if (Ops.size() == 2) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+ SmallVector<const SCEV *, 4> NewOps;
+ bool AnyFolded = false;
+ for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
+ E = Add->op_end(); I != E; ++I) {
+ const SCEV *Mul = getMulExpr(Ops[0], *I);
+ if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+ NewOps.push_back(Mul);
+ }
+ if (AnyFolded)
+ return getAddExpr(NewOps);
+ }
+ else if (const SCEVAddRecExpr *
+ AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+ // Negation preserves a recurrence's no self-wrap property.
+ SmallVector<const SCEV *, 4> Operands;
+ for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
+ E = AddRec->op_end(); I != E; ++I) {
+ Operands.push_back(getMulExpr(Ops[0], *I));
+ }
+ return getAddRecExpr(Operands, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ }
+ }
+ }
+
+ if (Ops.size() == 1)
+ return Ops[0];
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // If there are mul operands inline them all into this expression.
+ if (Idx < Ops.size()) {
+ bool DeletedMul = false;
+ while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+ // If we have an mul, expand the mul operands onto the end of the operands
+ // list.
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(Mul->op_begin(), Mul->op_end());
+ DeletedMul = true;
+ }
+
+ // If we deleted at least one mul, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just acquired.
+ if (DeletedMul)
+ return getMulExpr(Ops);
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this mul and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ SmallVector<const SCEV *, 8> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.reserve(AddRec->getNumOperands());
+ const SCEV *Scale = getMulExpr(LIOps);
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer mul and the inner addrec are guaranteed to have no overflow.
+ //
+ // No self-wrap cannot be guaranteed after changing the step size, but
+ // will be inferred if either NUW or NSW is true.
+ Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
+ const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, multiply the folded AddRec by the non-liv parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getMulExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // multiplied together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
+ // {A*C,+,F*D + G*B + B*D}<L>
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+ const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
+ const SCEV *B = F->getStepRecurrence(*this);
+ const SCEV *D = G->getStepRecurrence(*this);
+ const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
+ getMulExpr(G, B),
+ getMulExpr(B, D));
+ const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
+ F->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ return getMulExpr(Ops);
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an mul expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scMulExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ SCEVMulExpr *S =
+ static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ assert(getEffectiveSCEVType(LHS->getType()) ==
+ getEffectiveSCEVType(RHS->getType()) &&
+ "SCEVUDivExpr operand types don't match!");
+
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (RHSC->getValue()->equalsInt(1))
+ return LHS; // X udiv 1 --> x
+ // If the denominator is zero, the result of the udiv is undefined. Don't
+ // try to analyze it, because the resolution chosen here may differ from
+ // the resolution chosen in other parts of the compiler.
+ if (!RHSC->getValue()->isZero()) {
+ // Determine if the division can be folded into the operands of
+ // its operands.
+ // TODO: Generalize this to non-constants by using known-bits information.
+ const Type *Ty = LHS->getType();
+ unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+ unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
+ // For non-power-of-two values, effectively round the value up to the
+ // nearest power of two.
+ if (!RHSC->getValue()->getValue().isPowerOf2())
+ ++MaxShiftAmt;
+ const IntegerType *ExtTy =
+ IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+ // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+ if (!Step->getValue()->getValue()
+ .urem(RHSC->getValue()->getValue()) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop(), SCEV::FlagAnyWrap)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+ Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+ return getAddRecExpr(Operands, AR->getLoop(),
+ SCEV::FlagNW);
+ }
+ // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+ // Find an operand that's safely divisible.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ const SCEV *Op = M->getOperand(i);
+ const SCEV *Div = getUDivExpr(Op, RHSC);
+ if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+ Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+ M->op_end());
+ Operands[i] = Div;
+ return getMulExpr(Operands);
+ }
+ }
+ }
+ // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+ Operands.clear();
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+ const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+ if (isa<SCEVUDivExpr>(Op) ||
+ getMulExpr(Op, RHS) != A->getOperand(i))
+ break;
+ Operands.push_back(Op);
+ }
+ if (Operands.size() == A->getNumOperands())
+ return getAddExpr(Operands);
+ }
+ }
+
+ // Fold if both operands are constant.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ Constant *LHSCV = LHSC->getValue();
+ Constant *RHSCV = RHSC->getValue();
+ return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+ RHSCV)));
+ }
+ }
+ }
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUDivExpr);
+ ID.AddPointer(LHS);
+ ID.AddPointer(RHS);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+ LHS, RHS);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
+ const Loop *L,
+ SCEV::NoWrapFlags Flags) {
+ SmallVector<const SCEV *, 4> Operands;
+ Operands.push_back(Start);
+ if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+ if (StepChrec->getLoop() == L) {
+ Operands.append(StepChrec->op_begin(), StepChrec->op_end());
+ return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
+ }
+
+ Operands.push_back(Step);
+ return getAddRecExpr(Operands, L, Flags);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+ const Loop *L, SCEV::NoWrapFlags Flags) {
+ if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
+ "SCEVAddRecExpr operand types don't match!");
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ assert(isLoopInvariant(Operands[i], L) &&
+ "SCEVAddRecExpr operand is not loop-invariant!");
+#endif
+
+ if (Operands.back()->isZero()) {
+ Operands.pop_back();
+ return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
+ }
+
+ // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // use that information to infer NUW and NSW flags. However, computing a
+ // BE count requires calling getAddRecExpr, so we may not yet have a
+ // meaningful BE count at this point (and if we don't, we'd be stuck
+ // with a SCEVCouldNotCompute as the cached BE count).
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+ E = Operands.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+ if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+ const Loop *NestedLoop = NestedAR->getLoop();
+ if (L->contains(NestedLoop) ?
+ (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+ (!NestedLoop->contains(L) &&
+ DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+ SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
+ NestedAR->op_end());
+ Operands[0] = NestedAR->getStart();
+ // AddRecs require their operands be loop-invariant with respect to their
+ // loops. Don't perform this transformation if it would break this
+ // requirement.
+ bool AllInvariant = true;
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ if (!isLoopInvariant(Operands[i], L)) {
+ AllInvariant = false;
+ break;
+ }
+ if (AllInvariant) {
+ // Create a recurrence for the outer loop with the same step size.
+ //
+ // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
+ // inner recurrence has the same property.
+ SCEV::NoWrapFlags OuterFlags =
+ maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
+
+ NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
+ AllInvariant = true;
+ for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+ if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
+ AllInvariant = false;
+ break;
+ }
+ if (AllInvariant) {
+ // Ok, both add recurrences are valid after the transformation.
+ //
+ // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
+ // the outer recurrence has the same property.
+ SCEV::NoWrapFlags InnerFlags =
+ maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
+ return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
+ }
+ }
+ // Reset Operands to its original state.
+ Operands[0] = NestedAR;
+ }
+ }
+
+ // Okay, it looks like we really DO need an addrec expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ ID.AddPointer(Operands[i]);
+ ID.AddPointer(L);
+ void *IP = 0;
+ SCEVAddRecExpr *S =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+ std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+ S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+ O, Operands.size(), L);
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getSMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty smax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVSMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ APIntOps::smax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant minimum-int, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+ // If we have an smax with a constant maximum-int, it will always be
+ // maximum-int.
+ return Ops[0];
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Find the first SMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is an SMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedSMax = false;
+ while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(SMax->op_begin(), SMax->op_end());
+ DeletedSMax = true;
+ }
+
+ if (DeletedSMax)
+ return getSMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ // X smax Y smax Y --> X smax Y
+ // X smax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+ // Okay, it looks like we really DO need an smax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scSMaxExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getUMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty umax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVUMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ APIntOps::umax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant minimum-int, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+ // If we have an umax with a constant maximum-int, it will always be
+ // maximum-int.
+ return Ops[0];
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Find the first UMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is a UMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedUMax = false;
+ while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(UMax->op_begin(), UMax->op_end());
+ DeletedUMax = true;
+ }
+
+ if (DeletedUMax)
+ return getUMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ // X umax Y umax Y --> X umax Y
+ // X umax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+ // Okay, it looks like we really DO need a umax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUMaxExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // ~smax(~x, ~y) == smin(x, y).
+ return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // ~umax(~x, ~y) == umin(x, y)
+ return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+ // If we have TargetData, we can bypass creating a target-independent
+ // constant expression and then folding it back into a ConstantInt.
+ // This is just a compile-time optimization.
+ if (TD)
+ return getConstant(TD->getIntPtrType(getContext()),
+ TD->getTypeAllocSize(AllocTy));
+
+ Constant *C = ConstantExpr::getSizeOf(AllocTy);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+ Constant *C = ConstantExpr::getAlignOf(AllocTy);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+ unsigned FieldNo) {
+ // If we have TargetData, we can bypass creating a target-independent
+ // constant expression and then folding it back into a ConstantInt.
+ // This is just a compile-time optimization.
+ if (TD)
+ return getConstant(TD->getIntPtrType(getContext()),
+ TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
+ Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+ Constant *FieldNo) {
+ Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
+ // Don't attempt to do anything other than create a SCEVUnknown object
+ // here. createSCEV only calls getUnknown after checking for all other
+ // interesting possibilities, and any other code that calls getUnknown
+ // is doing so in order to hide a value from SCEV canonicalization.
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUnknown);
+ ID.AddPointer(V);
+ void *IP = 0;
+ if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+ assert(cast<SCEVUnknown>(S)->getValue() == V &&
+ "Stale SCEVUnknown in uniquing map!");
+ return S;
+ }
+ SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+ FirstUnknown);
+ FirstUnknown = cast<SCEVUnknown>(S);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+//===----------------------------------------------------------------------===//
+// Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+ // Integers and pointers are always SCEVable.
+ return Ty->isIntegerTy() || Ty->isPointerTy();
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ // If we have a TargetData, use it!
+ if (TD)
+ return TD->getTypeSizeInBits(Ty);
+
+ // Integer types have fixed sizes.
+ if (Ty->isIntegerTy())
+ return Ty->getPrimitiveSizeInBits();
+
+ // The only other support type is pointer. Without TargetData, conservatively
+ // assume pointers are 64-bit.
+ assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
+ return 64;
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ if (Ty->isIntegerTy())
+ return Ty;
+
+ // The only other support type is pointer.
+ assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
+ if (TD) return TD->getIntPtrType(getContext());
+
+ // Without TargetData, conservatively assume pointers are 64-bit.
+ return Type::getInt64Ty(getContext());
+}
+
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+ return &CouldNotCompute;
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+ ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+ if (I != ValueExprMap.end()) return I->second;
+ const SCEV *S = createSCEV(V);
+
+ // The process of creating a SCEV for V may have caused other SCEVs
+ // to have been created, so it's necessary to insert the new entry
+ // from scratch, rather than trying to remember the insert position
+ // above.
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ return S;
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+
+ const Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ return getMulExpr(V,
+ getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+
+ const Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ const SCEV *AllOnes =
+ getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
+ return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+ SCEV::NoWrapFlags Flags) {
+ assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
+
+ // Fast path: X - X --> 0.
+ if (LHS == RHS)
+ return getConstant(LHS->getType(), 0);
+
+ // X - Y --> X + -Y
+ return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
+ const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended. The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or zero extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrZeroExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended. The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or sign extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrSignExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or any extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrAnyExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getAnyExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. The conversion must not be widening.
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or noop with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+ "getTruncateOrNoop cannot extend!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getTruncateExpr(V, Ty);
+}
+
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+ const SCEV *RHS) {
+ const SCEV *PromotedLHS = LHS;
+ const SCEV *PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getUMinFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umin operation
+/// with them.
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+ const SCEV *RHS) {
+ const SCEV *PromotedLHS = LHS;
+ const SCEV *PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMinExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+ // A pointer operand may evaluate to a nonpointer expression, such as null.
+ if (!V->getType()->isPointerTy())
+ return V;
+
+ if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+ return getPointerBase(Cast->getOperand());
+ }
+ else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+ const SCEV *PtrOp = 0;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ if ((*I)->getType()->isPointerTy()) {
+ // Cannot find the base of an expression with multiple pointer operands.
+ if (PtrOp)
+ return V;
+ PtrOp = *I;
+ }
+ }
+ if (!PtrOp)
+ return V;
+ return getPointerBase(PtrOp);
+ }
+ return V;
+}
+
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist) {
+ // Push the def-use children onto the Worklist stack.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
+/// resolution.
+void
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushDefUseChildren(PN, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ Visited.insert(PN);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // Short-circuit the def-use traversal if the symbolic name
+ // ceases to appear in expressions.
+ if (Old != SymName && !hasOperand(Old, SymName))
+ continue;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, it's a PHI that's in the progress of being computed
+ // by createNodeForPHI, or it's a single-value PHI. In the first case,
+ // additional loop trip count information isn't going to change anything.
+ // In the second case, createNodeForPHI will perform the necessary
+ // updates on its own when it gets to that point. In the third, we do
+ // want to forget the SCEVUnknown.
+ if (!isa<PHINode>(I) ||
+ !isa<SCEVUnknown>(Old) ||
+ (I != PN && Old == SymName)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
+ }
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
+/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
+/// a loop header, making it a potential recurrence, or it doesn't.
+///
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+ if (const Loop *L = LI->getLoopFor(PN->getParent()))
+ if (L->getHeader() == PN->getParent()) {
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi as an addrec if it has a unique entry value and a unique
+ // backedge value.
+ Value *BEValueV = 0, *StartValueV = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (L->contains(PN->getIncomingBlock(i))) {
+ if (!BEValueV) {
+ BEValueV = V;
+ } else if (BEValueV != V) {
+ BEValueV = 0;
+ break;
+ }
+ } else if (!StartValueV) {
+ StartValueV = V;
+ } else if (StartValueV != V) {
+ StartValueV = 0;
+ break;
+ }
+ }
+ if (BEValueV && StartValueV) {
+ // While we are analyzing this PHI node, handle its value symbolically.
+ const SCEV *SymbolicName = getUnknown(PN);
+ assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
+ "PHI node already processed?");
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+ // Using this symbolic name for the PHI, analyze the value coming around
+ // the back-edge.
+ const SCEV *BEValue = getSCEV(BEValueV);
+
+ // NOTE: If BEValue is loop invariant, we know that the PHI node just
+ // has a special value for the first iteration of the loop.
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, then we found a simple induction variable!
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+ // If there is a single occurrence of the symbolic value, replace it
+ // with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (Add->getOperand(i) == SymbolicName)
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
+ }
+
+ if (FoundIndex != Add->getNumOperands()) {
+ // Create an add with everything but the specified operand.
+ SmallVector<const SCEV *, 8> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ const SCEV *Accum = getAddExpr(Ops);
+
+ // This is not a valid addrec if the step amount is varying each
+ // loop iteration, but is not itself an addrec in this loop.
+ if (isLoopInvariant(Accum, L) ||
+ (isa<SCEVAddRecExpr>(Accum) &&
+ cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+ // If the increment doesn't overflow, then neither the addrec nor
+ // the post-increment will overflow.
+ if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ } else if (const GEPOperator *GEP =
+ dyn_cast<GEPOperator>(BEValueV)) {
+ // If the increment is an inbounds GEP, then we know the address
+ // space cannot be wrapped around. We cannot make any guarantee
+ // about signed or unsigned overflow because pointers are
+ // unsigned but we may have a negative index from the base
+ // pointer.
+ if (GEP->isInBounds())
+ Flags = setFlags(Flags, SCEV::FlagNW);
+ }
+
+ const SCEV *StartVal = getSCEV(StartValueV);
+ const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+ // Since the no-wrap flags are on the increment, they apply to the
+ // post-incremented value as well.
+ if (isLoopInvariant(Accum, L))
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+ Accum, L, Flags);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ } else if (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(BEValue)) {
+ // Otherwise, this could be a loop like this:
+ // i = 0; for (j = 1; ..; ++j) { .... i = j; }
+ // In this case, j = {1,+,1} and BEValue is j.
+ // Because the other in-value of i (0) fits the evolution of BEValue
+ // i really is an addrec evolution.
+ if (AddRec->getLoop() == L && AddRec->isAffine()) {
+ const SCEV *StartVal = getSCEV(StartValueV);
+
+ // If StartVal = j.start - j.stride, we can use StartVal as the
+ // initial step of the addrec evolution.
+ if (StartVal == getMinusSCEV(AddRec->getOperand(0),
+ AddRec->getOperand(1))) {
+ // FIXME: For constant StartVal, we should be able to infer
+ // no-wrap flags.
+ const SCEV *PHISCEV =
+ getAddRecExpr(StartVal, AddRec->getOperand(1), L,
+ SCEV::FlagAnyWrap);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ }
+ }
+ }
+
+ // If the PHI has a single incoming value, follow that value, unless the
+ // PHI's incoming blocks are in a different loop, in which case doing so
+ // risks breaking LCSSA form. Instcombine would normally zap these, but
+ // it doesn't have DominatorTree information, so it may miss cases.
+ if (Value *V = SimplifyInstruction(PN, TD, DT))
+ if (LI->replacementPreservesLCSSAForm(PN, V))
+ return getSCEV(V);
+
+ // If it's not a loop phi, we can't handle it yet.
+ return getUnknown(PN);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+
+ // Don't blindly transfer the inbounds flag from the GEP instruction to the
+ // Add expression, because the Instruction may be guarded by control flow
+ // and the no-overflow bits may not be valid for the expression in any
+ // context.
+ bool isInBounds = GEP->isInBounds();
+
+ const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
+ return getUnknown(GEP);
+ const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+ E = GEP->op_end();
+ I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+
+ // Add the field offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+ } else {
+ // For an array, add the element offset, explicitly scaled.
+ const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *IndexS = getSCEV(Index);
+ // Getelementptr indices are signed.
+ IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
+
+ // Multiply the index by the element size to compute the element offset.
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
+ isInBounds ? SCEV::FlagNSW :
+ SCEV::FlagAnyWrap);
+
+ // Add the element offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+ }
+ }
+
+ // Get the SCEV for the GEP base.
+ const SCEV *BaseS = getSCEV(Base);
+
+ // Add the total offset from all the GEP indices to the base.
+ return getAddExpr(BaseS, TotalOffset,
+ isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration). It is, at the same time,
+/// the minimum number of times S is divisible by 2. For example, given {4,+,8}
+/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return C->getValue()->getValue().countTrailingZeros();
+
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+ return std::min(GetMinTrailingZeros(T->getOperand()),
+ (uint32_t)getTypeSizeInBits(T->getType()));
+
+ if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ // The result is the sum of all operands results.
+ uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+ uint32_t BitWidth = getTypeSizeInBits(M->getType());
+ for (unsigned i = 1, e = M->getNumOperands();
+ SumOpRes != BitWidth && i != e; ++i)
+ SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
+ BitWidth);
+ return SumOpRes;
+ }
+
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ unsigned BitWidth = getTypeSizeInBits(U->getType());
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+ return Zeros.countTrailingOnes();
+ }
+
+ // SCEVUDivExpr
+ return 0;
+}
+
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+ if (I != UnsignedRanges.end())
+ return I->second;
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+ unsigned BitWidth = getTypeSizeInBits(S->getType());
+ ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+ // If the value has known zeros, the maximum unsigned value will have those
+ // known zeros as well.
+ uint32_t TZ = GetMinTrailingZeros(S);
+ if (TZ != 0)
+ ConservativeResult =
+ ConstantRange(APInt::getMinValue(BitWidth),
+ APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Add->getOperand(0));
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+ X = X.add(getUnsignedRange(Add->getOperand(i)));
+ return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+ X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+ return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+ X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+ return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+ X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+ return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange X = getUnsignedRange(UDiv->getLHS());
+ ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+ return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ }
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ ConstantRange X = getUnsignedRange(ZExt->getOperand());
+ return setUnsignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ ConstantRange X = getUnsignedRange(SExt->getOperand());
+ return setUnsignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Trunc->getOperand());
+ return setUnsignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If there's no unsigned wrap, the value will never be less than its
+ // initial value.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+ if (!C->getValue()->isZero())
+ ConservativeResult =
+ ConservativeResult.intersectWith(
+ ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+
+ // TODO: non-affine addrec
+ if (AddRec->isAffine()) {
+ const Type *Ty = AddRec->getType();
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ ConstantRange StartRange = getUnsignedRange(Start);
+ ConstantRange StepRange = getSignedRange(Step);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange EndRange =
+ StartRange.add(MaxBECountRange.multiply(StepRange));
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+ ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+ if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+ ExtEndRange)
+ return setUnsignedRange(AddRec, ConservativeResult);
+
+ APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+ EndRange.getUnsignedMin());
+ APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+ EndRange.getUnsignedMax());
+ if (Min.isMinValue() && Max.isMaxValue())
+ return setUnsignedRange(AddRec, ConservativeResult);
+ return setUnsignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ }
+ }
+
+ return setUnsignedRange(AddRec, ConservativeResult);
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+ if (Ones == ~Zeros + 1)
+ return setUnsignedRange(U, ConservativeResult);
+ return setUnsignedRange(U,
+ ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
+ }
+
+ return setUnsignedRange(S, ConservativeResult);
+}
+
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+ if (I != SignedRanges.end())
+ return I->second;
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+ unsigned BitWidth = getTypeSizeInBits(S->getType());
+ ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+ // If the value has known zeros, the maximum signed value will have those
+ // known zeros as well.
+ uint32_t TZ = GetMinTrailingZeros(S);
+ if (TZ != 0)
+ ConservativeResult =
+ ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getSignedRange(Add->getOperand(0));
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+ X = X.add(getSignedRange(Add->getOperand(i)));
+ return setSignedRange(Add, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getSignedRange(Mul->getOperand(0));
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+ X = X.multiply(getSignedRange(Mul->getOperand(i)));
+ return setSignedRange(Mul, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getSignedRange(SMax->getOperand(0));
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+ X = X.smax(getSignedRange(SMax->getOperand(i)));
+ return setSignedRange(SMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getSignedRange(UMax->getOperand(0));
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+ X = X.umax(getSignedRange(UMax->getOperand(i)));
+ return setSignedRange(UMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange X = getSignedRange(UDiv->getLHS());
+ ConstantRange Y = getSignedRange(UDiv->getRHS());
+ return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ }
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ ConstantRange X = getSignedRange(ZExt->getOperand());
+ return setSignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ ConstantRange X = getSignedRange(SExt->getOperand());
+ return setSignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ ConstantRange X = getSignedRange(Trunc->getOperand());
+ return setSignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If there's no signed wrap, and all the operands have the same sign or
+ // zero, the value won't ever change sign.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+ bool AllNonNeg = true;
+ bool AllNonPos = true;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+ if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+ }
+ if (AllNonNeg)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt(BitWidth, 0),
+ APInt::getSignedMinValue(BitWidth)));
+ else if (AllNonPos)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt(BitWidth, 1)));
+ }
+
+ // TODO: non-affine addrec
+ if (AddRec->isAffine()) {
+ const Type *Ty = AddRec->getType();
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ ConstantRange StartRange = getSignedRange(Start);
+ ConstantRange StepRange = getSignedRange(Step);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange EndRange =
+ StartRange.add(MaxBECountRange.multiply(StepRange));
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+ ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+ if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+ ExtEndRange)
+ return setSignedRange(AddRec, ConservativeResult);
+
+ APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+ EndRange.getSignedMin());
+ APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+ EndRange.getSignedMax());
+ if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+ return setSignedRange(AddRec, ConservativeResult);
+ return setSignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ }
+ }
+
+ return setSignedRange(AddRec, ConservativeResult);
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ if (!U->getValue()->getType()->isIntegerTy() && !TD)
+ return setSignedRange(U, ConservativeResult);
+ unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+ if (NS == 1)
+ return setSignedRange(U, ConservativeResult);
+ return setSignedRange(U, ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+ }
+
+ return setSignedRange(S, ConservativeResult);
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.
+/// Analyze the expression.
+///
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
+ if (!isSCEVable(V->getType()))
+ return getUnknown(V);
+
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+
+ // Don't attempt to analyze instructions in blocks that aren't
+ // reachable. Such instructions don't matter, and they aren't required
+ // to obey basic rules for definitions dominating uses which this
+ // analysis depends on.
+ if (!DT->isReachableFromEntry(I->getParent()))
+ return getUnknown(V);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ Opcode = CE->getOpcode();
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return getConstant(CI);
+ else if (isa<ConstantPointerNull>(V))
+ return getConstant(V->getType(), 0);
+ else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
+ else
+ return getUnknown(V);
+
+ Operator *U = cast<Operator>(V);
+ switch (Opcode) {
+ case Instruction::Add: {
+ // The simple thing to do would be to just call getSCEV on both operands
+ // and call getAddExpr with the result. However if we're looking at a
+ // bunch of things all added together, this can be quite inefficient,
+ // because it leads to N-1 getAddExpr calls for N ultimate operands.
+ // Instead, gather up all the operands and make a single getAddExpr call.
+ // LLVM IR canonical form means we need only traverse the left operands.
+ SmallVector<const SCEV *, 4> AddOps;
+ AddOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+ unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ break;
+ U = cast<Operator>(Op);
+ const SCEV *Op1 = getSCEV(U->getOperand(1));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getNegativeSCEV(Op1));
+ else
+ AddOps.push_back(Op1);
+ }
+ AddOps.push_back(getSCEV(U->getOperand(0)));
+ return getAddExpr(AddOps);
+ }
+ case Instruction::Mul: {
+ // See the Add code above.
+ SmallVector<const SCEV *, 4> MulOps;
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0);
+ Op->getValueID() == Instruction::Mul + Value::InstructionVal;
+ Op = U->getOperand(0)) {
+ U = cast<Operator>(Op);
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ }
+ MulOps.push_back(getSCEV(U->getOperand(0)));
+ return getMulExpr(MulOps);
+ }
+ case Instruction::UDiv:
+ return getUDivExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::Sub:
+ return getMinusSCEV(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::And:
+ // For an expression like x&255 that merely masks off the high bits,
+ // use zext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ if (CI->isNullValue())
+ return getSCEV(U->getOperand(1));
+ if (CI->isAllOnesValue())
+ return getSCEV(U->getOperand(0));
+ const APInt &A = CI->getValue();
+
+ // Instcombine's ShrinkDemandedConstant may strip bits out of
+ // constants, obscuring what would otherwise be a low-bits mask.
+ // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+ // knew about to reconstruct a low-bits mask value.
+ unsigned LZ = A.countLeadingZeros();
+ unsigned BitWidth = A.getBitWidth();
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+ APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+ if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
+ return
+ getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
+ IntegerType::get(getContext(), BitWidth - LZ)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Or:
+ // If the RHS of the Or is a constant, we may have something like:
+ // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
+ // optimizations will transparently handle this case.
+ //
+ // In order for this transformation to be safe, the LHS must be of the
+ // form X*(2^n) and the Or constant must be less than 2^n.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ const SCEV *LHS = getSCEV(U->getOperand(0));
+ const APInt &CIVal = CI->getValue();
+ if (GetMinTrailingZeros(LHS) >=
+ (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+ // Build a plain add SCEV.
+ const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+ // If the LHS of the add was an addrec and it has no-wrap flags,
+ // transfer the no-wrap flags, since an or won't introduce a wrap.
+ if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+ const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+ OldAR->getNoWrapFlags());
+ }
+ return S;
+ }
+ }
+ break;
+ case Instruction::Xor:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // If the RHS of the xor is a signbit, then this is just an add.
+ // Instcombine turns add of signbit into xor as a strength reduction step.
+ if (CI->getValue().isSignBit())
+ return getAddExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+
+ // If the RHS of xor is -1, then this is a not operation.
+ if (CI->isAllOnesValue())
+ return getNotSCEV(getSCEV(U->getOperand(0)));
+
+ // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+ // This is a variant of the check for xor with -1, and it handles
+ // the case where instcombine has trimmed non-demanded bits out
+ // of an xor with -1.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+ if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO->getOpcode() == Instruction::And &&
+ LCI->getValue() == CI->getValue())
+ if (const SCEVZeroExtendExpr *Z =
+ dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+ const Type *UTy = U->getType();
+ const SCEV *Z0 = Z->getOperand();
+ const Type *Z0Ty = Z0->getType();
+ unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+ // If C is a low-bits mask, the zero extend is serving to
+ // mask off the high bits. Complement the operand and
+ // re-apply the zext.
+ if (APIntOps::isMask(Z0TySize, CI->getValue()))
+ return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+ // If C is a single bit, it may be in the sign-bit position
+ // before the zero-extend. In this case, represent the xor
+ // using an add, which is equivalent, and re-apply the zext.
+ APInt Trunc = CI->getValue().trunc(Z0TySize);
+ if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+ Trunc.isSignBit())
+ return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+ UTy);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ // Turn shift left of a constant amount into a multiply.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
+
+ Constant *X = ConstantInt::get(getContext(),
+ APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::LShr:
+ // Turn logical shift right of a constant into a unsigned divide.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
+
+ Constant *X = ConstantInt::get(getContext(),
+ APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::AShr:
+ // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
+ if (L->getOpcode() == Instruction::Shl &&
+ L->getOperand(1) == U->getOperand(1)) {
+ uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (CI->getValue().uge(BitWidth))
+ break;
+
+ uint64_t Amt = BitWidth - CI->getZExtValue();
+ if (Amt == BitWidth)
+ return getSCEV(L->getOperand(0)); // shift by zero --> noop
+ return
+ getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+ IntegerType::get(getContext(),
+ Amt)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Trunc:
+ return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::ZExt:
+ return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::SExt:
+ return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::BitCast:
+ // BitCasts are no-op casts so we just eliminate the cast.
+ if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+ return getSCEV(U->getOperand(0));
+ break;
+
+ // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+ // lead to pointer expressions which cannot safely be expanded to GEPs,
+ // because ScalarEvolution doesn't respect the GEP aliasing rules when
+ // simplifying integer expressions.
+
+ case Instruction::GetElementPtr:
+ return createNodeForGEP(cast<GEPOperator>(U));
+
+ case Instruction::PHI:
+ return createNodeForPHI(cast<PHINode>(U));
+
+ case Instruction::Select:
+ // This could be a smax or umax that was lowered earlier.
+ // Try to recover it.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ switch (ICI->getPredicate()) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ // a >s b ? a+x : b+x -> smax(a, b)+x
+ // a >s b ? b+x : a+x -> smin(a, b)+x
+ if (LHS->getType() == U->getType()) {
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *RS = getSCEV(RHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // a >u b ? a+x : b+x -> umax(a, b)+x
+ // a >u b ? b+x : a+x -> umin(a, b)+x
+ if (LHS->getType() == U->getType()) {
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *RS = getSCEV(RHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ // n != 0 ? n+x : 1+x -> umax(n, 1)+x
+ if (LHS->getType() == U->getType() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(LHS->getType(), 1);
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, One);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ // n == 0 ? 1+x : n+x -> umax(n, 1)+x
+ if (LHS->getType() == U->getType() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(LHS->getType(), 1);
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, One);
+ const SCEV *RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ default: // We cannot analyze this expression.
+ break;
+ }
+
+ return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Iteration Count Computation Code
+//
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).Exact;
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).Max;
+}
+
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+ BasicBlock *Header = L->getHeader();
+
+ // Push all Loop-header PHIs onto the Worklist stack.
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ Worklist.push_back(PN);
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+ // Initially insert a CouldNotCompute for this loop. If the insertion
+ // succeeds, proceed to actually compute a backedge-taken count and
+ // update the value. The temporary CouldNotCompute value tells SCEV
+ // code elsewhere that it shouldn't attempt to request a new
+ // backedge-taken count, which could result in infinite recursion.
+ std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+ BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BackedgeTakenInfo Result = getCouldNotCompute();
+ BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
+ if (Computed.Exact != getCouldNotCompute()) {
+ assert(isLoopInvariant(Computed.Exact, L) &&
+ isLoopInvariant(Computed.Max, L) &&
+ "Computed backedge-taken count isn't loop invariant for loop!");
+ ++NumTripCountsComputed;
+
+ // Update the value in the map.
+ Result = Computed;
+ } else {
+ if (Computed.Max != getCouldNotCompute())
+ // Update the value in the map.
+ Result = Computed;
+ if (isa<PHINode>(L->getHeader()->begin()))
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
+ }
+
+ // Now that we know more about the trip count for this loop, forget any
+ // existing SCEV values for PHI nodes in this loop since they are only
+ // conservative estimates made without the benefit of trip count
+ // information. This is similar to the code in forgetLoop, except that
+ // it handles SCEVUnknown PHI nodes specially.
+ if (Computed.hasAnyInfo()) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, or it's a PHI that's in the progress of being computed
+ // by createNodeForPHI. In the former case, additional loop trip
+ // count information isn't going to change anything. In the later
+ // case, createNodeForPHI will perform the necessary updates on its
+ // own when it gets to that point.
+ if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+ }
+
+ // Re-lookup the insert position, since the call to
+ // ComputeBackedgeTakenCount above could result in a
+ // recusive call to getBackedgeTakenInfo (on a different
+ // loop), which would invalidate the iterator computed
+ // earlier.
+ return BackedgeTakenCounts.find(L)->second = Result;
+}
+
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+ // Drop any stored trip count value.
+ BackedgeTakenCounts.erase(L);
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ forgetMemoizedResults(It->second);
+ ValueExprMap.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+
+ // Forget all contained loops too, to avoid dangling entries in the
+ // ValuesAtScopes map.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ forgetLoop(*I);
+}
+
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return;
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ Worklist.push_back(I);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ forgetMemoizedResults(It->second);
+ ValueExprMap.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
+/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Examine all exits and pick the most conservative values.
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ bool CouldNotComputeBECount = false;
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BackedgeTakenInfo NewBTI =
+ ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+ if (NewBTI.Exact == getCouldNotCompute()) {
+ // We couldn't compute an exact value for this exit, so
+ // we won't be able to compute an exact value for the loop.
+ CouldNotComputeBECount = true;
+ BECount = getCouldNotCompute();
+ } else if (!CouldNotComputeBECount) {
+ if (BECount == getCouldNotCompute())
+ BECount = NewBTI.Exact;
+ else
+ BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
+ }
+ if (MaxBECount == getCouldNotCompute())
+ MaxBECount = NewBTI.Max;
+ else if (NewBTI.Max != getCouldNotCompute())
+ MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+}
+
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+ BasicBlock *ExitingBlock) {
+
+ // Okay, we've chosen an exiting block. See what condition causes us to
+ // exit at this block.
+ //
+ // FIXME: we should be able to handle switch instructions (with a single exit)
+ BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (ExitBr == 0) return getCouldNotCompute();
+ assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+
+ // At this point, we know we have a conditional branch that determines whether
+ // the loop is exited. However, we don't know if the branch is executed each
+ // time through the loop. If not, then the execution count of the branch will
+ // not be equal to the trip count of the loop.
+ //
+ // Currently we check for this by checking to see if the Exit branch goes to
+ // the loop header. If so, we know it will always execute the same number of
+ // times as the loop. We also handle the case where the exit block *is* the
+ // loop header. This is common for un-rotated loops.
+ //
+ // If both of those tests fail, walk up the unique predecessor chain to the
+ // header, stopping if there is an edge that doesn't exit the loop. If the
+ // header is reached, the execution count of the branch will be equal to the
+ // trip count of the loop.
+ //
+ // More extensive analysis could be done to handle more cases here.
+ //
+ if (ExitBr->getSuccessor(0) != L->getHeader() &&
+ ExitBr->getSuccessor(1) != L->getHeader() &&
+ ExitBr->getParent() != L->getHeader()) {
+ // The simple checks failed, try climbing the unique predecessor chain
+ // up to the header.
+ bool Ok = false;
+ for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+ BasicBlock *Pred = BB->getUniquePredecessor();
+ if (!Pred)
+ return getCouldNotCompute();
+ TerminatorInst *PredTerm = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+ if (PredSucc == BB)
+ continue;
+ // If the predecessor has a successor that isn't BB and isn't
+ // outside the loop, assume the worst.
+ if (L->contains(PredSucc))
+ return getCouldNotCompute();
+ }
+ if (Pred == L->getHeader()) {
+ Ok = true;
+ break;
+ }
+ BB = Pred;
+ }
+ if (!Ok)
+ return getCouldNotCompute();
+ }
+
+ // Proceed to the next level to examine the exit condition expression.
+ return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+ ExitBr->getSuccessor(0),
+ ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+ Value *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+ // Check if the controlling expression for this loop is an And or Or.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+ if (BO->getOpcode() == Instruction::And) {
+ // Recurse on the operands of the and.
+ BackedgeTakenInfo BTI0 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+ BackedgeTakenInfo BTI1 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (L->contains(TBB)) {
+ // Both conditions must be true for the loop to continue executing.
+ // Choose the less conservative count.
+ if (BTI0.Exact == getCouldNotCompute() ||
+ BTI1.Exact == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+ if (BTI0.Max == getCouldNotCompute())
+ MaxBECount = BTI1.Max;
+ else if (BTI1.Max == getCouldNotCompute())
+ MaxBECount = BTI0.Max;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ } else {
+ // Both conditions must be true at the same time for the loop to exit.
+ // For now, be conservative.
+ assert(L->contains(FBB) && "Loop block has no successor in loop!");
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+ if (BO->getOpcode() == Instruction::Or) {
+ // Recurse on the operands of the or.
+ BackedgeTakenInfo BTI0 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+ BackedgeTakenInfo BTI1 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (L->contains(FBB)) {
+ // Both conditions must be false for the loop to continue executing.
+ // Choose the less conservative count.
+ if (BTI0.Exact == getCouldNotCompute() ||
+ BTI1.Exact == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+ if (BTI0.Max == getCouldNotCompute())
+ MaxBECount = BTI1.Max;
+ else if (BTI1.Max == getCouldNotCompute())
+ MaxBECount = BTI0.Max;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ } else {
+ // Both conditions must be false at the same time for the loop to exit.
+ // For now, be conservative.
+ assert(L->contains(TBB) && "Loop block has no successor in loop!");
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+ }
+
+ // With an icmp, it may be feasible to compute an exact backedge-taken count.
+ // Proceed to the next level to examine the icmp.
+ if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+ return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+
+ // Check for a constant condition. These are normally stripped out by
+ // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+ // preserve the CFG and is temporarily leaving constant conditions
+ // in place.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+ if (L->contains(FBB) == !CI->getZExtValue())
+ // The backedge is always taken.
+ return getCouldNotCompute();
+ else
+ // The backedge is never taken.
+ return getConstant(CI->getType(), 0);
+ }
+
+ // If it's not an integer or pointer comparison then compute it the hard way.
+ return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+ ICmpInst *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+
+ // If the condition was exit on true, convert the condition to exit on false
+ ICmpInst::Predicate Cond;
+ if (!L->contains(FBB))
+ Cond = ExitCond->getPredicate();
+ else
+ Cond = ExitCond->getInversePredicate();
+
+ // Handle common loops like: for (X = "string"; *X; ++X)
+ if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+ if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+ BackedgeTakenInfo ItCnt =
+ ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+ if (ItCnt.hasAnyInfo())
+ return ItCnt;
+ }
+
+ const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+ const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+
+ // Try to evaluate any dependencies out of the loop.
+ LHS = getSCEVAtScope(LHS, L);
+ RHS = getSCEVAtScope(RHS, L);
+
+ // At this point, we would like to compute how many iterations of the
+ // loop the predicate will return true for these inputs.
+ if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+ // If there is a loop-invariant, force it into the RHS.
+ std::swap(LHS, RHS);
+ Cond = ICmpInst::getSwappedPredicate(Cond);
+ }
+
+ // Simplify the operands before analyzing them.
+ (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
+ // If we have a comparison of a chrec against a constant, try to use value
+ // ranges to answer this query.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (AddRec->getLoop() == L) {
+ // Form the constant range.
+ ConstantRange CompRange(
+ ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+
+ const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+ if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+ }
+
+ switch (Cond) {
+ case ICmpInst::ICMP_NE: { // while (X != Y)
+ // Convert to: while (X-Y != 0)
+ BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_EQ: { // while (X == Y)
+ // Convert to: while (X-Y == 0)
+ BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, true);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_ULT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, false);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ default:
+#if 0
+ dbgs() << "ComputeBackedgeTakenCount ";
+ if (ExitCond->getOperand(0)->getType()->isUnsigned())
+ dbgs() << "[unsigned] ";
+ dbgs() << *LHS << " "
+ << Instruction::getOpcodeName(Instruction::ICmp)
+ << " " << *RHS << "\n";
+#endif
+ break;
+ }
+ return
+ ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+ ScalarEvolution &SE) {
+ const SCEV *InVal = SE.getConstant(C);
+ const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
+ assert(isa<SCEVConstant>(Val) &&
+ "Evaluation of SCEV at constant didn't fold correctly?");
+ return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// GetAddressedElementFromGlobal - Given a global variable with an initializer
+/// and a GEP expression (missing the pointer index) indexing into it, return
+/// the addressed element of the initializer or null if the index expression is
+/// invalid.
+static Constant *
+GetAddressedElementFromGlobal(GlobalVariable *GV,
+ const std::vector<ConstantInt*> &Indices) {
+ Constant *Init = GV->getInitializer();
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+ uint64_t Idx = Indices[i]->getZExtValue();
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+ assert(Idx < CS->getNumOperands() && "Bad struct index!");
+ Init = cast<Constant>(CS->getOperand(Idx));
+ } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+ if (Idx >= CA->getNumOperands()) return 0; // Bogus program
+ Init = cast<Constant>(CA->getOperand(Idx));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ assert(Idx < STy->getNumElements() && "Bad struct index!");
+ Init = Constant::getNullValue(STy->getElementType(Idx));
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
+ if (Idx >= ATy->getNumElements()) return 0; // Bogus program
+ Init = Constant::getNullValue(ATy->getElementType());
+ } else {
+ llvm_unreachable("Unknown constant aggregate type!");
+ }
+ return 0;
+ } else {
+ return 0; // Unknown initializer type
+ }
+ }
+ return Init;
+}
+
+/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
+ LoadInst *LI,
+ Constant *RHS,
+ const Loop *L,
+ ICmpInst::Predicate predicate) {
+ if (LI->isVolatile()) return getCouldNotCompute();
+
+ // Check to see if the loaded pointer is a getelementptr of a global.
+ // TODO: Use SCEV instead of manually grubbing with GEPs.
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+ if (!GEP) return getCouldNotCompute();
+
+ // Make sure that it is really a constant global we are gepping, with an
+ // initializer, and make sure the first IDX is really 0.
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+ !cast<Constant>(GEP->getOperand(1))->isNullValue())
+ return getCouldNotCompute();
+
+ // Okay, we allow one non-constant index into the GEP instruction.
+ Value *VarIdx = 0;
+ std::vector<ConstantInt*> Indexes;
+ unsigned VarIdxNum = 0;
+ for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ Indexes.push_back(CI);
+ } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+ if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
+ VarIdx = GEP->getOperand(i);
+ VarIdxNum = i-2;
+ Indexes.push_back(0);
+ }
+
+ // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+ // Check to see if X is a loop variant variable value now.
+ const SCEV *Idx = getSCEV(VarIdx);
+ Idx = getSCEVAtScope(Idx, L);
+
+ // We can only recognize very limited forms of loop index expressions, in
+ // particular, only affine AddRec's like {C1,+,C2}.
+ const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+ if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+ return getCouldNotCompute();
+
+ unsigned MaxSteps = MaxBruteForceIterations;
+ for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+ ConstantInt *ItCst = ConstantInt::get(
+ cast<IntegerType>(IdxExpr->getType()), IterationNum);
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+ // Form the GEP offset.
+ Indexes[VarIdxNum] = Val;
+
+ Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+ if (Result == 0) break; // Cannot compute!
+
+ // Evaluate the condition for this iteration.
+ Result = ConstantExpr::getICmp(predicate, Result, RHS);
+ if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
+ if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+#if 0
+ dbgs() << "\n***\n*** Computed loop count " << *ItCst
+ << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
+ << "***\n";
+#endif
+ ++NumArrayLenItCounts;
+ return getConstant(ItCst); // Found terminating iteration!
+ }
+ }
+ return getCouldNotCompute();
+}
+
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+ if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+ isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+ return true;
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ return canConstantFoldCallTo(F);
+ return false;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from. We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI. If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+ // If this is not an instruction, or if this is an instruction outside of the
+ // loop, it can't be derived from a loop PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !L->contains(I)) return 0;
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (L->getHeader() == I->getParent())
+ return PN;
+ else
+ // We don't currently keep track of the control flow needed to evaluate
+ // PHIs, so we cannot handle PHIs inside of loops.
+ return 0;
+ }
+
+ // If we won't be able to constant fold this expression even if the operands
+ // are constants, return early.
+ if (!CanConstantFold(I)) return 0;
+
+ // Otherwise, we can evaluate this instruction if all of its operands are
+ // constant or derived from a PHI node themselves.
+ PHINode *PHI = 0;
+ for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
+ if (!isa<Constant>(I->getOperand(Op))) {
+ PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
+ if (P == 0) return 0; // Not evolving from PHI
+ if (PHI == 0)
+ PHI = P;
+ else if (PHI != P)
+ return 0; // Evolving from multiple different PHIs.
+ }
+
+ // This is a expression evolving from a constant PHI!
+ return PHI;
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal. If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+ const TargetData *TD) {
+ if (isa<PHINode>(V)) return PHIVal;
+ if (Constant *C = dyn_cast<Constant>(V)) return C;
+ Instruction *I = cast<Instruction>(V);
+
+ std::vector<Constant*> Operands(I->getNumOperands());
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
+ if (Operands[i] == 0) return 0;
+ }
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+ Operands[1], TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size(), TD);
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+ const APInt &BEs,
+ const Loop *L) {
+ DenseMap<PHINode*, Constant*>::const_iterator I =
+ ConstantEvolutionLoopExitValue.find(PN);
+ if (I != ConstantEvolutionLoopExitValue.end())
+ return I->second;
+
+ if (BEs.ugt(MaxBruteForceIterations))
+ return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
+
+ Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+ // Since the loop is canonicalized, the PHI node must have two entries. One
+ // entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ Constant *StartCST =
+ dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0)
+ return RetVal = 0; // Must be a constant.
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ if (getConstantEvolvingPHI(BEValue, L) != PN &&
+ !isa<Constant>(BEValue))
+ return RetVal = 0; // Not derived from same PHI.
+
+ // Execute the loop symbolically to determine the exit value.
+ if (BEs.getActiveBits() >= 32)
+ return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+
+ unsigned NumIterations = BEs.getZExtValue(); // must be in range
+ unsigned IterationNum = 0;
+ for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+ if (IterationNum == NumIterations)
+ return RetVal = PHIVal; // Got exit value!
+
+ // Compute the value of the PHI node for the next iteration.
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+ if (NextPHI == PHIVal)
+ return RetVal = NextPHI; // Stopped evolving!
+ if (NextPHI == 0)
+ return 0; // Couldn't evaluate!
+ PHIVal = NextPHI;
+ }
+}
+
+/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// constant number of times (the condition evolves only from constants),
+/// try to evaluate a few iterations of the loop until we get the exit
+/// condition gets a value of ExitWhen (true or false). If we cannot
+/// evaluate the trip count of the loop, return getCouldNotCompute().
+const SCEV *
+ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
+ Value *Cond,
+ bool ExitWhen) {
+ PHINode *PN = getConstantEvolvingPHI(Cond, L);
+ if (PN == 0) return getCouldNotCompute();
+
+ // If the loop is canonicalized, the PHI will have exactly two entries.
+ // That's the only form we support here.
+ if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+ // One entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ Constant *StartCST =
+ dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) return getCouldNotCompute(); // Must be a constant.
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ if (getConstantEvolvingPHI(BEValue, L) != PN &&
+ !isa<Constant>(BEValue))
+ return getCouldNotCompute(); // Not derived from same PHI.
+
+ // Okay, we find a PHI node that defines the trip count of this loop. Execute
+ // the loop symbolically to determine when the condition gets a value of
+ // "ExitWhen".
+ unsigned IterationNum = 0;
+ unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
+ for (Constant *PHIVal = StartCST;
+ IterationNum != MaxIterations; ++IterationNum) {
+ ConstantInt *CondVal =
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+
+ // Couldn't symbolically evaluate.
+ if (!CondVal) return getCouldNotCompute();
+
+ if (CondVal->getValue() == uint64_t(ExitWhen)) {
+ ++NumBruteForceTripCountsComputed;
+ return getConstant(Type::getInt32Ty(getContext()), IterationNum);
+ }
+
+ // Compute the value of the PHI node for the next iteration.
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+ if (NextPHI == 0 || NextPHI == PHIVal)
+ return getCouldNotCompute();// Couldn't evaluate or not making progress...
+ PHIVal = NextPHI;
+ }
+
+ // Too many iterations were needed to evaluate.
+ return getCouldNotCompute();
+}
+
+/// getSCEVAtScope - Return a SCEV expression for the specified value
+/// at the specified scope in the program. The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+ // Check to see if we've folded this expression at this loop before.
+ std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+ std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+ if (!Pair.second)
+ return Pair.first->second ? Pair.first->second : V;
+
+ // Otherwise compute it.
+ const SCEV *C = computeSCEVAtScope(V, L);
+ ValuesAtScopes[V][L] = C;
+ return C;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+ if (isa<SCEVConstant>(V)) return V;
+
+ // If this instruction is evolved from a constant-evolving PHI, compute the
+ // exit value from the loop without using SCEVs.
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+ if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+ const Loop *LI = (*this->LI)[I->getParent()];
+ if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (PN->getParent() == LI->getHeader()) {
+ // Okay, there is no closed form solution for the PHI node. Check
+ // to see if the loop that contains it has a known backedge-taken
+ // count. If so, we may be able to force computation of the exit
+ // value.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+ if (const SCEVConstant *BTCC =
+ dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+ // Okay, we know how many times the containing loop executes. If
+ // this is a constant evolving PHI node, get the final value at
+ // the specified iteration number.
+ Constant *RV = getConstantEvolutionLoopExitValue(PN,
+ BTCC->getValue()->getValue(),
+ LI);
+ if (RV) return getSCEV(RV);
+ }
+ }
+
+ // Okay, this is an expression that we cannot symbolically evaluate
+ // into a SCEV. Check to see if it's possible to symbolically evaluate
+ // the arguments into constants, and if so, try to constant propagate the
+ // result. This is particularly useful for computing loop exit values.
+ if (CanConstantFold(I)) {
+ SmallVector<Constant *, 4> Operands;
+ bool MadeImprovement = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *Op = I->getOperand(i);
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ Operands.push_back(C);
+ continue;
+ }
+
+ // If any of the operands is non-constant and if they are
+ // non-integer and non-pointer, don't even try to analyze them
+ // with scev techniques.
+ if (!isSCEVable(Op->getType()))
+ return V;
+
+ const SCEV *OrigV = getSCEV(Op);
+ const SCEV *OpV = getSCEVAtScope(OrigV, L);
+ MadeImprovement |= OrigV != OpV;
+
+ Constant *C = 0;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
+ C = SC->getValue();
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
+ C = dyn_cast<Constant>(SU->getValue());
+ if (!C) return V;
+ if (C->getType() != Op->getType())
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ Op->getType(),
+ false),
+ C, Op->getType());
+ Operands.push_back(C);
+ }
+
+ // Check to see if getSCEVAtScope actually made an improvement.
+ if (MadeImprovement) {
+ Constant *C = 0;
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+ Operands[0], Operands[1], TD);
+ else
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size(), TD);
+ if (!C) return V;
+ return getSCEV(C);
+ }
+ }
+ }
+
+ // This is some other type of SCEVUnknown, just return it.
+ return V;
+ }
+
+ if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ if (OpAtScope != Comm->getOperand(i)) {
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+ Comm->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+
+ for (++i; i != e; ++i) {
+ OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ NewOps.push_back(OpAtScope);
+ }
+ if (isa<SCEVAddExpr>(Comm))
+ return getAddExpr(NewOps);
+ if (isa<SCEVMulExpr>(Comm))
+ return getMulExpr(NewOps);
+ if (isa<SCEVSMaxExpr>(Comm))
+ return getSMaxExpr(NewOps);
+ if (isa<SCEVUMaxExpr>(Comm))
+ return getUMaxExpr(NewOps);
+ llvm_unreachable("Unknown commutative SCEV type!");
+ }
+ }
+ // If we got here, all operands are loop invariant.
+ return Comm;
+ }
+
+ if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+ const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+ const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
+ if (LHS == Div->getLHS() && RHS == Div->getRHS())
+ return Div; // must be loop invariant
+ return getUDivExpr(LHS, RHS);
+ }
+
+ // If this is a loop recurrence for a loop that does not contain L, then we
+ // are dealing with the final value computed by the loop.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+ // First, attempt to evaluate each operand.
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+ if (OpAtScope == AddRec->getOperand(i))
+ continue;
+
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+ AddRec->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+ for (++i; i != e; ++i)
+ NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+ const SCEV *FoldedRec =
+ getAddRecExpr(NewOps, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+ // The addrec may be folded to a nonrecurrence, for example, if the
+ // induction variable is multiplied by zero after constant folding. Go
+ // ahead and return the folded value.
+ if (!AddRec)
+ return FoldedRec;
+ break;
+ }
+
+ // If the scope is outside the addrec's loop, evaluate it by using the
+ // loop exit value of the addrec.
+ if (!AddRec->getLoop()->contains(L)) {
+ // To evaluate this recurrence, we need to know how many times the AddRec
+ // loop iterates. Compute this now.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+ if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
+
+ // Then, evaluate the AddRec.
+ return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+ }
+
+ return AddRec;
+ }
+
+ if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getZeroExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getSignExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getTruncateExpr(Op, Cast->getType());
+ }
+
+ llvm_unreachable("Unknown SCEV type!");
+ return 0;
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+ return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+/// A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+ ScalarEvolution &SE) {
+ uint32_t BW = A.getBitWidth();
+ assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+ assert(A != 0 && "A must be non-zero.");
+
+ // 1. D = gcd(A, N)
+ //
+ // The gcd of A and N may have only one prime factor: 2. The number of
+ // trailing zeros in A is its multiplicity
+ uint32_t Mult2 = A.countTrailingZeros();
+ // D = 2^Mult2
+
+ // 2. Check if B is divisible by D.
+ //
+ // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+ // is not less than multiplicity of this prime factor for D.
+ if (B.countTrailingZeros() < Mult2)
+ return SE.getCouldNotCompute();
+
+ // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+ // modulo (N / D).
+ //
+ // (N / D) may need BW+1 bits in its representation. Hence, we'll use this
+ // bit width during computations.
+ APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
+ APInt Mod(BW + 1, 0);
+ Mod.setBit(BW - Mult2); // Mod = N / D
+ APInt I = AD.multiplicativeInverse(Mod);
+
+ // 4. Compute the minimum unsigned root of the equation:
+ // I * (B / D) mod (N / D)
+ APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+ // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+ // bits.
+ return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<const SCEV *,const SCEV *>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+ assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+ const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+ const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+ const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+ // We currently can only solve this if the coefficients are constants.
+ if (!LC || !MC || !NC) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
+ const APInt &L = LC->getValue()->getValue();
+ const APInt &M = MC->getValue()->getValue();
+ const APInt &N = NC->getValue()->getValue();
+ APInt Two(BitWidth, 2);
+ APInt Four(BitWidth, 4);
+
+ {
+ using namespace APIntOps;
+ const APInt& C = L;
+ // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+ // The B coefficient is M-N/2
+ APInt B(M);
+ B -= sdiv(N,Two);
+
+ // The A coefficient is N/2
+ APInt A(N.sdiv(Two));
+
+ // Compute the B^2-4ac term.
+ APInt SqrtTerm(B);
+ SqrtTerm *= B;
+ SqrtTerm -= Four * (A * C);
+
+ // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+ // integer value or else APInt::sqrt() will assert.
+ APInt SqrtVal(SqrtTerm.sqrt());
+
+ // Compute the two solutions for the quadratic formula.
+ // The divisions must be performed as signed divisions.
+ APInt NegB(-B);
+ APInt TwoA( A << 1 );
+ if (TwoA.isMinValue()) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ LLVMContext &Context = SE.getContext();
+
+ ConstantInt *Solution1 =
+ ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+ ConstantInt *Solution2 =
+ ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+ return std::make_pair(SE.getConstant(Solution1),
+ SE.getConstant(Solution2));
+ } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute. If not computable, return CouldNotCompute.
+///
+/// This is only used for loops with a "x != y" exit test. The exit condition is
+/// now expressed as a single expression, V = x-y. So the exit test is
+/// effectively V != 0. We know and take advantage of the fact that this
+/// expression only being used in a comparison by zero context.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ // If the value is a constant
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ // If the value is already zero, the branch will execute zero times.
+ if (C->getValue()->isZero()) return C;
+ return getCouldNotCompute(); // Otherwise it will loop infinitely.
+ }
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+ if (!AddRec || AddRec->getLoop() != L)
+ return getCouldNotCompute();
+
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+ // the quadratic equation to solve it.
+ if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(AddRec, *this);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1 && R2) {
+#if 0
+ dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
+ << " sol#2: " << *R2 << "\n";
+#endif
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+ R1->getValue(),
+ R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // We can only use this value if the chrec ends up with an exact zero
+ // value at this index. When solving for "X*X != 5", for example, we
+ // should not accept a root of 2.
+ const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
+ if (Val->isZero())
+ return R1; // We found a quadratic root!
+ }
+ }
+ return getCouldNotCompute();
+ }
+
+ // Otherwise we can only handle this if it is affine.
+ if (!AddRec->isAffine())
+ return getCouldNotCompute();
+
+ // If this is an affine expression, the execution count of this branch is
+ // the minimum unsigned root of the following equation:
+ //
+ // Start + Step*N = 0 (mod 2^BW)
+ //
+ // equivalent to:
+ //
+ // Step*N = -Start (mod 2^BW)
+ //
+ // where BW is the common bit width of Start and Step.
+
+ // Get the initial value for the loop.
+ const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+ const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+ // For now we handle only constant steps.
+ //
+ // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
+ // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
+ // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
+ // We have not yet seen any such cases.
+ const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+ if (StepC == 0)
+ return getCouldNotCompute();
+
+ // For positive steps (counting up until unsigned overflow):
+ // N = -Start/Step (as unsigned)
+ // For negative steps (counting down to zero):
+ // N = Start/-Step
+ // First compute the unsigned distance from zero in the direction of Step.
+ bool CountDown = StepC->getValue()->getValue().isNegative();
+ const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+
+ // Handle unitary steps, which cannot wraparound.
+ // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+ // N = Distance (as unsigned)
+ if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
+ return Distance;
+
+ // If the recurrence is known not to wraparound, unsigned divide computes the
+ // back edge count. We know that the value will either become zero (and thus
+ // the loop terminates), that the loop will terminate through some other exit
+ // condition first, or that the loop has undefined behavior. This means
+ // we can't "miss" the exit value, even with nonunit stride.
+ //
+ // FIXME: Prove that loops always exhibits *acceptable* undefined
+ // behavior. Loops must exhibit defined behavior until a wrapped value is
+ // actually used. So the trip count computed by udiv could be smaller than the
+ // number of well-defined iterations.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+ // FIXME: We really want an "isexact" bit for udiv.
+ return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+
+ // Then, try to solve the above equation provided that Start is constant.
+ if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+ return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+ -StartC->getValue()->getValue(),
+ *this);
+ return getCouldNotCompute();
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute. If not computable, return
+/// CouldNotCompute
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ // Loops that look like: while (X == 0) are very strange indeed. We don't
+ // handle them yet except for the trivial case. This could be expanded in the
+ // future as needed.
+
+ // If the value is a constant, check to see if it is known to be non-zero
+ // already. If so, the backedge will execute zero times.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ if (!C->getValue()->isNullValue())
+ return getConstant(C->getType(), 0);
+ return getCouldNotCompute(); // Otherwise it will loop infinitely.
+ }
+
+ // We could implement others, but I really doubt anyone writes loops like
+ // this, and if they did, they would already be constant folded.
+ return getCouldNotCompute();
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+std::pair<BasicBlock *, BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+ // If the block has a unique predecessor, then there is no path from the
+ // predecessor to the block that does not go through the direct edge
+ // from the predecessor to the block.
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ return std::make_pair(Pred, BB);
+
+ // A loop's header is defined to be a block that dominates the loop.
+ // If the header has a unique predecessor outside the loop, it must be
+ // a block that has exactly one successor that can reach the loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ return std::make_pair(L->getLoopPredecessor(), L->getHeader());
+
+ return std::pair<BasicBlock *, BasicBlock *>();
+}
+
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
+ // Quick check to see if they are the same SCEV.
+ if (A == B) return true;
+
+ // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+ // two different instructions with the same value. Check for this case.
+ if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+ if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+ if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+ if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+ if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+ return true;
+
+ // Otherwise assume they may have a different value.
+ return false;
+}
+
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+ const SCEV *&LHS, const SCEV *&RHS) {
+ bool Changed = false;
+
+ // Canonicalize a constant to the right side.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ // Check for both operands constant.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (ConstantExpr::getICmp(Pred,
+ LHSC->getValue(),
+ RHSC->getValue())->isNullValue())
+ goto trivially_false;
+ else
+ goto trivially_true;
+ }
+ // Otherwise swap the operands to put the constant on the right.
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ Changed = true;
+ }
+
+ // If we're comparing an addrec with a value which is loop-invariant in the
+ // addrec's loop, put the addrec on the left. Also make a dominance check,
+ // as both operands could be addrecs loop-invariant in each other's loop.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+ const Loop *L = AR->getLoop();
+ if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ Changed = true;
+ }
+ }
+
+ // If there's a constant operand, canonicalize comparisons with boundary
+ // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+ if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+ const APInt &RA = RC->getValue()->getValue();
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ break;
+ case ICmpInst::ICMP_UGE:
+ if ((RA - 1).isMinValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_UGT;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_ULE:
+ if ((RA + 1).isMaxValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_ULT;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_SGE:
+ if ((RA - 1).isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_SGT;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_SLE:
+ if ((RA + 1).isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_SLT;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (RA.isMinValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA + 1).isMaxValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (RA.isMaxValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA - 1).isMinValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (RA.isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA + 1).isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (RA.isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA - 1).isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) goto trivially_false;
+ break;
+ }
+ }
+
+ // Check for obvious equality.
+ if (HasSameValue(LHS, RHS)) {
+ if (ICmpInst::isTrueWhenEqual(Pred))
+ goto trivially_true;
+ if (ICmpInst::isFalseWhenEqual(Pred))
+ goto trivially_false;
+ }
+
+ // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+ // adding or subtracting 1 from one of the operands.
+ switch (Pred) {
+ case ICmpInst::ICMP_SLE:
+ if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SLT;
+ Changed = true;
+ } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SLT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SGT;
+ Changed = true;
+ } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SGT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_ULT;
+ Changed = true;
+ } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_ULT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_UGE:
+ if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_UGT;
+ Changed = true;
+ } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_UGT;
+ Changed = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // TODO: More simplifications are possible here.
+
+ return Changed;
+
+trivially_true:
+ // Return 0 == 0.
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = ICmpInst::ICMP_EQ;
+ return true;
+
+trivially_false:
+ // Return 0 != 0.
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = ICmpInst::ICMP_NE;
+ return true;
+}
+
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+ return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+ return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+ return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+ return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+ return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Canonicalize the inputs first.
+ (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+ // If LHS or RHS is an addrec, check to see if the condition is true in
+ // every iteration of the loop.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (isLoopEntryGuardedByCond(
+ AR->getLoop(), Pred, AR->getStart(), RHS) &&
+ isLoopBackedgeGuardedByCond(
+ AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+ return true;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+ if (isLoopEntryGuardedByCond(
+ AR->getLoop(), Pred, LHS, AR->getStart()) &&
+ isLoopBackedgeGuardedByCond(
+ AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+ return true;
+
+ // Otherwise see what can be done with known constant ranges.
+ return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
+
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ if (HasSameValue(LHS, RHS))
+ return ICmpInst::isTrueWhenEqual(Pred);
+
+ // This code is split out from isKnownPredicate because it is called from
+ // within isLoopEntryGuardedByCond.
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ break;
+ case ICmpInst::ICMP_SGT:
+ Pred = ICmpInst::ICMP_SLT;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLT: {
+ ConstantRange LHSRange = getSignedRange(LHS);
+ ConstantRange RHSRange = getSignedRange(RHS);
+ if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+ return true;
+ if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_SGE:
+ Pred = ICmpInst::ICMP_SLE;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLE: {
+ ConstantRange LHSRange = getSignedRange(LHS);
+ ConstantRange RHSRange = getSignedRange(RHS);
+ if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+ return true;
+ if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ Pred = ICmpInst::ICMP_ULT;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_ULT: {
+ ConstantRange LHSRange = getUnsignedRange(LHS);
+ ConstantRange RHSRange = getUnsignedRange(RHS);
+ if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+ return true;
+ if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_UGE:
+ Pred = ICmpInst::ICMP_ULE;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_ULE: {
+ ConstantRange LHSRange = getUnsignedRange(LHS);
+ ConstantRange RHSRange = getUnsignedRange(RHS);
+ if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+ return true;
+ if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_NE: {
+ if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+ return true;
+ if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+ return true;
+
+ const SCEV *Diff = getMinusSCEV(LHS, RHS);
+ if (isKnownNonZero(Diff))
+ return true;
+ break;
+ }
+ case ICmpInst::ICMP_EQ:
+ // The check at the top of the function catches the case where
+ // the values are known to be equal.
+ break;
+ }
+ return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS. This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return true;
+
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+
+ BranchInst *LoopContinuePredicate =
+ dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LoopContinuePredicate ||
+ LoopContinuePredicate->isUnconditional())
+ return false;
+
+ return isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
+ LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS. This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return false;
+
+ // Starting at the loop predecessor, climb up the predecessor chain, as long
+ // as there are predecessors that can be found that have unique successors
+ // leading to the original header.
+ for (std::pair<BasicBlock *, BasicBlock *>
+ Pair(L->getLoopPredecessor(), L->getHeader());
+ Pair.first;
+ Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+ BranchInst *LoopEntryPredicate =
+ dyn_cast<BranchInst>(Pair.first->getTerminator());
+ if (!LoopEntryPredicate ||
+ LoopEntryPredicate->isUnconditional())
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS,
+ LoopEntryPredicate->getCondition(),
+ LoopEntryPredicate->getSuccessor(0) != Pair.second))
+ return true;
+ }
+
+ return false;
+}
+
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ Value *FoundCondValue,
+ bool Inverse) {
+ // Recursively handle And and Or conditions.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+ if (BO->getOpcode() == Instruction::And) {
+ if (!Inverse)
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ } else if (BO->getOpcode() == Instruction::Or) {
+ if (Inverse)
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ }
+ }
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+ if (!ICI) return false;
+
+ // Bail if the ICmp's operands' types are wider than the needed type
+ // before attempting to call getSCEV on them. This avoids infinite
+ // recursion, since the analysis of widening casts can require loop
+ // exit condition information for overflow checking, which would
+ // lead back here.
+ if (getTypeSizeInBits(LHS->getType()) <
+ getTypeSizeInBits(ICI->getOperand(0)->getType()))
+ return false;
+
+ // Now that we found a conditional branch that dominates the loop, check to
+ // see if it is the comparison we are looking for.
+ ICmpInst::Predicate FoundPred;
+ if (Inverse)
+ FoundPred = ICI->getInversePredicate();
+ else
+ FoundPred = ICI->getPredicate();
+
+ const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+ const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+ // Balance the types. The case where FoundLHS' type is wider than
+ // LHS' type is checked for above.
+ if (getTypeSizeInBits(LHS->getType()) >
+ getTypeSizeInBits(FoundLHS->getType())) {
+ if (CmpInst::isSigned(Pred)) {
+ FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+ FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+ } else {
+ FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+ FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+ }
+ }
+
+ // Canonicalize the query to match the way instcombine will have
+ // canonicalized the comparison.
+ if (SimplifyICmpOperands(Pred, LHS, RHS))
+ if (LHS == RHS)
+ return CmpInst::isTrueWhenEqual(Pred);
+ if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+ if (FoundLHS == FoundRHS)
+ return CmpInst::isFalseWhenEqual(Pred);
+
+ // Check to see if we can make the LHS or RHS match.
+ if (LHS == FoundRHS || RHS == FoundLHS) {
+ if (isa<SCEVConstant>(RHS)) {
+ std::swap(FoundLHS, FoundRHS);
+ FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+ } else {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ }
+
+ // Check whether the found predicate is the same as the desired predicate.
+ if (FoundPred == Pred)
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+ // Check whether swapping the found predicate makes it the same as the
+ // desired predicate.
+ if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+ if (isa<SCEVConstant>(RHS))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+ else
+ return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+ RHS, LHS, FoundLHS, FoundRHS);
+ }
+
+ // Check whether the actual condition is beyond sufficient.
+ if (FoundPred == ICmpInst::ICMP_EQ)
+ if (ICmpInst::isTrueWhenEqual(Pred))
+ if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+ if (Pred == ICmpInst::ICMP_NE)
+ if (!ICmpInst::isTrueWhenEqual(FoundPred))
+ if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
+ // Otherwise assume the worst.
+ return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+ FoundLHS, FoundRHS) ||
+ // ~x < ~y --> x > y
+ isImpliedCondOperandsHelper(Pred, LHS, RHS,
+ getNotSCEV(FoundRHS),
+ getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+ const SCEV *End,
+ const SCEV *Step,
+ bool NoWrap) {
+ assert(!isKnownNegative(Step) &&
+ "This code doesn't handle negative strides yet!");
+
+ const Type *Ty = Start->getType();
+
+ // When Start == End, we have an exact BECount == 0. Short-circuit this case
+ // here because SCEV may not be able to determine that the unsigned division
+ // after rounding is zero.
+ if (Start == End)
+ return getConstant(Ty, 0);
+
+ const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
+ const SCEV *Diff = getMinusSCEV(End, Start);
+ const SCEV *RoundUp = getAddExpr(Step, NegOne);
+
+ // Add an adjustment to the difference between End and Start so that
+ // the division will effectively round up.
+ const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+ if (!NoWrap) {
+ // Check Add for unsigned overflow.
+ // TODO: More sophisticated things could be done here.
+ const Type *WideTy = IntegerType::get(getContext(),
+ getTypeSizeInBits(Ty) + 1);
+ const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+ const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+ const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+ if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+ return getCouldNotCompute();
+ }
+
+ return getUDivExpr(Add, Step);
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute. If not computable, return
+/// CouldNotCompute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool isSigned) {
+ // Only handle: "ADDREC < LoopInvariant".
+ if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AddRec || AddRec->getLoop() != L)
+ return getCouldNotCompute();
+
+ // Check to see if we have a flag which makes analysis easy.
+ bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
+ AddRec->getNoWrapFlags(SCEV::FlagNUW);
+
+ if (AddRec->isAffine()) {
+ unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ if (Step->isZero())
+ return getCouldNotCompute();
+ if (Step->isOne()) {
+ // With unit stride, the iteration never steps past the limit value.
+ } else if (isKnownPositive(Step)) {
+ // Test whether a positive iteration can step past the limit
+ // value and past the maximum value for its type in a single step.
+ // Note that it's not sufficient to check NoWrap here, because even
+ // though the value after a wrap is undefined, it's not undefined
+ // behavior, so if wrap does occur, the loop could either terminate or
+ // loop infinitely, but in either case, the loop is guaranteed to
+ // iterate at least until the iteration where the wrapping occurs.
+ const SCEV *One = getConstant(Step->getType(), 1);
+ if (isSigned) {
+ APInt Max = APInt::getSignedMaxValue(BitWidth);
+ if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+ .slt(getSignedRange(RHS).getSignedMax()))
+ return getCouldNotCompute();
+ } else {
+ APInt Max = APInt::getMaxValue(BitWidth);
+ if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
+ .ult(getUnsignedRange(RHS).getUnsignedMax()))
+ return getCouldNotCompute();
+ }
+ } else
+ // TODO: Handle negative strides here and below.
+ return getCouldNotCompute();
+
+ // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
+ // m. So, we count the number of iterations in which {n,+,s} < m is true.
+ // Note that we cannot simply return max(m-n,0)/s because it's not safe to
+ // treat m-n as signed nor unsigned due to overflow possibility.
+
+ // First, we get the value of the LHS in the first iteration: n
+ const SCEV *Start = AddRec->getOperand(0);
+
+ // Determine the minimum constant start value.
+ const SCEV *MinStart = getConstant(isSigned ?
+ getSignedRange(Start).getSignedMin() :
+ getUnsignedRange(Start).getUnsignedMin());
+
+ // If we know that the condition is true in order to enter the loop,
+ // then we know that it will run exactly (m-n)/s times. Otherwise, we
+ // only know that it will execute (max(m,n)-n)/s times. In both cases,
+ // the division must round up.
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L,
+ isSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT,
+ getMinusSCEV(Start, Step), RHS))
+ End = isSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ // Determine the maximum constant end value.
+ const SCEV *MaxEnd = getConstant(isSigned ?
+ getSignedRange(End).getSignedMax() :
+ getUnsignedRange(End).getUnsignedMax());
+
+ // If MaxEnd is within a step of the maximum integer value in its type,
+ // adjust it down to the minimum value which would produce the same effect.
+ // This allows the subsequent ceiling division of (N+(step-1))/step to
+ // compute the correct value.
+ const SCEV *StepMinusOne = getMinusSCEV(Step,
+ getConstant(Step->getType(), 1));
+ MaxEnd = isSigned ?
+ getSMinExpr(MaxEnd,
+ getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
+ StepMinusOne)) :
+ getUMinExpr(MaxEnd,
+ getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
+ StepMinusOne));
+
+ // Finally, we subtract these two values and divide, rounding up, to get
+ // the number of times the backedge is executed.
+ const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
+
+ // The maximum backedge count is similar, except using the minimum start
+ // value and the maximum end value.
+ // If we already have an exact constant BECount, use it instead.
+ const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
+ : getBECount(MinStart, MaxEnd, Step, NoWrap);
+
+ // If the stride is nonconstant, and NoWrap == true, then
+ // getBECount(MinStart, MaxEnd) may not compute. This would result in an
+ // exact BECount and invalid MaxBECount, which should be avoided to catch
+ // more optimization opportunities.
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+
+ return getCouldNotCompute();
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range. Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+ ScalarEvolution &SE) const {
+ if (Range.isFullSet()) // Infinite loop.
+ return SE.getCouldNotCompute();
+
+ // If the start is a non-zero constant, shift the range to simplify things.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+ if (!SC->getValue()->isZero()) {
+ SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+ Operands[0] = SE.getConstant(SC->getType(), 0);
+ const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
+ getNoWrapFlags(FlagNW));
+ if (const SCEVAddRecExpr *ShiftedAddRec =
+ dyn_cast<SCEVAddRecExpr>(Shifted))
+ return ShiftedAddRec->getNumIterationsInRange(
+ Range.subtract(SC->getValue()->getValue()), SE);
+ // This is strange and shouldn't happen.
+ return SE.getCouldNotCompute();
+ }
+
+ // The only time we can solve this is when we have all constant indices.
+ // Otherwise, we cannot determine the overflow conditions.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!isa<SCEVConstant>(getOperand(i)))
+ return SE.getCouldNotCompute();
+
+
+ // Okay at this point we know that all elements of the chrec are constants and
+ // that the start element is zero.
+
+ // First check to see if the range contains zero. If not, the first
+ // iteration exits.
+ unsigned BitWidth = SE.getTypeSizeInBits(getType());
+ if (!Range.contains(APInt(BitWidth, 0)))
+ return SE.getConstant(getType(), 0);
+
+ if (isAffine()) {
+ // If this is an affine expression then we have this situation:
+ // Solve {0,+,A} in Range === Ax in Range
+
+ // We know that zero is in the range. If A is positive then we know that
+ // the upper value of the range must be the first possible exit value.
+ // If A is negative then the lower of the range is the last possible loop
+ // value. Also note that we already checked for a full range.
+ APInt One(BitWidth,1);
+ APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+ APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+ // The exit value should be (End+A)/A.
+ APInt ExitVal = (End + A).udiv(A);
+ ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
+
+ // Evaluate at the exit value. If we really did fall out of the valid
+ // range, then we computed our trip count, otherwise wrap around or other
+ // things must have happened.
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+ if (Range.contains(Val->getValue()))
+ return SE.getCouldNotCompute(); // Something strange happened
+
+ // Ensure that the previous value is in the range. This is a sanity check.
+ assert(Range.contains(
+ EvaluateConstantChrecAtConstant(this,
+ ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+ "Linear scev computation is off in a bad way!");
+ return SE.getConstant(ExitValue);
+ } else if (isQuadratic()) {
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+ // quadratic equation to solve it. To do this, we must frame our problem in
+ // terms of figuring out when zero is crossed, instead of when
+ // Range.getUpper() is crossed.
+ SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
+ NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+ const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
+ // getNoWrapFlags(FlagNW)
+ FlagAnyWrap);
+
+ // Next, solve the constructed addrec
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1) {
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+ R1->getValue(), R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // Make sure the root is not off by one. The returned iteration should
+ // not be in the range, but the previous one should be. When solving
+ // for "X*X < 5", for example, we should not return a root of 2.
+ ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+ R1->getValue(),
+ SE);
+ if (Range.contains(R1Val->getValue())) {
+ // The next iteration must be out of the range...
+ ConstantInt *NextVal =
+ ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (!Range.contains(R1Val->getValue()))
+ return SE.getConstant(NextVal);
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+
+ // If R1 was not in the range, then it is a good return value. Make
+ // sure that R1-1 WAS in the range though, just in case.
+ ConstantInt *NextVal =
+ ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (Range.contains(R1Val->getValue()))
+ return R1;
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+ }
+ }
+
+ return SE.getCouldNotCompute();
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+ if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(getValPtr());
+ // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
+ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+
+ // Forget all the expressions associated with users of the old value,
+ // so that future queries will recompute the expressions using the new
+ // value.
+ Value *Old = getValPtr();
+ SmallVector<User *, 16> Worklist;
+ SmallPtrSet<User *, 8> Visited;
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ while (!Worklist.empty()) {
+ User *U = Worklist.pop_back_val();
+ // Deleting the Old value will cause this to dangle. Postpone
+ // that until everything else is done.
+ if (U == Old)
+ continue;
+ if (!Visited.insert(U))
+ continue;
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(U);
+ for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ }
+ // Delete the Old value.
+ if (PHINode *PN = dyn_cast<PHINode>(Old))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(Old);
+ // this now dangles!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+ : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+// ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution()
+ : FunctionPass(ID), FirstUnknown(0) {
+ initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolution::runOnFunction(Function &F) {
+ this->F = &F;
+ LI = &getAnalysis<LoopInfo>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
+ return false;
+}
+
+void ScalarEvolution::releaseMemory() {
+ // Iterate through all the SCEVUnknown instances and call their
+ // destructors, so that they release their references to their values.
+ for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+ U->~SCEVUnknown();
+ FirstUnknown = 0;
+
+ ValueExprMap.clear();
+ BackedgeTakenCounts.clear();
+ ConstantEvolutionLoopExitValue.clear();
+ ValuesAtScopes.clear();
+ LoopDispositions.clear();
+ BlockDispositions.clear();
+ UnsignedRanges.clear();
+ SignedRanges.clear();
+ UniqueSCEVs.clear();
+ SCEVAllocator.Reset();
+}
+
+void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequiredTransitive<DominatorTree>();
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+ return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+ const Loop *L) {
+ // Print all inner loops first
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ PrintLoopInfo(OS, SE, *I);
+
+ OS << "Loop ";
+ WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ OS << ": ";
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1)
+ OS << "<multiple exits> ";
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable backedge-taken count. ";
+ }
+
+ OS << "\n"
+ "Loop ";
+ WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ OS << ": ";
+
+ if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+ OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable max backedge-taken count. ";
+ }
+
+ OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+ // ScalarEvolution's implementation of the print method is to print
+ // out SCEV values of all instructions that are interesting. Doing
+ // this potentially causes it to create new SCEV objects though,
+ // which technically conflicts with the const qualifier. This isn't
+ // observable from outside the class though, so casting away the
+ // const isn't dangerous.
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+ OS << "Classifying expressions for: ";
+ WriteAsOperand(OS, F, /*PrintType=*/false);
+ OS << "\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
+ OS << *I << '\n';
+ OS << " --> ";
+ const SCEV *SV = SE.getSCEV(&*I);
+ SV->print(OS);
+
+ const Loop *L = LI->getLoopFor((*I).getParent());
+
+ const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+ if (AtUse != SV) {
+ OS << " --> ";
+ AtUse->print(OS);
+ }
+
+ if (L) {
+ OS << "\t\t" "Exits: ";
+ const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+ if (!SE.isLoopInvariant(ExitValue, L)) {
+ OS << "<<Unknown>>";
+ } else {
+ OS << *ExitValue;
+ }
+ }
+
+ OS << "\n";
+ }
+
+ OS << "Determining loop execution counts for: ";
+ WriteAsOperand(OS, F, /*PrintType=*/false);
+ OS << "\n";
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ PrintLoopInfo(OS, &SE, *I);
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+ std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+ std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, LoopVariant));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ LoopDisposition D = computeLoopDisposition(S, L);
+ return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return LoopInvariant;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+ // If L is the addrec's loop, it's computable.
+ if (AR->getLoop() == L)
+ return LoopComputable;
+
+ // Add recurrences are never invariant in the function-body (null loop).
+ if (!L)
+ return LoopVariant;
+
+ // This recurrence is variant w.r.t. L if L contains AR's loop.
+ if (L->contains(AR->getLoop()))
+ return LoopVariant;
+
+ // This recurrence is invariant w.r.t. L if AR's loop contains L.
+ if (AR->getLoop()->contains(L))
+ return LoopInvariant;
+
+ // This recurrence is variant w.r.t. L if any of its operands
+ // are variant.
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ if (!isLoopInvariant(*I, L))
+ return LoopVariant;
+
+ // Otherwise it's loop-invariant.
+ return LoopInvariant;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool HasVarying = false;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ LoopDisposition D = getLoopDisposition(*I, L);
+ if (D == LoopVariant)
+ return LoopVariant;
+ if (D == LoopComputable)
+ HasVarying = true;
+ }
+ return HasVarying ? LoopComputable : LoopInvariant;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+ if (LD == LoopVariant)
+ return LoopVariant;
+ LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+ if (RD == LoopVariant)
+ return LoopVariant;
+ return (LD == LoopInvariant && RD == LoopInvariant) ?
+ LoopInvariant : LoopComputable;
+ }
+ case scUnknown:
+ // All non-instruction values are loop invariant. All instructions are loop
+ // invariant if they are not contained in the specified loop.
+ // Instructions are never considered invariant in the function body
+ // (null loop) because they are defined within the "loop".
+ if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+ return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+ return LoopInvariant;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return LoopVariant;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return LoopVariant;
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+ std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+ Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BlockDisposition D = computeBlockDisposition(S, BB);
+ return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return ProperlyDominatesBlock;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+ case scAddRecExpr: {
+ // This uses a "dominates" query instead of "properly dominates" query
+ // to test for proper dominance too, because the instruction which
+ // produces the addrec's value is a PHI, and a PHI effectively properly
+ // dominates its entire containing block.
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+ if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+ return DoesNotDominateBlock;
+ }
+ // FALL THROUGH into SCEVNAryExpr handling.
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool Proper = true;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ BlockDisposition D = getBlockDisposition(*I, BB);
+ if (D == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ if (D == DominatesBlock)
+ Proper = false;
+ }
+ return Proper ? ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ BlockDisposition LD = getBlockDisposition(LHS, BB);
+ if (LD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ BlockDisposition RD = getBlockDisposition(RHS, BB);
+ if (RD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+ ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUnknown:
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+ if (I->getParent() == BB)
+ return DominatesBlock;
+ if (DT->properlyDominates(I->getParent(), BB))
+ return ProperlyDominatesBlock;
+ return DoesNotDominateBlock;
+ }
+ return ProperlyDominatesBlock;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return DoesNotDominateBlock;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return DoesNotDominateBlock;
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return false;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
+ const SCEV *CastOp = Cast->getOperand();
+ return Op == CastOp || hasOperand(CastOp, Op);
+ }
+ case scAddRecExpr:
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ const SCEV *NAryOp = *I;
+ if (NAryOp == Op || hasOperand(NAryOp, Op))
+ return true;
+ }
+ return false;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ return LHS == Op || hasOperand(LHS, Op) ||
+ RHS == Op || hasOperand(RHS, Op);
+ }
+ case scUnknown:
+ return false;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return false;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return false;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+ ValuesAtScopes.erase(S);
+ LoopDispositions.erase(S);
+ BlockDispositions.erase(S);
+ UnsignedRanges.erase(S);
+ SignedRanges.erase(S);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 000000000000..e9edb3e083de
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,173 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependencies between different iterations.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses ScalarEvolution to answer queries.
+ class ScalarEvolutionAliasAnalysis : public FunctionPass,
+ public AliasAnalysis {
+ ScalarEvolution *SE;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+ initializeScalarEvolutionAliasAnalysisPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+ Value *GetBaseValue(const SCEV *S);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+ return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<ScalarEvolution>();
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+ InitializeAliasAnalysis(this);
+ SE = &getAnalysis<ScalarEvolution>();
+ return false;
+}
+
+/// GetBaseValue - Given an expression, try to find a
+/// base value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // In an addrec, assume that the base will be in the start, rather
+ // than the step.
+ return GetBaseValue(AR->getStart());
+ } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // If there's a pointer operand, it'll be sorted at the end of the list.
+ const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+ if (Last->getType()->isPointerTy())
+ return GetBaseValue(Last);
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // This is a leaf node.
+ return U->getValue();
+ }
+ // No Identified object found.
+ return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are. This allows the code below to ignore this special
+ // case.
+ if (LocA.Size == 0 || LocB.Size == 0)
+ return NoAlias;
+
+ // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+ const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+ const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+
+ // If they evaluate to the same expression, it's a MustAlias.
+ if (AS == BS) return MustAlias;
+
+ // If something is known about the difference between the two addresses,
+ // see if it's enough to prove a NoAlias.
+ if (SE->getEffectiveSCEVType(AS->getType()) ==
+ SE->getEffectiveSCEVType(BS->getType())) {
+ unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+ APInt ASizeInt(BitWidth, LocA.Size);
+ APInt BSizeInt(BitWidth, LocB.Size);
+
+ // Compute the difference between the two pointers.
+ const SCEV *BA = SE->getMinusSCEV(BS, AS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
+ (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+ return NoAlias;
+
+ // Folding the subtraction while preserving range information can be tricky
+ // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+ // and try again to see if things fold better that way.
+
+ // Compute the difference between the two pointers.
+ const SCEV *AB = SE->getMinusSCEV(AS, BS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
+ (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+ return NoAlias;
+ }
+
+ // If ScalarEvolution can find an underlying object, form a new query.
+ // The correctness of this depends on ScalarEvolution not recognizing
+ // inttoptr and ptrtoint operators.
+ Value *AO = GetBaseValue(AS);
+ Value *BO = GetBaseValue(BS);
+ if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+ if (alias(Location(AO ? AO : LocA.Ptr,
+ AO ? +UnknownSize : LocA.Size,
+ AO ? 0 : LocA.TBAATag),
+ Location(BO ? BO : LocB.Ptr,
+ BO ? +UnknownSize : LocB.Size,
+ BO ? 0 : LocB.TBAATag)) == NoAlias)
+ return NoAlias;
+
+ // Forward the query to the next analysis.
+ return AliasAnalysis::alias(LocA, LocB);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 000000000000..befe6d2599d6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,1403 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
+ Instruction::CastOps Op,
+ BasicBlock::iterator IP) {
+ // Check to see if there is already a cast!
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (U->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(U))
+ if (CI->getOpcode() == Op) {
+ // If the cast isn't where we want it, fix it.
+ if (BasicBlock::iterator(CI) != IP) {
+ // Create a new cast, and leave the old cast in place in case
+ // it is being used as an insert point. Clear its operand
+ // so that it doesn't hold anything live.
+ Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
+ NewCI->takeName(CI);
+ CI->replaceAllUsesWith(NewCI);
+ CI->setOperand(0, UndefValue::get(V->getType()));
+ rememberInstruction(NewCI);
+ return NewCI;
+ }
+ rememberInstruction(CI);
+ return CI;
+ }
+ }
+
+ // Create a new cast.
+ Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
+ rememberInstruction(I);
+ return I;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+ Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+ assert((Op == Instruction::BitCast ||
+ Op == Instruction::PtrToInt ||
+ Op == Instruction::IntToPtr) &&
+ "InsertNoopCastOfTo cannot perform non-noop casts!");
+ assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+ "InsertNoopCastOfTo cannot change sizes!");
+
+ // Short-circuit unnecessary bitcasts.
+ if (Op == Instruction::BitCast && V->getType() == Ty)
+ return V;
+
+ // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+ if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if ((CI->getOpcode() == Instruction::PtrToInt ||
+ CI->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CI->getType()) ==
+ SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+ return CI->getOperand(0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if ((CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CE->getType()) ==
+ SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return CE->getOperand(0);
+ }
+
+ // Fold a cast of a constant.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(Op, C, Ty);
+
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ return ReuseOrCreateCast(A, Ty, Op, IP);
+ }
+
+ // Cast the instruction immediately after the instruction.
+ Instruction *I = cast<Instruction>(V);
+ BasicBlock::iterator IP = I; ++IP;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+ return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+ Value *LHS, Value *RHS) {
+ // Fold a binop with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+ // Do a quick scan to see if we have this binop nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+ IP->getOperand(1) == RHS)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // If we haven't found this binop, insert it.
+ Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+ BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+ rememberInstruction(BO);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(const SCEV *&S,
+ const SCEV *&Remainder,
+ const SCEV *Factor,
+ ScalarEvolution &SE,
+ const TargetData *TD) {
+ // Everything is divisible by one.
+ if (Factor->isOne())
+ return true;
+
+ // x/x == 1.
+ if (S == Factor) {
+ S = SE.getConstant(S->getType(), 1);
+ return true;
+ }
+
+ // For a Constant, check for a multiple of the given factor.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ // 0/x == 0.
+ if (C->isZero())
+ return true;
+ // Check for divisibility.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+ ConstantInt *CI =
+ ConstantInt::get(SE.getContext(),
+ C->getValue()->getValue().sdiv(
+ FC->getValue()->getValue()));
+ // If the quotient is zero and the remainder is non-zero, reject
+ // the value at this scale. It will be considered for subsequent
+ // smaller scales.
+ if (!CI->isZero()) {
+ const SCEV *Div = SE.getConstant(CI);
+ S = Div;
+ Remainder =
+ SE.getAddExpr(Remainder,
+ SE.getConstant(C->getValue()->getValue().srem(
+ FC->getValue()->getValue())));
+ return true;
+ }
+ }
+ }
+
+ // In a Mul, check if there is a constant operand which is a multiple
+ // of the given factor.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ if (TD) {
+ // With TargetData, the size is known. Check if there is a constant
+ // operand which is a multiple of the given factor. If so, we can
+ // factor it.
+ const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[0] =
+ SE.getConstant(C->getValue()->getValue().sdiv(
+ FC->getValue()->getValue()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ } else {
+ // Without TargetData, check if Factor can be factored out of any of the
+ // Mul's operands. If so, we can just remove it.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ const SCEV *SOp = M->getOperand(i);
+ const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
+ if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+ Remainder->isZero()) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[i] = SOp;
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ }
+ }
+ }
+
+ // In an AddRec, check if both start and step are divisible.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = A->getStepRecurrence(SE);
+ const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+ if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
+ return false;
+ if (!StepRem->isZero())
+ return false;
+ const SCEV *Start = A->getStart();
+ if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
+ return false;
+ // FIXME: can use A->getNoWrapFlags(FlagNW)
+ S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+ const Type *Ty,
+ ScalarEvolution &SE) {
+ unsigned NumAddRecs = 0;
+ for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+ ++NumAddRecs;
+ // Group Ops into non-addrecs and addrecs.
+ SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+ SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+ // Let ScalarEvolution sort and simplify the non-addrecs list.
+ const SCEV *Sum = NoAddRecs.empty() ?
+ SE.getConstant(Ty, 0) :
+ SE.getAddExpr(NoAddRecs);
+ // If it returned an add, use the operands. Otherwise it simplified
+ // the sum into a single value, so just use that.
+ Ops.clear();
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+ Ops.append(Add->op_begin(), Add->op_end());
+ else if (!Sum->isZero())
+ Ops.push_back(Sum);
+ // Then append the addrecs.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+ const Type *Ty,
+ ScalarEvolution &SE) {
+ // Find the addrecs.
+ SmallVector<const SCEV *, 8> AddRecs;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+ const SCEV *Start = A->getStart();
+ if (Start->isZero()) break;
+ const SCEV *Zero = SE.getConstant(Ty, 0);
+ AddRecs.push_back(SE.getAddRecExpr(Zero,
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ // FIXME: A->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+ Ops[i] = Zero;
+ Ops.append(Add->op_begin(), Add->op_end());
+ e += Add->getNumOperands();
+ } else {
+ Ops[i] = Start;
+ }
+ }
+ if (!AddRecs.empty()) {
+ // Add the addrecs onto the end of the list.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+ // Resort the operand list, moving any constants to the front.
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+ const SCEV *const *op_end,
+ const PointerType *PTy,
+ const Type *Ty,
+ Value *V) {
+ const Type *ElTy = PTy->getElementType();
+ SmallVector<Value *, 4> GepIndices;
+ SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+ bool AnyNonZeroIndices = false;
+
+ // Split AddRecs up into parts as either of the parts may be usable
+ // without the other.
+ SplitAddRecs(Ops, Ty, SE);
+
+ // Descend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ for (;;) {
+ // If the scale size is not 0, attempt to factor out a scale for
+ // array indexing.
+ SmallVector<const SCEV *, 8> ScaledOps;
+ if (ElTy->isSized()) {
+ const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ if (!ElSize->isZero()) {
+ SmallVector<const SCEV *, 8> NewOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ const SCEV *Op = Ops[i];
+ const SCEV *Remainder = SE.getConstant(Ty, 0);
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+ // Op now has ElSize factored out.
+ ScaledOps.push_back(Op);
+ if (!Remainder->isZero())
+ NewOps.push_back(Remainder);
+ AnyNonZeroIndices = true;
+ } else {
+ // The operand was not divisible, so add it to the list of operands
+ // we'll scan next iteration.
+ NewOps.push_back(Ops[i]);
+ }
+ }
+ // If we made any changes, update Ops.
+ if (!ScaledOps.empty()) {
+ Ops = NewOps;
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+ }
+ }
+
+ // Record the scaled array index for this level of the type. If
+ // we didn't find any operands that could be factored, tentatively
+ // assume that element zero was selected (since the zero offset
+ // would obviously be folded away).
+ Value *Scaled = ScaledOps.empty() ?
+ Constant::getNullValue(Ty) :
+ expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ bool FoundFieldNo = false;
+ // An empty struct has no fields.
+ if (STy->getNumElements() == 0) break;
+ if (SE.TD) {
+ // With TargetData, field offsets are known. See if a constant offset
+ // falls within any of the struct fields.
+ if (Ops.empty()) break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *SE.TD->getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ }
+ }
+ } else {
+ // Without TargetData, just check for an offsetof expression of the
+ // appropriate struct type.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
+ const Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
+ GepIndices.push_back(FieldNo);
+ ElTy =
+ STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
+ Ops[i] = SE.getConstant(Ty, 0);
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ break;
+ }
+ }
+ }
+ // If no struct field offsets were found, tentatively assume that
+ // field zero was selected (since the zero offset would obviously
+ // be folded away).
+ if (!FoundFieldNo) {
+ ElTy = STy->getTypeAtIndex(0u);
+ GepIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+ }
+ }
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ ElTy = ATy->getElementType();
+ else
+ break;
+ }
+
+ // If none of the operands were convertible to proper GEP indices, cast
+ // the base to i8* and do an ugly getelementptr with that. It's still
+ // better than ptrtoint+arithmetic+inttoptr at least.
+ if (!AnyNonZeroIndices) {
+ // Cast the base to i8*.
+ V = InsertNoopCastOfTo(V,
+ Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+ // Expand the operands for a plain byte offset.
+ Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // Emit a GEP.
+ Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+ rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return GEP;
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = false;
+ for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+ E = GepIndices.end(); I != E; ++I)
+ if (!L->isLoopInvariant(*I)) {
+ AnyIndexNotLoopInvariant = true;
+ break;
+ }
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+ // because ScalarEvolution may have changed the address arithmetic to
+ // compute a value which is beyond the end of the allocated object.
+ Value *Casted = V;
+ if (V->getType() != PTy)
+ Casted = InsertNoopCastOfTo(Casted, PTy);
+ Value *GEP = Builder.CreateGEP(Casted,
+ GepIndices.begin(),
+ GepIndices.end(),
+ "scevgep");
+ Ops.push_back(SE.getUnknown(GEP));
+ rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return expand(SE.getAddExpr(Ops));
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEV *F) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
+ if (!Mul) return false;
+
+ // If there is a constant factor, it will be first.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+ if (!SC) return false;
+
+ // Return true if the value is negative, this matches things like (-42 * V).
+ return SC->getValue()->getValue().isNegative();
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+ // Test whether we've already computed the most relevant loop for this SCEV.
+ std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+ RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ if (isa<SCEVConstant>(S))
+ // A constant has no relevant loops.
+ return 0;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+ // A non-instruction has no relevant loops.
+ return 0;
+ }
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+ const Loop *L = 0;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+ I != E; ++I)
+ L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+ return RelevantLoops[N] = L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+ const Loop *Result = getRelevantLoop(C->getOperand());
+ return RelevantLoops[C] = Result;
+ }
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const Loop *Result =
+ PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+ getRelevantLoop(D->getRHS()),
+ *SE.DT);
+ return RelevantLoops[D] = Result;
+ }
+ llvm_unreachable("Unexpected SCEV type!");
+ return 0;
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (isNonConstantNegative(LHS.second)) {
+ if (!isNonConstantNegative(RHS.second))
+ return false;
+ } else if (isNonConstantNegative(RHS.second))
+ return true;
+
+ // Otherwise they are equivalent according to this comparison.
+ return false;
+ }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ // The running sum is an integer, and there's a pointer at this level.
+ // Try to form a getelementptr. If the running sum is instructions,
+ // use a SCEVUnknown to avoid re-analyzing them.
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+ SE.getSCEV(Sum));
+ for (++I; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+ } else if (isNonConstantNegative(Op)) {
+ // Instead of doing a negate and add, just do a subtract.
+ Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W);
+ ++I;
+ } else {
+ // A simple add.
+ Value *W = expandCodeFor(Op, Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W);
+ ++I;
+ }
+ }
+
+ return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const SCEV *Op = I->second;
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = expand(Op);
+ ++I;
+ } else if (Op->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = expandCodeFor(Op, Ty);
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W);
+ ++I;
+ }
+ }
+
+ return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ Value *LHS = expandCodeFor(S->getLHS(), Ty);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+ const APInt &RHS = SC->getValue()->getValue();
+ if (RHS.isPowerOf2())
+ return InsertBinop(Instruction::LShr, LHS,
+ ConstantInt::get(Ty, RHS.logBase2()));
+ }
+
+ Value *RHS = expandCodeFor(S->getRHS(), Ty);
+ return InsertBinop(Instruction::UDiv, LHS, RHS);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+ ScalarEvolution &SE) {
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+ Base = A->getStart();
+ Rest = SE.getAddExpr(Rest,
+ SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ // FIXME: A->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+ Base = A->getOperand(A->getNumOperands()-1);
+ SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+ NewAddOps.back() = Rest;
+ Rest = SE.getAddExpr(NewAddOps);
+ ExposePointerBase(Base, Rest, SE);
+ }
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+ const Loop *L,
+ const Type *ExpandTy,
+ const Type *IntTy) {
+ assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
+ // Reuse a previously-inserted PHI, if present.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(Normalized->getType())) &&
+ SE.getSCEV(PN) == Normalized)
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *IncV =
+ cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+ // Determine if this is a well-behaved chain of instructions leading
+ // back to the PHI. It probably will be, if we're scanning an inner
+ // loop already visited by LSR for example, but it wouldn't have
+ // to be.
+ do {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+ (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
+ IncV = 0;
+ break;
+ }
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ if (L == IVIncInsertLoop) {
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+ IncV = 0;
+ break;
+ }
+ }
+ if (!IncV)
+ break;
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ break;
+ if (IncV->mayHaveSideEffects()) {
+ IncV = 0;
+ break;
+ }
+ } while (IncV != PN);
+
+ if (IncV) {
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+ // Remember the increment.
+ IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+ rememberInstruction(IncV);
+ if (L == IVIncInsertLoop)
+ do {
+ if (SE.DT->dominates(IncV, IVIncInsertPos))
+ break;
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ IncV->moveBefore(IVIncInsertPos);
+ IVIncInsertPos = IncV;
+ IncV = cast<Instruction>(IncV->getOperand(0));
+ } while (IncV != PN);
+ return PN;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Expand code for the start value.
+ Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+ L->getHeader()->begin());
+
+ // StartV must be hoisted into L's preheader to dominate the new phi.
+ assert(!isa<Instruction>(StartV) ||
+ SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
+ L->getHeader()));
+
+ // Expand code for the step value. Insert instructions right before the
+ // terminator corresponding to the back-edge. Do this before creating the PHI
+ // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
+ // negative, insert a sub instead of an add for the increment (unless it's a
+ // constant, because subtracts of constants are canonicalized to adds).
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ bool isPointer = ExpandTy->isPointerTy();
+ bool isNegative = !isPointer && isNonConstantNegative(Step);
+ if (isNegative)
+ Step = SE.getNegativeSCEV(Step);
+ Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+ // Create the PHI.
+ BasicBlock *Header = L->getHeader();
+ Builder.SetInsertPoint(Header, Header->begin());
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+ Twine(IVName) + ".iv");
+ rememberInstruction(PN);
+
+ // Create the step instructions and populate the PHI.
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *Pred = *HPI;
+
+ // Add a start value.
+ if (!L->contains(Pred)) {
+ PN->addIncoming(StartV, Pred);
+ continue;
+ }
+
+ // Create a step value and add it to the PHI. If IVIncInsertLoop is
+ // non-null and equal to the addrec's loop, insert the instructions
+ // at IVIncInsertPos.
+ Instruction *InsertPos = L == IVIncInsertLoop ?
+ IVIncInsertPos : Pred->getTerminator();
+ Builder.SetInsertPoint(InsertPos);
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (isPointer) {
+ const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+ IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType()) {
+ IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+ rememberInstruction(IncV);
+ }
+ } else {
+ IncV = isNegative ?
+ Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+ Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+ rememberInstruction(IncV);
+ }
+ PN->addIncoming(IncV, Pred);
+ }
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+
+ return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+ const Type *STy = S->getType();
+ const Type *IntTy = SE.getEffectiveSCEVType(STy);
+ const Loop *L = S->getLoop();
+
+ // Determine a normalized form of this expression, which is the expression
+ // before any post-inc adjustment is made.
+ const SCEVAddRecExpr *Normalized = S;
+ if (PostIncLoops.count(L)) {
+ PostIncLoopSet Loops;
+ Loops.insert(L);
+ Normalized =
+ cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+ Loops, SE, *SE.DT));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec start.
+ const SCEV *Start = Normalized->getStart();
+ const SCEV *PostLoopOffset = 0;
+ if (!SE.properlyDominates(Start, L->getHeader())) {
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ Normalized = cast<SCEVAddRecExpr>(
+ SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+ Normalized->getLoop(),
+ // FIXME: Normalized->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec step.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ const SCEV *PostLoopScale = 0;
+ if (!SE.dominates(Step, L->getHeader())) {
+ PostLoopScale = Step;
+ Step = SE.getConstant(Normalized->getType(), 1);
+ Normalized =
+ cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
+ Normalized->getLoop(),
+ // FIXME: Normalized
+ // ->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+
+ // Expand the core addrec. If we need post-loop scaling, force it to
+ // expand to an integer type to avoid the need for additional casting.
+ const Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+
+ // Accommodate post-inc mode, if necessary.
+ Value *Result;
+ if (!PostIncLoops.count(L))
+ Result = PN;
+ else {
+ // In PostInc mode, use the post-incremented value.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+ Result = PN->getIncomingValueForBlock(LatchBlock);
+ }
+
+ // Re-apply any non-loop-dominating scale.
+ if (PostLoopScale) {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateMul(Result,
+ expandCodeFor(PostLoopScale, IntTy));
+ rememberInstruction(Result);
+ }
+
+ // Re-apply any non-loop-dominating offset.
+ if (PostLoopOffset) {
+ if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ const SCEV *const OffsetArray[1] = { PostLoopOffset };
+ Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+ } else {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateAdd(Result,
+ expandCodeFor(PostLoopOffset, IntTy));
+ rememberInstruction(Result);
+ }
+ }
+
+ return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+ if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ const Loop *L = S->getLoop();
+
+ // First check for an existing canonical IV in a suitable type.
+ PHINode *CanonicalIV = 0;
+ if (PHINode *PN = L->getCanonicalInductionVariable())
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ CanonicalIV = PN;
+
+ // Rewrite an AddRec in terms of the canonical induction variable, if
+ // its type is more narrow.
+ if (CanonicalIV &&
+ SE.getTypeSizeInBits(CanonicalIV->getType()) >
+ SE.getTypeSizeInBits(Ty)) {
+ SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+ for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+ NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+ Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+ // FIXME: S->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BasicBlock::iterator NewInsertPt =
+ llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+ while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+ ++NewInsertPt;
+ V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+ NewInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ return V;
+ }
+
+ // {X,+,F} --> X + {0,+,F}
+ if (!S->getStart()->isZero()) {
+ SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+ NewOps[0] = SE.getConstant(Ty, 0);
+ // FIXME: can use S->getNoWrapFlags()
+ const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
+
+ // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+ // comments on expandAddToGEP for details.
+ const SCEV *Base = S->getStart();
+ const SCEV *RestArray[1] = { Rest };
+ // Dig into the expression to find the pointer base for a GEP.
+ ExposePointerBase(Base, RestArray[0], SE);
+ // If we found a pointer, expand the AddRec with a GEP.
+ if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+ // Make sure the Base isn't something exotic, such as a multiplied
+ // or divided pointer value. In those cases, the result type isn't
+ // actually a pointer type.
+ if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+ Value *StartV = expand(Base);
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+ }
+ }
+
+ // Just do a normal add. Pre-expand the operands to suppress folding.
+ return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+ SE.getUnknown(expand(Rest))));
+ }
+
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
+ // Create and insert the PHI node for the induction variable in the
+ // specified loop.
+ BasicBlock *Header = L->getHeader();
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+ Header->begin());
+ rememberInstruction(CanonicalIV);
+
+ Constant *One = ConstantInt::get(Ty, 1);
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *HP = *HPI;
+ if (L->contains(HP)) {
+ // Insert a unit add instruction right before the terminator
+ // corresponding to the back-edge.
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
+ Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
+ rememberInstruction(Add);
+ CanonicalIV->addIncoming(Add, HP);
+ } else {
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+ }
+ }
+ }
+
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
+ // {0,+,F} --> {0,+,1} * F
+
+ // If this is a simple linear addrec, emit it now as a special case.
+ if (S->isAffine()) // {0,+,F} --> i*F
+ return
+ expand(SE.getTruncateOrNoop(
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
+ SE.getNoopOrAnyExtend(S->getOperand(1),
+ CanonicalIV->getType())),
+ Ty));
+
+ // If this is a chain of recurrences, turn it into a closed form, using the
+ // folders, then expandCodeFor the closed form. This allows the folders to
+ // simplify the expression without having to build a bunch of special code
+ // into this folder.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
+
+ // Promote S up to the canonical IV type, if the cast is foldable.
+ const SCEV *NewS = S;
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+ if (isa<SCEVAddRecExpr>(Ext))
+ NewS = Ext;
+
+ const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+ //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
+
+ // Truncate the result down to the original type, if needed.
+ const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+ return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateTrunc(V, Ty, "tmp");
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateZExt(V, Ty, "tmp");
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateSExt(V, Ty, "tmp");
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ const Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ if (S->getOperand(i)->getType() != Ty) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ const Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ if (S->getOperand(i)->getType() != Ty) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+ Instruction *I) {
+ BasicBlock::iterator IP = I;
+ while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ Builder.SetInsertPoint(IP->getParent(), IP);
+ return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
+ // Expand the code for this SCEV.
+ Value *V = expand(SH);
+ if (Ty) {
+ assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+ "non-trivial casts should be done with the SCEVs directly!");
+ V = InsertNoopCastOfTo(V, Ty);
+ }
+ return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+ // Compute an insertion point for this SCEV object. Hoist the instructions
+ // as far out in the loop nest as possible.
+ Instruction *InsertPt = Builder.GetInsertPoint();
+ for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+ L = L->getParentLoop())
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = L->getHeader()->getFirstNonPHI();
+ while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+ InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+ break;
+ }
+
+ // Check to see if we already expanded this here.
+ std::map<std::pair<const SCEV *, Instruction *>,
+ AssertingVH<Value> >::iterator I =
+ InsertedExpressions.find(std::make_pair(S, InsertPt));
+ if (I != InsertedExpressions.end())
+ return I->second;
+
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+
+ // Expand the expression into instructions.
+ Value *V = visit(S);
+
+ // Remember the expanded value for this SCEV at this location.
+ if (PostIncLoops.empty())
+ InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(I);
+ else
+ InsertedValues.insert(I);
+
+ // If we just claimed an existing instruction and that instruction had
+ // been the insert point, adjust the insert point forward so that
+ // subsequently inserted code will be dominated.
+ if (Builder.GetInsertPoint() == I) {
+ BasicBlock::iterator It = cast<Instruction>(I);
+ do { ++It; } while (isInsertedInstruction(It) ||
+ isa<DbgInfoIntrinsic>(It));
+ Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+ }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+ // If we acquired more instructions since the old insert point was saved,
+ // advance past them.
+ while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
+
+ Builder.SetInsertPoint(BB, I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none). A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+ const Type *Ty) {
+ assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+ // Build a SCEV for {0,+,1}<L>.
+ // Conservatively use FlagAnyWrap for now.
+ const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+ SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+
+ // Emit code for it.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return V;
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 000000000000..60e630aaab88
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,184 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value. If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+ const Loop *L, DominatorTree *DT) {
+ // If the user is in the loop, use the preinc value.
+ if (L->contains(User)) return false;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock)
+ return false;
+
+ // Ok, the user is outside of the loop. If it is dominated by the latch
+ // block, use the post-inc value.
+ if (DT->dominates(LatchBlock, User->getParent()))
+ return true;
+
+ // There is one case we have to be careful of: PHI nodes. These little guys
+ // can live in blocks that are not dominated by the latch block, but (since
+ // their uses occur in the predecessor block, not the block the PHI lives in)
+ // should still use the post-inc value. Check for this case now.
+ PHINode *PN = dyn_cast<PHINode>(User);
+ if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
+
+ // Look at all of the uses of Operand by the PHI node. If any use corresponds
+ // to a block that is not dominated by the latch block, give up and use the
+ // preincremented value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Operand &&
+ !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
+
+ // Okay, all uses of Operand by PN are in predecessor blocks that really are
+ // dominated by the latch block. Use the post-incremented value.
+ return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+ const SCEV *S,
+ Instruction *User,
+ Value *OperandValToReplace,
+ PostIncLoopSet &Loops,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+ return S;
+
+ if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+ const SCEV *O = X->getOperand();
+ const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+ Loops, SE, DT);
+ if (O != N)
+ switch (S->getSCEVType()) {
+ case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+ case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+ case scTruncate: return SE.getTruncateExpr(N, S->getType());
+ default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+ }
+ return S;
+ }
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // An addrec. This is the interesting part.
+ SmallVector<const SCEV *, 8> Operands;
+ const Loop *L = AR->getLoop();
+ // The addrec conceptually uses its operands at loop entry.
+ Instruction *LUser = L->getHeader()->begin();
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+ Operands.push_back(N);
+ }
+ // Conservatively use AnyWrap until/unless we need FlagNW.
+ const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected transform name!");
+ case NormalizeAutodetect:
+ if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ Loops.insert(L);
+ }
+#if 0
+ // This assert is conceptually correct, but ScalarEvolution currently
+ // sometimes fails to canonicalize two equal SCEVs to exactly the same
+ // form. It's possibly a pessimization when this happens, but it isn't a
+ // correctness problem, so disable this assert for now.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Normalize:
+ if (Loops.count(L)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ }
+#if 0
+ // See the comment on the assert above.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Denormalize:
+ if (Loops.count(L))
+ Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+ break;
+ }
+ return Result;
+ }
+
+ if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+ SmallVector<const SCEV *, 8> Operands;
+ bool Changed = false;
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+ Loops, SE, DT);
+ Changed |= N != O;
+ Operands.push_back(N);
+ }
+ // If any operand actually changed, return a transformed result.
+ if (Changed)
+ switch (S->getSCEVType()) {
+ case scAddExpr: return SE.getAddExpr(Operands);
+ case scMulExpr: return SE.getMulExpr(Operands);
+ case scSMaxExpr: return SE.getSMaxExpr(Operands);
+ case scUMaxExpr: return SE.getUMaxExpr(Operands);
+ default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+ }
+ return S;
+ }
+
+ if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEV *LO = X->getLHS();
+ const SCEV *RO = X->getRHS();
+ const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+ Loops, SE, DT);
+ const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+ Loops, SE, DT);
+ if (LO != LN || RO != RN)
+ return SE.getUDivExpr(LN, RN);
+ return S;
+ }
+
+ llvm_unreachable("Unexpected SCEV kind!");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 000000000000..d8c207b4bd49
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,347 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparseprop"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
+ if (V == UndefVal)
+ OS << "undefined";
+ else if (V == OverdefinedVal)
+ OS << "overdefined";
+ else if (V == UntrackedVal)
+ OS << "untracked";
+ else
+ OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+// SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet. This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+ if (I != ValueState.end()) return I->second; // Common case, in the map
+
+ LatticeVal LV;
+ if (LatticeFunc->IsUntrackedValue(V))
+ return LatticeFunc->getUntrackedVal();
+ else if (Constant *C = dyn_cast<Constant>(V))
+ LV = LatticeFunc->ComputeConstant(C);
+ else if (Argument *A = dyn_cast<Argument>(V))
+ LV = LatticeFunc->ComputeArgument(A);
+ else if (!isa<Instruction>(V))
+ // All other non-instructions are overdefined.
+ LV = LatticeFunc->getOverdefinedVal();
+ else
+ // All instructions are underdefined by default.
+ LV = LatticeFunc->getUndefVal();
+
+ // If this value is untracked, don't add it to the map.
+ if (LV == LatticeFunc->getUntrackedVal())
+ return LV;
+ return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+ if (I != ValueState.end() && I->second == V)
+ return; // No change.
+
+ // An update. Visit uses of I.
+ ValueState[&Inst] = V;
+ InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ BBExecutable.insert(BB); // Basic block is executable!
+ BBWorkList.push_back(BB); // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << "\n");
+
+ if (BBExecutable.count(Dest)) {
+ // The destination is already executable, but we just made an edge
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+ visitPHINode(*cast<PHINode>(I));
+
+ } else {
+ MarkBlockExecutable(Dest);
+ }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVectorImpl<bool> &Succs,
+ bool AggressiveUndef) {
+ Succs.resize(TI.getNumSuccessors());
+ if (TI.getNumSuccessors() == 0) return;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue;
+ if (AggressiveUndef)
+ BCValue = getOrInitValueState(BI->getCondition());
+ else
+ BCValue = getLatticeState(BI->getCondition());
+
+ if (BCValue == LatticeFunc->getOverdefinedVal() ||
+ BCValue == LatticeFunc->getUntrackedVal()) {
+ // Overdefined condition variables can branch either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (BCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // Non-constant values can go either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way
+ Succs[C->isNullValue()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ // TODO: Could ask the lattice function if the value can throw.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ if (isa<IndirectBrInst>(TI)) {
+ Succs.assign(Succs.size(), true);
+ return;
+ }
+
+ SwitchInst &SI = cast<SwitchInst>(TI);
+ LatticeVal SCValue;
+ if (AggressiveUndef)
+ SCValue = getOrInitValueState(SI.getCondition());
+ else
+ SCValue = getLatticeState(SI.getCondition());
+
+ if (SCValue == LatticeFunc->getOverdefinedVal() ||
+ SCValue == LatticeFunc->getUntrackedVal()) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (SCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+ bool AggressiveUndef) {
+ SmallVector<bool, 16> SuccFeasible;
+ TerminatorInst *TI = From->getTerminator();
+ getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (TI->getSuccessor(i) == To && SuccFeasible[i])
+ return true;
+
+ return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible, true);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable...
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+ // The lattice function may store more information on a PHINode than could be
+ // computed from its incoming values. For example, SSI form stores its sigma
+ // functions as PHINodes with a single incoming value.
+ if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(PN, IV);
+ return;
+ }
+
+ LatticeVal PNIV = getOrInitValueState(&PN);
+ LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+
+ // If this value is already overdefined (common) just return.
+ if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64) {
+ UpdateState(PN, Overdefined);
+ return;
+ }
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the
+ // transfer function to give us the merge of the incoming values.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+ continue;
+
+ // Merge in this value.
+ LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+ if (OpVal != PNIV)
+ PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+
+ if (PNIV == Overdefined)
+ break; // Rest of input values don't matter.
+ }
+
+ // Update the PHI with the compute value, which is the merge of the inputs.
+ UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+ // PHIs are handled by the propagation logic, they are never passed into the
+ // transfer functions.
+ if (PHINode *PN = dyn_cast<PHINode>(&I))
+ return visitPHINode(*PN);
+
+ // Otherwise, ask the transfer function what the result is. If this is
+ // something that we care about, remember it.
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(I, IV);
+
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+ visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+ MarkBlockExecutable(&F.getEntryBlock());
+
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty()) {
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Instruction *I = InstWorkList.back();
+ InstWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
+
+ // "I" got into the work list because it made a transition. See if any
+ // users are both live and in need of updating.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (BBExecutable.count(U->getParent())) // Inst is executable?
+ visitInst(*U);
+ }
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ visitInst(*I);
+ }
+ }
+}
+
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
+ OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ OS << "INFEASIBLE: ";
+ OS << "\t";
+ if (BB->hasName())
+ OS << BB->getNameStr() << ":\n";
+ else
+ OS << "; anon bb\n";
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ LatticeFunc->PrintValue(getLatticeState(I), OS);
+ OS << *I << "\n";
+ }
+
+ OS << "\n";
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
new file mode 100644
index 000000000000..68a39cd581f4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -0,0 +1,51 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks. A trace is a
+// single entry, multiple exit, region of code that is often hot. Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+ return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+ return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(raw_ostream &O) const {
+ Function *F = getFunction();
+ O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
+ for (const_iterator i = begin(), e = end(); i != e; ++i) {
+ O << "; ";
+ WriteAsOperand(O, *i, true, getModule());
+ O << "\n";
+ }
+ O << "; Trace parent function: \n" << *F;
+}
+
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 000000000000..0faf1398ec76
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,300 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
+//
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+// !0 = metadata !{ metadata !"an example type tree" }
+// !1 = metadata !{ metadata !"int", metadata !0 }
+// !2 = metadata !{ metadata !"float", metadata !0 }
+// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
+//
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its descendants and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+// struct X {
+// double d;
+// int i;
+// };
+// void foo(struct X *x, struct X *y, double *p) {
+// *x = *y;
+// *p = 0.0;
+// }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
+namespace {
+ /// TBAANode - This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAANode {
+ const MDNode *Node;
+
+ public:
+ TBAANode() : Node(0) {}
+ explicit TBAANode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias tree parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
+
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+ };
+}
+
+namespace {
+ /// TypeBasedAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses TypeBased to answer queries.
+ class TypeBasedAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+ initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+ "Type-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+ return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+ const MDNode *B) const {
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A); ; ) {
+ if (T.getNode() == B)
+ // B is an ancestor of A.
+ return true;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B); ; ) {
+ if (T.getNode() == A)
+ // A is an ancestor of B.
+ return true;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ if (!EnableTBAA)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ const MDNode *AM = LocA.TBAATag;
+ if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ const MDNode *BM = LocB.TBAATag;
+ if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+ // If they may alias, chain to the next AliasAnalysis.
+ if (Aliases(AM, BM))
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Otherwise return a definitive result.
+ return NoAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableTBAA)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ const MDNode *M = Loc.TBAATag;
+ if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // If this is an "immutable" type, we can assume the pointer is pointing
+ // to constant memory.
+ if (TBAANode(M).TypeIsImmutable())
+ return true;
+
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefBehavior(CS);
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If this is an "immutable" type, we can assume the call doesn't write
+ // to memory.
+ if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (TBAANode(M).TypeIsImmutable())
+ Min = OnlyReadsMemory;
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+ // Functions don't have metadata. Just chain to the next implementation.
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(L, M))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (const MDNode *M1 =
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 =
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(M1, M2))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 000000000000..455c91077dfb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,1798 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0). For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+ return TD ? TD->getPointerSizeInBits() : 0;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero. If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers. In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ const TargetData *TD, unsigned Depth) {
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ unsigned BitWidth = Mask.getBitWidth();
+ assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+ && "Not integer or pointer type!");
+ assert((!TD ||
+ TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+ (!V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "V, Mask, KnownOne and KnownZero should have same BitWidth");
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ }
+ // Null and aggregate-zero are all-zeros.
+ if (isa<ConstantPointerNull>(V) ||
+ isa<ConstantAggregateZero>(V)) {
+ KnownOne.clearAllBits();
+ KnownZero = Mask;
+ return;
+ }
+ // Handle a constant vector by taking the intersection of the known bits of
+ // each element.
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ KnownZero.setAllBits(); KnownOne.setAllBits();
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+ TD, Depth);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ }
+ return;
+ }
+ // The address of an aligned GlobalValue has trailing zeros.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ unsigned Align = GV->getAlignment();
+ if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+ const Type *ObjectType = GV->getType()->getElementType();
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (!GV->isDeclaration() && !GV->mayBeOverridden())
+ Align = TD->getPrefTypeAlignment(ObjectType);
+ else
+ Align = TD->getABITypeAlignment(ObjectType);
+ }
+ if (Align > 0)
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ else
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ return;
+ }
+ // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+ // the bits of its aliasee.
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden()) {
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ } else {
+ ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
+ TD, Depth+1);
+ }
+ return;
+ }
+
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ // Get alignment information off byval arguments if specified in the IR.
+ if (A->hasByValAttr())
+ if (unsigned Align = A->getParamAlignment())
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ return;
+ }
+
+ // Start out not knowing anything.
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
+
+ if (Depth == MaxDepth || Mask == 0)
+ return; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return;
+
+ APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::And: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ APInt Mask2(Mask & ~KnownZero);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ }
+ case Instruction::Or: {
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ APInt Mask2(Mask & ~KnownOne);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ }
+ case Instruction::Xor: {
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case Instruction::Mul: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case Instruction::UDiv: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ ComputeMaskedBits(I->getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case Instruction::Select:
+ ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ return; // Can't work with floating point.
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ // We can't handle these if we don't know the pointer size.
+ if (!TD) return;
+ // FALL THROUGH and handle them the same as zext/trunc.
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ const Type *SrcTy = I->getOperand(0)->getType();
+
+ unsigned SrcBitWidth;
+ // Note that we handle pointer operands here because of inttoptr/ptrtoint
+ // which fall through here.
+ if (SrcTy->isPointerTy())
+ SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
+ else
+ SrcBitWidth = SrcTy->getScalarSizeInBits();
+
+ APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
+ KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+ KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+ Depth+1);
+ KnownZero = KnownZero.zextOrTrunc(BitWidth);
+ KnownOne = KnownOne.zextOrTrunc(BitWidth);
+ // Any top bits are known to be zero.
+ if (BitWidth > SrcBitWidth)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::BitCast: {
+ const Type *SrcTy = I->getOperand(0)->getType();
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ // TODO: For now, not handling conversions like:
+ // (bitcast i64 %x to <2 x i32>)
+ !I->getType()->isVectorTy()) {
+ ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
+ Depth+1);
+ return;
+ }
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt MaskIn = Mask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
+ KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::Shl:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ APInt Mask2(Mask.lshr(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
+ return;
+ }
+ break;
+ case Instruction::LShr:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Unsigned shift right.
+ APInt Mask2(Mask.shl(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ // high bits known zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ return;
+ }
+ break;
+ case Instruction::AShr:
+ // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Signed shift right.
+ APInt Mask2(Mask.shl(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
+ KnownZero |= HighBits;
+ else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
+ KnownOne |= HighBits;
+ return;
+ }
+ break;
+ case Instruction::Sub: {
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (!CLHS->getValue().isNegative()) {
+ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
+ TD, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case Instruction::Add: {
+ // If one of the operands has trailing zeros, then the bits that the
+ // other operand has in those bit positions will be preserved in the
+ // result. For an add, this works with either operand. For a subtract,
+ // this only works if the known zeros are in the right operand.
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+ BitWidth - Mask.countLeadingZeros());
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
+ assert((LHSKnownZero & LHSKnownOne) == 0 &&
+ "Bits known to be one AND zero?");
+ unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+ ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+ // Determine which operand has more trailing zeros, and use that
+ // many bits from the other operand.
+ if (LHSKnownZeroOut > RHSKnownZeroOut) {
+ if (I->getOpcode() == Instruction::Add) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+ } else {
+ // If the known zeros are in the left operand for a subtract,
+ // fall back to the minimum known zeros in both operands.
+ KnownZero |= APInt::getLowBitsSet(BitWidth,
+ std::min(LHSKnownZeroOut,
+ RHSKnownZeroOut));
+ }
+ } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+ KnownZero |= LHSKnownZero & Mask;
+ KnownOne |= LHSKnownOne & Mask;
+ }
+
+ // Are we still trying to solve for the sign bit?
+ if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
+ OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
+ if (OBO->hasNoSignedWrap()) {
+ if (I->getOpcode() == Instruction::Add) {
+ // Adding two positive numbers can't wrap into negative
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // and adding two negative numbers can't wrap into positive.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ } else {
+ // Subtracting a negative number from a positive one can't wrap
+ if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // neither can subtracting a positive number from a negative one.
+ else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ }
+ }
+
+ return;
+ }
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ }
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (Mask.isNegative() && KnownZero.isNonNegative()) {
+ APInt Mask2 = APInt::getSignBit(BitWidth);
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero |= LHSKnownZero;
+ }
+
+ break;
+ case Instruction::URem: {
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
+ TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ TD, Depth+1);
+
+ unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ break;
+ }
+
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(V);
+ unsigned Align = AI->getAlignment();
+ if (Align == 0 && TD)
+ Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+
+ if (Align > 0)
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ // Analyze all of the subscripts of this getelementptr instruction
+ // to determine if we can prove known low zero bits.
+ APInt LocalMask = APInt::getAllOnesValue(BitWidth);
+ APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LocalMask,
+ LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+ gep_type_iterator GTI = gep_type_begin(I);
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+ Value *Index = I->getOperand(i);
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ // Handle struct member offset arithmetic.
+ if (!TD) return;
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ uint64_t Offset = SL->getElementOffset(Idx);
+ TrailZ = std::min(TrailZ,
+ CountTrailingZeros_64(Offset));
+ } else {
+ // Handle array index arithmetic.
+ const Type *IndexedTy = GTI.getIndexedType();
+ if (!IndexedTy->isSized()) return;
+ unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
+ uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+ LocalMask = APInt::getAllOnesValue(GEPOpiBits);
+ LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+ ComputeMaskedBits(Index, LocalMask,
+ LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ TrailZ = std::min(TrailZ,
+ unsigned(CountTrailingZeros_64(TypeSize) +
+ LocalKnownZero.countTrailingOnes()));
+ }
+ }
+
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *P = cast<PHINode>(I);
+ // Handle the case of a simple two-predecessor recurrence PHI.
+ // There's a lot more that could theoretically be done here, but
+ // this is sufficient to catch some interesting cases.
+ if (P->getNumIncomingValues() == 2) {
+ for (unsigned i = 0; i != 2; ++i) {
+ Value *L = P->getIncomingValue(i);
+ Value *R = P->getIncomingValue(!i);
+ Operator *LU = dyn_cast<Operator>(L);
+ if (!LU)
+ continue;
+ unsigned Opcode = LU->getOpcode();
+ // Check for operations that have the property that if
+ // both their operands have low zero bits, the result
+ // will have low zero bits.
+ if (Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub ||
+ Opcode == Instruction::And ||
+ Opcode == Instruction::Or ||
+ Opcode == Instruction::Mul) {
+ Value *LL = LU->getOperand(0);
+ Value *LR = LU->getOperand(1);
+ // Find a recurrence.
+ if (LL == I)
+ L = LR;
+ else if (LR == I)
+ L = LL;
+ else
+ break;
+ // Ok, we have a PHI of the form L op= R. Check for low
+ // zero bits.
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+ Mask2 = APInt::getLowBitsSet(BitWidth,
+ KnownZero2.countTrailingOnes());
+
+ // We need to take the minimum number of known bits
+ APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+ ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+
+ KnownZero = Mask &
+ APInt::getLowBitsSet(BitWidth,
+ std::min(KnownZero2.countTrailingOnes(),
+ KnownZero3.countTrailingOnes()));
+ break;
+ }
+ }
+ }
+
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (P->getNumIncomingValues() == 0)
+ return;
+
+ // Otherwise take the unions of the known bit sets of the operands,
+ // taking conservative care to avoid excessive recursion.
+ if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+ // Skip if every incoming value references to ourself.
+ if (P->hasConstantValue() == P)
+ break;
+
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+ // Skip direct self references.
+ if (P->getIncomingValue(i) == P) continue;
+
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ // Recurse, but cap the recursion to one level, because we don't
+ // want to waste time spinning around in loops.
+ ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
+ KnownZero2, KnownOne2, TD, MaxDepth-1);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ // If all bits have been ruled out, there's no need to check
+ // more operands.
+ if (!KnownZero && !KnownOne)
+ break;
+ }
+ }
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ case Intrinsic::x86_sse42_crc32_64_8:
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ break;
+ }
+ }
+ break;
+ }
+}
+
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one. Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const TargetData *TD, unsigned Depth) {
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+ if (!BitWidth) {
+ KnownZero = false;
+ KnownOne = false;
+ return;
+ }
+ APInt ZeroBits(BitWidth, 0);
+ APInt OneBits(BitWidth, 0);
+ ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
+ Depth);
+ KnownOne = OneBits[BitWidth - 1];
+ KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined. Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI->getValue().isPowerOf2();
+ // TODO: Handle vector constants.
+
+ // 1 << X is clearly a power of two if the one is not shifted off the end. If
+ // it is shifted off the end then the result is undefined.
+ if (match(V, m_Shl(m_One(), m_Value())))
+ return true;
+
+ // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+ // bottom. If it is shifted off the bottom then the result is undefined.
+ if (match(V, m_LShr(m_SignBit(), m_Value())))
+ return true;
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+ return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(V))
+ return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
+ isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+
+ // An exact divide or right shift can only shift off zero bits, so the result
+ // is a power of two only if the first operand is a power of two and not
+ // copying a sign bit (sdiv int_min, 2).
+ if (match(V, m_LShr(m_Value(), m_Value())) ||
+ match(V, m_UDiv(m_Value(), m_Value()))) {
+ PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V);
+ if (PEO->isExact())
+ return isPowerOfTwo(PEO->getOperand(0), TD, Depth);
+ }
+
+ return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined. For vectors return true if every element is known to be
+/// non-zero when defined. Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return false;
+ if (isa<ConstantInt>(C))
+ // Must be non-zero due to null test above.
+ return true;
+ // TODO: Handle vectors
+ return false;
+ }
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+
+ // X | Y != 0 if X != 0 or Y != 0.
+ Value *X = 0, *Y = 0;
+ if (match(V, m_Or(m_Value(X), m_Value(Y))))
+ return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+ // ext X != 0 if X != 0.
+ if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+ // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
+ // if the lowest bit is shifted off the end.
+ if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+ // shl nuw can't remove any non-zero bits.
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (BO->hasNoUnsignedWrap())
+ return isKnownNonZero(X, TD, Depth);
+
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+ if (KnownOne[0])
+ return true;
+ }
+ // shr X, Y != 0 if X is negative. Note that the value of the shift is not
+ // defined if the sign bit is shifted off the end.
+ else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+ // shr exact can only shift out zero bits.
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (BO->isExact())
+ return isKnownNonZero(X, TD, Depth);
+
+ bool XKnownNonNegative, XKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ if (XKnownNegative)
+ return true;
+ }
+ // div exact can only produce a zero if the dividend is zero.
+ else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (BO->isExact())
+ return isKnownNonZero(X, TD, Depth);
+ }
+ // X + Y.
+ else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+ bool XKnownNonNegative, XKnownNegative;
+ bool YKnownNonNegative, YKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+ // If X and Y are both non-negative (as signed values) then their sum is not
+ // zero unless both X and Y are zero.
+ if (XKnownNonNegative && YKnownNonNegative)
+ if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+ return true;
+
+ // If X and Y are both negative (as signed values) then their sum is not
+ // zero unless both X and Y equal INT_MIN.
+ if (BitWidth && XKnownNegative && YKnownNegative) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ APInt Mask = APInt::getSignedMaxValue(BitWidth);
+ // The sign bit of X is set. If some other bit is set then X is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ // The sign bit of Y is set. If some other bit is set then Y is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ }
+
+ // The sum of a non-negative number and a power of two is not zero.
+ if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+ return true;
+ if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+ return true;
+ }
+ // (C ? X : Y) != 0 if X != 0 and Y != 0.
+ else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+ isKnownNonZero(SI->getFalseValue(), TD, Depth))
+ return true;
+ }
+
+ if (!BitWidth) return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
+ TD, Depth);
+ return KnownOne != 0;
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers. In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+ const TargetData *TD, unsigned Depth) {
+ APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "ashr X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
+ unsigned Depth) {
+ assert((TD || V->getType()->isIntOrIntVectorTy()) &&
+ "ComputeNumSignBits requires a TargetData object to operate "
+ "on non-integer values!");
+ const Type *Ty = V->getType();
+ unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
+ Ty->getScalarSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ // Note that ConstantInt is handled by the general ComputeMaskedBits case
+ // below.
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ Operator *U = dyn_cast<Operator>(V);
+ switch (Operator::getOpcode(V)) {
+ default: break;
+ case Instruction::SExt:
+ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+ return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+
+ case Instruction::AShr:
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ // ashr X, C -> adds C sign bits.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ // vector ashr X, <C, C, C, C> -> adds C sign bits
+ if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
+ if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+ Tmp += CI->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ }
+ return Tmp;
+ case Instruction::Shl:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (C->getZExtValue() >= TyBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case Instruction::Select:
+ Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case Instruction::Add:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
+ Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case Instruction::Sub:
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne,
+ TD, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(U);
+ // Don't analyze large in-degree PHIs.
+ if (PN->getNumIncomingValues() > 4) break;
+
+ // Take the minimum of all incoming values. This can't infinitely loop
+ // because of our depth threshold.
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (Tmp == 1) return Tmp;
+ Tmp = std::min(Tmp,
+ ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+ }
+ return Tmp;
+ }
+
+ case Instruction::Trunc:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-TyBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V. If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+ bool LookThroughSExt, unsigned Depth) {
+ const unsigned MaxDepth = 6;
+
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
+
+ const Type *T = V->getType();
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+ if (Base == 0)
+ return false;
+
+ if (Base == 1) {
+ Multiple = V;
+ return true;
+ }
+
+ ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+ Constant *BaseVal = ConstantInt::get(T, Base);
+ if (CO && CO == BaseVal) {
+ // Multiple is 1.
+ Multiple = ConstantInt::get(T, 1);
+ return true;
+ }
+
+ if (CI && CI->getZExtValue() % Base == 0) {
+ Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+ return true;
+ }
+
+ if (Depth == MaxDepth) return false; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return false;
+
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::SExt:
+ if (!LookThroughSExt) return false;
+ // otherwise fall through to ZExt
+ case Instruction::ZExt:
+ return ComputeMultiple(I->getOperand(0), Base, Multiple,
+ LookThroughSExt, Depth+1);
+ case Instruction::Shl:
+ case Instruction::Mul: {
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+
+ if (I->getOpcode() == Instruction::Shl) {
+ ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+ if (!Op1CI) return false;
+ // Turn Op0 << Op1 into Op0 * 2^Op1
+ APInt Op1Int = Op1CI->getValue();
+ uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+ APInt API(Op1Int.getBitWidth(), 0);
+ API.setBit(BitToSet);
+ Op1 = ConstantInt::get(V->getContext(), API);
+ }
+
+ Value *Mul0 = NULL;
+ if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1))
+ if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+ if (Op1C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+ if (Op1C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+
+ // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+ Multiple = ConstantExpr::getMul(MulC, Op1C);
+ return true;
+ }
+
+ if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+ if (Mul0CI->getValue() == 1) {
+ // V == Base * Op1, so return Op1
+ Multiple = Op1;
+ return true;
+ }
+ }
+
+ Value *Mul1 = NULL;
+ if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+ if (Constant *Op0C = dyn_cast<Constant>(Op0))
+ if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+ if (Op0C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+ if (Op0C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+
+ // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+ Multiple = ConstantExpr::getMul(MulC, Op0C);
+ return true;
+ }
+
+ if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+ if (Mul1CI->getValue() == 1) {
+ // V == Base * Op0, so return Op0
+ Multiple = Op0;
+ return true;
+ }
+ }
+ }
+ }
+
+ // We could not determine if V is a multiple of Base.
+ return false;
+}
+
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP
+/// value is never equal to -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->getValueAPF().isNegZero();
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ const Operator *I = dyn_cast<Operator>(V);
+ if (I == 0) return false;
+
+ // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+ if (I->getOpcode() == Instruction::FAdd &&
+ isa<ConstantFP>(I->getOperand(1)) &&
+ cast<ConstantFP>(I->getOperand(1))->isNullValue())
+ return true;
+
+ // sitofp and uitofp turn into +0.0 for zero.
+ if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+ return true;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // sqrt(-0.0) = -0.0, no other negative results are possible.
+ if (II->getIntrinsicID() == Intrinsic::sqrt)
+ return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction()) {
+ if (F->isDeclaration()) {
+ // abs(x) != -0.0
+ if (F->getName() == "abs") return true;
+ // fabs[lf](x) != -0.0
+ if (F->getName() == "fabs") return true;
+ if (F->getName() == "fabsf") return true;
+ if (F->getName() == "fabsl") return true;
+ if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+ F->getName() == "sqrtl")
+ return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
+ }
+ }
+
+ return false;
+}
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with. This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+ // All byte-wide stores are splatable, even of arbitrary variables.
+ if (V->getType()->isIntegerTy(8)) return V;
+
+ // Handle 'null' ConstantArrayZero etc.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+
+ // Constant float and double values can be handled as integer values if the
+ // corresponding integer value is "byteable". An important case is 0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+ if (CFP->getType()->isDoubleTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+ // Don't handle long double formats, which have strange constraints.
+ }
+
+ // We can handle constant integers that are power of two in size and a
+ // multiple of 8 bits.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ unsigned Width = CI->getBitWidth();
+ if (isPowerOf2_32(Width) && Width > 8) {
+ // We can handle this value if the recursive binary decomposition is the
+ // same at all levels.
+ APInt Val = CI->getValue();
+ APInt Val2;
+ while (Val.getBitWidth() != 8) {
+ unsigned NextWidth = Val.getBitWidth()/2;
+ Val2 = Val.lshr(NextWidth);
+ Val2 = Val2.trunc(Val.getBitWidth()/2);
+ Val = Val.trunc(Val.getBitWidth()/2);
+
+ // If the top/bottom halves aren't the same, reject it.
+ if (Val != Val2)
+ return 0;
+ }
+ return ConstantInt::get(V->getContext(), Val);
+ }
+ }
+
+ // A ConstantArray is splatable if all its members are equal and also
+ // splatable.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ if (CA->getNumOperands() == 0)
+ return 0;
+
+ Value *Val = isBytewiseValue(CA->getOperand(0));
+ if (!Val)
+ return 0;
+
+ for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
+ if (CA->getOperand(I-1) != CA->getOperand(I))
+ return 0;
+
+ return Val;
+ }
+
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return 0;
+}
+
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+ SmallVector<unsigned, 10> &Idxs,
+ unsigned IdxSkip,
+ Instruction *InsertBefore) {
+ const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+ if (STy) {
+ // Save the original To argument so we can modify it
+ Value *OrigTo = To;
+ // General case, the type indexed by Idxs is a struct
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // Process each struct element recursively
+ Idxs.push_back(i);
+ Value *PrevTo = To;
+ To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+ InsertBefore);
+ Idxs.pop_back();
+ if (!To) {
+ // Couldn't find any inserted value for this index? Cleanup
+ while (PrevTo != OrigTo) {
+ InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+ PrevTo = Del->getAggregateOperand();
+ Del->eraseFromParent();
+ }
+ // Stop processing elements
+ break;
+ }
+ }
+ // If we successfully found a value for each of our subaggregates
+ if (To)
+ return To;
+ }
+ // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+ // the struct's elements had a value that was inserted directly. In the latter
+ // case, perhaps we can't determine each of the subelements individually, but
+ // we might be able to find the complete struct somewhere.
+
+ // Find the value that is at that particular spot
+ Value *V = FindInsertedValue(From, Idxs);
+
+ if (!V)
+ return NULL;
+
+ // Insert the value in the new (sub) aggregrate
+ return llvm::InsertValueInst::Create(To, V,
+ ArrayRef<unsigned>(Idxs).slice(IdxSkip),
+ "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
+ Instruction *InsertBefore) {
+ assert(InsertBefore && "Must have someplace to insert!");
+ const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+ idx_range);
+ Value *To = UndefValue::get(IndexedType);
+ SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
+ unsigned IdxSkip = Idxs.size();
+
+ return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+ Instruction *InsertBefore) {
+ // Nothing to index? Just return V then (this is useful at the end of our
+ // recursion)
+ if (idx_range.empty())
+ return V;
+ // We have indices, so V should have an indexable type
+ assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
+ && "Not looking at a struct or array?");
+ assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
+ && "Invalid indices for type?");
+ const CompositeType *PTy = cast<CompositeType>(V->getType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
+ idx_range));
+ else if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy,
+ idx_range));
+ else if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
+ // Recursively process this constant
+ return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1),
+ InsertBefore);
+ } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+ // Loop the indices for the insertvalue instruction in parallel with the
+ // requested indices
+ const unsigned *req_idx = idx_range.begin();
+ for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+ i != e; ++i, ++req_idx) {
+ if (req_idx == idx_range.end()) {
+ if (InsertBefore)
+ // The requested index identifies a part of a nested aggregate. Handle
+ // this specially. For example,
+ // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+ // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+ // %C = extractvalue {i32, { i32, i32 } } %B, 1
+ // This can be changed into
+ // %A = insertvalue {i32, i32 } undef, i32 10, 0
+ // %C = insertvalue {i32, i32 } %A, i32 11, 1
+ // which allows the unused 0,0 element from the nested struct to be
+ // removed.
+ return BuildSubAggregate(V,
+ ArrayRef<unsigned>(idx_range.begin(),
+ req_idx),
+ InsertBefore);
+ else
+ // We can't handle this without inserting insertvalues
+ return 0;
+ }
+
+ // This insert value inserts something else than what we are looking for.
+ // See if the (aggregrate) value inserted into has the value we are
+ // looking for, then.
+ if (*req_idx != *i)
+ return FindInsertedValue(I->getAggregateOperand(), idx_range,
+ InsertBefore);
+ }
+ // If we end up here, the indices of the insertvalue match with those
+ // requested (though possibly only partially). Now we recursively look at
+ // the inserted value, passing any remaining indices.
+ return FindInsertedValue(I->getInsertedValueOperand(),
+ ArrayRef<unsigned>(req_idx, idx_range.end()),
+ InsertBefore);
+ } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+ // If we're extracting a value from an aggregrate that was extracted from
+ // something else, we can extract from that something else directly instead.
+ // However, we will need to chain I's indices with the requested indices.
+
+ // Calculate the number of indices required
+ unsigned size = I->getNumIndices() + idx_range.size();
+ // Allocate some space to put the new indices in
+ SmallVector<unsigned, 5> Idxs;
+ Idxs.reserve(size);
+ // Add indices from the extract value instruction
+ Idxs.append(I->idx_begin(), I->idx_end());
+
+ // Add requested indices
+ Idxs.append(idx_range.begin(), idx_range.end());
+
+ assert(Idxs.size() == size
+ && "Number of indices added not correct?");
+
+ return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
+ }
+ // Otherwise, we don't know (such as, extracting from a function return value
+ // or load instruction)
+ return 0;
+}
+
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset. Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const TargetData &TD) {
+ Operator *PtrOp = dyn_cast<Operator>(Ptr);
+ if (PtrOp == 0) return Ptr;
+
+ // Just look through bitcasts.
+ if (PtrOp->getOpcode() == Instruction::BitCast)
+ return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+
+ // If this is a GEP with constant indices, we can look through it.
+ GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+ if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = cast<ConstantInt>(*I);
+ if (OpC->isZero()) continue;
+
+ // Handle a struct and array indices which add their offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += OpC->getSExtValue()*Size;
+ }
+ }
+
+ // Re-sign extend from the pointer size if needed to get overflow edge cases
+ // right.
+ unsigned PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize < 64)
+ Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+
+ return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// GetConstantStringInfo - This function computes the length of a
+/// null-terminated C string pointed to by V. If successful, it returns true
+/// and returns the string in Str. If unsuccessful, it returns false.
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+ uint64_t Offset,
+ bool StopAtNul) {
+ // If V is NULL then return false;
+ if (V == NULL) return false;
+
+ // Look through bitcast instructions.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return false because ConstantArray can't occur
+ // any other way
+ const User *GEP = 0;
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast)
+ return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return false;
+ GEP = CE;
+ }
+
+ if (GEP) {
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return false;
+
+ // Make sure the index-ee is a pointer to array of i8.
+ const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+ const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+ if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (FirstIdx == 0 || !FirstIdx->isZero())
+ return false;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return false;
+ return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
+ StopAtNul);
+ }
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return false;
+ const Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case
+ if (isa<ConstantAggregateZero>(GlobalInit)) {
+ // This is a degenerate case. The initializer is constant zero so the
+ // length of the string must be zero.
+ Str.clear();
+ return true;
+ }
+
+ // Must be a Constant Array
+ const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ if (Offset > NumElts)
+ return false;
+
+ // Traverse the constant array from 'Offset' which is the place the GEP refers
+ // to in the array.
+ Str.reserve(NumElts-Offset);
+ for (unsigned i = Offset; i != NumElts; ++i) {
+ const Constant *Elt = Array->getOperand(i);
+ const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return false;
+ if (StopAtNul && CI->isZero())
+ return true; // we found end of string, success!
+ Str += (char)CI->getZExtValue();
+ }
+
+ // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+ return true;
+}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+ // Look through noop bitcast instructions.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+ // If this is a PHI node, there are two cases: either we have already seen it
+ // or we haven't.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (!PHIs.insert(PN))
+ return ~0ULL; // already in the set.
+
+ // If it was new, see if all the input strings are the same length.
+ uint64_t LenSoFar = ~0ULL;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ if (Len == 0) return 0; // Unknown length -> unknown.
+
+ if (Len == ~0ULL) continue;
+
+ if (Len != LenSoFar && LenSoFar != ~0ULL)
+ return 0; // Disagree -> unknown.
+ LenSoFar = Len;
+ }
+
+ // Success, all agree.
+ return LenSoFar;
+ }
+
+ // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ if (Len1 == 0) return 0;
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ if (Len2 == 0) return 0;
+ if (Len1 == ~0ULL) return Len2;
+ if (Len2 == ~0ULL) return Len1;
+ if (Len1 != Len2) return 0;
+ return Len1;
+ }
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return unknown.
+ User *GEP = 0;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return 0;
+ GEP = CE;
+ } else {
+ return 0;
+ }
+
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return 0;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+ if (!Idx->isZero())
+ return 0;
+ } else
+ return 0;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return 0;
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+ GV->mayBeOverridden())
+ return 0;
+ Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case, which is a degenerate case. The
+ // initializer is constant zero so the length of the string must be zero.
+ if (isa<ConstantAggregateZero>(GlobalInit))
+ return 1; // Len = 0 offset by 1.
+
+ // Must be a Constant Array
+ ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ // Traverse the constant array from StartIdx (derived above) which is
+ // the place the GEP refers to in the array.
+ for (unsigned i = StartIdx; i != NumElts; ++i) {
+ Constant *Elt = Array->getOperand(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return 0;
+ if (CI->isZero())
+ return i-StartIdx+1; // We found end of string, success!
+ }
+
+ return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0;
+
+ SmallPtrSet<PHINode*, 32> PHIs;
+ uint64_t Len = GetStringLengthH(V, PHIs);
+ // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+ // an empty string as a length.
+ return Len == ~0ULL ? 1 : Len;
+}
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ // See if InstructionSimplify knows any relevant tricks.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Acquire a DominatorTree and use it.
+ if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+ V = Simplified;
+ continue;
+ }
+
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}
+
+/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+/// are lifetime markers.
+///
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+ if (!II) return false;
+
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Archive/Archive.cpp b/contrib/llvm/lib/Archive/Archive.cpp
new file mode 100644
index 000000000000..1eab27d3eba3
--- /dev/null
+++ b/contrib/llvm/lib/Archive/Archive.cpp
@@ -0,0 +1,261 @@
+//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the Archive and ArchiveMember
+// classes that is common to both reading and writing archives..
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
+#include <memory>
+#include <cstring>
+using namespace llvm;
+
+// getMemberSize - compute the actual physical size of the file member as seen
+// on disk. This isn't the size of member's payload. Use getSize() for that.
+unsigned
+ArchiveMember::getMemberSize() const {
+ // Basically its the file size plus the header size
+ unsigned result = info.fileSize + sizeof(ArchiveMemberHeader);
+
+ // If it has a long filename, include the name length
+ if (hasLongFilename())
+ result += path.str().length() + 1;
+
+ // If its now odd lengthed, include the padding byte
+ if (result % 2 != 0 )
+ result++;
+
+ return result;
+}
+
+// This default constructor is only use by the ilist when it creates its
+// sentry node. We give it specific static values to make it stand out a bit.
+ArchiveMember::ArchiveMember()
+ : parent(0), path("--invalid--"), flags(0), data(0)
+{
+ info.user = sys::Process::GetCurrentUserId();
+ info.group = sys::Process::GetCurrentGroupId();
+ info.mode = 0777;
+ info.fileSize = 0;
+ info.modTime = sys::TimeValue::now();
+}
+
+// This is the constructor that the Archive class uses when it is building or
+// reading an archive. It just defaults a few things and ensures the parent is
+// set for the iplist. The Archive class fills in the ArchiveMember's data.
+// This is required because correctly setting the data may depend on other
+// things in the Archive.
+ArchiveMember::ArchiveMember(Archive* PAR)
+ : parent(PAR), path(), flags(0), data(0)
+{
+}
+
+// This method allows an ArchiveMember to be replaced with the data for a
+// different file, presumably as an update to the member. It also makes sure
+// the flags are reset correctly.
+bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
+ bool Exists;
+ if (sys::fs::exists(newFile.str(), Exists) || !Exists) {
+ if (ErrMsg)
+ *ErrMsg = "Can not replace an archive member with a non-existent file";
+ return true;
+ }
+
+ data = 0;
+ path = newFile;
+
+ // SVR4 symbol tables have an empty name
+ if (path.str() == ARFILE_SVR4_SYMTAB_NAME)
+ flags |= SVR4SymbolTableFlag;
+ else
+ flags &= ~SVR4SymbolTableFlag;
+
+ // BSD4.4 symbol tables have a special name
+ if (path.str() == ARFILE_BSD4_SYMTAB_NAME)
+ flags |= BSD4SymbolTableFlag;
+ else
+ flags &= ~BSD4SymbolTableFlag;
+
+ // LLVM symbol tables have a very specific name
+ if (path.str() == ARFILE_LLVM_SYMTAB_NAME)
+ flags |= LLVMSymbolTableFlag;
+ else
+ flags &= ~LLVMSymbolTableFlag;
+
+ // String table name
+ if (path.str() == ARFILE_STRTAB_NAME)
+ flags |= StringTableFlag;
+ else
+ flags &= ~StringTableFlag;
+
+ // If it has a slash then it has a path
+ bool hasSlash = path.str().find('/') != std::string::npos;
+ if (hasSlash)
+ flags |= HasPathFlag;
+ else
+ flags &= ~HasPathFlag;
+
+ // If it has a slash or its over 15 chars then its a long filename format
+ if (hasSlash || path.str().length() > 15)
+ flags |= HasLongFilenameFlag;
+ else
+ flags &= ~HasLongFilenameFlag;
+
+ // Get the signature and status info
+ const char* signature = (const char*) data;
+ SmallString<4> magic;
+ if (!signature) {
+ sys::fs::get_magic(path.str(), magic.capacity(), magic);
+ signature = magic.c_str();
+ const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
+ if (FSinfo)
+ info = *FSinfo;
+ else
+ return true;
+ }
+
+ // Determine what kind of file it is.
+ switch (sys::IdentifyFileType(signature,4)) {
+ case sys::Bitcode_FileType:
+ flags |= BitcodeFlag;
+ break;
+ default:
+ flags &= ~BitcodeFlag;
+ break;
+ }
+ return false;
+}
+
+// Archive constructor - this is the only constructor that gets used for the
+// Archive class. Everything else (default,copy) is deprecated. This just
+// initializes and maps the file into memory, if requested.
+Archive::Archive(const sys::Path& filename, LLVMContext& C)
+ : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(),
+ symTabSize(0), firstFileOffset(0), modules(), foreignST(0), Context(C) {
+}
+
+bool
+Archive::mapToMemory(std::string* ErrMsg) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
+ mapfile = File.take();
+ base = mapfile->getBufferStart();
+ return false;
+}
+
+void Archive::cleanUpMemory() {
+ // Shutdown the file mapping
+ delete mapfile;
+ mapfile = 0;
+ base = 0;
+
+ // Forget the entire symbol table
+ symTab.clear();
+ symTabSize = 0;
+
+ firstFileOffset = 0;
+
+ // Free the foreign symbol table member
+ if (foreignST) {
+ delete foreignST;
+ foreignST = 0;
+ }
+
+ // Delete any Modules and ArchiveMember's we've allocated as a result of
+ // symbol table searches.
+ for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
+ delete I->second.first;
+ delete I->second.second;
+ }
+}
+
+// Archive destructor - just clean up memory
+Archive::~Archive() {
+ cleanUpMemory();
+}
+
+
+
+static void getSymbols(Module*M, std::vector<std::string>& symbols) {
+ // Loop over global variables
+ for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
+ if (!GI->isDeclaration() && !GI->hasLocalLinkage())
+ if (!GI->getName().empty())
+ symbols.push_back(GI->getName());
+
+ // Loop over functions
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+ if (!FI->isDeclaration() && !FI->hasLocalLinkage())
+ if (!FI->getName().empty())
+ symbols.push_back(FI->getName());
+
+ // Loop over aliases
+ for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+ AI != AE; ++AI) {
+ if (AI->hasName())
+ symbols.push_back(AI->getName());
+ }
+}
+
+// Get just the externally visible defined symbols from the bitcode
+bool llvm::GetBitcodeSymbols(const sys::Path& fName,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg) {
+ OwningPtr<MemoryBuffer> Buffer;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) {
+ if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": "
+ + ec.message();
+ return true;
+ }
+
+ Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+ if (!M)
+ return true;
+
+ // Get the symbols
+ getSymbols(M, symbols);
+
+ // Done with the module.
+ delete M;
+ return true;
+}
+
+Module*
+llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
+ const std::string& ModuleID,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg) {
+ // Get the module.
+ OwningPtr<MemoryBuffer> Buffer(
+ MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
+
+ Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+ if (!M)
+ return 0;
+
+ // Get the symbols
+ getSymbols(M, symbols);
+
+ // Done with the module. Note that it's the caller's responsibility to delete
+ // the Module.
+ return M;
+}
diff --git a/contrib/llvm/lib/Archive/ArchiveInternals.h b/contrib/llvm/lib/Archive/ArchiveInternals.h
new file mode 100644
index 000000000000..55684f7023d2
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveInternals.h
@@ -0,0 +1,89 @@
+//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Internal implementation header for LLVM Archive files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
+#define LIB_ARCHIVE_ARCHIVEINTERNALS_H
+
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include <cstring>
+
+#define ARFILE_MAGIC "!<arch>\n" ///< magic string
+#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1) ///< length of magic string
+#define ARFILE_SVR4_SYMTAB_NAME "/ " ///< SVR4 symtab entry name
+#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name
+#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name
+#define ARFILE_STRTAB_NAME "// " ///< Name of string table
+#define ARFILE_PAD "\n" ///< inter-file align padding
+#define ARFILE_MEMBER_MAGIC "`\n" ///< fmag field magic #
+
+namespace llvm {
+
+ class LLVMContext;
+
+ /// The ArchiveMemberHeader structure is used internally for bitcode
+ /// archives.
+ /// The header precedes each file member in the archive. This structure is
+ /// defined using character arrays for direct and correct interpretation
+ /// regardless of the endianess of the machine that produced it.
+ /// @brief Archive File Member Header
+ class ArchiveMemberHeader {
+ /// @name Data
+ /// @{
+ public:
+ char name[16]; ///< Name of the file member.
+ char date[12]; ///< File date, decimal seconds since Epoch
+ char uid[6]; ///< user id in ASCII decimal
+ char gid[6]; ///< group id in ASCII decimal
+ char mode[8]; ///< file mode in ASCII octal
+ char size[10]; ///< file size in ASCII decimal
+ char fmag[2]; ///< Always contains ARFILE_MAGIC_TERMINATOR
+
+ /// @}
+ /// @name Methods
+ /// @{
+ public:
+ void init() {
+ memset(name,' ',16);
+ memset(date,' ',12);
+ memset(uid,' ',6);
+ memset(gid,' ',6);
+ memset(mode,' ',8);
+ memset(size,' ',10);
+ fmag[0] = '`';
+ fmag[1] = '\n';
+ }
+
+ bool checkSignature() {
+ return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2);
+ }
+ };
+
+ // Get just the externally visible defined symbols from the bitcode
+ bool GetBitcodeSymbols(const sys::Path& fName,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg);
+
+ Module* GetBitcodeSymbols(const char *Buffer, unsigned Length,
+ const std::string& ModuleID,
+ LLVMContext& Context,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg);
+}
+
+#endif
+
+// vim: sw=2 ai
diff --git a/contrib/llvm/lib/Archive/ArchiveReader.cpp b/contrib/llvm/lib/Archive/ArchiveReader.cpp
new file mode 100644
index 000000000000..eef6fe0b1c1d
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveReader.cpp
@@ -0,0 +1,630 @@
+//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up standard unix archive files (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Module.h"
+#include <cstdlib>
+#include <memory>
+using namespace llvm;
+
+/// Read a variable-bit-rate encoded unsigned integer
+static inline unsigned readInteger(const char*&At, const char*End) {
+ unsigned Shift = 0;
+ unsigned Result = 0;
+
+ do {
+ if (At == End)
+ return Result;
+ Result |= (unsigned)((*At++) & 0x7F) << Shift;
+ Shift += 7;
+ } while (At[-1] & 0x80);
+ return Result;
+}
+
+// Completely parse the Archive's symbol table and populate symTab member var.
+bool
+Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) {
+ const char* At = (const char*) data;
+ const char* End = At + size;
+ while (At < End) {
+ unsigned offset = readInteger(At, End);
+ if (At == End) {
+ if (error)
+ *error = "Ran out of data reading vbr_uint for symtab offset!";
+ return false;
+ }
+ unsigned length = readInteger(At, End);
+ if (At == End) {
+ if (error)
+ *error = "Ran out of data reading vbr_uint for symtab length!";
+ return false;
+ }
+ if (At + length > End) {
+ if (error)
+ *error = "Malformed symbol table: length not consistent with size";
+ return false;
+ }
+ // we don't care if it can't be inserted (duplicate entry)
+ symTab.insert(std::make_pair(std::string(At, length), offset));
+ At += length;
+ }
+ symTabSize = size;
+ return true;
+}
+
+// This member parses an ArchiveMemberHeader that is presumed to be pointed to
+// by At. The At pointer is updated to the byte just after the header, which
+// can be variable in size.
+ArchiveMember*
+Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
+{
+ if (At + sizeof(ArchiveMemberHeader) >= End) {
+ if (error)
+ *error = "Unexpected end of file";
+ return 0;
+ }
+
+ // Cast archive member header
+ ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
+ At += sizeof(ArchiveMemberHeader);
+
+ // Extract the size and determine if the file is
+ // compressed or not (negative length).
+ int flags = 0;
+ int MemberSize = atoi(Hdr->size);
+ if (MemberSize < 0) {
+ flags |= ArchiveMember::CompressedFlag;
+ MemberSize = -MemberSize;
+ }
+
+ // Check the size of the member for sanity
+ if (At + MemberSize > End) {
+ if (error)
+ *error = "invalid member length in archive file";
+ return 0;
+ }
+
+ // Check the member signature
+ if (!Hdr->checkSignature()) {
+ if (error)
+ *error = "invalid file member signature";
+ return 0;
+ }
+
+ // Convert and check the member name
+ // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
+ // table. The special name "//" and 14 blanks is for a string table, used
+ // for long file names. This library doesn't generate either of those but
+ // it will accept them. If the name starts with #1/ and the remainder is
+ // digits, then those digits specify the length of the name that is
+ // stored immediately following the header. The special name
+ // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode.
+ // Anything else is a regular, short filename that is terminated with
+ // a '/' and blanks.
+
+ std::string pathname;
+ switch (Hdr->name[0]) {
+ case '#':
+ if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
+ if (isdigit(Hdr->name[3])) {
+ unsigned len = atoi(&Hdr->name[3]);
+ const char *nulp = (const char *)memchr(At, '\0', len);
+ pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
+ At += len;
+ MemberSize -= len;
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ } else {
+ if (error)
+ *error = "invalid long filename";
+ return 0;
+ }
+ } else if (Hdr->name[1] == '_' &&
+ (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
+ // The member is using a long file name (>15 chars) format.
+ // This format is standard for 4.4BSD and Mac OSX operating
+ // systems. LLVM uses it similarly. In this format, the
+ // remainder of the name field (after #1/) specifies the
+ // length of the file name which occupy the first bytes of
+ // the member's data. The pathname already has the #1/ stripped.
+ pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
+ flags |= ArchiveMember::LLVMSymbolTableFlag;
+ }
+ break;
+ case '/':
+ if (Hdr->name[1]== '/') {
+ if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
+ pathname.assign(ARFILE_STRTAB_NAME);
+ flags |= ArchiveMember::StringTableFlag;
+ } else {
+ if (error)
+ *error = "invalid string table name";
+ return 0;
+ }
+ } else if (Hdr->name[1] == ' ') {
+ if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
+ pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
+ flags |= ArchiveMember::SVR4SymbolTableFlag;
+ } else {
+ if (error)
+ *error = "invalid SVR4 symbol table name";
+ return 0;
+ }
+ } else if (isdigit(Hdr->name[1])) {
+ unsigned index = atoi(&Hdr->name[1]);
+ if (index < strtab.length()) {
+ const char* namep = strtab.c_str() + index;
+ const char* endp = strtab.c_str() + strtab.length();
+ const char* p = namep;
+ const char* last_p = p;
+ while (p < endp) {
+ if (*p == '\n' && *last_p == '/') {
+ pathname.assign(namep, last_p - namep);
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ break;
+ }
+ last_p = p;
+ p++;
+ }
+ if (p >= endp) {
+ if (error)
+ *error = "missing name termiantor in string table";
+ return 0;
+ }
+ } else {
+ if (error)
+ *error = "name index beyond string table";
+ return 0;
+ }
+ }
+ break;
+ case '_':
+ if (Hdr->name[1] == '_' &&
+ (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
+ pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
+ flags |= ArchiveMember::BSD4SymbolTableFlag;
+ break;
+ }
+ /* FALL THROUGH */
+
+ default:
+ char* slash = (char*) memchr(Hdr->name, '/', 16);
+ if (slash == 0)
+ slash = Hdr->name + 16;
+ pathname.assign(Hdr->name, slash - Hdr->name);
+ break;
+ }
+
+ // Determine if this is a bitcode file
+ switch (sys::IdentifyFileType(At, 4)) {
+ case sys::Bitcode_FileType:
+ flags |= ArchiveMember::BitcodeFlag;
+ break;
+ default:
+ flags &= ~ArchiveMember::BitcodeFlag;
+ break;
+ }
+
+ // Instantiate the ArchiveMember to be filled
+ ArchiveMember* member = new ArchiveMember(this);
+
+ // Fill in fields of the ArchiveMember
+ member->parent = this;
+ member->path.set(pathname);
+ member->info.fileSize = MemberSize;
+ member->info.modTime.fromEpochTime(atoi(Hdr->date));
+ unsigned int mode;
+ sscanf(Hdr->mode, "%o", &mode);
+ member->info.mode = mode;
+ member->info.user = atoi(Hdr->uid);
+ member->info.group = atoi(Hdr->gid);
+ member->flags = flags;
+ member->data = At;
+
+ return member;
+}
+
+bool
+Archive::checkSignature(std::string* error) {
+ // Check the magic string at file's header
+ if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) {
+ if (error)
+ *error = "invalid signature for an archive file";
+ return false;
+ }
+ return true;
+}
+
+// This function loads the entire archive and fully populates its ilist with
+// the members of the archive file. This is typically used in preparation for
+// editing the contents of the archive.
+bool
+Archive::loadArchive(std::string* error) {
+
+ // Set up parsing
+ members.clear();
+ symTab.clear();
+ const char *At = base;
+ const char *End = mapfile->getBufferEnd();
+
+ if (!checkSignature(error))
+ return false;
+
+ At += 8; // Skip the magic string.
+
+ bool seenSymbolTable = false;
+ bool foundFirstFile = false;
+ while (At < End) {
+ // parse the member header
+ const char* Save = At;
+ ArchiveMember* mbr = parseMemberHeader(At, End, error);
+ if (!mbr)
+ return false;
+
+ // check if this is the foreign symbol table
+ if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+ // We just save this but don't do anything special
+ // with it. It doesn't count as the "first file".
+ if (foreignST) {
+ // What? Multiple foreign symbol tables? Just chuck it
+ // and retain the last one found.
+ delete foreignST;
+ }
+ foreignST = mbr;
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ } else if (mbr->isStringTable()) {
+ // Simply suck the entire string table into a string
+ // variable. This will be used to get the names of the
+ // members that use the "/ddd" format for their names
+ // (SVR4 style long names).
+ strtab.assign(At, mbr->getSize());
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ } else if (mbr->isLLVMSymbolTable()) {
+ // This is the LLVM symbol table for the archive. If we've seen it
+ // already, its an error. Otherwise, parse the symbol table and move on.
+ if (seenSymbolTable) {
+ if (error)
+ *error = "invalid archive: multiple symbol tables";
+ return false;
+ }
+ if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error))
+ return false;
+ seenSymbolTable = true;
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr; // We don't need this member in the list of members.
+ } else {
+ // This is just a regular file. If its the first one, save its offset.
+ // Otherwise just push it on the list and move on to the next file.
+ if (!foundFirstFile) {
+ firstFileOffset = Save - base;
+ foundFirstFile = true;
+ }
+ members.push_back(mbr);
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ }
+ }
+ return true;
+}
+
+// Open and completely load the archive file.
+Archive*
+Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C,
+ std::string* ErrorMessage) {
+ std::auto_ptr<Archive> result ( new Archive(file, C));
+ if (result->mapToMemory(ErrorMessage))
+ return 0;
+ if (!result->loadArchive(ErrorMessage))
+ return 0;
+ return result.release();
+}
+
+// Get all the bitcode modules from the archive
+bool
+Archive::getAllModules(std::vector<Module*>& Modules,
+ std::string* ErrMessage) {
+
+ for (iterator I=begin(), E=end(); I != E; ++I) {
+ if (I->isBitcode()) {
+ std::string FullMemberName = archPath.str() +
+ "(" + I->getPath().str() + ")";
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+ FullMemberName.c_str());
+
+ Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
+ delete Buffer;
+ if (!M)
+ return true;
+
+ Modules.push_back(M);
+ }
+ }
+ return false;
+}
+
+// Load just the symbol table from the archive file
+bool
+Archive::loadSymbolTable(std::string* ErrorMsg) {
+
+ // Set up parsing
+ members.clear();
+ symTab.clear();
+ const char *At = base;
+ const char *End = mapfile->getBufferEnd();
+
+ // Make sure we're dealing with an archive
+ if (!checkSignature(ErrorMsg))
+ return false;
+
+ At += 8; // Skip signature
+
+ // Parse the first file member header
+ const char* FirstFile = At;
+ ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr)
+ return false;
+
+ if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+ // Skip the foreign symbol table, we don't do anything with it
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+
+ // Read the next one
+ FirstFile = At;
+ mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr) {
+ delete mbr;
+ return false;
+ }
+ }
+
+ if (mbr->isStringTable()) {
+ // Process the string table entry
+ strtab.assign((const char*)mbr->getData(), mbr->getSize());
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ // Get the next one
+ FirstFile = At;
+ mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr) {
+ delete mbr;
+ return false;
+ }
+ }
+
+ // See if its the symbol table
+ if (mbr->isLLVMSymbolTable()) {
+ if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) {
+ delete mbr;
+ return false;
+ }
+
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ // Can't be any more symtab headers so just advance
+ FirstFile = At;
+ } else {
+ // There's no symbol table in the file. We have to rebuild it from scratch
+ // because the intent of this method is to get the symbol table loaded so
+ // it can be searched efficiently.
+ // Add the member to the members list
+ members.push_back(mbr);
+ }
+
+ firstFileOffset = FirstFile - base;
+ return true;
+}
+
+// Open the archive and load just the symbol tables
+Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
+ LLVMContext& C,
+ std::string* ErrorMessage) {
+ std::auto_ptr<Archive> result ( new Archive(file, C) );
+ if (result->mapToMemory(ErrorMessage))
+ return 0;
+ if (!result->loadSymbolTable(ErrorMessage))
+ return 0;
+ return result.release();
+}
+
+// Look up one symbol in the symbol table and return the module that defines
+// that symbol.
+Module*
+Archive::findModuleDefiningSymbol(const std::string& symbol,
+ std::string* ErrMsg) {
+ SymTabType::iterator SI = symTab.find(symbol);
+ if (SI == symTab.end())
+ return 0;
+
+ // The symbol table was previously constructed assuming that the members were
+ // written without the symbol table header. Because VBR encoding is used, the
+ // values could not be adjusted to account for the offset of the symbol table
+ // because that could affect the size of the symbol table due to VBR encoding.
+ // We now have to account for this by adjusting the offset by the size of the
+ // symbol table and its header.
+ unsigned fileOffset =
+ SI->second + // offset in symbol-table-less file
+ firstFileOffset; // add offset to first "real" file in archive
+
+ // See if the module is already loaded
+ ModuleMap::iterator MI = modules.find(fileOffset);
+ if (MI != modules.end())
+ return MI->second.first;
+
+ // Module hasn't been loaded yet, we need to load it
+ const char* modptr = base + fileOffset;
+ ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(),
+ ErrMsg);
+ if (!mbr)
+ return 0;
+
+ // Now, load the bitcode module to get the Module.
+ std::string FullMemberName = archPath.str() + "(" +
+ mbr->getPath().str() + ")";
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
+ FullMemberName.c_str());
+
+ Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+ if (!m)
+ return 0;
+
+ modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr)));
+
+ return m;
+}
+
+// Look up multiple symbols in the symbol table and return a set of
+// Modules that define those symbols.
+bool
+Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
+ std::set<Module*>& result,
+ std::string* error) {
+ if (!mapfile || !base) {
+ if (error)
+ *error = "Empty archive invalid for finding modules defining symbols";
+ return false;
+ }
+
+ if (symTab.empty()) {
+ // We don't have a symbol table, so we must build it now but lets also
+ // make sure that we populate the modules table as we do this to ensure
+ // that we don't load them twice when findModuleDefiningSymbol is called
+ // below.
+
+ // Get a pointer to the first file
+ const char* At = base + firstFileOffset;
+ const char* End = mapfile->getBufferEnd();
+
+ while ( At < End) {
+ // Compute the offset to be put in the symbol table
+ unsigned offset = At - base - firstFileOffset;
+
+ // Parse the file's header
+ ArchiveMember* mbr = parseMemberHeader(At, End, error);
+ if (!mbr)
+ return false;
+
+ // If it contains symbols
+ if (mbr->isBitcode()) {
+ // Get the symbols
+ std::vector<std::string> symbols;
+ std::string FullMemberName = archPath.str() + "(" +
+ mbr->getPath().str() + ")";
+ Module* M =
+ GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
+ symbols, error);
+
+ if (M) {
+ // Insert the module's symbols into the symbol table
+ for (std::vector<std::string>::iterator I = symbols.begin(),
+ E=symbols.end(); I != E; ++I ) {
+ symTab.insert(std::make_pair(*I, offset));
+ }
+ // Insert the Module and the ArchiveMember into the table of
+ // modules.
+ modules.insert(std::make_pair(offset, std::make_pair(M, mbr)));
+ } else {
+ if (error)
+ *error = "Can't parse bitcode member: " +
+ mbr->getPath().str() + ": " + *error;
+ delete mbr;
+ return false;
+ }
+ }
+
+ // Go to the next file location
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ }
+ }
+
+ // At this point we have a valid symbol table (one way or another) so we
+ // just use it to quickly find the symbols requested.
+
+ for (std::set<std::string>::iterator I=symbols.begin(),
+ E=symbols.end(); I != E;) {
+ // See if this symbol exists
+ Module* m = findModuleDefiningSymbol(*I,error);
+ if (m) {
+ // The symbol exists, insert the Module into our result, duplicates will
+ // be ignored.
+ result.insert(m);
+
+ // Remove the symbol now that its been resolved, being careful to
+ // post-increment the iterator.
+ symbols.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ return true;
+}
+
+bool Archive::isBitcodeArchive() {
+ // Make sure the symTab has been loaded. In most cases this should have been
+ // done when the archive was constructed, but still, this is just in case.
+ if (symTab.empty())
+ if (!loadSymbolTable(0))
+ return false;
+
+ // Now that we know it's been loaded, return true
+ // if it has a size
+ if (symTab.size()) return true;
+
+ // We still can't be sure it isn't a bitcode archive
+ if (!loadArchive(0))
+ return false;
+
+ std::vector<Module *> Modules;
+ std::string ErrorMessage;
+
+ // Scan the archive, trying to load a bitcode member. We only load one to
+ // see if this works.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (!I->isBitcode())
+ continue;
+
+ std::string FullMemberName =
+ archPath.str() + "(" + I->getPath().str() + ")";
+
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+ FullMemberName.c_str());
+ Module *M = ParseBitcodeFile(Buffer, Context);
+ delete Buffer;
+ if (!M)
+ return false; // Couldn't parse bitcode, not a bitcode archive.
+ delete M;
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Archive/ArchiveWriter.cpp b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
new file mode 100644
index 000000000000..8fcc7aa29cc8
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
@@ -0,0 +1,489 @@
+//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up an LLVM archive file (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <fstream>
+#include <ostream>
+#include <iomanip>
+using namespace llvm;
+
+// Write an integer using variable bit rate encoding. This saves a few bytes
+// per entry in the symbol table.
+static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+ while (1) {
+ if (num < 0x80) { // done?
+ ARFile << (unsigned char)num;
+ return;
+ }
+
+ // Nope, we are bigger than a character, output the next 7 bits and set the
+ // high bit to say that there is more coming...
+ ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F));
+ num >>= 7; // Shift out 7 bits now...
+ }
+}
+
+// Compute how many bytes are taken by a given VBR encoded value. This is needed
+// to pre-compute the size of the symbol table.
+static inline unsigned numVbrBytes(unsigned num) {
+
+ // Note that the following nested ifs are somewhat equivalent to a binary
+ // search. We split it in half by comparing against 2^14 first. This allows
+ // most reasonable values to be done in 2 comparisons instead of 1 for
+ // small ones and four for large ones. We expect this to access file offsets
+ // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range,
+ // so this approach is reasonable.
+ if (num < 1<<14) {
+ if (num < 1<<7)
+ return 1;
+ else
+ return 2;
+ }
+ if (num < 1<<21)
+ return 3;
+
+ if (num < 1<<28)
+ return 4;
+ return 5; // anything >= 2^28 takes 5 bytes
+}
+
+// Create an empty archive.
+Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) {
+ Archive* result = new Archive(FilePath, C);
+ return result;
+}
+
+// Fill the ArchiveMemberHeader with the information from a member. If
+// TruncateNames is true, names are flattened to 15 chars or less. The sz field
+// is provided here instead of coming from the mbr because the member might be
+// stored compressed and the compressed size is not the ArchiveMember's size.
+// Furthermore compressed files have negative size fields to identify them as
+// compressed.
+bool
+Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
+ int sz, bool TruncateNames) const {
+
+ // Set the permissions mode, uid and gid
+ hdr.init();
+ char buffer[32];
+ sprintf(buffer, "%-8o", mbr.getMode());
+ memcpy(hdr.mode,buffer,8);
+ sprintf(buffer, "%-6u", mbr.getUser());
+ memcpy(hdr.uid,buffer,6);
+ sprintf(buffer, "%-6u", mbr.getGroup());
+ memcpy(hdr.gid,buffer,6);
+
+ // Set the last modification date
+ uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime();
+ sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+ memcpy(hdr.date,buffer,12);
+
+ // Get rid of trailing blanks in the name
+ std::string mbrPath = mbr.getPath().str();
+ size_t mbrLen = mbrPath.length();
+ while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
+ mbrPath.erase(mbrLen-1,1);
+ mbrLen--;
+ }
+
+ // Set the name field in one of its various flavors.
+ bool writeLongName = false;
+ if (mbr.isStringTable()) {
+ memcpy(hdr.name,ARFILE_STRTAB_NAME,16);
+ } else if (mbr.isSVR4SymbolTable()) {
+ memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16);
+ } else if (mbr.isBSD4SymbolTable()) {
+ memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16);
+ } else if (mbr.isLLVMSymbolTable()) {
+ memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+ } else if (TruncateNames) {
+ const char* nm = mbrPath.c_str();
+ unsigned len = mbrPath.length();
+ size_t slashpos = mbrPath.rfind('/');
+ if (slashpos != std::string::npos) {
+ nm += slashpos + 1;
+ len -= slashpos +1;
+ }
+ if (len > 15)
+ len = 15;
+ memcpy(hdr.name,nm,len);
+ hdr.name[len] = '/';
+ } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) {
+ memcpy(hdr.name,mbrPath.c_str(),mbrPath.length());
+ hdr.name[mbrPath.length()] = '/';
+ } else {
+ std::string nm = "#1/";
+ nm += utostr(mbrPath.length());
+ memcpy(hdr.name,nm.data(),nm.length());
+ if (sz < 0)
+ sz -= mbrPath.length();
+ else
+ sz += mbrPath.length();
+ writeLongName = true;
+ }
+
+ // Set the size field
+ if (sz < 0) {
+ buffer[0] = '-';
+ sprintf(&buffer[1],"%-9u",(unsigned)-sz);
+ } else {
+ sprintf(buffer, "%-10u", (unsigned)sz);
+ }
+ memcpy(hdr.size,buffer,10);
+
+ return writeLongName;
+}
+
+// Insert a file into the archive before some other member. This also takes care
+// of extracting the necessary flags and information from the file.
+bool
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
+ std::string* ErrMsg) {
+ bool Exists;
+ if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
+ if (ErrMsg)
+ *ErrMsg = "Can not add a non-existent file to archive";
+ return true;
+ }
+
+ ArchiveMember* mbr = new ArchiveMember(this);
+
+ mbr->data = 0;
+ mbr->path = filePath;
+ const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
+ if (!FSInfo) {
+ delete mbr;
+ return true;
+ }
+ mbr->info = *FSInfo;
+
+ unsigned flags = 0;
+ bool hasSlash = filePath.str().find('/') != std::string::npos;
+ if (hasSlash)
+ flags |= ArchiveMember::HasPathFlag;
+ if (hasSlash || filePath.str().length() > 15)
+ flags |= ArchiveMember::HasLongFilenameFlag;
+
+ sys::LLVMFileType type;
+ if (sys::fs::identify_magic(mbr->path.str(), type))
+ type = sys::Unknown_FileType;
+ switch (type) {
+ case sys::Bitcode_FileType:
+ flags |= ArchiveMember::BitcodeFlag;
+ break;
+ default:
+ break;
+ }
+ mbr->flags = flags;
+ members.insert(where,mbr);
+ return false;
+}
+
+// Write one member out to the file.
+bool
+Archive::writeMember(
+ const ArchiveMember& member,
+ std::ofstream& ARFile,
+ bool CreateSymbolTable,
+ bool TruncateNames,
+ bool ShouldCompress,
+ std::string* ErrMsg
+) {
+
+ unsigned filepos = ARFile.tellp();
+ filepos -= 8;
+
+ // Get the data and its size either from the
+ // member's in-memory data or directly from the file.
+ size_t fSize = member.getSize();
+ const char *data = (const char*)member.getData();
+ MemoryBuffer *mFile = 0;
+ if (!data) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
+ mFile = File.take();
+ data = mFile->getBufferStart();
+ fSize = mFile->getBufferSize();
+ }
+
+ // Now that we have the data in memory, update the
+ // symbol table if it's a bitcode file.
+ if (CreateSymbolTable && member.isBitcode()) {
+ std::vector<std::string> symbols;
+ std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
+ + ")";
+ Module* M =
+ GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
+
+ // If the bitcode parsed successfully
+ if ( M ) {
+ for (std::vector<std::string>::iterator SI = symbols.begin(),
+ SE = symbols.end(); SI != SE; ++SI) {
+
+ std::pair<SymTabType::iterator,bool> Res =
+ symTab.insert(std::make_pair(*SI,filepos));
+
+ if (Res.second) {
+ symTabSize += SI->length() +
+ numVbrBytes(SI->length()) +
+ numVbrBytes(filepos);
+ }
+ }
+ // We don't need this module any more.
+ delete M;
+ } else {
+ delete mFile;
+ if (ErrMsg)
+ *ErrMsg = "Can't parse bitcode member: " + member.getPath().str()
+ + ": " + *ErrMsg;
+ return true;
+ }
+ }
+
+ int hdrSize = fSize;
+
+ // Compute the fields of the header
+ ArchiveMemberHeader Hdr;
+ bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames);
+
+ // Write header to archive file
+ ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+ // Write the long filename if its long
+ if (writeLongName) {
+ ARFile.write(member.getPath().str().data(),
+ member.getPath().str().length());
+ }
+
+ // Write the (possibly compressed) member's content to the file.
+ ARFile.write(data,fSize);
+
+ // Make sure the member is an even length
+ if ((ARFile.tellp() & 1) == 1)
+ ARFile << ARFILE_PAD;
+
+ // Close the mapped file if it was opened
+ delete mFile;
+ return false;
+}
+
+// Write out the LLVM symbol table as an archive member to the file.
+void
+Archive::writeSymbolTable(std::ofstream& ARFile) {
+
+ // Construct the symbol table's header
+ ArchiveMemberHeader Hdr;
+ Hdr.init();
+ memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+ uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime();
+ char buffer[32];
+ sprintf(buffer, "%-8o", 0644);
+ memcpy(Hdr.mode,buffer,8);
+ sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId());
+ memcpy(Hdr.uid,buffer,6);
+ sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId());
+ memcpy(Hdr.gid,buffer,6);
+ sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+ memcpy(Hdr.date,buffer,12);
+ sprintf(buffer,"%-10u",symTabSize);
+ memcpy(Hdr.size,buffer,10);
+
+ // Write the header
+ ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+#ifndef NDEBUG
+ // Save the starting position of the symbol tables data content.
+ unsigned startpos = ARFile.tellp();
+#endif
+
+ // Write out the symbols sequentially
+ for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end();
+ I != E; ++I)
+ {
+ // Write out the file index
+ writeInteger(I->second, ARFile);
+ // Write out the length of the symbol
+ writeInteger(I->first.length(), ARFile);
+ // Write out the symbol
+ ARFile.write(I->first.data(), I->first.length());
+ }
+
+#ifndef NDEBUG
+ // Now that we're done with the symbol table, get the ending file position
+ unsigned endpos = ARFile.tellp();
+#endif
+
+ // Make sure that the amount we wrote is what we pre-computed. This is
+ // critical for file integrity purposes.
+ assert(endpos - startpos == symTabSize && "Invalid symTabSize computation");
+
+ // Make sure the symbol table is even sized
+ if (symTabSize % 2 != 0 )
+ ARFile << ARFILE_PAD;
+}
+
+// Write the entire archive to the file specified when the archive was created.
+// This writes to a temporary file first. Options are for creating a symbol
+// table, flattening the file names (no directories, 15 chars max) and
+// compressing each archive member.
+bool
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
+ std::string* ErrMsg)
+{
+ // Make sure they haven't opened up the file, not loaded it,
+ // but are now trying to write it which would wipe out the file.
+ if (members.empty() && mapfile && mapfile->getBufferSize() > 8) {
+ if (ErrMsg)
+ *ErrMsg = "Can't write an archive not opened for writing";
+ return true;
+ }
+
+ // Create a temporary file to store the archive in
+ sys::Path TmpArchive = archPath;
+ if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+ return true;
+
+ // Make sure the temporary gets removed if we crash
+ sys::RemoveFileOnSignal(TmpArchive);
+
+ // Create archive file for output.
+ std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+ std::ios::binary;
+ std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
+
+ // Check for errors opening or creating archive file.
+ if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
+ TmpArchive.eraseFromDisk();
+ if (ErrMsg)
+ *ErrMsg = "Error opening archive file: " + archPath.str();
+ return true;
+ }
+
+ // If we're creating a symbol table, reset it now
+ if (CreateSymbolTable) {
+ symTabSize = 0;
+ symTab.clear();
+ }
+
+ // Write magic string to archive.
+ ArchiveFile << ARFILE_MAGIC;
+
+ // Loop over all member files, and write them out. Note that this also
+ // builds the symbol table, symTab.
+ for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
+ if (writeMember(*I, ArchiveFile, CreateSymbolTable,
+ TruncateNames, Compress, ErrMsg)) {
+ TmpArchive.eraseFromDisk();
+ ArchiveFile.close();
+ return true;
+ }
+ }
+
+ // Close archive file.
+ ArchiveFile.close();
+
+ // Write the symbol table
+ if (CreateSymbolTable) {
+ // At this point we have written a file that is a legal archive but it
+ // doesn't have a symbol table in it. To aid in faster reading and to
+ // ensure compatibility with other archivers we need to put the symbol
+ // table first in the file. Unfortunately, this means mapping the file
+ // we just wrote back in and copying it to the destination file.
+ sys::Path FinalFilePath = archPath;
+
+ // Map in the archive we just wrote.
+ {
+ OwningPtr<MemoryBuffer> arch;
+ if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
+ const char* base = arch->getBufferStart();
+
+ // Open another temporary file in order to avoid invalidating the
+ // mmapped data
+ if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
+ return true;
+ sys::RemoveFileOnSignal(FinalFilePath);
+
+ std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
+ if (!FinalFile.is_open() || FinalFile.bad()) {
+ TmpArchive.eraseFromDisk();
+ if (ErrMsg)
+ *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+ return true;
+ }
+
+ // Write the file magic number
+ FinalFile << ARFILE_MAGIC;
+
+ // If there is a foreign symbol table, put it into the file now. Most
+ // ar(1) implementations require the symbol table to be first but llvm-ar
+ // can deal with it being after a foreign symbol table. This ensures
+ // compatibility with other ar(1) implementations as well as allowing the
+ // archive to store both native .o and LLVM .bc files, both indexed.
+ if (foreignST) {
+ if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
+ FinalFile.close();
+ TmpArchive.eraseFromDisk();
+ return true;
+ }
+ }
+
+ // Put out the LLVM symbol table now.
+ writeSymbolTable(FinalFile);
+
+ // Copy the temporary file contents being sure to skip the file's magic
+ // number.
+ FinalFile.write(base + sizeof(ARFILE_MAGIC)-1,
+ arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1);
+
+ // Close up shop
+ FinalFile.close();
+ } // free arch.
+
+ // Move the final file over top of TmpArchive
+ if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+ return true;
+ }
+
+ // Before we replace the actual archive, we need to forget all the
+ // members, since they point to data in that old archive. We need to do
+ // this because we cannot replace an open file on Windows.
+ cleanUpMemory();
+
+ if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+ return true;
+
+ // Set correct read and write permissions after temporary file is moved
+ // to final destination path.
+ if (archPath.makeReadableOnDisk(ErrMsg))
+ return true;
+ if (archPath.makeWriteableOnDisk(ErrMsg))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
new file mode 100644
index 000000000000..3c63106e8c3b
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -0,0 +1,830 @@
+//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLLexer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
+ ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions.
+//===----------------------------------------------------------------------===//
+
+// atoull - Convert an ascii string of decimal digits into the unsigned long
+// long representation... this does not have to do input error checking,
+// because we know that the input will be matched by a suitable regex...
+//
+uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
+ uint64_t Result = 0;
+ for (; Buffer != End; Buffer++) {
+ uint64_t OldRes = Result;
+ Result *= 10;
+ Result += *Buffer-'0';
+ if (Result < OldRes) { // Uh, oh, overflow detected!!!
+ Error("constant bigger than 64 bits detected!");
+ return 0;
+ }
+ }
+ return Result;
+}
+
+uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
+ uint64_t Result = 0;
+ for (; Buffer != End; ++Buffer) {
+ uint64_t OldRes = Result;
+ Result *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Result += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Result += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Result += C-'a'+10;
+
+ if (Result < OldRes) { // Uh, oh, overflow detected!!!
+ Error("constant bigger than 64 bits detected!");
+ return 0;
+ }
+ }
+ return Result;
+}
+
+void LLLexer::HexToIntPair(const char *Buffer, const char *End,
+ uint64_t Pair[2]) {
+ Pair[0] = 0;
+ for (int i=0; i<16; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[0] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[0] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[0] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[0] += C-'a'+10;
+ }
+ Pair[1] = 0;
+ for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
+ Pair[1] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[1] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[1] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[1] += C-'a'+10;
+ }
+ if (Buffer != End)
+ Error("constant bigger than 128 bits detected!");
+}
+
+/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
+/// { low64, high16 } as usual for an APInt.
+void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
+ uint64_t Pair[2]) {
+ Pair[1] = 0;
+ for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[1] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[1] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[1] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[1] += C-'a'+10;
+ }
+ Pair[0] = 0;
+ for (int i=0; i<16; i++, Buffer++) {
+ Pair[0] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[0] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[0] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[0] += C-'a'+10;
+ }
+ if (Buffer != End)
+ Error("constant bigger than 128 bits detected!");
+}
+
+// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
+// appropriate character.
+static void UnEscapeLexed(std::string &Str) {
+ if (Str.empty()) return;
+
+ char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
+ char *BOut = Buffer;
+ for (char *BIn = Buffer; BIn != EndBuffer; ) {
+ if (BIn[0] == '\\') {
+ if (BIn < EndBuffer-1 && BIn[1] == '\\') {
+ *BOut++ = '\\'; // Two \ becomes one
+ BIn += 2;
+ } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
+ char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
+ *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
+ BIn[3] = Tmp; // Restore character
+ BIn += 3; // Skip over handled chars
+ ++BOut;
+ } else {
+ *BOut++ = *BIn++;
+ }
+ } else {
+ *BOut++ = *BIn++;
+ }
+ }
+ Str.resize(BOut-Buffer);
+}
+
+/// isLabelChar - Return true for [-a-zA-Z$._0-9].
+static bool isLabelChar(char C) {
+ return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
+}
+
+
+/// isLabelTail - Return true if this pointer points to a valid end of a label.
+static const char *isLabelTail(const char *CurPtr) {
+ while (1) {
+ if (CurPtr[0] == ':') return CurPtr+1;
+ if (!isLabelChar(CurPtr[0])) return 0;
+ ++CurPtr;
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Lexer definition.
+//===----------------------------------------------------------------------===//
+
+LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+ LLVMContext &C)
+ : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
+ CurPtr = CurBuf->getBufferStart();
+}
+
+std::string LLLexer::getFilename() const {
+ return CurBuf->getBufferIdentifier();
+}
+
+int LLLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default: return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+}
+
+
+lltok::Kind LLLexer::LexToken() {
+ TokStart = CurPtr;
+
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ default:
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
+ return LexIdentifier();
+
+ return lltok::Error;
+ case EOF: return lltok::Eof;
+ case 0:
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return LexToken();
+ case '+': return LexPositive();
+ case '@': return LexAt();
+ case '%': return LexPercent();
+ case '"': return LexQuote();
+ case '.':
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr-1);
+ return lltok::LabelStr;
+ }
+ if (CurPtr[0] == '.' && CurPtr[1] == '.') {
+ CurPtr += 2;
+ return lltok::dotdotdot;
+ }
+ return lltok::Error;
+ case '$':
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr-1);
+ return lltok::LabelStr;
+ }
+ return lltok::Error;
+ case ';':
+ SkipLineComment();
+ return LexToken();
+ case '!': return LexExclaim();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '-':
+ return LexDigitOrNegative();
+ case '=': return lltok::equal;
+ case '[': return lltok::lsquare;
+ case ']': return lltok::rsquare;
+ case '{': return lltok::lbrace;
+ case '}': return lltok::rbrace;
+ case '<': return lltok::less;
+ case '>': return lltok::greater;
+ case '(': return lltok::lparen;
+ case ')': return lltok::rparen;
+ case ',': return lltok::comma;
+ case '*': return lltok::star;
+ case '\\': return lltok::backslash;
+ }
+}
+
+void LLLexer::SkipLineComment() {
+ while (1) {
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
+ return;
+ }
+}
+
+/// LexAt - Lex all tokens that start with an @ character:
+/// GlobalVar @\"[^\"]*\"
+/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// GlobalVarID @[0-9]+
+lltok::Kind LLLexer::LexAt() {
+ // Handle AtStringConstant: @\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in global variable name");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(TokStart+2, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return lltok::GlobalVar;
+ }
+ }
+ }
+
+ // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (ReadVarName())
+ return lltok::GlobalVar;
+
+ // Handle GlobalVarID: @[0-9]+
+ if (isdigit(CurPtr[0])) {
+ for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::GlobalID;
+ }
+
+ return lltok::Error;
+}
+
+/// ReadString - Read a string until the closing quote.
+lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
+ const char *Start = CurPtr;
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in string constant");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(Start, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return kind;
+ }
+ }
+}
+
+/// ReadVarName - Read the rest of a token containing a variable name.
+bool LLLexer::ReadVarName() {
+ const char *NameStart = CurPtr;
+ if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_') {
+ ++CurPtr;
+ while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_')
+ ++CurPtr;
+
+ StrVal.assign(NameStart, CurPtr);
+ return true;
+ }
+ return false;
+}
+
+/// LexPercent - Lex all tokens that start with a % character:
+/// LocalVar ::= %\"[^\"]*\"
+/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// LocalVarID ::= %[0-9]+
+lltok::Kind LLLexer::LexPercent() {
+ // Handle LocalVarName: %\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+ return ReadString(lltok::LocalVar);
+ }
+
+ // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (ReadVarName())
+ return lltok::LocalVar;
+
+ // Handle LocalVarID: %[0-9]+
+ if (isdigit(CurPtr[0])) {
+ for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::LocalVarID;
+ }
+
+ return lltok::Error;
+}
+
+/// LexQuote - Lex all tokens that start with a " character:
+/// QuoteLabel "[^"]+":
+/// StringConstant "[^"]*"
+lltok::Kind LLLexer::LexQuote() {
+ lltok::Kind kind = ReadString(lltok::StringConstant);
+ if (kind == lltok::Error || kind == lltok::Eof)
+ return kind;
+
+ if (CurPtr[0] == ':') {
+ ++CurPtr;
+ kind = lltok::LabelStr;
+ }
+
+ return kind;
+}
+
+/// LexExclaim:
+/// !foo
+/// !
+lltok::Kind LLLexer::LexExclaim() {
+ // Lex a metadata name as a MetadataVar.
+ if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
+ ++CurPtr;
+ while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
+ ++CurPtr;
+
+ StrVal.assign(TokStart+1, CurPtr); // Skip !
+ UnEscapeLexed(StrVal);
+ return lltok::MetadataVar;
+ }
+ return lltok::exclaim;
+}
+
+/// LexIdentifier: Handle several related productions:
+/// Label [-a-zA-Z$._0-9]+:
+/// IntegerType i[0-9]+
+/// Keyword sdiv, float, ...
+/// HexIntConstant [us]0x[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexIdentifier() {
+ const char *StartChar = CurPtr;
+ const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
+ const char *KeywordEnd = 0;
+
+ for (; isLabelChar(*CurPtr); ++CurPtr) {
+ // If we decide this is an integer, remember the end of the sequence.
+ if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
+ if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
+ }
+
+ // If we stopped due to a colon, this really is a label.
+ if (*CurPtr == ':') {
+ StrVal.assign(StartChar-1, CurPtr++);
+ return lltok::LabelStr;
+ }
+
+ // Otherwise, this wasn't a label. If this was valid as an integer type,
+ // return it.
+ if (IntEnd == 0) IntEnd = CurPtr;
+ if (IntEnd != StartChar) {
+ CurPtr = IntEnd;
+ uint64_t NumBits = atoull(StartChar, CurPtr);
+ if (NumBits < IntegerType::MIN_INT_BITS ||
+ NumBits > IntegerType::MAX_INT_BITS) {
+ Error("bitwidth for integer type out of range!");
+ return lltok::Error;
+ }
+ TyVal = IntegerType::get(Context, NumBits);
+ return lltok::Type;
+ }
+
+ // Otherwise, this was a letter sequence. See which keyword this is.
+ if (KeywordEnd == 0) KeywordEnd = CurPtr;
+ CurPtr = KeywordEnd;
+ --StartChar;
+ unsigned Len = CurPtr-StartChar;
+#define KEYWORD(STR) \
+ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
+ return lltok::kw_##STR;
+
+ KEYWORD(true); KEYWORD(false);
+ KEYWORD(declare); KEYWORD(define);
+ KEYWORD(global); KEYWORD(constant);
+
+ KEYWORD(private);
+ KEYWORD(linker_private);
+ KEYWORD(linker_private_weak);
+ KEYWORD(linker_private_weak_def_auto);
+ KEYWORD(internal);
+ KEYWORD(available_externally);
+ KEYWORD(linkonce);
+ KEYWORD(linkonce_odr);
+ KEYWORD(weak);
+ KEYWORD(weak_odr);
+ KEYWORD(appending);
+ KEYWORD(dllimport);
+ KEYWORD(dllexport);
+ KEYWORD(common);
+ KEYWORD(default);
+ KEYWORD(hidden);
+ KEYWORD(protected);
+ KEYWORD(unnamed_addr);
+ KEYWORD(extern_weak);
+ KEYWORD(external);
+ KEYWORD(thread_local);
+ KEYWORD(zeroinitializer);
+ KEYWORD(undef);
+ KEYWORD(null);
+ KEYWORD(to);
+ KEYWORD(tail);
+ KEYWORD(target);
+ KEYWORD(triple);
+ KEYWORD(deplibs);
+ KEYWORD(datalayout);
+ KEYWORD(volatile);
+ KEYWORD(nuw);
+ KEYWORD(nsw);
+ KEYWORD(exact);
+ KEYWORD(inbounds);
+ KEYWORD(align);
+ KEYWORD(addrspace);
+ KEYWORD(section);
+ KEYWORD(alias);
+ KEYWORD(module);
+ KEYWORD(asm);
+ KEYWORD(sideeffect);
+ KEYWORD(alignstack);
+ KEYWORD(gc);
+
+ KEYWORD(ccc);
+ KEYWORD(fastcc);
+ KEYWORD(coldcc);
+ KEYWORD(x86_stdcallcc);
+ KEYWORD(x86_fastcallcc);
+ KEYWORD(x86_thiscallcc);
+ KEYWORD(arm_apcscc);
+ KEYWORD(arm_aapcscc);
+ KEYWORD(arm_aapcs_vfpcc);
+ KEYWORD(msp430_intrcc);
+ KEYWORD(ptx_kernel);
+ KEYWORD(ptx_device);
+
+ KEYWORD(cc);
+ KEYWORD(c);
+
+ KEYWORD(signext);
+ KEYWORD(zeroext);
+ KEYWORD(inreg);
+ KEYWORD(sret);
+ KEYWORD(nounwind);
+ KEYWORD(noreturn);
+ KEYWORD(noalias);
+ KEYWORD(nocapture);
+ KEYWORD(byval);
+ KEYWORD(nest);
+ KEYWORD(readnone);
+ KEYWORD(readonly);
+ KEYWORD(uwtable);
+
+ KEYWORD(inlinehint);
+ KEYWORD(noinline);
+ KEYWORD(alwaysinline);
+ KEYWORD(optsize);
+ KEYWORD(ssp);
+ KEYWORD(sspreq);
+ KEYWORD(noredzone);
+ KEYWORD(noimplicitfloat);
+ KEYWORD(naked);
+ KEYWORD(hotpatch);
+ KEYWORD(nonlazybind);
+
+ KEYWORD(type);
+ KEYWORD(opaque);
+
+ KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
+ KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
+ KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
+ KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
+
+ KEYWORD(x);
+ KEYWORD(blockaddress);
+#undef KEYWORD
+
+ // Keywords for types.
+#define TYPEKEYWORD(STR, LLVMTY) \
+ if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
+ TyVal = LLVMTY; return lltok::Type; }
+ TYPEKEYWORD("void", Type::getVoidTy(Context));
+ TYPEKEYWORD("float", Type::getFloatTy(Context));
+ TYPEKEYWORD("double", Type::getDoubleTy(Context));
+ TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
+ TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
+ TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
+ TYPEKEYWORD("label", Type::getLabelTy(Context));
+ TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
+ TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
+#undef TYPEKEYWORD
+
+ // Keywords for instructions.
+#define INSTKEYWORD(STR, Enum) \
+ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
+ UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+
+ INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
+ INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
+ INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
+ INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
+ INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
+ INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
+ INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
+ INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
+
+ INSTKEYWORD(phi, PHI);
+ INSTKEYWORD(call, Call);
+ INSTKEYWORD(trunc, Trunc);
+ INSTKEYWORD(zext, ZExt);
+ INSTKEYWORD(sext, SExt);
+ INSTKEYWORD(fptrunc, FPTrunc);
+ INSTKEYWORD(fpext, FPExt);
+ INSTKEYWORD(uitofp, UIToFP);
+ INSTKEYWORD(sitofp, SIToFP);
+ INSTKEYWORD(fptoui, FPToUI);
+ INSTKEYWORD(fptosi, FPToSI);
+ INSTKEYWORD(inttoptr, IntToPtr);
+ INSTKEYWORD(ptrtoint, PtrToInt);
+ INSTKEYWORD(bitcast, BitCast);
+ INSTKEYWORD(select, Select);
+ INSTKEYWORD(va_arg, VAArg);
+ INSTKEYWORD(ret, Ret);
+ INSTKEYWORD(br, Br);
+ INSTKEYWORD(switch, Switch);
+ INSTKEYWORD(indirectbr, IndirectBr);
+ INSTKEYWORD(invoke, Invoke);
+ INSTKEYWORD(unwind, Unwind);
+ INSTKEYWORD(unreachable, Unreachable);
+
+ INSTKEYWORD(alloca, Alloca);
+ INSTKEYWORD(load, Load);
+ INSTKEYWORD(store, Store);
+ INSTKEYWORD(getelementptr, GetElementPtr);
+
+ INSTKEYWORD(extractelement, ExtractElement);
+ INSTKEYWORD(insertelement, InsertElement);
+ INSTKEYWORD(shufflevector, ShuffleVector);
+ INSTKEYWORD(extractvalue, ExtractValue);
+ INSTKEYWORD(insertvalue, InsertValue);
+#undef INSTKEYWORD
+
+ // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
+ // the CFE to avoid forcing it to deal with 64-bit numbers.
+ if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
+ TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
+ int len = CurPtr-TokStart-3;
+ uint32_t bits = len * 4;
+ APInt Tmp(bits, StringRef(TokStart+3, len), 16);
+ uint32_t activeBits = Tmp.getActiveBits();
+ if (activeBits > 0 && activeBits < bits)
+ Tmp = Tmp.trunc(activeBits);
+ APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
+ return lltok::APSInt;
+ }
+
+ // If this is "cc1234", return this as just "cc".
+ if (TokStart[0] == 'c' && TokStart[1] == 'c') {
+ CurPtr = TokStart+2;
+ return lltok::kw_cc;
+ }
+
+ // Finally, if this isn't known, return an error.
+ CurPtr = TokStart+1;
+ return lltok::Error;
+}
+
+
+/// Lex0x: Handle productions that start with 0x, knowing that it matches and
+/// that this is not a label:
+/// HexFPConstant 0x[0-9A-Fa-f]+
+/// HexFP80Constant 0xK[0-9A-Fa-f]+
+/// HexFP128Constant 0xL[0-9A-Fa-f]+
+/// HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::Lex0x() {
+ CurPtr = TokStart + 2;
+
+ char Kind;
+ if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
+ Kind = *CurPtr++;
+ } else {
+ Kind = 'J';
+ }
+
+ if (!isxdigit(CurPtr[0])) {
+ // Bad token, return it as an error.
+ CurPtr = TokStart+1;
+ return lltok::Error;
+ }
+
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ if (Kind == 'J') {
+ // HexFPConstant - Floating point constant represented in IEEE format as a
+ // hexadecimal number for when exponential notation is not precise enough.
+ // Float and double only.
+ APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
+ return lltok::APFloat;
+ }
+
+ uint64_t Pair[2];
+ switch (Kind) {
+ default: llvm_unreachable("Unknown kind!");
+ case 'K':
+ // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
+ FP80HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APInt(80, 2, Pair));
+ return lltok::APFloat;
+ case 'L':
+ // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
+ HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APInt(128, 2, Pair), true);
+ return lltok::APFloat;
+ case 'M':
+ // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
+ HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APInt(128, 2, Pair));
+ return lltok::APFloat;
+ }
+}
+
+/// LexIdentifier: Handle several related productions:
+/// Label [-a-zA-Z$._0-9]+:
+/// NInteger -[0-9]+
+/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+/// PInteger [0-9]+
+/// HexFPConstant 0x[0-9A-Fa-f]+
+/// HexFP80Constant 0xK[0-9A-Fa-f]+
+/// HexFP128Constant 0xL[0-9A-Fa-f]+
+/// HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexDigitOrNegative() {
+ // If the letter after the negative is a number, this is probably a label.
+ if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
+ // Okay, this is not a number after the -, it's probably a label.
+ if (const char *End = isLabelTail(CurPtr)) {
+ StrVal.assign(TokStart, End-1);
+ CurPtr = End;
+ return lltok::LabelStr;
+ }
+
+ return lltok::Error;
+ }
+
+ // At this point, it is either a label, int or fp constant.
+
+ // Skip digits, we have at least one.
+ for (; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ // Check to see if this really is a label afterall, e.g. "-1:".
+ if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
+ if (const char *End = isLabelTail(CurPtr)) {
+ StrVal.assign(TokStart, End-1);
+ CurPtr = End;
+ return lltok::LabelStr;
+ }
+ }
+
+ // If the next character is a '.', then it is a fp value, otherwise its
+ // integer.
+ if (CurPtr[0] != '.') {
+ if (TokStart[0] == '0' && TokStart[1] == 'x')
+ return Lex0x();
+ unsigned Len = CurPtr-TokStart;
+ uint32_t numBits = ((Len * 64) / 19) + 2;
+ APInt Tmp(numBits, StringRef(TokStart, Len), 10);
+ if (TokStart[0] == '-') {
+ uint32_t minBits = Tmp.getMinSignedBits();
+ if (minBits > 0 && minBits < numBits)
+ Tmp = Tmp.trunc(minBits);
+ APSIntVal = APSInt(Tmp, false);
+ } else {
+ uint32_t activeBits = Tmp.getActiveBits();
+ if (activeBits > 0 && activeBits < numBits)
+ Tmp = Tmp.trunc(activeBits);
+ APSIntVal = APSInt(Tmp, true);
+ }
+ return lltok::APSInt;
+ }
+
+ ++CurPtr;
+
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(CurPtr[0])) ++CurPtr;
+
+ if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+ if (isdigit(CurPtr[1]) ||
+ ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+ CurPtr += 2;
+ while (isdigit(CurPtr[0])) ++CurPtr;
+ }
+ }
+
+ APFloatVal = APFloat(std::atof(TokStart));
+ return lltok::APFloat;
+}
+
+/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+lltok::Kind LLLexer::LexPositive() {
+ // If the letter after the negative is a number, this is probably not a
+ // label.
+ if (!isdigit(CurPtr[0]))
+ return lltok::Error;
+
+ // Skip digits.
+ for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ // At this point, we need a '.'.
+ if (CurPtr[0] != '.') {
+ CurPtr = TokStart+1;
+ return lltok::Error;
+ }
+
+ ++CurPtr;
+
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(CurPtr[0])) ++CurPtr;
+
+ if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+ if (isdigit(CurPtr[1]) ||
+ ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+ CurPtr += 2;
+ while (isdigit(CurPtr[0])) ++CurPtr;
+ }
+ }
+
+ APFloatVal = APFloat(std::atof(TokStart));
+ return lltok::APFloat;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.h b/contrib/llvm/lib/AsmParser/LLLexer.h
new file mode 100644
index 000000000000..33b913572375
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLLexer.h
@@ -0,0 +1,93 @@
+//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ASMPARSER_LLLEXER_H
+#define LIB_ASMPARSER_LLLEXER_H
+
+#include "LLToken.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+
+namespace llvm {
+ class MemoryBuffer;
+ class Type;
+ class SMDiagnostic;
+ class LLVMContext;
+
+ class LLLexer {
+ const char *CurPtr;
+ MemoryBuffer *CurBuf;
+ SMDiagnostic &ErrorInfo;
+ SourceMgr &SM;
+ LLVMContext &Context;
+
+ // Information about the current token.
+ const char *TokStart;
+ lltok::Kind CurKind;
+ std::string StrVal;
+ unsigned UIntVal;
+ Type *TyVal;
+ APFloat APFloatVal;
+ APSInt APSIntVal;
+
+ std::string TheError;
+ public:
+ explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
+ LLVMContext &C);
+ ~LLLexer() {}
+
+ lltok::Kind Lex() {
+ return CurKind = LexToken();
+ }
+
+ typedef SMLoc LocTy;
+ LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
+ lltok::Kind getKind() const { return CurKind; }
+ const std::string &getStrVal() const { return StrVal; }
+ Type *getTyVal() const { return TyVal; }
+ unsigned getUIntVal() const { return UIntVal; }
+ const APSInt &getAPSIntVal() const { return APSIntVal; }
+ const APFloat &getAPFloatVal() const { return APFloatVal; }
+
+
+ bool Error(LocTy L, const Twine &Msg) const;
+ bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
+ std::string getFilename() const;
+
+ private:
+ lltok::Kind LexToken();
+
+ int getNextChar();
+ void SkipLineComment();
+ lltok::Kind ReadString(lltok::Kind kind);
+ bool ReadVarName();
+
+ lltok::Kind LexIdentifier();
+ lltok::Kind LexDigitOrNegative();
+ lltok::Kind LexPositive();
+ lltok::Kind LexAt();
+ lltok::Kind LexExclaim();
+ lltok::Kind LexPercent();
+ lltok::Kind LexQuote();
+ lltok::Kind Lex0x();
+
+ uint64_t atoull(const char *Buffer, const char *End);
+ uint64_t HexIntToVal(const char *Buffer, const char *End);
+ void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
+ void FP80HexToIntPair(const char *Buff, const char *End, uint64_t Pair[2]);
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
new file mode 100644
index 000000000000..cfc31f3db8a7
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -0,0 +1,3742 @@
+//===-- LLParser.cpp - Parser Class ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLParser.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static std::string getTypeString(const Type *T) {
+ std::string Result;
+ raw_string_ostream Tmp(Result);
+ Tmp << *T;
+ return Tmp.str();
+}
+
+/// Run: module ::= toplevelentity*
+bool LLParser::Run() {
+ // Prime the lexer.
+ Lex.Lex();
+
+ return ParseTopLevelEntities() ||
+ ValidateEndOfModule();
+}
+
+/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
+/// module.
+bool LLParser::ValidateEndOfModule() {
+ // Handle any instruction metadata forward references.
+ if (!ForwardRefInstMetadata.empty()) {
+ for (DenseMap<Instruction*, std::vector<MDRef> >::iterator
+ I = ForwardRefInstMetadata.begin(), E = ForwardRefInstMetadata.end();
+ I != E; ++I) {
+ Instruction *Inst = I->first;
+ const std::vector<MDRef> &MDList = I->second;
+
+ for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
+ unsigned SlotNo = MDList[i].MDSlot;
+
+ if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
+ return Error(MDList[i].Loc, "use of undefined metadata '!" +
+ Twine(SlotNo) + "'");
+ Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
+ }
+ }
+ ForwardRefInstMetadata.clear();
+ }
+
+
+ // If there are entries in ForwardRefBlockAddresses at this point, they are
+ // references after the function was defined. Resolve those now.
+ while (!ForwardRefBlockAddresses.empty()) {
+ // Okay, we are referencing an already-parsed function, resolve them now.
+ Function *TheFn = 0;
+ const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
+ if (Fn.Kind == ValID::t_GlobalName)
+ TheFn = M->getFunction(Fn.StrVal);
+ else if (Fn.UIntVal < NumberedVals.size())
+ TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
+
+ if (TheFn == 0)
+ return Error(Fn.Loc, "unknown function referenced by blockaddress");
+
+ // Resolve all these references.
+ if (ResolveForwardRefBlockAddresses(TheFn,
+ ForwardRefBlockAddresses.begin()->second,
+ 0))
+ return true;
+
+ ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
+ }
+
+ for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
+ if (NumberedTypes[i].second.isValid())
+ return Error(NumberedTypes[i].second,
+ "use of undefined type '%" + Twine(i) + "'");
+
+ for (StringMap<std::pair<Type*, LocTy> >::iterator I =
+ NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
+ if (I->second.second.isValid())
+ return Error(I->second.second,
+ "use of undefined type named '" + I->getKey() + "'");
+
+ if (!ForwardRefVals.empty())
+ return Error(ForwardRefVals.begin()->second.second,
+ "use of undefined value '@" + ForwardRefVals.begin()->first +
+ "'");
+
+ if (!ForwardRefValIDs.empty())
+ return Error(ForwardRefValIDs.begin()->second.second,
+ "use of undefined value '@" +
+ Twine(ForwardRefValIDs.begin()->first) + "'");
+
+ if (!ForwardRefMDNodes.empty())
+ return Error(ForwardRefMDNodes.begin()->second.second,
+ "use of undefined metadata '!" +
+ Twine(ForwardRefMDNodes.begin()->first) + "'");
+
+
+ // Look for intrinsic functions and CallInst that need to be upgraded
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
+ UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+
+ // Check debug info intrinsics.
+ CheckDebugInfoIntrinsics(M);
+ return false;
+}
+
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
+ std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+ PerFunctionState *PFS) {
+ // Loop over all the references, resolving them.
+ for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+ BasicBlock *Res;
+ if (PFS) {
+ if (Refs[i].first.Kind == ValID::t_LocalName)
+ Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc);
+ else
+ Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc);
+ } else if (Refs[i].first.Kind == ValID::t_LocalID) {
+ return Error(Refs[i].first.Loc,
+ "cannot take address of numeric label after the function is defined");
+ } else {
+ Res = dyn_cast_or_null<BasicBlock>(
+ TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
+ }
+
+ if (Res == 0)
+ return Error(Refs[i].first.Loc,
+ "referenced value is not a basic block");
+
+ // Get the BlockAddress for this and update references to use it.
+ BlockAddress *BA = BlockAddress::get(TheFn, Res);
+ Refs[i].second->replaceAllUsesWith(BA);
+ Refs[i].second->eraseFromParent();
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Top-Level Entities
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ParseTopLevelEntities() {
+ while (1) {
+ switch (Lex.getKind()) {
+ default: return TokError("expected top-level entity");
+ case lltok::Eof: return false;
+ case lltok::kw_declare: if (ParseDeclare()) return true; break;
+ case lltok::kw_define: if (ParseDefine()) return true; break;
+ case lltok::kw_module: if (ParseModuleAsm()) return true; break;
+ case lltok::kw_target: if (ParseTargetDefinition()) return true; break;
+ case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
+ case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
+ case lltok::LocalVar: if (ParseNamedType()) return true; break;
+ case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break;
+ case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break;
+ case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break;
+ case lltok::MetadataVar: if (ParseNamedMetadata()) return true; break;
+
+ // The Global variable production with no name can have many different
+ // optional leading prefixes, the production is:
+ // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+ // OptionalAddrSpace OptionalUnNammedAddr
+ // ('constant'|'global') ...
+ case lltok::kw_private: // OptionalLinkage
+ case lltok::kw_linker_private: // OptionalLinkage
+ case lltok::kw_linker_private_weak: // OptionalLinkage
+ case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
+ case lltok::kw_internal: // OptionalLinkage
+ case lltok::kw_weak: // OptionalLinkage
+ case lltok::kw_weak_odr: // OptionalLinkage
+ case lltok::kw_linkonce: // OptionalLinkage
+ case lltok::kw_linkonce_odr: // OptionalLinkage
+ case lltok::kw_appending: // OptionalLinkage
+ case lltok::kw_dllexport: // OptionalLinkage
+ case lltok::kw_common: // OptionalLinkage
+ case lltok::kw_dllimport: // OptionalLinkage
+ case lltok::kw_extern_weak: // OptionalLinkage
+ case lltok::kw_external: { // OptionalLinkage
+ unsigned Linkage, Visibility;
+ if (ParseOptionalLinkage(Linkage) ||
+ ParseOptionalVisibility(Visibility) ||
+ ParseGlobal("", SMLoc(), Linkage, true, Visibility))
+ return true;
+ break;
+ }
+ case lltok::kw_default: // OptionalVisibility
+ case lltok::kw_hidden: // OptionalVisibility
+ case lltok::kw_protected: { // OptionalVisibility
+ unsigned Visibility;
+ if (ParseOptionalVisibility(Visibility) ||
+ ParseGlobal("", SMLoc(), 0, false, Visibility))
+ return true;
+ break;
+ }
+
+ case lltok::kw_thread_local: // OptionalThreadLocal
+ case lltok::kw_addrspace: // OptionalAddrSpace
+ case lltok::kw_constant: // GlobalType
+ case lltok::kw_global: // GlobalType
+ if (ParseGlobal("", SMLoc(), 0, false, 0)) return true;
+ break;
+ }
+ }
+}
+
+
+/// toplevelentity
+/// ::= 'module' 'asm' STRINGCONSTANT
+bool LLParser::ParseModuleAsm() {
+ assert(Lex.getKind() == lltok::kw_module);
+ Lex.Lex();
+
+ std::string AsmStr;
+ if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
+ ParseStringConstant(AsmStr)) return true;
+
+ M->appendModuleInlineAsm(AsmStr);
+ return false;
+}
+
+/// toplevelentity
+/// ::= 'target' 'triple' '=' STRINGCONSTANT
+/// ::= 'target' 'datalayout' '=' STRINGCONSTANT
+bool LLParser::ParseTargetDefinition() {
+ assert(Lex.getKind() == lltok::kw_target);
+ std::string Str;
+ switch (Lex.Lex()) {
+ default: return TokError("unknown target property");
+ case lltok::kw_triple:
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after target triple") ||
+ ParseStringConstant(Str))
+ return true;
+ M->setTargetTriple(Str);
+ return false;
+ case lltok::kw_datalayout:
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after target datalayout") ||
+ ParseStringConstant(Str))
+ return true;
+ M->setDataLayout(Str);
+ return false;
+ }
+}
+
+/// toplevelentity
+/// ::= 'deplibs' '=' '[' ']'
+/// ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+bool LLParser::ParseDepLibs() {
+ assert(Lex.getKind() == lltok::kw_deplibs);
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after deplibs") ||
+ ParseToken(lltok::lsquare, "expected '=' after deplibs"))
+ return true;
+
+ if (EatIfPresent(lltok::rsquare))
+ return false;
+
+ std::string Str;
+ if (ParseStringConstant(Str)) return true;
+ M->addLibrary(Str);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseStringConstant(Str)) return true;
+ M->addLibrary(Str);
+ }
+
+ return ParseToken(lltok::rsquare, "expected ']' at end of list");
+}
+
+/// ParseUnnamedType:
+/// ::= LocalVarID '=' 'type' type
+bool LLParser::ParseUnnamedType() {
+ LocTy TypeLoc = Lex.getLoc();
+ unsigned TypeID = Lex.getUIntVal();
+ Lex.Lex(); // eat LocalVarID;
+
+ if (ParseToken(lltok::equal, "expected '=' after name") ||
+ ParseToken(lltok::kw_type, "expected 'type' after '='"))
+ return true;
+
+ if (TypeID >= NumberedTypes.size())
+ NumberedTypes.resize(TypeID+1);
+
+ Type *Result = 0;
+ if (ParseStructDefinition(TypeLoc, "",
+ NumberedTypes[TypeID], Result)) return true;
+
+ if (!isa<StructType>(Result)) {
+ std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
+ if (Entry.first)
+ return Error(TypeLoc, "non-struct types may not be recursive");
+ Entry.first = Result;
+ Entry.second = SMLoc();
+ }
+
+ return false;
+}
+
+
+/// toplevelentity
+/// ::= LocalVar '=' 'type' type
+bool LLParser::ParseNamedType() {
+ std::string Name = Lex.getStrVal();
+ LocTy NameLoc = Lex.getLoc();
+ Lex.Lex(); // eat LocalVar.
+
+ if (ParseToken(lltok::equal, "expected '=' after name") ||
+ ParseToken(lltok::kw_type, "expected 'type' after name"))
+ return true;
+
+ Type *Result = 0;
+ if (ParseStructDefinition(NameLoc, Name,
+ NamedTypes[Name], Result)) return true;
+
+ if (!isa<StructType>(Result)) {
+ std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
+ if (Entry.first)
+ return Error(NameLoc, "non-struct types may not be recursive");
+ Entry.first = Result;
+ Entry.second = SMLoc();
+ }
+
+ return false;
+}
+
+
+/// toplevelentity
+/// ::= 'declare' FunctionHeader
+bool LLParser::ParseDeclare() {
+ assert(Lex.getKind() == lltok::kw_declare);
+ Lex.Lex();
+
+ Function *F;
+ return ParseFunctionHeader(F, false);
+}
+
+/// toplevelentity
+/// ::= 'define' FunctionHeader '{' ...
+bool LLParser::ParseDefine() {
+ assert(Lex.getKind() == lltok::kw_define);
+ Lex.Lex();
+
+ Function *F;
+ return ParseFunctionHeader(F, true) ||
+ ParseFunctionBody(*F);
+}
+
+/// ParseGlobalType
+/// ::= 'constant'
+/// ::= 'global'
+bool LLParser::ParseGlobalType(bool &IsConstant) {
+ if (Lex.getKind() == lltok::kw_constant)
+ IsConstant = true;
+ else if (Lex.getKind() == lltok::kw_global)
+ IsConstant = false;
+ else {
+ IsConstant = false;
+ return TokError("expected 'global' or 'constant'");
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseUnnamedGlobal:
+/// OptionalVisibility ALIAS ...
+/// OptionalLinkage OptionalVisibility ... -> global variable
+/// GlobalID '=' OptionalVisibility ALIAS ...
+/// GlobalID '=' OptionalLinkage OptionalVisibility ... -> global variable
+bool LLParser::ParseUnnamedGlobal() {
+ unsigned VarID = NumberedVals.size();
+ std::string Name;
+ LocTy NameLoc = Lex.getLoc();
+
+ // Handle the GlobalID form.
+ if (Lex.getKind() == lltok::GlobalID) {
+ if (Lex.getUIntVal() != VarID)
+ return Error(Lex.getLoc(), "variable expected to be numbered '%" +
+ Twine(VarID) + "'");
+ Lex.Lex(); // eat GlobalID;
+
+ if (ParseToken(lltok::equal, "expected '=' after name"))
+ return true;
+ }
+
+ bool HasLinkage;
+ unsigned Linkage, Visibility;
+ if (ParseOptionalLinkage(Linkage, HasLinkage) ||
+ ParseOptionalVisibility(Visibility))
+ return true;
+
+ if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+ return ParseAlias(Name, NameLoc, Visibility);
+}
+
+/// ParseNamedGlobal:
+/// GlobalVar '=' OptionalVisibility ALIAS ...
+/// GlobalVar '=' OptionalLinkage OptionalVisibility ... -> global variable
+bool LLParser::ParseNamedGlobal() {
+ assert(Lex.getKind() == lltok::GlobalVar);
+ LocTy NameLoc = Lex.getLoc();
+ std::string Name = Lex.getStrVal();
+ Lex.Lex();
+
+ bool HasLinkage;
+ unsigned Linkage, Visibility;
+ if (ParseToken(lltok::equal, "expected '=' in global variable") ||
+ ParseOptionalLinkage(Linkage, HasLinkage) ||
+ ParseOptionalVisibility(Visibility))
+ return true;
+
+ if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+ return ParseAlias(Name, NameLoc, Visibility);
+}
+
+// MDString:
+// ::= '!' STRINGCONSTANT
+bool LLParser::ParseMDString(MDString *&Result) {
+ std::string Str;
+ if (ParseStringConstant(Str)) return true;
+ Result = MDString::get(Context, Str);
+ return false;
+}
+
+// MDNode:
+// ::= '!' MDNodeNumber
+//
+/// This version of ParseMDNodeID returns the slot number and null in the case
+/// of a forward reference.
+bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) {
+ // !{ ..., !42, ... }
+ if (ParseUInt32(SlotNo)) return true;
+
+ // Check existing MDNode.
+ if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0)
+ Result = NumberedMetadata[SlotNo];
+ else
+ Result = 0;
+ return false;
+}
+
+bool LLParser::ParseMDNodeID(MDNode *&Result) {
+ // !{ ..., !42, ... }
+ unsigned MID = 0;
+ if (ParseMDNodeID(Result, MID)) return true;
+
+ // If not a forward reference, just return it now.
+ if (Result) return false;
+
+ // Otherwise, create MDNode forward reference.
+ MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
+ ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
+
+ if (NumberedMetadata.size() <= MID)
+ NumberedMetadata.resize(MID+1);
+ NumberedMetadata[MID] = FwdNode;
+ Result = FwdNode;
+ return false;
+}
+
+/// ParseNamedMetadata:
+/// !foo = !{ !1, !2 }
+bool LLParser::ParseNamedMetadata() {
+ assert(Lex.getKind() == lltok::MetadataVar);
+ std::string Name = Lex.getStrVal();
+ Lex.Lex();
+
+ if (ParseToken(lltok::equal, "expected '=' here") ||
+ ParseToken(lltok::exclaim, "Expected '!' here") ||
+ ParseToken(lltok::lbrace, "Expected '{' here"))
+ return true;
+
+ NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
+ if (Lex.getKind() != lltok::rbrace)
+ do {
+ if (ParseToken(lltok::exclaim, "Expected '!' here"))
+ return true;
+
+ MDNode *N = 0;
+ if (ParseMDNodeID(N)) return true;
+ NMD->addOperand(N);
+ } while (EatIfPresent(lltok::comma));
+
+ if (ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ return false;
+}
+
+/// ParseStandaloneMetadata:
+/// !42 = !{...}
+bool LLParser::ParseStandaloneMetadata() {
+ assert(Lex.getKind() == lltok::exclaim);
+ Lex.Lex();
+ unsigned MetadataID = 0;
+
+ LocTy TyLoc;
+ Type *Ty = 0;
+ SmallVector<Value *, 16> Elts;
+ if (ParseUInt32(MetadataID) ||
+ ParseToken(lltok::equal, "expected '=' here") ||
+ ParseType(Ty, TyLoc) ||
+ ParseToken(lltok::exclaim, "Expected '!' here") ||
+ ParseToken(lltok::lbrace, "Expected '{' here") ||
+ ParseMDNodeVector(Elts, NULL) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ MDNode *Init = MDNode::get(Context, Elts);
+
+ // See if this was forward referenced, if so, handle it.
+ std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
+ FI = ForwardRefMDNodes.find(MetadataID);
+ if (FI != ForwardRefMDNodes.end()) {
+ MDNode *Temp = FI->second.first;
+ Temp->replaceAllUsesWith(Init);
+ MDNode::deleteTemporary(Temp);
+ ForwardRefMDNodes.erase(FI);
+
+ assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
+ } else {
+ if (MetadataID >= NumberedMetadata.size())
+ NumberedMetadata.resize(MetadataID+1);
+
+ if (NumberedMetadata[MetadataID] != 0)
+ return TokError("Metadata id is already used");
+ NumberedMetadata[MetadataID] = Init;
+ }
+
+ return false;
+}
+
+/// ParseAlias:
+/// ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
+/// Aliasee
+/// ::= TypeAndValue
+/// ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
+/// ::= 'getelementptr' 'inbounds'? '(' ... ')'
+///
+/// Everything through visibility has already been parsed.
+///
+bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
+ unsigned Visibility) {
+ assert(Lex.getKind() == lltok::kw_alias);
+ Lex.Lex();
+ unsigned Linkage;
+ LocTy LinkageLoc = Lex.getLoc();
+ if (ParseOptionalLinkage(Linkage))
+ return true;
+
+ if (Linkage != GlobalValue::ExternalLinkage &&
+ Linkage != GlobalValue::WeakAnyLinkage &&
+ Linkage != GlobalValue::WeakODRLinkage &&
+ Linkage != GlobalValue::InternalLinkage &&
+ Linkage != GlobalValue::PrivateLinkage &&
+ Linkage != GlobalValue::LinkerPrivateLinkage &&
+ Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
+ Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ return Error(LinkageLoc, "invalid linkage type for alias");
+
+ Constant *Aliasee;
+ LocTy AliaseeLoc = Lex.getLoc();
+ if (Lex.getKind() != lltok::kw_bitcast &&
+ Lex.getKind() != lltok::kw_getelementptr) {
+ if (ParseGlobalTypeAndValue(Aliasee)) return true;
+ } else {
+ // The bitcast dest type is not present, it is implied by the dest type.
+ ValID ID;
+ if (ParseValID(ID)) return true;
+ if (ID.Kind != ValID::t_Constant)
+ return Error(AliaseeLoc, "invalid aliasee");
+ Aliasee = ID.ConstantVal;
+ }
+
+ if (!Aliasee->getType()->isPointerTy())
+ return Error(AliaseeLoc, "alias must have pointer type");
+
+ // Okay, create the alias but do not insert it into the module yet.
+ GlobalAlias* GA = new GlobalAlias(Aliasee->getType(),
+ (GlobalValue::LinkageTypes)Linkage, Name,
+ Aliasee);
+ GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+
+ // See if this value already exists in the symbol table. If so, it is either
+ // a redefinition or a definition of a forward reference.
+ if (GlobalValue *Val = M->getNamedValue(Name)) {
+ // See if this was a redefinition. If so, there is no entry in
+ // ForwardRefVals.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I == ForwardRefVals.end())
+ return Error(NameLoc, "redefinition of global named '@" + Name + "'");
+
+ // Otherwise, this was a definition of forward ref. Verify that types
+ // agree.
+ if (Val->getType() != GA->getType())
+ return Error(NameLoc,
+ "forward reference and definition of alias have different types");
+
+ // If they agree, just RAUW the old value with the alias and remove the
+ // forward ref info.
+ Val->replaceAllUsesWith(GA);
+ Val->eraseFromParent();
+ ForwardRefVals.erase(I);
+ }
+
+ // Insert into the module, we know its name won't collide now.
+ M->getAliasList().push_back(GA);
+ assert(GA->getName() == Name && "Should not be a name conflict!");
+
+ return false;
+}
+
+/// ParseGlobal
+/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
+/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+/// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///
+/// Everything through visibility has been parsed already.
+///
+bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
+ unsigned Linkage, bool HasLinkage,
+ unsigned Visibility) {
+ unsigned AddrSpace;
+ bool ThreadLocal, IsConstant, UnnamedAddr;
+ LocTy UnnamedAddrLoc;
+ LocTy TyLoc;
+
+ Type *Ty = 0;
+ if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
+ ParseOptionalAddrSpace(AddrSpace) ||
+ ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+ &UnnamedAddrLoc) ||
+ ParseGlobalType(IsConstant) ||
+ ParseType(Ty, TyLoc))
+ return true;
+
+ // If the linkage is specified and is external, then no initializer is
+ // present.
+ Constant *Init = 0;
+ if (!HasLinkage || (Linkage != GlobalValue::DLLImportLinkage &&
+ Linkage != GlobalValue::ExternalWeakLinkage &&
+ Linkage != GlobalValue::ExternalLinkage)) {
+ if (ParseGlobalValue(Ty, Init))
+ return true;
+ }
+
+ if (Ty->isFunctionTy() || Ty->isLabelTy())
+ return Error(TyLoc, "invalid type for global variable");
+
+ GlobalVariable *GV = 0;
+
+ // See if the global was forward referenced, if so, use the global.
+ if (!Name.empty()) {
+ if (GlobalValue *GVal = M->getNamedValue(Name)) {
+ if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+ return Error(NameLoc, "redefinition of global '@" + Name + "'");
+ GV = cast<GlobalVariable>(GVal);
+ }
+ } else {
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ GV = cast<GlobalVariable>(I->second.first);
+ ForwardRefValIDs.erase(I);
+ }
+ }
+
+ if (GV == 0) {
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0,
+ Name, 0, false, AddrSpace);
+ } else {
+ if (GV->getType()->getElementType() != Ty)
+ return Error(TyLoc,
+ "forward reference and definition of global have different types");
+
+ // Move the forward-reference to the correct spot in the module.
+ M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV);
+ }
+
+ if (Name.empty())
+ NumberedVals.push_back(GV);
+
+ // Set the parsed properties on the global.
+ if (Init)
+ GV->setInitializer(Init);
+ GV->setConstant(IsConstant);
+ GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
+ GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ GV->setThreadLocal(ThreadLocal);
+ GV->setUnnamedAddr(UnnamedAddr);
+
+ // Parse attributes on the global.
+ while (Lex.getKind() == lltok::comma) {
+ Lex.Lex();
+
+ if (Lex.getKind() == lltok::kw_section) {
+ Lex.Lex();
+ GV->setSection(Lex.getStrVal());
+ if (ParseToken(lltok::StringConstant, "expected global section string"))
+ return true;
+ } else if (Lex.getKind() == lltok::kw_align) {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment)) return true;
+ GV->setAlignment(Alignment);
+ } else {
+ TokError("unknown global variable property!");
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Reference/Resolution Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetGlobalVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed. This can return null if the value
+/// exists but does not have the right type.
+GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
+ LocTy Loc) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (PTy == 0) {
+ Error(Loc, "global variable reference must have pointer type");
+ return 0;
+ }
+
+ // Look this name up in the normal function symbol table.
+ GlobalValue *Val =
+ cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name));
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I != ForwardRefVals.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ Error(Loc, "'@" + Name + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ GlobalValue *FwdVal;
+ if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+ else
+ FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, 0, Name);
+
+ ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (PTy == 0) {
+ Error(Loc, "global variable reference must have pointer type");
+ return 0;
+ }
+
+ GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(ID);
+ if (I != ForwardRefValIDs.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ GlobalValue *FwdVal;
+ if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
+ else
+ FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, 0, "");
+
+ ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Routines.
+//===----------------------------------------------------------------------===//
+
+/// ParseToken - If the current token has the specified kind, eat it and return
+/// success. Otherwise, emit the specified error and return failure.
+bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) {
+ if (Lex.getKind() != T)
+ return TokError(ErrMsg);
+ Lex.Lex();
+ return false;
+}
+
+/// ParseStringConstant
+/// ::= StringConstant
+bool LLParser::ParseStringConstant(std::string &Result) {
+ if (Lex.getKind() != lltok::StringConstant)
+ return TokError("expected string constant");
+ Result = Lex.getStrVal();
+ Lex.Lex();
+ return false;
+}
+
+/// ParseUInt32
+/// ::= uint32
+bool LLParser::ParseUInt32(unsigned &Val) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+ return TokError("expected integer");
+ uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
+ if (Val64 != unsigned(Val64))
+ return TokError("expected 32-bit integer (too large)");
+ Val = Val64;
+ Lex.Lex();
+ return false;
+}
+
+
+/// ParseOptionalAddrSpace
+/// := /*empty*/
+/// := 'addrspace' '(' uint32 ')'
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
+ AddrSpace = 0;
+ if (!EatIfPresent(lltok::kw_addrspace))
+ return false;
+ return ParseToken(lltok::lparen, "expected '(' in address space") ||
+ ParseUInt32(AddrSpace) ||
+ ParseToken(lltok::rparen, "expected ')' in address space");
+}
+
+/// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind
+/// indicates what kind of attribute list this is: 0: function arg, 1: result,
+/// 2: function attr.
+bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
+ Attrs = Attribute::None;
+ LocTy AttrLoc = Lex.getLoc();
+
+ while (1) {
+ switch (Lex.getKind()) {
+ default: // End of attributes.
+ if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
+ return Error(AttrLoc, "invalid use of function-only attribute");
+
+ // As a hack, we allow "align 2" on functions as a synonym for
+ // "alignstack 2".
+ if (AttrKind == 2 &&
+ (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment)))
+ return Error(AttrLoc, "invalid use of attribute on a function");
+
+ if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly))
+ return Error(AttrLoc, "invalid use of parameter-only attribute");
+
+ return false;
+ case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break;
+ case lltok::kw_signext: Attrs |= Attribute::SExt; break;
+ case lltok::kw_inreg: Attrs |= Attribute::InReg; break;
+ case lltok::kw_sret: Attrs |= Attribute::StructRet; break;
+ case lltok::kw_noalias: Attrs |= Attribute::NoAlias; break;
+ case lltok::kw_nocapture: Attrs |= Attribute::NoCapture; break;
+ case lltok::kw_byval: Attrs |= Attribute::ByVal; break;
+ case lltok::kw_nest: Attrs |= Attribute::Nest; break;
+
+ case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break;
+ case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break;
+ case lltok::kw_uwtable: Attrs |= Attribute::UWTable; break;
+ case lltok::kw_noinline: Attrs |= Attribute::NoInline; break;
+ case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break;
+ case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break;
+ case lltok::kw_inlinehint: Attrs |= Attribute::InlineHint; break;
+ case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break;
+ case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break;
+ case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break;
+ case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break;
+ case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break;
+ case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
+ case lltok::kw_naked: Attrs |= Attribute::Naked; break;
+ case lltok::kw_hotpatch: Attrs |= Attribute::Hotpatch; break;
+ case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break;
+
+ case lltok::kw_alignstack: {
+ unsigned Alignment;
+ if (ParseOptionalStackAlignment(Alignment))
+ return true;
+ Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+ continue;
+ }
+
+ case lltok::kw_align: {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment))
+ return true;
+ Attrs |= Attribute::constructAlignmentFromInt(Alignment);
+ continue;
+ }
+
+ }
+ Lex.Lex();
+ }
+}
+
+/// ParseOptionalLinkage
+/// ::= /*empty*/
+/// ::= 'private'
+/// ::= 'linker_private'
+/// ::= 'linker_private_weak'
+/// ::= 'linker_private_weak_def_auto'
+/// ::= 'internal'
+/// ::= 'weak'
+/// ::= 'weak_odr'
+/// ::= 'linkonce'
+/// ::= 'linkonce_odr'
+/// ::= 'available_externally'
+/// ::= 'appending'
+/// ::= 'dllexport'
+/// ::= 'common'
+/// ::= 'dllimport'
+/// ::= 'extern_weak'
+/// ::= 'external'
+bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
+ HasLinkage = false;
+ switch (Lex.getKind()) {
+ default: Res=GlobalValue::ExternalLinkage; return false;
+ case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break;
+ case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break;
+ case lltok::kw_linker_private_weak:
+ Res = GlobalValue::LinkerPrivateWeakLinkage;
+ break;
+ case lltok::kw_linker_private_weak_def_auto:
+ Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+ break;
+ case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
+ case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
+ case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
+ case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break;
+ case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break;
+ case lltok::kw_available_externally:
+ Res = GlobalValue::AvailableExternallyLinkage;
+ break;
+ case lltok::kw_appending: Res = GlobalValue::AppendingLinkage; break;
+ case lltok::kw_dllexport: Res = GlobalValue::DLLExportLinkage; break;
+ case lltok::kw_common: Res = GlobalValue::CommonLinkage; break;
+ case lltok::kw_dllimport: Res = GlobalValue::DLLImportLinkage; break;
+ case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break;
+ case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break;
+ }
+ Lex.Lex();
+ HasLinkage = true;
+ return false;
+}
+
+/// ParseOptionalVisibility
+/// ::= /*empty*/
+/// ::= 'default'
+/// ::= 'hidden'
+/// ::= 'protected'
+///
+bool LLParser::ParseOptionalVisibility(unsigned &Res) {
+ switch (Lex.getKind()) {
+ default: Res = GlobalValue::DefaultVisibility; return false;
+ case lltok::kw_default: Res = GlobalValue::DefaultVisibility; break;
+ case lltok::kw_hidden: Res = GlobalValue::HiddenVisibility; break;
+ case lltok::kw_protected: Res = GlobalValue::ProtectedVisibility; break;
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseOptionalCallingConv
+/// ::= /*empty*/
+/// ::= 'ccc'
+/// ::= 'fastcc'
+/// ::= 'coldcc'
+/// ::= 'x86_stdcallcc'
+/// ::= 'x86_fastcallcc'
+/// ::= 'x86_thiscallcc'
+/// ::= 'arm_apcscc'
+/// ::= 'arm_aapcscc'
+/// ::= 'arm_aapcs_vfpcc'
+/// ::= 'msp430_intrcc'
+/// ::= 'ptx_kernel'
+/// ::= 'ptx_device'
+/// ::= 'cc' UINT
+///
+bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
+ switch (Lex.getKind()) {
+ default: CC = CallingConv::C; return false;
+ case lltok::kw_ccc: CC = CallingConv::C; break;
+ case lltok::kw_fastcc: CC = CallingConv::Fast; break;
+ case lltok::kw_coldcc: CC = CallingConv::Cold; break;
+ case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
+ case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+ case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+ case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
+ case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
+ case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+ case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
+ case lltok::kw_ptx_kernel: CC = CallingConv::PTX_Kernel; break;
+ case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break;
+ case lltok::kw_cc: {
+ unsigned ArbitraryCC;
+ Lex.Lex();
+ if (ParseUInt32(ArbitraryCC)) {
+ return true;
+ } else
+ CC = static_cast<CallingConv::ID>(ArbitraryCC);
+ return false;
+ }
+ break;
+ }
+
+ Lex.Lex();
+ return false;
+}
+
+/// ParseInstructionMetadata
+/// ::= !dbg !42 (',' !dbg !57)*
+bool LLParser::ParseInstructionMetadata(Instruction *Inst,
+ PerFunctionState *PFS) {
+ do {
+ if (Lex.getKind() != lltok::MetadataVar)
+ return TokError("expected metadata after comma");
+
+ std::string Name = Lex.getStrVal();
+ unsigned MDK = M->getMDKindID(Name.c_str());
+ Lex.Lex();
+
+ MDNode *Node;
+ SMLoc Loc = Lex.getLoc();
+
+ if (ParseToken(lltok::exclaim, "expected '!' here"))
+ return true;
+
+ // This code is similar to that of ParseMetadataValue, however it needs to
+ // have special-case code for a forward reference; see the comments on
+ // ForwardRefInstMetadata for details. Also, MDStrings are not supported
+ // at the top level here.
+ if (Lex.getKind() == lltok::lbrace) {
+ ValID ID;
+ if (ParseMetadataListValue(ID, PFS))
+ return true;
+ assert(ID.Kind == ValID::t_MDNode);
+ Inst->setMetadata(MDK, ID.MDNodeVal);
+ } else {
+ unsigned NodeID = 0;
+ if (ParseMDNodeID(Node, NodeID))
+ return true;
+ if (Node) {
+ // If we got the node, add it to the instruction.
+ Inst->setMetadata(MDK, Node);
+ } else {
+ MDRef R = { Loc, MDK, NodeID };
+ // Otherwise, remember that this should be resolved later.
+ ForwardRefInstMetadata[Inst].push_back(R);
+ }
+ }
+
+ // If this is the end of the list, we're done.
+ } while (EatIfPresent(lltok::comma));
+ return false;
+}
+
+/// ParseOptionalAlignment
+/// ::= /* empty */
+/// ::= 'align' 4
+bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::kw_align))
+ return false;
+ LocTy AlignLoc = Lex.getLoc();
+ if (ParseUInt32(Alignment)) return true;
+ if (!isPowerOf2_32(Alignment))
+ return Error(AlignLoc, "alignment is not a power of two");
+ if (Alignment > Value::MaximumAlignment)
+ return Error(AlignLoc, "huge alignments are not supported yet");
+ return false;
+}
+
+/// ParseOptionalCommaAlign
+/// ::=
+/// ::= ',' align 4
+///
+/// This returns with AteExtraComma set to true if it ate an excess comma at the
+/// end.
+bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
+ bool &AteExtraComma) {
+ AteExtraComma = false;
+ while (EatIfPresent(lltok::comma)) {
+ // Metadata at the end is an early exit.
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ return false;
+ }
+
+ if (Lex.getKind() != lltok::kw_align)
+ return Error(Lex.getLoc(), "expected metadata or 'align'");
+
+ if (ParseOptionalAlignment(Alignment)) return true;
+ }
+
+ return false;
+}
+
+/// ParseOptionalStackAlignment
+/// ::= /* empty */
+/// ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::kw_alignstack))
+ return false;
+ LocTy ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::lparen))
+ return Error(ParenLoc, "expected '('");
+ LocTy AlignLoc = Lex.getLoc();
+ if (ParseUInt32(Alignment)) return true;
+ ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::rparen))
+ return Error(ParenLoc, "expected ')'");
+ if (!isPowerOf2_32(Alignment))
+ return Error(AlignLoc, "stack alignment is not a power of two");
+ return false;
+}
+
+/// ParseIndexList - This parses the index list for an insert/extractvalue
+/// instruction. This sets AteExtraComma in the case where we eat an extra
+/// comma at the end of the line and find that it is followed by metadata.
+/// Clients that don't allow metadata can call the version of this function that
+/// only takes one argument.
+///
+/// ParseIndexList
+/// ::= (',' uint32)+
+///
+bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
+ bool &AteExtraComma) {
+ AteExtraComma = false;
+
+ if (Lex.getKind() != lltok::comma)
+ return TokError("expected ',' as start of index list");
+
+ while (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ return false;
+ }
+ unsigned Idx = 0;
+ if (ParseUInt32(Idx)) return true;
+ Indices.push_back(Idx);
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Type Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseType - Parse a type.
+bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
+ SMLoc TypeLoc = Lex.getLoc();
+ switch (Lex.getKind()) {
+ default:
+ return TokError("expected type");
+ case lltok::Type:
+ // Type ::= 'float' | 'void' (etc)
+ Result = Lex.getTyVal();
+ Lex.Lex();
+ break;
+ case lltok::lbrace:
+ // Type ::= StructType
+ if (ParseAnonStructType(Result, false))
+ return true;
+ break;
+ case lltok::lsquare:
+ // Type ::= '[' ... ']'
+ Lex.Lex(); // eat the lsquare.
+ if (ParseArrayVectorType(Result, false))
+ return true;
+ break;
+ case lltok::less: // Either vector or packed struct.
+ // Type ::= '<' ... '>'
+ Lex.Lex();
+ if (Lex.getKind() == lltok::lbrace) {
+ if (ParseAnonStructType(Result, true) ||
+ ParseToken(lltok::greater, "expected '>' at end of packed struct"))
+ return true;
+ } else if (ParseArrayVectorType(Result, true))
+ return true;
+ break;
+ case lltok::LocalVar: {
+ // Type ::= %foo
+ std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
+
+ // If the type hasn't been defined yet, create a forward definition and
+ // remember where that forward def'n was seen (in case it never is defined).
+ if (Entry.first == 0) {
+ Entry.first = StructType::createNamed(Context, Lex.getStrVal());
+ Entry.second = Lex.getLoc();
+ }
+ Result = Entry.first;
+ Lex.Lex();
+ break;
+ }
+
+ case lltok::LocalVarID: {
+ // Type ::= %4
+ if (Lex.getUIntVal() >= NumberedTypes.size())
+ NumberedTypes.resize(Lex.getUIntVal()+1);
+ std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
+
+ // If the type hasn't been defined yet, create a forward definition and
+ // remember where that forward def'n was seen (in case it never is defined).
+ if (Entry.first == 0) {
+ Entry.first = StructType::createNamed(Context, "");
+ Entry.second = Lex.getLoc();
+ }
+ Result = Entry.first;
+ Lex.Lex();
+ break;
+ }
+ }
+
+ // Parse the type suffixes.
+ while (1) {
+ switch (Lex.getKind()) {
+ // End of type.
+ default:
+ if (!AllowVoid && Result->isVoidTy())
+ return Error(TypeLoc, "void type only allowed for function results");
+ return false;
+
+ // Type ::= Type '*'
+ case lltok::star:
+ if (Result->isLabelTy())
+ return TokError("basic block pointers are invalid");
+ if (Result->isVoidTy())
+ return TokError("pointers to void are invalid - use i8* instead");
+ if (!PointerType::isValidElementType(Result))
+ return TokError("pointer to this type is invalid");
+ Result = PointerType::getUnqual(Result);
+ Lex.Lex();
+ break;
+
+ // Type ::= Type 'addrspace' '(' uint32 ')' '*'
+ case lltok::kw_addrspace: {
+ if (Result->isLabelTy())
+ return TokError("basic block pointers are invalid");
+ if (Result->isVoidTy())
+ return TokError("pointers to void are invalid; use i8* instead");
+ if (!PointerType::isValidElementType(Result))
+ return TokError("pointer to this type is invalid");
+ unsigned AddrSpace;
+ if (ParseOptionalAddrSpace(AddrSpace) ||
+ ParseToken(lltok::star, "expected '*' in address space"))
+ return true;
+
+ Result = PointerType::get(Result, AddrSpace);
+ break;
+ }
+
+ /// Types '(' ArgTypeListI ')' OptFuncAttrs
+ case lltok::lparen:
+ if (ParseFunctionType(Result))
+ return true;
+ break;
+ }
+ }
+}
+
+/// ParseParameterList
+/// ::= '(' ')'
+/// ::= '(' Arg (',' Arg)* ')'
+/// Arg
+/// ::= Type OptionalAttributes Value OptionalAttributes
+bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS) {
+ if (ParseToken(lltok::lparen, "expected '(' in call"))
+ return true;
+
+ while (Lex.getKind() != lltok::rparen) {
+ // If this isn't the first argument, we need a comma.
+ if (!ArgList.empty() &&
+ ParseToken(lltok::comma, "expected ',' in argument list"))
+ return true;
+
+ // Parse the argument.
+ LocTy ArgLoc;
+ Type *ArgTy = 0;
+ unsigned ArgAttrs1 = Attribute::None;
+ unsigned ArgAttrs2 = Attribute::None;
+ Value *V;
+ if (ParseType(ArgTy, ArgLoc))
+ return true;
+
+ // Otherwise, handle normal operands.
+ if (ParseOptionalAttrs(ArgAttrs1, 0) || ParseValue(ArgTy, V, PFS))
+ return true;
+ ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
+ }
+
+ Lex.Lex(); // Lex the ')'.
+ return false;
+}
+
+
+
+/// ParseArgumentList - Parse the argument list for a function type or function
+/// prototype.
+/// ::= '(' ArgTypeListI ')'
+/// ArgTypeListI
+/// ::= /*empty*/
+/// ::= '...'
+/// ::= ArgTypeList ',' '...'
+/// ::= ArgType (',' ArgType)*
+///
+bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
+ bool &isVarArg){
+ isVarArg = false;
+ assert(Lex.getKind() == lltok::lparen);
+ Lex.Lex(); // eat the (.
+
+ if (Lex.getKind() == lltok::rparen) {
+ // empty
+ } else if (Lex.getKind() == lltok::dotdotdot) {
+ isVarArg = true;
+ Lex.Lex();
+ } else {
+ LocTy TypeLoc = Lex.getLoc();
+ Type *ArgTy = 0;
+ unsigned Attrs;
+ std::string Name;
+
+ if (ParseType(ArgTy) ||
+ ParseOptionalAttrs(Attrs, 0)) return true;
+
+ if (ArgTy->isVoidTy())
+ return Error(TypeLoc, "argument can not have void type");
+
+ if (Lex.getKind() == lltok::LocalVar) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ }
+
+ if (!FunctionType::isValidArgumentType(ArgTy))
+ return Error(TypeLoc, "invalid type for function argument");
+
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+
+ while (EatIfPresent(lltok::comma)) {
+ // Handle ... at end of arg list.
+ if (EatIfPresent(lltok::dotdotdot)) {
+ isVarArg = true;
+ break;
+ }
+
+ // Otherwise must be an argument type.
+ TypeLoc = Lex.getLoc();
+ if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true;
+
+ if (ArgTy->isVoidTy())
+ return Error(TypeLoc, "argument can not have void type");
+
+ if (Lex.getKind() == lltok::LocalVar) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ } else {
+ Name = "";
+ }
+
+ if (!ArgTy->isFirstClassType())
+ return Error(TypeLoc, "invalid type for function argument");
+
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+ }
+ }
+
+ return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+}
+
+/// ParseFunctionType
+/// ::= Type ArgumentList OptionalAttrs
+bool LLParser::ParseFunctionType(Type *&Result) {
+ assert(Lex.getKind() == lltok::lparen);
+
+ if (!FunctionType::isValidReturnType(Result))
+ return TokError("invalid function return type");
+
+ SmallVector<ArgInfo, 8> ArgList;
+ bool isVarArg;
+ if (ParseArgumentList(ArgList, isVarArg))
+ return true;
+
+ // Reject names on the arguments lists.
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ if (!ArgList[i].Name.empty())
+ return Error(ArgList[i].Loc, "argument name invalid in function type");
+ if (ArgList[i].Attrs != 0)
+ return Error(ArgList[i].Loc,
+ "argument attributes invalid in function type");
+ }
+
+ SmallVector<Type*, 16> ArgListTy;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ArgListTy.push_back(ArgList[i].Ty);
+
+ Result = FunctionType::get(Result, ArgListTy, isVarArg);
+ return false;
+}
+
+/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into
+/// other structs.
+bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
+ SmallVector<Type*, 8> Elts;
+ if (ParseStructBody(Elts)) return true;
+
+ Result = StructType::get(Context, Elts, Packed);
+ return false;
+}
+
+/// ParseStructDefinition - Parse a struct in a 'type' definition.
+bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+ std::pair<Type*, LocTy> &Entry,
+ Type *&ResultTy) {
+ // If the type was already defined, diagnose the redefinition.
+ if (Entry.first && !Entry.second.isValid())
+ return Error(TypeLoc, "redefinition of type");
+
+ // If we have opaque, just return without filling in the definition for the
+ // struct. This counts as a definition as far as the .ll file goes.
+ if (EatIfPresent(lltok::kw_opaque)) {
+ // This type is being defined, so clear the location to indicate this.
+ Entry.second = SMLoc();
+
+ // If this type number has never been uttered, create it.
+ if (Entry.first == 0)
+ Entry.first = StructType::createNamed(Context, Name);
+ ResultTy = Entry.first;
+ return false;
+ }
+
+ // If the type starts with '<', then it is either a packed struct or a vector.
+ bool isPacked = EatIfPresent(lltok::less);
+
+ // If we don't have a struct, then we have a random type alias, which we
+ // accept for compatibility with old files. These types are not allowed to be
+ // forward referenced and not allowed to be recursive.
+ if (Lex.getKind() != lltok::lbrace) {
+ if (Entry.first)
+ return Error(TypeLoc, "forward references to non-struct type");
+
+ ResultTy = 0;
+ if (isPacked)
+ return ParseArrayVectorType(ResultTy, true);
+ return ParseType(ResultTy);
+ }
+
+ // This type is being defined, so clear the location to indicate this.
+ Entry.second = SMLoc();
+
+ // If this type number has never been uttered, create it.
+ if (Entry.first == 0)
+ Entry.first = StructType::createNamed(Context, Name);
+
+ StructType *STy = cast<StructType>(Entry.first);
+
+ SmallVector<Type*, 8> Body;
+ if (ParseStructBody(Body) ||
+ (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
+ return true;
+
+ STy->setBody(Body, isPacked);
+ ResultTy = STy;
+ return false;
+}
+
+
+/// ParseStructType: Handles packed and unpacked types. </> parsed elsewhere.
+/// StructType
+/// ::= '{' '}'
+/// ::= '{' Type (',' Type)* '}'
+/// ::= '<' '{' '}' '>'
+/// ::= '<' '{' Type (',' Type)* '}' '>'
+bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex(); // Consume the '{'
+
+ // Handle the empty struct.
+ if (EatIfPresent(lltok::rbrace))
+ return false;
+
+ LocTy EltTyLoc = Lex.getLoc();
+ Type *Ty = 0;
+ if (ParseType(Ty)) return true;
+ Body.push_back(Ty);
+
+ if (!StructType::isValidElementType(Ty))
+ return Error(EltTyLoc, "invalid element type for struct");
+
+ while (EatIfPresent(lltok::comma)) {
+ EltTyLoc = Lex.getLoc();
+ if (ParseType(Ty)) return true;
+
+ if (!StructType::isValidElementType(Ty))
+ return Error(EltTyLoc, "invalid element type for struct");
+
+ Body.push_back(Ty);
+ }
+
+ return ParseToken(lltok::rbrace, "expected '}' at end of struct");
+}
+
+/// ParseArrayVectorType - Parse an array or vector type, assuming the first
+/// token has already been consumed.
+/// Type
+/// ::= '[' APSINTVAL 'x' Types ']'
+/// ::= '<' APSINTVAL 'x' Types '>'
+bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
+ Lex.getAPSIntVal().getBitWidth() > 64)
+ return TokError("expected number in address space");
+
+ LocTy SizeLoc = Lex.getLoc();
+ uint64_t Size = Lex.getAPSIntVal().getZExtValue();
+ Lex.Lex();
+
+ if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
+ return true;
+
+ LocTy TypeLoc = Lex.getLoc();
+ Type *EltTy = 0;
+ if (ParseType(EltTy)) return true;
+
+ if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
+ "expected end of sequential type"))
+ return true;
+
+ if (isVector) {
+ if (Size == 0)
+ return Error(SizeLoc, "zero element vector is illegal");
+ if ((unsigned)Size != Size)
+ return Error(SizeLoc, "size too large for vector");
+ if (!VectorType::isValidElementType(EltTy))
+ return Error(TypeLoc, "vector element type must be fp or integer");
+ Result = VectorType::get(EltTy, unsigned(Size));
+ } else {
+ if (!ArrayType::isValidElementType(EltTy))
+ return Error(TypeLoc, "invalid array element type");
+ Result = ArrayType::get(EltTy, Size);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Function Semantic Analysis.
+//===----------------------------------------------------------------------===//
+
+LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
+ int functionNumber)
+ : P(p), F(f), FunctionNumber(functionNumber) {
+
+ // Insert unnamed arguments into the NumberedVals list.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI)
+ if (!AI->hasName())
+ NumberedVals.push_back(AI);
+}
+
+LLParser::PerFunctionState::~PerFunctionState() {
+ // If there were any forward referenced non-basicblock values, delete them.
+ for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
+ if (!isa<BasicBlock>(I->second.first)) {
+ I->second.first->replaceAllUsesWith(
+ UndefValue::get(I->second.first->getType()));
+ delete I->second.first;
+ I->second.first = 0;
+ }
+
+ for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
+ if (!isa<BasicBlock>(I->second.first)) {
+ I->second.first->replaceAllUsesWith(
+ UndefValue::get(I->second.first->getType()));
+ delete I->second.first;
+ I->second.first = 0;
+ }
+}
+
+bool LLParser::PerFunctionState::FinishFunction() {
+ // Check to see if someone took the address of labels in this block.
+ if (!P.ForwardRefBlockAddresses.empty()) {
+ ValID FunctionID;
+ if (!F.getName().empty()) {
+ FunctionID.Kind = ValID::t_GlobalName;
+ FunctionID.StrVal = F.getName();
+ } else {
+ FunctionID.Kind = ValID::t_GlobalID;
+ FunctionID.UIntVal = FunctionNumber;
+ }
+
+ std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
+ FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
+ if (FRBAI != P.ForwardRefBlockAddresses.end()) {
+ // Resolve all these references.
+ if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
+ return true;
+
+ P.ForwardRefBlockAddresses.erase(FRBAI);
+ }
+ }
+
+ if (!ForwardRefVals.empty())
+ return P.Error(ForwardRefVals.begin()->second.second,
+ "use of undefined value '%" + ForwardRefVals.begin()->first +
+ "'");
+ if (!ForwardRefValIDs.empty())
+ return P.Error(ForwardRefValIDs.begin()->second.second,
+ "use of undefined value '%" +
+ Twine(ForwardRefValIDs.begin()->first) + "'");
+ return false;
+}
+
+
+/// GetVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed. This can return null if the value
+/// exists but does not have the right type.
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
+ const Type *Ty, LocTy Loc) {
+ // Look this name up in the normal function symbol table.
+ Value *Val = F.getValueSymbolTable().lookup(Name);
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I != ForwardRefVals.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ if (Ty->isLabelTy())
+ P.Error(Loc, "'%" + Name + "' is not a basic block");
+ else
+ P.Error(Loc, "'%" + Name + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ // Don't make placeholders with invalid type.
+ if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+ P.Error(Loc, "invalid use of a non-first-class type");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Value *FwdVal;
+ if (Ty->isLabelTy())
+ FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
+ else
+ FwdVal = new Argument(Ty, Name);
+
+ ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
+ LocTy Loc) {
+ // Look this name up in the normal function symbol table.
+ Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(ID);
+ if (I != ForwardRefValIDs.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ if (Ty->isLabelTy())
+ P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
+ else
+ P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
+ getTypeString(Val->getType()) + "'");
+ return 0;
+ }
+
+ if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+ P.Error(Loc, "invalid use of a non-first-class type");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Value *FwdVal;
+ if (Ty->isLabelTy())
+ FwdVal = BasicBlock::Create(F.getContext(), "", &F);
+ else
+ FwdVal = new Argument(Ty);
+
+ ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+/// SetInstName - After an instruction is parsed and inserted into its
+/// basic block, this installs its name.
+bool LLParser::PerFunctionState::SetInstName(int NameID,
+ const std::string &NameStr,
+ LocTy NameLoc, Instruction *Inst) {
+ // If this instruction has void type, it cannot have a name or ID specified.
+ if (Inst->getType()->isVoidTy()) {
+ if (NameID != -1 || !NameStr.empty())
+ return P.Error(NameLoc, "instructions returning void cannot have a name");
+ return false;
+ }
+
+ // If this was a numbered instruction, verify that the instruction is the
+ // expected value and resolve any forward references.
+ if (NameStr.empty()) {
+ // If neither a name nor an ID was specified, just use the next ID.
+ if (NameID == -1)
+ NameID = NumberedVals.size();
+
+ if (unsigned(NameID) != NumberedVals.size())
+ return P.Error(NameLoc, "instruction expected to be numbered '%" +
+ Twine(NumberedVals.size()) + "'");
+
+ std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
+ ForwardRefValIDs.find(NameID);
+ if (FI != ForwardRefValIDs.end()) {
+ if (FI->second.first->getType() != Inst->getType())
+ return P.Error(NameLoc, "instruction forward referenced with type '" +
+ getTypeString(FI->second.first->getType()) + "'");
+ FI->second.first->replaceAllUsesWith(Inst);
+ delete FI->second.first;
+ ForwardRefValIDs.erase(FI);
+ }
+
+ NumberedVals.push_back(Inst);
+ return false;
+ }
+
+ // Otherwise, the instruction had a name. Resolve forward refs and set it.
+ std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ FI = ForwardRefVals.find(NameStr);
+ if (FI != ForwardRefVals.end()) {
+ if (FI->second.first->getType() != Inst->getType())
+ return P.Error(NameLoc, "instruction forward referenced with type '" +
+ getTypeString(FI->second.first->getType()) + "'");
+ FI->second.first->replaceAllUsesWith(Inst);
+ delete FI->second.first;
+ ForwardRefVals.erase(FI);
+ }
+
+ // Set the name on the instruction.
+ Inst->setName(NameStr);
+
+ if (Inst->getName() != NameStr)
+ return P.Error(NameLoc, "multiple definition of local value named '" +
+ NameStr + "'");
+ return false;
+}
+
+/// GetBB - Get a basic block with the specified name or ID, creating a
+/// forward reference record if needed.
+BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
+ LocTy Loc) {
+ return cast_or_null<BasicBlock>(GetVal(Name,
+ Type::getLabelTy(F.getContext()), Loc));
+}
+
+BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
+ return cast_or_null<BasicBlock>(GetVal(ID,
+ Type::getLabelTy(F.getContext()), Loc));
+}
+
+/// DefineBB - Define the specified basic block, which is either named or
+/// unnamed. If there is an error, this returns null otherwise it returns
+/// the block being defined.
+BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
+ LocTy Loc) {
+ BasicBlock *BB;
+ if (Name.empty())
+ BB = GetBB(NumberedVals.size(), Loc);
+ else
+ BB = GetBB(Name, Loc);
+ if (BB == 0) return 0; // Already diagnosed error.
+
+ // Move the block to the end of the function. Forward ref'd blocks are
+ // inserted wherever they happen to be referenced.
+ F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB);
+
+ // Remove the block from forward ref sets.
+ if (Name.empty()) {
+ ForwardRefValIDs.erase(NumberedVals.size());
+ NumberedVals.push_back(BB);
+ } else {
+ // BB forward references are already in the function symbol table.
+ ForwardRefVals.erase(Name);
+ }
+
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Constants.
+//===----------------------------------------------------------------------===//
+
+/// ParseValID - Parse an abstract value that doesn't necessarily have a
+/// type implied. For example, if we parse "4" we don't know what integer type
+/// it has. The value will later be combined with its type and checked for
+/// sanity. PFS is used to convert function-local operands of metadata (since
+/// metadata operands are not just parsed here but also converted to values).
+/// PFS can be null when we are not parsing metadata values inside a function.
+bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
+ ID.Loc = Lex.getLoc();
+ switch (Lex.getKind()) {
+ default: return TokError("expected value token");
+ case lltok::GlobalID: // @42
+ ID.UIntVal = Lex.getUIntVal();
+ ID.Kind = ValID::t_GlobalID;
+ break;
+ case lltok::GlobalVar: // @foo
+ ID.StrVal = Lex.getStrVal();
+ ID.Kind = ValID::t_GlobalName;
+ break;
+ case lltok::LocalVarID: // %42
+ ID.UIntVal = Lex.getUIntVal();
+ ID.Kind = ValID::t_LocalID;
+ break;
+ case lltok::LocalVar: // %foo
+ ID.StrVal = Lex.getStrVal();
+ ID.Kind = ValID::t_LocalName;
+ break;
+ case lltok::exclaim: // !42, !{...}, or !"foo"
+ return ParseMetadataValue(ID, PFS);
+ case lltok::APSInt:
+ ID.APSIntVal = Lex.getAPSIntVal();
+ ID.Kind = ValID::t_APSInt;
+ break;
+ case lltok::APFloat:
+ ID.APFloatVal = Lex.getAPFloatVal();
+ ID.Kind = ValID::t_APFloat;
+ break;
+ case lltok::kw_true:
+ ID.ConstantVal = ConstantInt::getTrue(Context);
+ ID.Kind = ValID::t_Constant;
+ break;
+ case lltok::kw_false:
+ ID.ConstantVal = ConstantInt::getFalse(Context);
+ ID.Kind = ValID::t_Constant;
+ break;
+ case lltok::kw_null: ID.Kind = ValID::t_Null; break;
+ case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
+ case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+
+ case lltok::lbrace: {
+ // ValID ::= '{' ConstVector '}'
+ Lex.Lex();
+ SmallVector<Constant*, 16> Elts;
+ if (ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rbrace, "expected end of struct constant"))
+ return true;
+
+ ID.ConstantStructElts = new Constant*[Elts.size()];
+ ID.UIntVal = Elts.size();
+ memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+ ID.Kind = ValID::t_ConstantStruct;
+ return false;
+ }
+ case lltok::less: {
+ // ValID ::= '<' ConstVector '>' --> Vector.
+ // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct.
+ Lex.Lex();
+ bool isPackedStruct = EatIfPresent(lltok::lbrace);
+
+ SmallVector<Constant*, 16> Elts;
+ LocTy FirstEltLoc = Lex.getLoc();
+ if (ParseGlobalValueVector(Elts) ||
+ (isPackedStruct &&
+ ParseToken(lltok::rbrace, "expected end of packed struct")) ||
+ ParseToken(lltok::greater, "expected end of constant"))
+ return true;
+
+ if (isPackedStruct) {
+ ID.ConstantStructElts = new Constant*[Elts.size()];
+ memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+ ID.UIntVal = Elts.size();
+ ID.Kind = ValID::t_PackedConstantStruct;
+ return false;
+ }
+
+ if (Elts.empty())
+ return Error(ID.Loc, "constant vector must not be empty");
+
+ if (!Elts[0]->getType()->isIntegerTy() &&
+ !Elts[0]->getType()->isFloatingPointTy())
+ return Error(FirstEltLoc,
+ "vector elements must have integer or floating point type");
+
+ // Verify that all the vector elements have the same type.
+ for (unsigned i = 1, e = Elts.size(); i != e; ++i)
+ if (Elts[i]->getType() != Elts[0]->getType())
+ return Error(FirstEltLoc,
+ "vector element #" + Twine(i) +
+ " is not of type '" + getTypeString(Elts[0]->getType()));
+
+ ID.ConstantVal = ConstantVector::get(Elts);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::lsquare: { // Array Constant
+ Lex.Lex();
+ SmallVector<Constant*, 16> Elts;
+ LocTy FirstEltLoc = Lex.getLoc();
+ if (ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rsquare, "expected end of array constant"))
+ return true;
+
+ // Handle empty element.
+ if (Elts.empty()) {
+ // Use undef instead of an array because it's inconvenient to determine
+ // the element type at this point, there being no elements to examine.
+ ID.Kind = ValID::t_EmptyArray;
+ return false;
+ }
+
+ if (!Elts[0]->getType()->isFirstClassType())
+ return Error(FirstEltLoc, "invalid array element type: " +
+ getTypeString(Elts[0]->getType()));
+
+ ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+
+ // Verify all elements are correct type!
+ for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
+ if (Elts[i]->getType() != Elts[0]->getType())
+ return Error(FirstEltLoc,
+ "array element #" + Twine(i) +
+ " is not of type '" + getTypeString(Elts[0]->getType()));
+ }
+
+ ID.ConstantVal = ConstantArray::get(ATy, Elts);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_c: // c "foo"
+ Lex.Lex();
+ ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false);
+ if (ParseToken(lltok::StringConstant, "expected string")) return true;
+ ID.Kind = ValID::t_Constant;
+ return false;
+
+ case lltok::kw_asm: {
+ // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT
+ bool HasSideEffect, AlignStack;
+ Lex.Lex();
+ if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+ ParseOptionalToken(lltok::kw_alignstack, AlignStack) ||
+ ParseStringConstant(ID.StrVal) ||
+ ParseToken(lltok::comma, "expected comma in inline asm expression") ||
+ ParseToken(lltok::StringConstant, "expected constraint string"))
+ return true;
+ ID.StrVal2 = Lex.getStrVal();
+ ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1);
+ ID.Kind = ValID::t_InlineAsm;
+ return false;
+ }
+
+ case lltok::kw_blockaddress: {
+ // ValID ::= 'blockaddress' '(' @foo ',' %bar ')'
+ Lex.Lex();
+
+ ValID Fn, Label;
+ LocTy FnLoc, LabelLoc;
+
+ if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
+ ParseValID(Fn) ||
+ ParseToken(lltok::comma, "expected comma in block address expression")||
+ ParseValID(Label) ||
+ ParseToken(lltok::rparen, "expected ')' in block address expression"))
+ return true;
+
+ if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
+ return Error(Fn.Loc, "expected function name in blockaddress");
+ if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
+ return Error(Label.Loc, "expected basic block name in blockaddress");
+
+ // Make a global variable as a placeholder for this reference.
+ GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
+ false, GlobalValue::InternalLinkage,
+ 0, "");
+ ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
+ ID.ConstantVal = FwdRef;
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ case lltok::kw_trunc:
+ case lltok::kw_zext:
+ case lltok::kw_sext:
+ case lltok::kw_fptrunc:
+ case lltok::kw_fpext:
+ case lltok::kw_bitcast:
+ case lltok::kw_uitofp:
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
+ case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoint: {
+ unsigned Opc = Lex.getUIntVal();
+ Type *DestTy = 0;
+ Constant *SrcVal;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
+ ParseGlobalTypeAndValue(SrcVal) ||
+ ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
+ ParseType(DestTy) ||
+ ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
+ return true;
+ if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
+ return Error(ID.Loc, "invalid cast opcode for cast from '" +
+ getTypeString(SrcVal->getType()) + "' to '" +
+ getTypeString(DestTy) + "'");
+ ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
+ SrcVal, DestTy);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_extractvalue: {
+ Lex.Lex();
+ Constant *Val;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val) ||
+ ParseIndexList(Indices) ||
+ ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
+ return true;
+
+ if (!Val->getType()->isAggregateType())
+ return Error(ID.Loc, "extractvalue operand must be aggregate type");
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
+ return Error(ID.Loc, "invalid indices for extractvalue");
+ ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_insertvalue: {
+ Lex.Lex();
+ Constant *Val0, *Val1;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseIndexList(Indices) ||
+ ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
+ return true;
+ if (!Val0->getType()->isAggregateType())
+ return Error(ID.Loc, "insertvalue operand must be aggregate type");
+ if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+ return Error(ID.Loc, "invalid indices for insertvalue");
+ ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_icmp:
+ case lltok::kw_fcmp: {
+ unsigned PredVal, Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseCmpPredicate(PredVal, Opc) ||
+ ParseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in compare constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
+ return true;
+
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "compare operands must have the same type");
+
+ CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
+
+ if (Opc == Instruction::FCmp) {
+ if (!Val0->getType()->isFPOrFPVectorTy())
+ return Error(ID.Loc, "fcmp requires floating point operands");
+ ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+ } else {
+ assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
+ if (!Val0->getType()->isIntOrIntVectorTy() &&
+ !Val0->getType()->isPointerTy())
+ return Error(ID.Loc, "icmp requires pointer or integer operands");
+ ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
+ }
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // Binary Operators.
+ case lltok::kw_add:
+ case lltok::kw_fadd:
+ case lltok::kw_sub:
+ case lltok::kw_fsub:
+ case lltok::kw_mul:
+ case lltok::kw_fmul:
+ case lltok::kw_udiv:
+ case lltok::kw_sdiv:
+ case lltok::kw_fdiv:
+ case lltok::kw_urem:
+ case lltok::kw_srem:
+ case lltok::kw_frem:
+ case lltok::kw_shl:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
+ bool NUW = false;
+ bool NSW = false;
+ bool Exact = false;
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ LocTy ModifierLoc = Lex.getLoc();
+ if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+ Opc == Instruction::Mul || Opc == Instruction::Shl) {
+ if (EatIfPresent(lltok::kw_nuw))
+ NUW = true;
+ if (EatIfPresent(lltok::kw_nsw)) {
+ NSW = true;
+ if (EatIfPresent(lltok::kw_nuw))
+ NUW = true;
+ }
+ } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr || Opc == Instruction::AShr) {
+ if (EatIfPresent(lltok::kw_exact))
+ Exact = true;
+ }
+ if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in binary constantexpr"))
+ return true;
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "operands of constexpr must have same type");
+ if (!Val0->getType()->isIntOrIntVectorTy()) {
+ if (NUW)
+ return Error(ModifierLoc, "nuw only applies to integer operations");
+ if (NSW)
+ return Error(ModifierLoc, "nsw only applies to integer operations");
+ }
+ // Check that the type is valid for the operator.
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ if (!Val0->getType()->isIntOrIntVectorTy())
+ return Error(ID.Loc, "constexpr requires integer operands");
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ if (!Val0->getType()->isFPOrFPVectorTy())
+ return Error(ID.Loc, "constexpr requires fp operands");
+ break;
+ default: llvm_unreachable("Unknown binary operator!");
+ }
+ unsigned Flags = 0;
+ if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (NSW) Flags |= OverflowingBinaryOperator::NoSignedWrap;
+ if (Exact) Flags |= PossiblyExactOperator::IsExact;
+ Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
+ ID.ConstantVal = C;
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // Logical Operations
+ case lltok::kw_and:
+ case lltok::kw_or:
+ case lltok::kw_xor: {
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in logical constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+ return true;
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "operands of constexpr must have same type");
+ if (!Val0->getType()->isIntOrIntVectorTy())
+ return Error(ID.Loc,
+ "constexpr requires integer or integer vector operands");
+ ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ case lltok::kw_getelementptr:
+ case lltok::kw_shufflevector:
+ case lltok::kw_insertelement:
+ case lltok::kw_extractelement:
+ case lltok::kw_select: {
+ unsigned Opc = Lex.getUIntVal();
+ SmallVector<Constant*, 16> Elts;
+ bool InBounds = false;
+ Lex.Lex();
+ if (Opc == Instruction::GetElementPtr)
+ InBounds = EatIfPresent(lltok::kw_inbounds);
+ if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
+ ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rparen, "expected ')' in constantexpr"))
+ return true;
+
+ if (Opc == Instruction::GetElementPtr) {
+ if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+ return Error(ID.Loc, "getelementptr requires pointer operand");
+
+ if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
+ (Value**)(Elts.data() + 1),
+ Elts.size() - 1))
+ return Error(ID.Loc, "invalid indices for getelementptr");
+ ID.ConstantVal = InBounds ?
+ ConstantExpr::getInBoundsGetElementPtr(Elts[0],
+ Elts.data() + 1,
+ Elts.size() - 1) :
+ ConstantExpr::getGetElementPtr(Elts[0],
+ Elts.data() + 1, Elts.size() - 1);
+ } else if (Opc == Instruction::Select) {
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to select");
+ if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
+ Elts[2]))
+ return Error(ID.Loc, Reason);
+ ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
+ } else if (Opc == Instruction::ShuffleVector) {
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to shufflevector");
+ if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+ return Error(ID.Loc, "invalid operands to shufflevector");
+ ID.ConstantVal =
+ ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
+ } else if (Opc == Instruction::ExtractElement) {
+ if (Elts.size() != 2)
+ return Error(ID.Loc, "expected two operands to extractelement");
+ if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
+ return Error(ID.Loc, "invalid extractelement operands");
+ ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
+ } else {
+ assert(Opc == Instruction::InsertElement && "Unknown opcode");
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to insertelement");
+ if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+ return Error(ID.Loc, "invalid insertelement operands");
+ ID.ConstantVal =
+ ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
+ }
+
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ }
+
+ Lex.Lex();
+ return false;
+}
+
+/// ParseGlobalValue - Parse a global value with the specified type.
+bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
+ C = 0;
+ ValID ID;
+ Value *V = NULL;
+ bool Parsed = ParseValID(ID) ||
+ ConvertValIDToValue(Ty, ID, V, NULL);
+ if (V && !(C = dyn_cast<Constant>(V)))
+ return Error(ID.Loc, "global values must be constants");
+ return Parsed;
+}
+
+bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+ Type *Ty = 0;
+ return ParseType(Ty) ||
+ ParseGlobalValue(Ty, V);
+}
+
+/// ParseGlobalValueVector
+/// ::= /*empty*/
+/// ::= TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+ // Empty list.
+ if (Lex.getKind() == lltok::rbrace ||
+ Lex.getKind() == lltok::rsquare ||
+ Lex.getKind() == lltok::greater ||
+ Lex.getKind() == lltok::rparen)
+ return false;
+
+ Constant *C;
+ if (ParseGlobalTypeAndValue(C)) return true;
+ Elts.push_back(C);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseGlobalTypeAndValue(C)) return true;
+ Elts.push_back(C);
+ }
+
+ return false;
+}
+
+bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex();
+
+ SmallVector<Value*, 16> Elts;
+ if (ParseMDNodeVector(Elts, PFS) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ ID.MDNodeVal = MDNode::get(Context, Elts);
+ ID.Kind = ValID::t_MDNode;
+ return false;
+}
+
+/// ParseMetadataValue
+/// ::= !42
+/// ::= !{...}
+/// ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::exclaim);
+ Lex.Lex();
+
+ // MDNode:
+ // !{ ... }
+ if (Lex.getKind() == lltok::lbrace)
+ return ParseMetadataListValue(ID, PFS);
+
+ // Standalone metadata reference
+ // !42
+ if (Lex.getKind() == lltok::APSInt) {
+ if (ParseMDNodeID(ID.MDNodeVal)) return true;
+ ID.Kind = ValID::t_MDNode;
+ return false;
+ }
+
+ // MDString:
+ // ::= '!' STRINGCONSTANT
+ if (ParseMDString(ID.MDStringVal)) return true;
+ ID.Kind = ValID::t_MDString;
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Function Parsing.
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+ PerFunctionState *PFS) {
+ if (Ty->isFunctionTy())
+ return Error(ID.Loc, "functions are not values, refer to them as pointers");
+
+ switch (ID.Kind) {
+ default: llvm_unreachable("Unknown ValID!");
+ case ValID::t_LocalID:
+ if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+ V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
+ return (V == 0);
+ case ValID::t_LocalName:
+ if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+ V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
+ return (V == 0);
+ case ValID::t_InlineAsm: {
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ const FunctionType *FTy =
+ PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+ if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+ return Error(ID.Loc, "invalid type for inline asm constraint string");
+ V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1);
+ return false;
+ }
+ case ValID::t_MDNode:
+ if (!Ty->isMetadataTy())
+ return Error(ID.Loc, "metadata value must have metadata type");
+ V = ID.MDNodeVal;
+ return false;
+ case ValID::t_MDString:
+ if (!Ty->isMetadataTy())
+ return Error(ID.Loc, "metadata value must have metadata type");
+ V = ID.MDStringVal;
+ return false;
+ case ValID::t_GlobalName:
+ V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+ return V == 0;
+ case ValID::t_GlobalID:
+ V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+ return V == 0;
+ case ValID::t_APSInt:
+ if (!Ty->isIntegerTy())
+ return Error(ID.Loc, "integer constant must have integer type");
+ ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+ V = ConstantInt::get(Context, ID.APSIntVal);
+ return false;
+ case ValID::t_APFloat:
+ if (!Ty->isFloatingPointTy() ||
+ !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
+ return Error(ID.Loc, "floating point constant invalid for type");
+
+ // The lexer has no type info, so builds all float and double FP constants
+ // as double. Fix this here. Long double does not need this.
+ if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
+ Ty->isFloatTy()) {
+ bool Ignored;
+ ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &Ignored);
+ }
+ V = ConstantFP::get(Context, ID.APFloatVal);
+
+ if (V->getType() != Ty)
+ return Error(ID.Loc, "floating point constant does not have type '" +
+ getTypeString(Ty) + "'");
+
+ return false;
+ case ValID::t_Null:
+ if (!Ty->isPointerTy())
+ return Error(ID.Loc, "null must be a pointer type");
+ V = ConstantPointerNull::get(cast<PointerType>(Ty));
+ return false;
+ case ValID::t_Undef:
+ // FIXME: LabelTy should not be a first-class type.
+ if (!Ty->isFirstClassType() || Ty->isLabelTy())
+ return Error(ID.Loc, "invalid type for undef constant");
+ V = UndefValue::get(Ty);
+ return false;
+ case ValID::t_EmptyArray:
+ if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
+ return Error(ID.Loc, "invalid empty array initializer");
+ V = UndefValue::get(Ty);
+ return false;
+ case ValID::t_Zero:
+ // FIXME: LabelTy should not be a first-class type.
+ if (!Ty->isFirstClassType() || Ty->isLabelTy())
+ return Error(ID.Loc, "invalid type for null constant");
+ V = Constant::getNullValue(Ty);
+ return false;
+ case ValID::t_Constant:
+ if (ID.ConstantVal->getType() != Ty)
+ return Error(ID.Loc, "constant expression type mismatch");
+
+ V = ID.ConstantVal;
+ return false;
+ case ValID::t_ConstantStruct:
+ case ValID::t_PackedConstantStruct:
+ if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (ST->getNumElements() != ID.UIntVal)
+ return Error(ID.Loc,
+ "initializer with struct type has wrong # elements");
+ if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
+ return Error(ID.Loc, "packed'ness of initializer and type don't match");
+
+ // Verify that the elements are compatible with the structtype.
+ for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
+ if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
+ return Error(ID.Loc, "element " + Twine(i) +
+ " of struct initializer doesn't match struct element type");
+
+ V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts,
+ ID.UIntVal));
+ } else
+ return Error(ID.Loc, "constant expression type mismatch");
+ return false;
+ }
+}
+
+bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) {
+ V = 0;
+ ValID ID;
+ return ParseValID(ID, PFS) ||
+ ConvertValIDToValue(Ty, ID, V, PFS);
+}
+
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
+ Type *Ty = 0;
+ return ParseType(Ty) ||
+ ParseValue(Ty, V, PFS);
+}
+
+bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+ PerFunctionState &PFS) {
+ Value *V;
+ Loc = Lex.getLoc();
+ if (ParseTypeAndValue(V, PFS)) return true;
+ if (!isa<BasicBlock>(V))
+ return Error(Loc, "expected a basic block");
+ BB = cast<BasicBlock>(V);
+ return false;
+}
+
+
+/// FunctionHeader
+/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
+/// OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+/// OptionalAlign OptGC
+bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
+ // Parse the linkage.
+ LocTy LinkageLoc = Lex.getLoc();
+ unsigned Linkage;
+
+ unsigned Visibility, RetAttrs;
+ CallingConv::ID CC;
+ Type *RetType = 0;
+ LocTy RetTypeLoc = Lex.getLoc();
+ if (ParseOptionalLinkage(Linkage) ||
+ ParseOptionalVisibility(Visibility) ||
+ ParseOptionalCallingConv(CC) ||
+ ParseOptionalAttrs(RetAttrs, 1) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/))
+ return true;
+
+ // Verify that the linkage is ok.
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::ExternalLinkage:
+ break; // always ok.
+ case GlobalValue::DLLImportLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ if (isDefine)
+ return Error(LinkageLoc, "invalid linkage for function definition");
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::DLLExportLinkage:
+ if (!isDefine)
+ return Error(LinkageLoc, "invalid linkage for function declaration");
+ break;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::CommonLinkage:
+ return Error(LinkageLoc, "invalid function linkage type");
+ }
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "invalid function return type");
+
+ LocTy NameLoc = Lex.getLoc();
+
+ std::string FunctionName;
+ if (Lex.getKind() == lltok::GlobalVar) {
+ FunctionName = Lex.getStrVal();
+ } else if (Lex.getKind() == lltok::GlobalID) { // @42 is ok.
+ unsigned NameID = Lex.getUIntVal();
+
+ if (NameID != NumberedVals.size())
+ return TokError("function expected to be numbered '%" +
+ Twine(NumberedVals.size()) + "'");
+ } else {
+ return TokError("expected function name");
+ }
+
+ Lex.Lex();
+
+ if (Lex.getKind() != lltok::lparen)
+ return TokError("expected '(' in function argument list");
+
+ SmallVector<ArgInfo, 8> ArgList;
+ bool isVarArg;
+ unsigned FuncAttrs;
+ std::string Section;
+ unsigned Alignment;
+ std::string GC;
+ bool UnnamedAddr;
+ LocTy UnnamedAddrLoc;
+
+ if (ParseArgumentList(ArgList, isVarArg) ||
+ ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+ &UnnamedAddrLoc) ||
+ ParseOptionalAttrs(FuncAttrs, 2) ||
+ (EatIfPresent(lltok::kw_section) &&
+ ParseStringConstant(Section)) ||
+ ParseOptionalAlignment(Alignment) ||
+ (EatIfPresent(lltok::kw_gc) &&
+ ParseStringConstant(GC)))
+ return true;
+
+ // If the alignment was parsed as an attribute, move to the alignment field.
+ if (FuncAttrs & Attribute::Alignment) {
+ Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs);
+ FuncAttrs &= ~Attribute::Alignment;
+ }
+
+ // Okay, if we got here, the function is syntactically valid. Convert types
+ // and do semantic checks.
+ std::vector<Type*> ParamTypeList;
+ SmallVector<AttributeWithIndex, 8> Attrs;
+
+ if (RetAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ ParamTypeList.push_back(ArgList[i].Ty);
+ if (ArgList[i].Attrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+ }
+
+ if (FuncAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs));
+
+ AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+ if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy())
+ return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+
+ const FunctionType *FT =
+ FunctionType::get(RetType, ParamTypeList, isVarArg);
+ const PointerType *PFT = PointerType::getUnqual(FT);
+
+ Fn = 0;
+ if (!FunctionName.empty()) {
+ // If this was a definition of a forward reference, remove the definition
+ // from the forward reference table and fill in the forward ref.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
+ ForwardRefVals.find(FunctionName);
+ if (FRVI != ForwardRefVals.end()) {
+ Fn = M->getFunction(FunctionName);
+ if (Fn->getType() != PFT)
+ return Error(FRVI->second.second, "invalid forward reference to "
+ "function '" + FunctionName + "' with wrong type!");
+
+ ForwardRefVals.erase(FRVI);
+ } else if ((Fn = M->getFunction(FunctionName))) {
+ // Reject redefinitions.
+ return Error(NameLoc, "invalid redefinition of function '" +
+ FunctionName + "'");
+ } else if (M->getNamedValue(FunctionName)) {
+ return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
+ }
+
+ } else {
+ // If this is a definition of a forward referenced function, make sure the
+ // types agree.
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
+ = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ Fn = cast<Function>(I->second.first);
+ if (Fn->getType() != PFT)
+ return Error(NameLoc, "type of definition and forward reference of '@" +
+ Twine(NumberedVals.size()) + "' disagree");
+ ForwardRefValIDs.erase(I);
+ }
+ }
+
+ if (Fn == 0)
+ Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+ else // Move the forward-reference to the correct spot in the module.
+ M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
+
+ if (FunctionName.empty())
+ NumberedVals.push_back(Fn);
+
+ Fn->setLinkage((GlobalValue::LinkageTypes)Linkage);
+ Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ Fn->setCallingConv(CC);
+ Fn->setAttributes(PAL);
+ Fn->setUnnamedAddr(UnnamedAddr);
+ Fn->setAlignment(Alignment);
+ Fn->setSection(Section);
+ if (!GC.empty()) Fn->setGC(GC.c_str());
+
+ // Add all of the arguments we parsed to the function.
+ Function::arg_iterator ArgIt = Fn->arg_begin();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
+ // If the argument has a name, insert it into the argument symbol table.
+ if (ArgList[i].Name.empty()) continue;
+
+ // Set the name, if it conflicted, it will be auto-renamed.
+ ArgIt->setName(ArgList[i].Name);
+
+ if (ArgIt->getName() != ArgList[i].Name)
+ return Error(ArgList[i].Loc, "redefinition of argument '%" +
+ ArgList[i].Name + "'");
+ }
+
+ return false;
+}
+
+
+/// ParseFunctionBody
+/// ::= '{' BasicBlock+ '}'
+///
+bool LLParser::ParseFunctionBody(Function &Fn) {
+ if (Lex.getKind() != lltok::lbrace)
+ return TokError("expected '{' in function body");
+ Lex.Lex(); // eat the {.
+
+ int FunctionNumber = -1;
+ if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
+
+ PerFunctionState PFS(*this, Fn, FunctionNumber);
+
+ // We need at least one basic block.
+ if (Lex.getKind() == lltok::rbrace)
+ return TokError("function body requires at least one basic block");
+
+ while (Lex.getKind() != lltok::rbrace)
+ if (ParseBasicBlock(PFS)) return true;
+
+ // Eat the }.
+ Lex.Lex();
+
+ // Verify function is ok.
+ return PFS.FinishFunction();
+}
+
+/// ParseBasicBlock
+/// ::= LabelStr? Instruction*
+bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
+ // If this basic block starts out with a name, remember it.
+ std::string Name;
+ LocTy NameLoc = Lex.getLoc();
+ if (Lex.getKind() == lltok::LabelStr) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ }
+
+ BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+ if (BB == 0) return true;
+
+ std::string NameStr;
+
+ // Parse the instructions in this block until we get a terminator.
+ Instruction *Inst;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MetadataOnInst;
+ do {
+ // This instruction may have three possibilities for a name: a) none
+ // specified, b) name specified "%foo =", c) number specified: "%4 =".
+ LocTy NameLoc = Lex.getLoc();
+ int NameID = -1;
+ NameStr = "";
+
+ if (Lex.getKind() == lltok::LocalVarID) {
+ NameID = Lex.getUIntVal();
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after instruction id"))
+ return true;
+ } else if (Lex.getKind() == lltok::LocalVar) {
+ NameStr = Lex.getStrVal();
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after instruction name"))
+ return true;
+ }
+
+ switch (ParseInstruction(Inst, BB, PFS)) {
+ default: assert(0 && "Unknown ParseInstruction result!");
+ case InstError: return true;
+ case InstNormal:
+ BB->getInstList().push_back(Inst);
+
+ // With a normal result, we check to see if the instruction is followed by
+ // a comma and metadata.
+ if (EatIfPresent(lltok::comma))
+ if (ParseInstructionMetadata(Inst, &PFS))
+ return true;
+ break;
+ case InstExtraComma:
+ BB->getInstList().push_back(Inst);
+
+ // If the instruction parser ate an extra comma at the end of it, it
+ // *must* be followed by metadata.
+ if (ParseInstructionMetadata(Inst, &PFS))
+ return true;
+ break;
+ }
+
+ // Set the name on the instruction.
+ if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
+ } while (!isa<TerminatorInst>(Inst));
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseInstruction - Parse one of the many different instructions.
+///
+int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS) {
+ lltok::Kind Token = Lex.getKind();
+ if (Token == lltok::Eof)
+ return TokError("found end of file when expecting more instructions");
+ LocTy Loc = Lex.getLoc();
+ unsigned KeywordVal = Lex.getUIntVal();
+ Lex.Lex(); // Eat the keyword.
+
+ switch (Token) {
+ default: return Error(Loc, "expected instruction opcode");
+ // Terminator Instructions.
+ case lltok::kw_unwind: Inst = new UnwindInst(Context); return false;
+ case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
+ case lltok::kw_ret: return ParseRet(Inst, BB, PFS);
+ case lltok::kw_br: return ParseBr(Inst, PFS);
+ case lltok::kw_switch: return ParseSwitch(Inst, PFS);
+ case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS);
+ case lltok::kw_invoke: return ParseInvoke(Inst, PFS);
+ // Binary Operators.
+ case lltok::kw_add:
+ case lltok::kw_sub:
+ case lltok::kw_mul:
+ case lltok::kw_shl: {
+ bool NUW = EatIfPresent(lltok::kw_nuw);
+ bool NSW = EatIfPresent(lltok::kw_nsw);
+ if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+
+ if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+ if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+ return false;
+ }
+ case lltok::kw_fadd:
+ case lltok::kw_fsub:
+ case lltok::kw_fmul: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+
+ case lltok::kw_sdiv:
+ case lltok::kw_udiv:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
+ bool Exact = EatIfPresent(lltok::kw_exact);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+ if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
+ return false;
+ }
+
+ case lltok::kw_urem:
+ case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+ case lltok::kw_fdiv:
+ case lltok::kw_frem: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+ case lltok::kw_and:
+ case lltok::kw_or:
+ case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
+ case lltok::kw_icmp:
+ case lltok::kw_fcmp: return ParseCompare(Inst, PFS, KeywordVal);
+ // Casts.
+ case lltok::kw_trunc:
+ case lltok::kw_zext:
+ case lltok::kw_sext:
+ case lltok::kw_fptrunc:
+ case lltok::kw_fpext:
+ case lltok::kw_bitcast:
+ case lltok::kw_uitofp:
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
+ case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal);
+ // Other.
+ case lltok::kw_select: return ParseSelect(Inst, PFS);
+ case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS);
+ case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
+ case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
+ case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
+ case lltok::kw_phi: return ParsePHI(Inst, PFS);
+ case lltok::kw_call: return ParseCall(Inst, PFS, false);
+ case lltok::kw_tail: return ParseCall(Inst, PFS, true);
+ // Memory.
+ case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
+ case lltok::kw_load: return ParseLoad(Inst, PFS, false);
+ case lltok::kw_store: return ParseStore(Inst, PFS, false);
+ case lltok::kw_volatile:
+ if (EatIfPresent(lltok::kw_load))
+ return ParseLoad(Inst, PFS, true);
+ else if (EatIfPresent(lltok::kw_store))
+ return ParseStore(Inst, PFS, true);
+ else
+ return TokError("expected 'load' or 'store'");
+ case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
+ case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS);
+ case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS);
+ }
+}
+
+/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
+bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
+ if (Opc == Instruction::FCmp) {
+ switch (Lex.getKind()) {
+ default: TokError("expected fcmp predicate (e.g. 'oeq')");
+ case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
+ case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
+ case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
+ case lltok::kw_ogt: P = CmpInst::FCMP_OGT; break;
+ case lltok::kw_ole: P = CmpInst::FCMP_OLE; break;
+ case lltok::kw_oge: P = CmpInst::FCMP_OGE; break;
+ case lltok::kw_ord: P = CmpInst::FCMP_ORD; break;
+ case lltok::kw_uno: P = CmpInst::FCMP_UNO; break;
+ case lltok::kw_ueq: P = CmpInst::FCMP_UEQ; break;
+ case lltok::kw_une: P = CmpInst::FCMP_UNE; break;
+ case lltok::kw_ult: P = CmpInst::FCMP_ULT; break;
+ case lltok::kw_ugt: P = CmpInst::FCMP_UGT; break;
+ case lltok::kw_ule: P = CmpInst::FCMP_ULE; break;
+ case lltok::kw_uge: P = CmpInst::FCMP_UGE; break;
+ case lltok::kw_true: P = CmpInst::FCMP_TRUE; break;
+ case lltok::kw_false: P = CmpInst::FCMP_FALSE; break;
+ }
+ } else {
+ switch (Lex.getKind()) {
+ default: TokError("expected icmp predicate (e.g. 'eq')");
+ case lltok::kw_eq: P = CmpInst::ICMP_EQ; break;
+ case lltok::kw_ne: P = CmpInst::ICMP_NE; break;
+ case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
+ case lltok::kw_sgt: P = CmpInst::ICMP_SGT; break;
+ case lltok::kw_sle: P = CmpInst::ICMP_SLE; break;
+ case lltok::kw_sge: P = CmpInst::ICMP_SGE; break;
+ case lltok::kw_ult: P = CmpInst::ICMP_ULT; break;
+ case lltok::kw_ugt: P = CmpInst::ICMP_UGT; break;
+ case lltok::kw_ule: P = CmpInst::ICMP_ULE; break;
+ case lltok::kw_uge: P = CmpInst::ICMP_UGE; break;
+ }
+ }
+ Lex.Lex();
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Terminator Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseRet - Parse a return instruction.
+/// ::= 'ret' void (',' !dbg, !1)*
+/// ::= 'ret' TypeAndValue (',' !dbg, !1)*
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS) {
+ SMLoc TypeLoc = Lex.getLoc();
+ Type *Ty = 0;
+ if (ParseType(Ty, true /*void allowed*/)) return true;
+
+ Type *ResType = PFS.getFunction().getReturnType();
+
+ if (Ty->isVoidTy()) {
+ if (!ResType->isVoidTy())
+ return Error(TypeLoc, "value doesn't match function result type '" +
+ getTypeString(ResType) + "'");
+
+ Inst = ReturnInst::Create(Context);
+ return false;
+ }
+
+ Value *RV;
+ if (ParseValue(Ty, RV, PFS)) return true;
+
+ if (ResType != RV->getType())
+ return Error(TypeLoc, "value doesn't match function result type '" +
+ getTypeString(ResType) + "'");
+
+ Inst = ReturnInst::Create(Context, RV);
+ return false;
+}
+
+
+/// ParseBr
+/// ::= 'br' TypeAndValue
+/// ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc, Loc2;
+ Value *Op0;
+ BasicBlock *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
+
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
+ Inst = BranchInst::Create(BB);
+ return false;
+ }
+
+ if (Op0->getType() != Type::getInt1Ty(Context))
+ return Error(Loc, "branch condition must have 'i1' type");
+
+ if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
+ ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after true destination") ||
+ ParseTypeAndBasicBlock(Op2, Loc2, PFS))
+ return true;
+
+ Inst = BranchInst::Create(Op1, Op2, Op0);
+ return false;
+}
+
+/// ParseSwitch
+/// Instruction
+/// ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
+/// JumpTable
+/// ::= (TypeAndValue ',' TypeAndValue)*
+bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CondLoc, BBLoc;
+ Value *Cond;
+ BasicBlock *DefaultBB;
+ if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after switch condition") ||
+ ParseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
+ ParseToken(lltok::lsquare, "expected '[' with switch table"))
+ return true;
+
+ if (!Cond->getType()->isIntegerTy())
+ return Error(CondLoc, "switch condition must have integer type");
+
+ // Parse the jump table pairs.
+ SmallPtrSet<Value*, 32> SeenCases;
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
+ while (Lex.getKind() != lltok::rsquare) {
+ Value *Constant;
+ BasicBlock *DestBB;
+
+ if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after case value") ||
+ ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+
+ if (!SeenCases.insert(Constant))
+ return Error(CondLoc, "duplicate case value in switch");
+ if (!isa<ConstantInt>(Constant))
+ return Error(CondLoc, "case value is not a constant integer");
+
+ Table.push_back(std::make_pair(cast<ConstantInt>(Constant), DestBB));
+ }
+
+ Lex.Lex(); // Eat the ']'.
+
+ SwitchInst *SI = SwitchInst::Create(Cond, DefaultBB, Table.size());
+ for (unsigned i = 0, e = Table.size(); i != e; ++i)
+ SI->addCase(Table[i].first, Table[i].second);
+ Inst = SI;
+ return false;
+}
+
+/// ParseIndirectBr
+/// Instruction
+/// ::= 'indirectbr' TypeAndValue ',' '[' LabelList ']'
+bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy AddrLoc;
+ Value *Address;
+ if (ParseTypeAndValue(Address, AddrLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
+ ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
+ return true;
+
+ if (!Address->getType()->isPointerTy())
+ return Error(AddrLoc, "indirectbr address must have pointer type");
+
+ // Parse the destination list.
+ SmallVector<BasicBlock*, 16> DestList;
+
+ if (Lex.getKind() != lltok::rsquare) {
+ BasicBlock *DestBB;
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ DestList.push_back(DestBB);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ DestList.push_back(DestBB);
+ }
+ }
+
+ if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+ return true;
+
+ IndirectBrInst *IBI = IndirectBrInst::Create(Address, DestList.size());
+ for (unsigned i = 0, e = DestList.size(); i != e; ++i)
+ IBI->addDestination(DestList[i]);
+ Inst = IBI;
+ return false;
+}
+
+
+/// ParseInvoke
+/// ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
+/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CallLoc = Lex.getLoc();
+ unsigned RetAttrs, FnAttrs;
+ CallingConv::ID CC;
+ Type *RetType = 0;
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+
+ BasicBlock *NormalBB, *UnwindBB;
+ if (ParseOptionalCallingConv(CC) ||
+ ParseOptionalAttrs(RetAttrs, 1) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) ||
+ ParseParameterList(ArgList, PFS) ||
+ ParseOptionalAttrs(FnAttrs, 2) ||
+ ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
+ ParseTypeAndBasicBlock(NormalBB, PFS) ||
+ ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
+ ParseTypeAndBasicBlock(UnwindBB, PFS))
+ return true;
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ const PointerType *PFTy = 0;
+ const FunctionType *Ty = 0;
+ if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+ !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+ // Pull out the types of all of the arguments...
+ std::vector<Type*> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ PFTy = PointerType::getUnqual(Ty);
+ }
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+ // Set up the Attributes for the function.
+ SmallVector<AttributeWithIndex, 8> Attrs;
+ if (RetAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+ SmallVector<Value*, 8> Args;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ const Type *ExpectedTy = 0;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
+ Args.push_back(ArgList[i].V);
+ if (ArgList[i].Attrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Finish off the Attributes and check them
+ AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+ InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
+ II->setCallingConv(CC);
+ II->setAttributes(PAL);
+ Inst = II;
+ return false;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Binary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseArithmetic
+/// ::= ArithmeticOps TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc, unsigned OperandType) {
+ LocTy Loc; Value *LHS, *RHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ bool Valid;
+ switch (OperandType) {
+ default: llvm_unreachable("Unknown operand type!");
+ case 0: // int or FP.
+ Valid = LHS->getType()->isIntOrIntVectorTy() ||
+ LHS->getType()->isFPOrFPVectorTy();
+ break;
+ case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+ case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
+ }
+
+ if (!Valid)
+ return Error(Loc, "invalid operand type for instruction");
+
+ Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ return false;
+}
+
+/// ParseLogical
+/// ::= ArithmeticOps TypeAndValue ',' Value {
+bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ LocTy Loc; Value *LHS, *RHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' in logical operation") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ if (!LHS->getType()->isIntOrIntVectorTy())
+ return Error(Loc,"instruction requires integer or integer vector operands");
+
+ Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ return false;
+}
+
+
+/// ParseCompare
+/// ::= 'icmp' IPredicates TypeAndValue ',' Value
+/// ::= 'fcmp' FPredicates TypeAndValue ',' Value
+bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ // Parse the integer/fp comparison predicate.
+ LocTy Loc;
+ unsigned Pred;
+ Value *LHS, *RHS;
+ if (ParseCmpPredicate(Pred, Opc) ||
+ ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after compare value") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ if (Opc == Instruction::FCmp) {
+ if (!LHS->getType()->isFPOrFPVectorTy())
+ return Error(Loc, "fcmp requires floating point operands");
+ Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ } else {
+ assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
+ if (!LHS->getType()->isIntOrIntVectorTy() &&
+ !LHS->getType()->isPointerTy())
+ return Error(Loc, "icmp requires integer operands");
+ Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Instructions.
+//===----------------------------------------------------------------------===//
+
+
+/// ParseCast
+/// ::= CastOpc TypeAndValue 'to' Type
+bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ LocTy Loc;
+ Value *Op;
+ Type *DestTy = 0;
+ if (ParseTypeAndValue(Op, Loc, PFS) ||
+ ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
+ ParseType(DestTy))
+ return true;
+
+ if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
+ CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
+ return Error(Loc, "invalid cast opcode for cast from '" +
+ getTypeString(Op->getType()) + "' to '" +
+ getTypeString(DestTy) + "'");
+ }
+ Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
+ return false;
+}
+
+/// ParseSelect
+/// ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after select condition") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after select value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
+ return Error(Loc, Reason);
+
+ Inst = SelectInst::Create(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParseVA_Arg
+/// ::= 'va_arg' TypeAndValue ',' Type
+bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Op;
+ Type *EltTy = 0;
+ LocTy TypeLoc;
+ if (ParseTypeAndValue(Op, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
+ ParseType(EltTy, TypeLoc))
+ return true;
+
+ if (!EltTy->isFirstClassType())
+ return Error(TypeLoc, "va_arg requires operand with first class type");
+
+ Inst = new VAArgInst(Op, EltTy);
+ return false;
+}
+
+/// ParseExtractElement
+/// ::= 'extractelement' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after extract value") ||
+ ParseTypeAndValue(Op1, PFS))
+ return true;
+
+ if (!ExtractElementInst::isValidOperands(Op0, Op1))
+ return Error(Loc, "invalid extractelement operands");
+
+ Inst = ExtractElementInst::Create(Op0, Op1);
+ return false;
+}
+
+/// ParseInsertElement
+/// ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
+ return Error(Loc, "invalid insertelement operands");
+
+ Inst = InsertElementInst::Create(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParseShuffleVector
+/// ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after shuffle mask") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after shuffle value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
+ return Error(Loc, "invalid extractelement operands");
+
+ Inst = new ShuffleVectorInst(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParsePHI
+/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
+int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
+ Type *Ty = 0; LocTy TypeLoc;
+ Value *Op0, *Op1;
+
+ if (ParseType(Ty, TypeLoc) ||
+ ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ ParseValue(Ty, Op0, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+ ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ return true;
+
+ bool AteExtraComma = false;
+ SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
+ while (1) {
+ PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
+
+ if (!EatIfPresent(lltok::comma))
+ break;
+
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ break;
+ }
+
+ if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ ParseValue(Ty, Op0, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+ ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ return true;
+ }
+
+ if (!Ty->isFirstClassType())
+ return Error(TypeLoc, "phi node must have first class type");
+
+ PHINode *PN = PHINode::Create(Ty, PHIVals.size());
+ for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
+ PN->addIncoming(PHIVals[i].first, PHIVals[i].second);
+ Inst = PN;
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseCall
+/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
+/// ParameterList OptionalAttrs
+bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+ bool isTail) {
+ unsigned RetAttrs, FnAttrs;
+ CallingConv::ID CC;
+ Type *RetType = 0;
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+ LocTy CallLoc = Lex.getLoc();
+
+ if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
+ ParseOptionalCallingConv(CC) ||
+ ParseOptionalAttrs(RetAttrs, 1) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) ||
+ ParseParameterList(ArgList, PFS) ||
+ ParseOptionalAttrs(FnAttrs, 2))
+ return true;
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ const PointerType *PFTy = 0;
+ const FunctionType *Ty = 0;
+ if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+ !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+ // Pull out the types of all of the arguments...
+ std::vector<Type*> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ PFTy = PointerType::getUnqual(Ty);
+ }
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+ // Set up the Attributes for the function.
+ SmallVector<AttributeWithIndex, 8> Attrs;
+ if (RetAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+ SmallVector<Value*, 8> Args;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ const Type *ExpectedTy = 0;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
+ Args.push_back(ArgList[i].V);
+ if (ArgList[i].Attrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Finish off the Attributes and check them
+ AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+ CallInst *CI = CallInst::Create(Callee, Args);
+ CI->setTailCall(isTail);
+ CI->setCallingConv(CC);
+ CI->setAttributes(PAL);
+ Inst = CI;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseAlloc
+/// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
+int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Size = 0;
+ LocTy SizeLoc;
+ unsigned Alignment = 0;
+ Type *Ty = 0;
+ if (ParseType(Ty)) return true;
+
+ bool AteExtraComma = false;
+ if (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::kw_align) {
+ if (ParseOptionalAlignment(Alignment)) return true;
+ } else if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ } else {
+ if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
+ ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ return true;
+ }
+ }
+
+ if (Size && !Size->getType()->isIntegerTy())
+ return Error(SizeLoc, "element count must have integer type");
+
+ Inst = new AllocaInst(Ty, Size, Alignment);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseLoad
+/// ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)?
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
+ bool isVolatile) {
+ Value *Val; LocTy Loc;
+ unsigned Alignment = 0;
+ bool AteExtraComma = false;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ return true;
+
+ if (!Val->getType()->isPointerTy() ||
+ !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+ return Error(Loc, "load operand must be a pointer to a first class type");
+
+ Inst = new LoadInst(Val, "", isVolatile, Alignment);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseStore
+/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
+ bool isVolatile) {
+ Value *Val, *Ptr; LocTy Loc, PtrLoc;
+ unsigned Alignment = 0;
+ bool AteExtraComma = false;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after store operand") ||
+ ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ return true;
+
+ if (!Ptr->getType()->isPointerTy())
+ return Error(PtrLoc, "store operand must be a pointer");
+ if (!Val->getType()->isFirstClassType())
+ return Error(Loc, "store operand must be a first class value");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+ return Error(Loc, "stored value and pointer type do not match");
+
+ Inst = new StoreInst(Val, Ptr, isVolatile, Alignment);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseGetElementPtr
+/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
+int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr, *Val; LocTy Loc, EltLoc;
+
+ bool InBounds = EatIfPresent(lltok::kw_inbounds);
+
+ if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+
+ if (!Ptr->getType()->isPointerTy())
+ return Error(Loc, "base of getelementptr must be a pointer");
+
+ SmallVector<Value*, 16> Indices;
+ bool AteExtraComma = false;
+ while (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ break;
+ }
+ if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
+ if (!Val->getType()->isIntegerTy())
+ return Error(EltLoc, "getelementptr index must be an integer");
+ Indices.push_back(Val);
+ }
+
+ if (!GetElementPtrInst::getIndexedType(Ptr->getType(),
+ Indices.begin(), Indices.end()))
+ return Error(Loc, "invalid getelementptr indices");
+ Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end());
+ if (InBounds)
+ cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseExtractValue
+/// ::= 'extractvalue' TypeAndValue (',' uint32)+
+int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val; LocTy Loc;
+ SmallVector<unsigned, 4> Indices;
+ bool AteExtraComma;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseIndexList(Indices, AteExtraComma))
+ return true;
+
+ if (!Val->getType()->isAggregateType())
+ return Error(Loc, "extractvalue operand must be aggregate type");
+
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
+ return Error(Loc, "invalid indices for extractvalue");
+ Inst = ExtractValueInst::Create(Val, Indices);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseInsertValue
+/// ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+
+int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val0, *Val1; LocTy Loc0, Loc1;
+ SmallVector<unsigned, 4> Indices;
+ bool AteExtraComma;
+ if (ParseTypeAndValue(Val0, Loc0, PFS) ||
+ ParseToken(lltok::comma, "expected comma after insertvalue operand") ||
+ ParseTypeAndValue(Val1, Loc1, PFS) ||
+ ParseIndexList(Indices, AteExtraComma))
+ return true;
+
+ if (!Val0->getType()->isAggregateType())
+ return Error(Loc0, "insertvalue operand must be aggregate type");
+
+ if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+ return Error(Loc0, "invalid indices for insertvalue");
+ Inst = InsertValueInst::Create(Val0, Val1, Indices);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+//===----------------------------------------------------------------------===//
+// Embedded metadata.
+//===----------------------------------------------------------------------===//
+
+/// ParseMDNodeVector
+/// ::= Element (',' Element)*
+/// Element
+/// ::= 'null' | TypeAndValue
+bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
+ PerFunctionState *PFS) {
+ // Check for an empty list.
+ if (Lex.getKind() == lltok::rbrace)
+ return false;
+
+ do {
+ // Null is a special case since it is typeless.
+ if (EatIfPresent(lltok::kw_null)) {
+ Elts.push_back(0);
+ continue;
+ }
+
+ Value *V = 0;
+ if (ParseTypeAndValue(V, PFS)) return true;
+ Elts.push_back(V);
+ } while (EatIfPresent(lltok::comma));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
new file mode 100644
index 000000000000..963065785061
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -0,0 +1,373 @@
+//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
+
+#include "LLLexer.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include <map>
+
+namespace llvm {
+ class Module;
+ class OpaqueType;
+ class Function;
+ class Value;
+ class BasicBlock;
+ class Instruction;
+ class Constant;
+ class GlobalValue;
+ class MDString;
+ class MDNode;
+ class StructType;
+
+ /// ValID - Represents a reference of a definition of some sort with no type.
+ /// There are several cases where we have to parse the value but where the
+ /// type can depend on later context. This may either be a numeric reference
+ /// or a symbolic (%var) reference. This is just a discriminated union.
+ struct ValID {
+ enum {
+ t_LocalID, t_GlobalID, // ID in UIntVal.
+ t_LocalName, t_GlobalName, // Name in StrVal.
+ t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal.
+ t_Null, t_Undef, t_Zero, // No value.
+ t_EmptyArray, // No value: []
+ t_Constant, // Value in ConstantVal.
+ t_InlineAsm, // Value in StrVal/StrVal2/UIntVal.
+ t_MDNode, // Value in MDNodeVal.
+ t_MDString, // Value in MDStringVal.
+ t_ConstantStruct, // Value in ConstantStructElts.
+ t_PackedConstantStruct // Value in ConstantStructElts.
+ } Kind;
+
+ LLLexer::LocTy Loc;
+ unsigned UIntVal;
+ std::string StrVal, StrVal2;
+ APSInt APSIntVal;
+ APFloat APFloatVal;
+ Constant *ConstantVal;
+ MDNode *MDNodeVal;
+ MDString *MDStringVal;
+ Constant **ConstantStructElts;
+
+ ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
+ ~ValID() {
+ if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
+ delete [] ConstantStructElts;
+ }
+
+ bool operator<(const ValID &RHS) const {
+ if (Kind == t_LocalID || Kind == t_GlobalID)
+ return UIntVal < RHS.UIntVal;
+ assert((Kind == t_LocalName || Kind == t_GlobalName ||
+ Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
+ "Ordering not defined for this ValID kind yet");
+ return StrVal < RHS.StrVal;
+ }
+ };
+
+ class LLParser {
+ public:
+ typedef LLLexer::LocTy LocTy;
+ private:
+ LLVMContext &Context;
+ LLLexer Lex;
+ Module *M;
+
+ // Instruction metadata resolution. Each instruction can have a list of
+ // MDRef info associated with them.
+ //
+ // The simpler approach of just creating temporary MDNodes and then calling
+ // RAUW on them when the definition is processed doesn't work because some
+ // instruction metadata kinds, such as dbg, get stored in the IR in an
+ // "optimized" format which doesn't participate in the normal value use
+ // lists. This means that RAUW doesn't work, even on temporary MDNodes
+ // which otherwise support RAUW. Instead, we defer resolving MDNode
+ // references until the definitions have been processed.
+ struct MDRef {
+ SMLoc Loc;
+ unsigned MDKind, MDSlot;
+ };
+ DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
+
+ // Type resolution handling data structures. The location is set when we
+ // have processed a use of the type but not a definition yet.
+ StringMap<std::pair<Type*, LocTy> > NamedTypes;
+ std::vector<std::pair<Type*, LocTy> > NumberedTypes;
+
+ std::vector<TrackingVH<MDNode> > NumberedMetadata;
+ std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
+
+ // Global Value reference information.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
+ std::vector<GlobalValue*> NumberedVals;
+
+ // References to blockaddress. The key is the function ValID, the value is
+ // a list of references to blocks in that function.
+ std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
+ ForwardRefBlockAddresses;
+
+ public:
+ LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
+ Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
+ M(m) {}
+ bool Run();
+
+ LLVMContext &getContext() { return Context; }
+
+ private:
+
+ bool Error(LocTy L, const Twine &Msg) const {
+ return Lex.Error(L, Msg);
+ }
+ bool TokError(const Twine &Msg) const {
+ return Error(Lex.getLoc(), Msg);
+ }
+
+ /// GetGlobalVal - Get a value with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// exists but does not have the right type.
+ GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
+ GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+ // Helper Routines.
+ bool ParseToken(lltok::Kind T, const char *ErrMsg);
+ bool EatIfPresent(lltok::Kind T) {
+ if (Lex.getKind() != T) return false;
+ Lex.Lex();
+ return true;
+ }
+ bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
+ if (Lex.getKind() != T) {
+ Present = false;
+ } else {
+ if (Loc)
+ *Loc = Lex.getLoc();
+ Lex.Lex();
+ Present = true;
+ }
+ return false;
+ }
+ bool ParseStringConstant(std::string &Result);
+ bool ParseUInt32(unsigned &Val);
+ bool ParseUInt32(unsigned &Val, LocTy &Loc) {
+ Loc = Lex.getLoc();
+ return ParseUInt32(Val);
+ }
+ bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+ bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
+ bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
+ bool ParseOptionalLinkage(unsigned &Linkage) {
+ bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
+ }
+ bool ParseOptionalVisibility(unsigned &Visibility);
+ bool ParseOptionalCallingConv(CallingConv::ID &CC);
+ bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseOptionalStackAlignment(unsigned &Alignment);
+ bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
+ bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
+ bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
+ bool AteExtraComma;
+ if (ParseIndexList(Indices, AteExtraComma)) return true;
+ if (AteExtraComma)
+ return TokError("expected index");
+ return false;
+ }
+
+ // Top-Level Entities
+ bool ParseTopLevelEntities();
+ bool ValidateEndOfModule();
+ bool ParseTargetDefinition();
+ bool ParseDepLibs();
+ bool ParseModuleAsm();
+ bool ParseUnnamedType();
+ bool ParseNamedType();
+ bool ParseDeclare();
+ bool ParseDefine();
+
+ bool ParseGlobalType(bool &IsConstant);
+ bool ParseUnnamedGlobal();
+ bool ParseNamedGlobal();
+ bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
+ bool HasLinkage, unsigned Visibility);
+ bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
+ bool ParseStandaloneMetadata();
+ bool ParseNamedMetadata();
+ bool ParseMDString(MDString *&Result);
+ bool ParseMDNodeID(MDNode *&Result);
+ bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
+
+ // Type Parsing.
+ bool ParseType(Type *&Result, bool AllowVoid = false);
+ bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
+ Loc = Lex.getLoc();
+ return ParseType(Result, AllowVoid);
+ }
+ bool ParseAnonStructType(Type *&Result, bool Packed);
+ bool ParseStructBody(SmallVectorImpl<Type*> &Body);
+ bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+ std::pair<Type*, LocTy> &Entry,
+ Type *&ResultTy);
+
+ bool ParseArrayVectorType(Type *&Result, bool isVector);
+ bool ParseFunctionType(Type *&Result);
+
+ // Function Semantic Analysis.
+ class PerFunctionState {
+ LLParser &P;
+ Function &F;
+ std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
+ std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
+ std::vector<Value*> NumberedVals;
+
+ /// FunctionNumber - If this is an unnamed function, this is the slot
+ /// number of it, otherwise it is -1.
+ int FunctionNumber;
+ public:
+ PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
+ ~PerFunctionState();
+
+ Function &getFunction() const { return F; }
+
+ bool FinishFunction();
+
+ /// GetVal - Get a value with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// exists but does not have the right type.
+ Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
+ Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+ /// SetInstName - After an instruction is parsed and inserted into its
+ /// basic block, this installs its name.
+ bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
+ Instruction *Inst);
+
+ /// GetBB - Get a basic block with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// is not a BasicBlock.
+ BasicBlock *GetBB(const std::string &Name, LocTy Loc);
+ BasicBlock *GetBB(unsigned ID, LocTy Loc);
+
+ /// DefineBB - Define the specified basic block, which is either named or
+ /// unnamed. If there is an error, this returns null otherwise it returns
+ /// the block being defined.
+ BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+ };
+
+ bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+ PerFunctionState *PFS);
+
+ bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS);
+ bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+ return ParseValue(Ty, V, &PFS);
+ }
+ bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
+ PerFunctionState &PFS) {
+ Loc = Lex.getLoc();
+ return ParseValue(Ty, V, &PFS);
+ }
+
+ bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
+ bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+ return ParseTypeAndValue(V, &PFS);
+ }
+ bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
+ Loc = Lex.getLoc();
+ return ParseTypeAndValue(V, PFS);
+ }
+ bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+ PerFunctionState &PFS);
+ bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
+ LocTy Loc;
+ return ParseTypeAndBasicBlock(BB, Loc, PFS);
+ }
+
+
+ struct ParamInfo {
+ LocTy Loc;
+ Value *V;
+ unsigned Attrs;
+ ParamInfo(LocTy loc, Value *v, unsigned attrs)
+ : Loc(loc), V(v), Attrs(attrs) {}
+ };
+ bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS);
+
+ // Constant Parsing.
+ bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
+ bool ParseGlobalValue(const Type *Ty, Constant *&V);
+ bool ParseGlobalTypeAndValue(Constant *&V);
+ bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
+ bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
+ bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+ bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
+
+ // Function Parsing.
+ struct ArgInfo {
+ LocTy Loc;
+ Type *Ty;
+ unsigned Attrs;
+ std::string Name;
+ ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
+ : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
+ };
+ bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
+ bool ParseFunctionHeader(Function *&Fn, bool isDefine);
+ bool ParseFunctionBody(Function &Fn);
+ bool ParseBasicBlock(PerFunctionState &PFS);
+
+ // Instruction Parsing. Each instruction parsing routine can return with a
+ // normal result, an error result, or return having eaten an extra comma.
+ enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
+ int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS);
+ bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
+
+ bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+ bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+
+ bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
+ unsigned OperandType);
+ bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
+ bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
+ bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
+ bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
+ bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
+ int ParsePHI(Instruction *&I, PerFunctionState &PFS);
+ bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
+ int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
+ int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
+ int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
+ int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
+
+ bool ResolveForwardRefBlockAddresses(Function *TheFn,
+ std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+ PerFunctionState *PFS);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
new file mode 100644
index 000000000000..a5f89fcce0c0
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -0,0 +1,149 @@
+//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the enums for the .ll lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBS_ASMPARSER_LLTOKEN_H
+#define LIBS_ASMPARSER_LLTOKEN_H
+
+namespace llvm {
+namespace lltok {
+ enum Kind {
+ // Markers
+ Eof, Error,
+
+ // Tokens with no info.
+ dotdotdot, // ...
+ equal, comma, // = ,
+ star, // *
+ lsquare, rsquare, // [ ]
+ lbrace, rbrace, // { }
+ less, greater, // < >
+ lparen, rparen, // ( )
+ backslash, // \ (not /)
+ exclaim, // !
+
+ kw_x,
+ kw_true, kw_false,
+ kw_declare, kw_define,
+ kw_global, kw_constant,
+
+ kw_private, kw_linker_private, kw_linker_private_weak,
+ kw_linker_private_weak_def_auto, kw_internal,
+ kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
+ kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
+ kw_default, kw_hidden, kw_protected,
+ kw_unnamed_addr,
+ kw_extern_weak,
+ kw_external, kw_thread_local,
+ kw_zeroinitializer,
+ kw_undef, kw_null,
+ kw_to,
+ kw_tail,
+ kw_target,
+ kw_triple,
+ kw_deplibs,
+ kw_datalayout,
+ kw_volatile,
+ kw_nuw,
+ kw_nsw,
+ kw_exact,
+ kw_inbounds,
+ kw_align,
+ kw_addrspace,
+ kw_section,
+ kw_alias,
+ kw_module,
+ kw_asm,
+ kw_sideeffect,
+ kw_alignstack,
+ kw_gc,
+ kw_c,
+
+ kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+ kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
+ kw_msp430_intrcc,
+ kw_ptx_kernel, kw_ptx_device,
+
+ kw_signext,
+ kw_zeroext,
+ kw_inreg,
+ kw_sret,
+ kw_nounwind,
+ kw_noreturn,
+ kw_noalias,
+ kw_nocapture,
+ kw_byval,
+ kw_nest,
+ kw_readnone,
+ kw_readonly,
+ kw_uwtable,
+
+ kw_inlinehint,
+ kw_noinline,
+ kw_alwaysinline,
+ kw_optsize,
+ kw_ssp,
+ kw_sspreq,
+ kw_noredzone,
+ kw_noimplicitfloat,
+ kw_naked,
+ kw_hotpatch,
+ kw_nonlazybind,
+
+ kw_type,
+ kw_opaque,
+
+ kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
+ kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
+ kw_ueq, kw_une,
+
+ // Instruction Opcodes (Opcode in UIntVal).
+ kw_add, kw_fadd, kw_sub, kw_fsub, kw_mul, kw_fmul,
+ kw_udiv, kw_sdiv, kw_fdiv,
+ kw_urem, kw_srem, kw_frem, kw_shl, kw_lshr, kw_ashr,
+ kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp,
+
+ kw_phi, kw_call,
+ kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
+ kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
+ kw_select, kw_va_arg,
+
+ kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
+ kw_unreachable,
+
+ kw_alloca, kw_load, kw_store, kw_getelementptr,
+
+ kw_extractelement, kw_insertelement, kw_shufflevector,
+ kw_extractvalue, kw_insertvalue, kw_blockaddress,
+
+ // Unsigned Valued tokens (UIntVal).
+ GlobalID, // @42
+ LocalVarID, // %42
+
+ // String valued tokens (StrVal).
+ LabelStr, // foo:
+ GlobalVar, // @foo @"foo"
+ LocalVar, // %foo %"foo"
+ MetadataVar, // !foo
+ StringConstant, // "foo"
+
+ // Type valued tokens (TyVal).
+ Type,
+
+ APFloat, // APFloatVal
+ APSInt // APSInt
+ };
+} // end namespace lltok
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp
new file mode 100644
index 000000000000..59fb471f2b93
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/Parser.cpp
@@ -0,0 +1,62 @@
+//===- Parser.cpp - Main dispatch module for the Parser library -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Parser.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "LLParser.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cstring>
+using namespace llvm;
+
+Module *llvm::ParseAssembly(MemoryBuffer *F,
+ Module *M,
+ SMDiagnostic &Err,
+ LLVMContext &Context) {
+ SourceMgr SM;
+ SM.AddNewSourceBuffer(F, SMLoc());
+
+ // If we are parsing into an existing module, do it.
+ if (M)
+ return LLParser(F, SM, Err, M).Run() ? 0 : M;
+
+ // Otherwise create a new module.
+ OwningPtr<Module> M2(new Module(F->getBufferIdentifier(), Context));
+ if (LLParser(F, SM, Err, M2.get()).Run())
+ return 0;
+ return M2.take();
+}
+
+Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+ Err = SMDiagnostic(Filename,
+ "Could not open input file: " + ec.message());
+ return 0;
+ }
+
+ return ParseAssembly(File.take(), 0, Err, Context);
+}
+
+Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
+ SMDiagnostic &Err, LLVMContext &Context) {
+ MemoryBuffer *F =
+ MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)),
+ "<string>");
+
+ return ParseAssembly(F, M, Err, Context);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
new file mode 100644
index 000000000000..15844c0041c3
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -0,0 +1,88 @@
+//===-- BitReader.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitReader.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <string>
+#include <cstring>
+
+using namespace llvm;
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+ reference to the module via the OutModule parameter. Returns 0 on success.
+ Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule, char **OutMessage) {
+ return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+ OutMessage);
+}
+
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule,
+ char **OutMessage) {
+ std::string Message;
+
+ *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
+ &Message));
+ if (!*OutModule) {
+ if (OutMessage)
+ *OutMessage = strdup(Message.c_str());
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Reads a module from the specified path, returning via the OutModule parameter
+ a module provider which performs lazy deserialization. Returns 0 on success.
+ Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM,
+ char **OutMessage) {
+ std::string Message;
+
+ *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+ &Message));
+ if (!*OutM) {
+ if (OutMessage)
+ *OutMessage = strdup(Message.c_str());
+ return 1;
+ }
+
+ return 0;
+
+}
+
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+ reinterpret_cast<LLVMModuleRef*>(OutMP),
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+ OutMP, OutMessage);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
new file mode 100644
index 000000000000..24c29941cf16
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -0,0 +1,2824 @@
+//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "BitcodeReader.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/OperandTraits.h"
+using namespace llvm;
+
+void BitcodeReader::FreeState() {
+ if (BufferOwned)
+ delete Buffer;
+ Buffer = 0;
+ std::vector<Type*>().swap(TypeList);
+ ValueList.clear();
+ MDValueList.clear();
+
+ std::vector<AttrListPtr>().swap(MAttributes);
+ std::vector<BasicBlock*>().swap(FunctionBBs);
+ std::vector<Function*>().swap(FunctionsWithBodies);
+ DeferredFunctionInfo.clear();
+ MDKindMap.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(SmallVector<uint64_t, 64> &Record, unsigned Idx,
+ StrTy &Result) {
+ if (Idx > Record.size())
+ return true;
+
+ for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+ Result += (char)Record[i];
+ return false;
+}
+
+static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown/new linkages to external
+ case 0: return GlobalValue::ExternalLinkage;
+ case 1: return GlobalValue::WeakAnyLinkage;
+ case 2: return GlobalValue::AppendingLinkage;
+ case 3: return GlobalValue::InternalLinkage;
+ case 4: return GlobalValue::LinkOnceAnyLinkage;
+ case 5: return GlobalValue::DLLImportLinkage;
+ case 6: return GlobalValue::DLLExportLinkage;
+ case 7: return GlobalValue::ExternalWeakLinkage;
+ case 8: return GlobalValue::CommonLinkage;
+ case 9: return GlobalValue::PrivateLinkage;
+ case 10: return GlobalValue::WeakODRLinkage;
+ case 11: return GlobalValue::LinkOnceODRLinkage;
+ case 12: return GlobalValue::AvailableExternallyLinkage;
+ case 13: return GlobalValue::LinkerPrivateLinkage;
+ case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+ case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+ }
+}
+
+static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown visibilities to default.
+ case 0: return GlobalValue::DefaultVisibility;
+ case 1: return GlobalValue::HiddenVisibility;
+ case 2: return GlobalValue::ProtectedVisibility;
+ }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+ switch (Val) {
+ default: return -1;
+ case bitc::CAST_TRUNC : return Instruction::Trunc;
+ case bitc::CAST_ZEXT : return Instruction::ZExt;
+ case bitc::CAST_SEXT : return Instruction::SExt;
+ case bitc::CAST_FPTOUI : return Instruction::FPToUI;
+ case bitc::CAST_FPTOSI : return Instruction::FPToSI;
+ case bitc::CAST_UITOFP : return Instruction::UIToFP;
+ case bitc::CAST_SITOFP : return Instruction::SIToFP;
+ case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+ case bitc::CAST_FPEXT : return Instruction::FPExt;
+ case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
+ case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
+ case bitc::CAST_BITCAST : return Instruction::BitCast;
+ }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
+ switch (Val) {
+ default: return -1;
+ case bitc::BINOP_ADD:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+ case bitc::BINOP_SUB:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+ case bitc::BINOP_MUL:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
+ case bitc::BINOP_UDIV: return Instruction::UDiv;
+ case bitc::BINOP_SDIV:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
+ case bitc::BINOP_UREM: return Instruction::URem;
+ case bitc::BINOP_SREM:
+ return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
+ case bitc::BINOP_SHL: return Instruction::Shl;
+ case bitc::BINOP_LSHR: return Instruction::LShr;
+ case bitc::BINOP_ASHR: return Instruction::AShr;
+ case bitc::BINOP_AND: return Instruction::And;
+ case bitc::BINOP_OR: return Instruction::Or;
+ case bitc::BINOP_XOR: return Instruction::Xor;
+ }
+}
+
+namespace llvm {
+namespace {
+ /// @brief A class for maintaining the slot number definition
+ /// as a placeholder for the actual definition for forward constants defs.
+ class ConstantPlaceHolder : public ConstantExpr {
+ void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
+ public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context)
+ : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+ Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+ }
+
+ /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+ //static inline bool classof(const ConstantPlaceHolder *) { return true; }
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+ }
+
+
+ /// Provide fast operand accessors
+ //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+ };
+}
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder> :
+ public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
+};
+}
+
+
+void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+ if (Idx == size()) {
+ push_back(V);
+ return;
+ }
+
+ if (Idx >= size())
+ resize(Idx+1);
+
+ WeakVH &OldV = ValuePtrs[Idx];
+ if (OldV == 0) {
+ OldV = V;
+ return;
+ }
+
+ // Handle constants and non-constants (e.g. instrs) differently for
+ // efficiency.
+ if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+ ResolveConstants.push_back(std::make_pair(PHC, Idx));
+ OldV = V;
+ } else {
+ // If there was a forward reference to this value, replace it.
+ Value *PrevVal = OldV;
+ OldV->replaceAllUsesWith(V);
+ delete PrevVal;
+ }
+}
+
+
+Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
+ const Type *Ty) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = ValuePtrs[Idx]) {
+ assert(Ty == V->getType() && "Type mismatch in constant table!");
+ return cast<Constant>(V);
+ }
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Constant *C = new ConstantPlaceHolder(Ty, Context);
+ ValuePtrs[Idx] = C;
+ return C;
+}
+
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = ValuePtrs[Idx]) {
+ assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
+ return V;
+ }
+
+ // No type specified, must be invalid reference.
+ if (Ty == 0) return 0;
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Value *V = new Argument(Ty);
+ ValuePtrs[Idx] = V;
+ return V;
+}
+
+/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+/// resolves any forward references. The idea behind this is that we sometimes
+/// get constants (such as large arrays) which reference *many* forward ref
+/// constants. Replacing each of these causes a lot of thrashing when
+/// building/reuniquing the constant. Instead of doing this, we look at all the
+/// uses and rewrite all the place holders at once for any constant that uses
+/// a placeholder.
+void BitcodeReaderValueList::ResolveConstantForwardRefs() {
+ // Sort the values by-pointer so that they are efficient to look up with a
+ // binary search.
+ std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+ SmallVector<Constant*, 64> NewOps;
+
+ while (!ResolveConstants.empty()) {
+ Value *RealVal = operator[](ResolveConstants.back().second);
+ Constant *Placeholder = ResolveConstants.back().first;
+ ResolveConstants.pop_back();
+
+ // Loop over all users of the placeholder, updating them to reference the
+ // new value. If they reference more than one placeholder, update them all
+ // at once.
+ while (!Placeholder->use_empty()) {
+ Value::use_iterator UI = Placeholder->use_begin();
+ User *U = *UI;
+
+ // If the using object isn't uniqued, just update the operands. This
+ // handles instructions and initializers for global variables.
+ if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
+ UI.getUse().set(RealVal);
+ continue;
+ }
+
+ // Otherwise, we have a constant that uses the placeholder. Replace that
+ // constant with a new constant that has *all* placeholder uses updated.
+ Constant *UserC = cast<Constant>(U);
+ for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
+ I != E; ++I) {
+ Value *NewOp;
+ if (!isa<ConstantPlaceHolder>(*I)) {
+ // Not a placeholder reference.
+ NewOp = *I;
+ } else if (*I == Placeholder) {
+ // Common case is that it just references this one placeholder.
+ NewOp = RealVal;
+ } else {
+ // Otherwise, look up the placeholder in ResolveConstants.
+ ResolveConstantsTy::iterator It =
+ std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
+ std::pair<Constant*, unsigned>(cast<Constant>(*I),
+ 0));
+ assert(It != ResolveConstants.end() && It->first == *I);
+ NewOp = operator[](It->second);
+ }
+
+ NewOps.push_back(cast<Constant>(NewOp));
+ }
+
+ // Make the new constant.
+ Constant *NewC;
+ if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+ NewC = ConstantArray::get(UserCA->getType(), NewOps);
+ } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+ NewC = ConstantStruct::get(UserCS->getType(), NewOps);
+ } else if (isa<ConstantVector>(UserC)) {
+ NewC = ConstantVector::get(NewOps);
+ } else {
+ assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+ NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
+ }
+
+ UserC->replaceAllUsesWith(NewC);
+ UserC->destroyConstant();
+ NewOps.clear();
+ }
+
+ // Update all ValueHandles, they should be the only users at this point.
+ Placeholder->replaceAllUsesWith(RealVal);
+ delete Placeholder;
+ }
+}
+
+void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
+ if (Idx == size()) {
+ push_back(V);
+ return;
+ }
+
+ if (Idx >= size())
+ resize(Idx+1);
+
+ WeakVH &OldV = MDValuePtrs[Idx];
+ if (OldV == 0) {
+ OldV = V;
+ return;
+ }
+
+ // If there was a forward reference to this value, replace it.
+ MDNode *PrevVal = cast<MDNode>(OldV);
+ OldV->replaceAllUsesWith(V);
+ MDNode::deleteTemporary(PrevVal);
+ // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
+ // value for Idx.
+ MDValuePtrs[Idx] = V;
+}
+
+Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = MDValuePtrs[Idx]) {
+ assert(V->getType()->isMetadataTy() && "Type mismatch in value table!");
+ return V;
+ }
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
+ MDValuePtrs[Idx] = V;
+ return V;
+}
+
+Type *BitcodeReader::getTypeByID(unsigned ID) {
+ // The type table size is always specified correctly.
+ if (ID >= TypeList.size())
+ return 0;
+
+ if (Type *Ty = TypeList[ID])
+ return Ty;
+
+ // If we have a forward reference, the only possible case is when it is to a
+ // named struct. Just create a placeholder for now.
+ return TypeList[ID] = StructType::createNamed(Context, "");
+}
+
+/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
+Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
+ if (ID >= TypeList.size())
+ TypeList.resize(ID+1);
+
+ return TypeList[ID];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+bool BitcodeReader::ParseAttributeBlock() {
+ if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+ return Error("Malformed block record");
+
+ if (!MAttributes.empty())
+ return Error("Multiple PARAMATTR blocks found!");
+
+ SmallVector<uint64_t, 64> Record;
+
+ SmallVector<AttributeWithIndex, 8> Attrs;
+
+ // Read all the records.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of PARAMATTR block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...]
+ if (Record.size() & 1)
+ return Error("Invalid ENTRY record");
+
+ // FIXME : Remove this autoupgrade code in LLVM 3.0.
+ // If Function attributes are using index 0 then transfer them
+ // to index ~0. Index 0 is used for return value attributes but used to be
+ // used for function attributes.
+ Attributes RetAttribute = Attribute::None;
+ Attributes FnAttribute = Attribute::None;
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ // FIXME: remove in LLVM 3.0
+ // The alignment is stored as a 16-bit raw value from bits 31--16.
+ // We shift the bits above 31 down by 11 bits.
+
+ unsigned Alignment = (Record[i+1] & (0xffffull << 16)) >> 16;
+ if (Alignment && !isPowerOf2_32(Alignment))
+ return Error("Alignment is not a power of two.");
+
+ Attributes ReconstitutedAttr = Record[i+1] & 0xffff;
+ if (Alignment)
+ ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment);
+ ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11;
+ Record[i+1] = ReconstitutedAttr;
+
+ if (Record[i] == 0)
+ RetAttribute = Record[i+1];
+ else if (Record[i] == ~0U)
+ FnAttribute = Record[i+1];
+ }
+
+ unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
+ Attribute::ReadOnly|Attribute::ReadNone);
+
+ if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
+ (RetAttribute & OldRetAttrs) != 0) {
+ if (FnAttribute == Attribute::None) { // add a slot so they get added.
+ Record.push_back(~0U);
+ Record.push_back(0);
+ }
+
+ FnAttribute |= RetAttribute & OldRetAttrs;
+ RetAttribute &= ~OldRetAttrs;
+ }
+
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ if (Record[i] == 0) {
+ if (RetAttribute != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttribute));
+ } else if (Record[i] == ~0U) {
+ if (FnAttribute != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute));
+ } else if (Record[i+1] != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1]));
+ }
+
+ MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end()));
+ Attrs.clear();
+ break;
+ }
+ }
+ }
+}
+
+bool BitcodeReader::ParseTypeTable() {
+ if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
+ return Error("Malformed block record");
+
+ return ParseTypeTableBody();
+}
+
+bool BitcodeReader::ParseTypeTableBody() {
+ if (!TypeList.empty())
+ return Error("Multiple TYPE_BLOCKs found!");
+
+ SmallVector<uint64_t, 64> Record;
+ unsigned NumRecords = 0;
+
+ SmallString<64> TypeName;
+
+ // Read all the records for this type table.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (NumRecords != TypeList.size())
+ return Error("Invalid type forward reference in TYPE_BLOCK");
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of type table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ Type *ResultTy = 0;
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: return Error("unknown type in type table");
+ case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+ // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+ // type list. This allows us to reserve space.
+ if (Record.size() < 1)
+ return Error("Invalid TYPE_CODE_NUMENTRY record");
+ TypeList.resize(Record[0]);
+ continue;
+ case bitc::TYPE_CODE_VOID: // VOID
+ ResultTy = Type::getVoidTy(Context);
+ break;
+ case bitc::TYPE_CODE_FLOAT: // FLOAT
+ ResultTy = Type::getFloatTy(Context);
+ break;
+ case bitc::TYPE_CODE_DOUBLE: // DOUBLE
+ ResultTy = Type::getDoubleTy(Context);
+ break;
+ case bitc::TYPE_CODE_X86_FP80: // X86_FP80
+ ResultTy = Type::getX86_FP80Ty(Context);
+ break;
+ case bitc::TYPE_CODE_FP128: // FP128
+ ResultTy = Type::getFP128Ty(Context);
+ break;
+ case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+ ResultTy = Type::getPPC_FP128Ty(Context);
+ break;
+ case bitc::TYPE_CODE_LABEL: // LABEL
+ ResultTy = Type::getLabelTy(Context);
+ break;
+ case bitc::TYPE_CODE_METADATA: // METADATA
+ ResultTy = Type::getMetadataTy(Context);
+ break;
+ case bitc::TYPE_CODE_X86_MMX: // X86_MMX
+ ResultTy = Type::getX86_MMXTy(Context);
+ break;
+ case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
+ if (Record.size() < 1)
+ return Error("Invalid Integer type record");
+
+ ResultTy = IntegerType::get(Context, Record[0]);
+ break;
+ case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+ // [pointee type, address space]
+ if (Record.size() < 1)
+ return Error("Invalid POINTER type record");
+ unsigned AddressSpace = 0;
+ if (Record.size() == 2)
+ AddressSpace = Record[1];
+ ResultTy = getTypeByID(Record[0]);
+ if (ResultTy == 0) return Error("invalid element type in pointer type");
+ ResultTy = PointerType::get(ResultTy, AddressSpace);
+ break;
+ }
+ case bitc::TYPE_CODE_FUNCTION: {
+ // FIXME: attrid is dead, remove it in LLVM 3.0
+ // FUNCTION: [vararg, attrid, retty, paramty x N]
+ if (Record.size() < 3)
+ return Error("Invalid FUNCTION type record");
+ std::vector<Type*> ArgTys;
+ for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ ArgTys.push_back(T);
+ else
+ break;
+ }
+
+ ResultTy = getTypeByID(Record[2]);
+ if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+ return Error("invalid type in function type");
+
+ ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
+ if (Record.size() < 1)
+ return Error("Invalid STRUCT type record");
+ std::vector<Type*> EltTys;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ EltTys.push_back(T);
+ else
+ break;
+ }
+ if (EltTys.size() != Record.size()-1)
+ return Error("invalid type in struct type");
+ ResultTy = StructType::get(Context, EltTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N]
+ if (ConvertToString(Record, 0, TypeName))
+ return Error("Invalid STRUCT_NAME record");
+ continue;
+
+ case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
+ if (Record.size() < 1)
+ return Error("Invalid STRUCT type record");
+
+ if (NumRecords >= TypeList.size())
+ return Error("invalid TYPE table");
+
+ // Check to see if this was forward referenced, if so fill in the temp.
+ StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+ if (Res) {
+ Res->setName(TypeName);
+ TypeList[NumRecords] = 0;
+ } else // Otherwise, create a new struct.
+ Res = StructType::createNamed(Context, TypeName);
+ TypeName.clear();
+
+ SmallVector<Type*, 8> EltTys;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+ if (Type *T = getTypeByID(Record[i]))
+ EltTys.push_back(T);
+ else
+ break;
+ }
+ if (EltTys.size() != Record.size()-1)
+ return Error("invalid STRUCT type record");
+ Res->setBody(EltTys, Record[0]);
+ ResultTy = Res;
+ break;
+ }
+ case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: []
+ if (Record.size() != 1)
+ return Error("Invalid OPAQUE type record");
+
+ if (NumRecords >= TypeList.size())
+ return Error("invalid TYPE table");
+
+ // Check to see if this was forward referenced, if so fill in the temp.
+ StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+ if (Res) {
+ Res->setName(TypeName);
+ TypeList[NumRecords] = 0;
+ } else // Otherwise, create a new struct with no body.
+ Res = StructType::createNamed(Context, TypeName);
+ TypeName.clear();
+ ResultTy = Res;
+ break;
+ }
+ case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid ARRAY type record");
+ if ((ResultTy = getTypeByID(Record[1])))
+ ResultTy = ArrayType::get(ResultTy, Record[0]);
+ else
+ return Error("Invalid ARRAY type element");
+ break;
+ case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid VECTOR type record");
+ if ((ResultTy = getTypeByID(Record[1])))
+ ResultTy = VectorType::get(ResultTy, Record[0]);
+ else
+ return Error("Invalid ARRAY type element");
+ break;
+ }
+
+ if (NumRecords >= TypeList.size())
+ return Error("invalid TYPE table");
+ assert(ResultTy && "Didn't read a type?");
+ assert(TypeList[NumRecords] == 0 && "Already read type?");
+ TypeList[NumRecords++] = ResultTy;
+ }
+}
+
+// FIXME: Remove in LLVM 3.1
+bool BitcodeReader::ParseOldTypeTable() {
+ if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
+ return Error("Malformed block record");
+
+ if (!TypeList.empty())
+ return Error("Multiple TYPE_BLOCKs found!");
+
+
+ // While horrible, we have no good ordering of types in the bc file. Just
+ // iteratively parse types out of the bc file in multiple passes until we get
+ // them all. Do this by saving a cursor for the start of the type block.
+ BitstreamCursor StartOfTypeBlockCursor(Stream);
+
+ unsigned NumTypesRead = 0;
+
+ SmallVector<uint64_t, 64> Record;
+RestartScan:
+ unsigned NextTypeID = 0;
+ bool ReadAnyTypes = false;
+
+ // Read all the records for this type table.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (NextTypeID != TypeList.size())
+ return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
+
+ // If we haven't read all of the types yet, iterate again.
+ if (NumTypesRead != TypeList.size()) {
+ // If we didn't successfully read any types in this pass, then we must
+ // have an unhandled forward reference.
+ if (!ReadAnyTypes)
+ return Error("Obsolete bitcode contains unhandled recursive type");
+
+ Stream = StartOfTypeBlockCursor;
+ goto RestartScan;
+ }
+
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of type table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ Type *ResultTy = 0;
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: return Error("unknown type in type table");
+ case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+ // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+ // type list. This allows us to reserve space.
+ if (Record.size() < 1)
+ return Error("Invalid TYPE_CODE_NUMENTRY record");
+ TypeList.resize(Record[0]);
+ continue;
+ case bitc::TYPE_CODE_VOID: // VOID
+ ResultTy = Type::getVoidTy(Context);
+ break;
+ case bitc::TYPE_CODE_FLOAT: // FLOAT
+ ResultTy = Type::getFloatTy(Context);
+ break;
+ case bitc::TYPE_CODE_DOUBLE: // DOUBLE
+ ResultTy = Type::getDoubleTy(Context);
+ break;
+ case bitc::TYPE_CODE_X86_FP80: // X86_FP80
+ ResultTy = Type::getX86_FP80Ty(Context);
+ break;
+ case bitc::TYPE_CODE_FP128: // FP128
+ ResultTy = Type::getFP128Ty(Context);
+ break;
+ case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+ ResultTy = Type::getPPC_FP128Ty(Context);
+ break;
+ case bitc::TYPE_CODE_LABEL: // LABEL
+ ResultTy = Type::getLabelTy(Context);
+ break;
+ case bitc::TYPE_CODE_METADATA: // METADATA
+ ResultTy = Type::getMetadataTy(Context);
+ break;
+ case bitc::TYPE_CODE_X86_MMX: // X86_MMX
+ ResultTy = Type::getX86_MMXTy(Context);
+ break;
+ case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
+ if (Record.size() < 1)
+ return Error("Invalid Integer type record");
+ ResultTy = IntegerType::get(Context, Record[0]);
+ break;
+ case bitc::TYPE_CODE_OPAQUE: // OPAQUE
+ if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
+ ResultTy = StructType::createNamed(Context, "");
+ break;
+ case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
+ if (NextTypeID >= TypeList.size()) break;
+ // If we already read it, don't reprocess.
+ if (TypeList[NextTypeID] &&
+ !cast<StructType>(TypeList[NextTypeID])->isOpaque())
+ break;
+
+ // Set a type.
+ if (TypeList[NextTypeID] == 0)
+ TypeList[NextTypeID] = StructType::createNamed(Context, "");
+
+ std::vector<Type*> EltTys;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+ if (Type *Elt = getTypeByIDOrNull(Record[i]))
+ EltTys.push_back(Elt);
+ else
+ break;
+ }
+
+ if (EltTys.size() != Record.size()-1)
+ break; // Not all elements are ready.
+
+ cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
+ ResultTy = TypeList[NextTypeID];
+ TypeList[NextTypeID] = 0;
+ break;
+ }
+ case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+ // [pointee type, address space]
+ if (Record.size() < 1)
+ return Error("Invalid POINTER type record");
+ unsigned AddressSpace = 0;
+ if (Record.size() == 2)
+ AddressSpace = Record[1];
+ if ((ResultTy = getTypeByIDOrNull(Record[0])))
+ ResultTy = PointerType::get(ResultTy, AddressSpace);
+ break;
+ }
+ case bitc::TYPE_CODE_FUNCTION: {
+ // FIXME: attrid is dead, remove it in LLVM 3.0
+ // FUNCTION: [vararg, attrid, retty, paramty x N]
+ if (Record.size() < 3)
+ return Error("Invalid FUNCTION type record");
+ std::vector<Type*> ArgTys;
+ for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+ if (Type *Elt = getTypeByIDOrNull(Record[i]))
+ ArgTys.push_back(Elt);
+ else
+ break;
+ }
+ if (ArgTys.size()+3 != Record.size())
+ break; // Something was null.
+ if ((ResultTy = getTypeByIDOrNull(Record[2])))
+ ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid ARRAY type record");
+ if ((ResultTy = getTypeByIDOrNull(Record[1])))
+ ResultTy = ArrayType::get(ResultTy, Record[0]);
+ break;
+ case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid VECTOR type record");
+ if ((ResultTy = getTypeByIDOrNull(Record[1])))
+ ResultTy = VectorType::get(ResultTy, Record[0]);
+ break;
+ }
+
+ if (NextTypeID >= TypeList.size())
+ return Error("invalid TYPE table");
+
+ if (ResultTy && TypeList[NextTypeID] == 0) {
+ ++NumTypesRead;
+ ReadAnyTypes = true;
+
+ TypeList[NextTypeID] = ResultTy;
+ }
+
+ ++NextTypeID;
+ }
+}
+
+
+bool BitcodeReader::ParseOldTypeSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this type table.
+ std::string TypeName;
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of type symbol table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N]
+ if (ConvertToString(Record, 1, TypeName))
+ return Error("Invalid TST_ENTRY record");
+ unsigned TypeID = Record[0];
+ if (TypeID >= TypeList.size())
+ return Error("Invalid Type ID in TST_ENTRY record");
+
+ // Only apply the type name to a struct type with no name.
+ if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
+ if (!STy->isAnonymous() && !STy->hasName())
+ STy->setName(TypeName);
+ TypeName.clear();
+ break;
+ }
+ }
+}
+
+bool BitcodeReader::ParseValueSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ SmallString<128> ValueName;
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of value symbol table block");
+ return false;
+ }
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
+ if (ConvertToString(Record, 1, ValueName))
+ return Error("Invalid VST_ENTRY record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size())
+ return Error("Invalid Value ID in VST_ENTRY record");
+ Value *V = ValueList[ValueID];
+
+ V->setName(StringRef(ValueName.data(), ValueName.size()));
+ ValueName.clear();
+ break;
+ }
+ case bitc::VST_CODE_BBENTRY: {
+ if (ConvertToString(Record, 1, ValueName))
+ return Error("Invalid VST_BBENTRY record");
+ BasicBlock *BB = getBasicBlock(Record[0]);
+ if (BB == 0)
+ return Error("Invalid BB ID in VST_BBENTRY record");
+
+ BB->setName(StringRef(ValueName.data(), ValueName.size()));
+ ValueName.clear();
+ break;
+ }
+ }
+ }
+}
+
+bool BitcodeReader::ParseMetadata() {
+ unsigned NextMDValueNo = MDValueList.size();
+
+ if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of PARAMATTR block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ bool IsFunctionLocal = false;
+ // Read a record.
+ Record.clear();
+ Code = Stream.ReadRecord(Code, Record);
+ switch (Code) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::METADATA_NAME: {
+ // Read named of the named metadata.
+ unsigned NameLength = Record.size();
+ SmallString<8> Name;
+ Name.resize(NameLength);
+ for (unsigned i = 0; i != NameLength; ++i)
+ Name[i] = Record[i];
+ Record.clear();
+ Code = Stream.ReadCode();
+
+ // METADATA_NAME is always followed by METADATA_NAMED_NODE.
+ unsigned NextBitCode = Stream.ReadRecord(Code, Record);
+ assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
+
+ // Read named metadata elements.
+ unsigned Size = Record.size();
+ NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
+ for (unsigned i = 0; i != Size; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+ if (MD == 0)
+ return Error("Malformed metadata record");
+ NMD->addOperand(MD);
+ }
+ break;
+ }
+ case bitc::METADATA_FN_NODE:
+ IsFunctionLocal = true;
+ // fall-through
+ case bitc::METADATA_NODE: {
+ if (Record.size() % 2 == 1)
+ return Error("Invalid METADATA_NODE record");
+
+ unsigned Size = Record.size();
+ SmallVector<Value*, 8> Elts;
+ for (unsigned i = 0; i != Size; i += 2) {
+ const Type *Ty = getTypeByID(Record[i]);
+ if (!Ty) return Error("Invalid METADATA_NODE record");
+ if (Ty->isMetadataTy())
+ Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+ else if (!Ty->isVoidTy())
+ Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
+ else
+ Elts.push_back(NULL);
+ }
+ Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal);
+ IsFunctionLocal = false;
+ MDValueList.AssignValue(V, NextMDValueNo++);
+ break;
+ }
+ case bitc::METADATA_STRING: {
+ unsigned MDStringLength = Record.size();
+ SmallString<8> String;
+ String.resize(MDStringLength);
+ for (unsigned i = 0; i != MDStringLength; ++i)
+ String[i] = Record[i];
+ Value *V = MDString::get(Context,
+ StringRef(String.data(), String.size()));
+ MDValueList.AssignValue(V, NextMDValueNo++);
+ break;
+ }
+ case bitc::METADATA_KIND: {
+ unsigned RecordLength = Record.size();
+ if (Record.empty() || RecordLength < 2)
+ return Error("Invalid METADATA_KIND record");
+ SmallString<8> Name;
+ Name.resize(RecordLength-1);
+ unsigned Kind = Record[0];
+ for (unsigned i = 1; i != RecordLength; ++i)
+ Name[i-1] = Record[i];
+
+ unsigned NewKind = TheModule->getMDKindID(Name.str());
+ if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+ return Error("Conflicting METADATA_KIND records");
+ break;
+ }
+ }
+ }
+}
+
+/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+static uint64_t DecodeSignRotatedValue(uint64_t V) {
+ if ((V & 1) == 0)
+ return V >> 1;
+ if (V != 1)
+ return -(V >> 1);
+ // There is no such thing as -0 with integers. "-0" really means MININT.
+ return 1ULL << 63;
+}
+
+/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
+/// values and aliases that we can.
+bool BitcodeReader::ResolveGlobalAndAliasInits() {
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+
+ GlobalInitWorklist.swap(GlobalInits);
+ AliasInitWorklist.swap(AliasInits);
+
+ while (!GlobalInitWorklist.empty()) {
+ unsigned ValID = GlobalInitWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ // Not ready to resolve this yet, it requires something later in the file.
+ GlobalInits.push_back(GlobalInitWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ GlobalInitWorklist.back().first->setInitializer(C);
+ else
+ return Error("Global variable initializer is not a constant!");
+ }
+ GlobalInitWorklist.pop_back();
+ }
+
+ while (!AliasInitWorklist.empty()) {
+ unsigned ValID = AliasInitWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ AliasInits.push_back(AliasInitWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ AliasInitWorklist.back().first->setAliasee(C);
+ else
+ return Error("Alias initializer is not a constant!");
+ }
+ AliasInitWorklist.pop_back();
+ }
+ return false;
+}
+
+bool BitcodeReader::ParseConstants() {
+ if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ const Type *CurTy = Type::getInt32Ty(Context);
+ unsigned NextCstNo = ValueList.size();
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK)
+ break;
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ Value *V = 0;
+ unsigned BitCode = Stream.ReadRecord(Code, Record);
+ switch (BitCode) {
+ default: // Default behavior: unknown constant
+ case bitc::CST_CODE_UNDEF: // UNDEF
+ V = UndefValue::get(CurTy);
+ break;
+ case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
+ if (Record.empty())
+ return Error("Malformed CST_SETTYPE record");
+ if (Record[0] >= TypeList.size())
+ return Error("Invalid Type ID in CST_SETTYPE record");
+ CurTy = TypeList[Record[0]];
+ continue; // Skip the ValueList manipulation.
+ case bitc::CST_CODE_NULL: // NULL
+ V = Constant::getNullValue(CurTy);
+ break;
+ case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
+ if (!CurTy->isIntegerTy() || Record.empty())
+ return Error("Invalid CST_INTEGER record");
+ V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
+ break;
+ case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
+ if (!CurTy->isIntegerTy() || Record.empty())
+ return Error("Invalid WIDE_INTEGER record");
+
+ unsigned NumWords = Record.size();
+ SmallVector<uint64_t, 8> Words;
+ Words.resize(NumWords);
+ for (unsigned i = 0; i != NumWords; ++i)
+ Words[i] = DecodeSignRotatedValue(Record[i]);
+ V = ConstantInt::get(Context,
+ APInt(cast<IntegerType>(CurTy)->getBitWidth(),
+ NumWords, &Words[0]));
+ break;
+ }
+ case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
+ if (Record.empty())
+ return Error("Invalid FLOAT record");
+ if (CurTy->isFloatTy())
+ V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
+ else if (CurTy->isDoubleTy())
+ V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
+ else if (CurTy->isX86_FP80Ty()) {
+ // Bits are not stored the same way as a normal i80 APInt, compensate.
+ uint64_t Rearrange[2];
+ Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
+ Rearrange[1] = Record[0] >> 48;
+ V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange)));
+ } else if (CurTy->isFP128Ty())
+ V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true));
+ else if (CurTy->isPPC_FP128Ty())
+ V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0])));
+ else
+ V = UndefValue::get(CurTy);
+ break;
+ }
+
+ case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ unsigned Size = Record.size();
+ std::vector<Constant*> Elts;
+
+ if (const StructType *STy = dyn_cast<StructType>(CurTy)) {
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i],
+ STy->getElementType(i)));
+ V = ConstantStruct::get(STy, Elts);
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+ const Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+ V = ConstantArray::get(ATy, Elts);
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+ const Type *EltTy = VTy->getElementType();
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+ V = ConstantVector::get(Elts);
+ } else {
+ V = UndefValue::get(CurTy);
+ }
+ break;
+ }
+ case bitc::CST_CODE_STRING: { // STRING: [values]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ const ArrayType *ATy = cast<ArrayType>(CurTy);
+ const Type *EltTy = ATy->getElementType();
+
+ unsigned Size = Record.size();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+ V = ConstantArray::get(ATy, Elts);
+ break;
+ }
+ case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ const ArrayType *ATy = cast<ArrayType>(CurTy);
+ const Type *EltTy = ATy->getElementType();
+
+ unsigned Size = Record.size();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+ Elts.push_back(Constant::getNullValue(EltTy));
+ V = ConstantArray::get(ATy, Elts);
+ break;
+ }
+ case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
+ if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+ int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown binop.
+ } else {
+ Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+ Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
+ unsigned Flags = 0;
+ if (Record.size() >= 4) {
+ if (Opc == Instruction::Add ||
+ Opc == Instruction::Sub ||
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
+ if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+ Flags |= OverflowingBinaryOperator::NoSignedWrap;
+ if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+ Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+ } else if (Opc == Instruction::SDiv ||
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
+ if (Record[3] & (1 << bitc::PEO_EXACT))
+ Flags |= SDivOperator::IsExact;
+ }
+ }
+ V = ConstantExpr::get(Opc, LHS, RHS, Flags);
+ }
+ break;
+ }
+ case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval]
+ if (Record.size() < 3) return Error("Invalid CE_CAST record");
+ int Opc = GetDecodedCastOpcode(Record[0]);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown cast.
+ } else {
+ const Type *OpTy = getTypeByID(Record[1]);
+ if (!OpTy) return Error("Invalid CE_CAST record");
+ Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
+ V = ConstantExpr::getCast(Opc, Op, CurTy);
+ }
+ break;
+ }
+ case bitc::CST_CODE_CE_INBOUNDS_GEP:
+ case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands]
+ if (Record.size() & 1) return Error("Invalid CE_GEP record");
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ const Type *ElTy = getTypeByID(Record[i]);
+ if (!ElTy) return Error("Invalid CE_GEP record");
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+ }
+ if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
+ V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1],
+ Elts.size()-1);
+ else
+ V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1],
+ Elts.size()-1);
+ break;
+ }
+ case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#]
+ if (Record.size() < 3) return Error("Invalid CE_SELECT record");
+ V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
+ Type::getInt1Ty(Context)),
+ ValueList.getConstantFwdRef(Record[1],CurTy),
+ ValueList.getConstantFwdRef(Record[2],CurTy));
+ break;
+ case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+ if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+ const VectorType *OpTy =
+ dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+ if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ V = ConstantExpr::getExtractElement(Op0, Op1);
+ break;
+ }
+ case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+ const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ if (Record.size() < 3 || OpTy == 0)
+ return Error("Invalid CE_INSERTELT record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
+ OpTy->getElementType());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
+ const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ if (Record.size() < 3 || OpTy == 0)
+ return Error("Invalid CE_SHUFFLEVEC record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+ OpTy->getNumElements());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
+ V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
+ const VectorType *RTy = dyn_cast<VectorType>(CurTy);
+ const VectorType *OpTy =
+ dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+ if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+ return Error("Invalid CE_SHUFVEC_EX record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+ const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+ RTy->getNumElements());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
+ V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
+ if (Record.size() < 4) return Error("Invalid CE_CMP record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ if (OpTy == 0) return Error("Invalid CE_CMP record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+
+ if (OpTy->isFPOrFPVectorTy())
+ V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
+ else
+ V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+ break;
+ }
+ case bitc::CST_CODE_INLINEASM: {
+ if (Record.size() < 2) return Error("Invalid INLINEASM record");
+ std::string AsmStr, ConstrStr;
+ bool HasSideEffects = Record[0] & 1;
+ bool IsAlignStack = Record[0] >> 1;
+ unsigned AsmStrSize = Record[1];
+ if (2+AsmStrSize >= Record.size())
+ return Error("Invalid INLINEASM record");
+ unsigned ConstStrSize = Record[2+AsmStrSize];
+ if (3+AsmStrSize+ConstStrSize > Record.size())
+ return Error("Invalid INLINEASM record");
+
+ for (unsigned i = 0; i != AsmStrSize; ++i)
+ AsmStr += (char)Record[2+i];
+ for (unsigned i = 0; i != ConstStrSize; ++i)
+ ConstrStr += (char)Record[3+AsmStrSize+i];
+ const PointerType *PTy = cast<PointerType>(CurTy);
+ V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+ AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+ break;
+ }
+ case bitc::CST_CODE_BLOCKADDRESS:{
+ if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+ const Type *FnTy = getTypeByID(Record[0]);
+ if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+ Function *Fn =
+ dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
+ if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+
+ GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+ Type::getInt8Ty(Context),
+ false, GlobalValue::InternalLinkage,
+ 0, "");
+ BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+ V = FwdRef;
+ break;
+ }
+ }
+
+ ValueList.AssignValue(V, NextCstNo);
+ ++NextCstNo;
+ }
+
+ if (NextCstNo != ValueList.size())
+ return Error("Invalid constant reference!");
+
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of constants block");
+
+ // Once all the constants have been read, go through and resolve forward
+ // references.
+ ValueList.ResolveConstantForwardRefs();
+ return false;
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it. This lets us lazily deserialize the
+/// functions.
+bool BitcodeReader::RememberAndSkipFunctionBody() {
+ // Get the function we are talking about.
+ if (FunctionsWithBodies.empty())
+ return Error("Insufficient function protos");
+
+ Function *Fn = FunctionsWithBodies.back();
+ FunctionsWithBodies.pop_back();
+
+ // Save the current stream state.
+ uint64_t CurBit = Stream.GetCurrentBitNo();
+ DeferredFunctionInfo[Fn] = CurBit;
+
+ // Skip over the function block for now.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ return false;
+}
+
+bool BitcodeReader::ParseModule() {
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+ std::vector<std::string> SectionTable;
+ std::vector<std::string> GCTable;
+
+ // Read all the records for this module.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of module block");
+
+ // Patch the initializers for globals and aliases up.
+ ResolveGlobalAndAliasInits();
+ if (!GlobalInits.empty() || !AliasInits.empty())
+ return Error("Malformed global initializer set");
+ if (!FunctionsWithBodies.empty())
+ return Error("Too few function bodies found");
+
+ // Look for intrinsic functions which need to be upgraded at some point
+ for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ Function* NewFn;
+ if (UpgradeIntrinsicFunction(FI, NewFn))
+ UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+ }
+
+ // Look for global variables which need to be renamed.
+ for (Module::global_iterator
+ GI = TheModule->global_begin(), GE = TheModule->global_end();
+ GI != GE; ++GI)
+ UpgradeGlobalVariable(GI);
+
+ // Force deallocation of memory for these vectors to favor the client that
+ // want lazy deserialization.
+ std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+ std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+ std::vector<Function*>().swap(FunctionsWithBodies);
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ switch (Stream.ReadSubBlockID()) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ case bitc::BLOCKINFO_BLOCK_ID:
+ if (Stream.ReadBlockInfoBlock())
+ return Error("Malformed BlockInfoBlock");
+ break;
+ case bitc::PARAMATTR_BLOCK_ID:
+ if (ParseAttributeBlock())
+ return true;
+ break;
+ case bitc::TYPE_BLOCK_ID_NEW:
+ if (ParseTypeTable())
+ return true;
+ break;
+ case bitc::TYPE_BLOCK_ID_OLD:
+ if (ParseOldTypeTable())
+ return true;
+ break;
+ case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
+ if (ParseOldTypeSymbolTable())
+ return true;
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (ParseValueSymbolTable())
+ return true;
+ break;
+ case bitc::CONSTANTS_BLOCK_ID:
+ if (ParseConstants() || ResolveGlobalAndAliasInits())
+ return true;
+ break;
+ case bitc::METADATA_BLOCK_ID:
+ if (ParseMetadata())
+ return true;
+ break;
+ case bitc::FUNCTION_BLOCK_ID:
+ // If this is the first function body we've seen, reverse the
+ // FunctionsWithBodies list.
+ if (!HasReversedFunctionsWithBodies) {
+ std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+ HasReversedFunctionsWithBodies = true;
+ }
+
+ if (RememberAndSkipFunctionBody())
+ return true;
+ break;
+ }
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_VERSION: // VERSION: [version#]
+ if (Record.size() < 1)
+ return Error("Malformed MODULE_CODE_VERSION");
+ // Only version #0 is supported so far.
+ if (Record[0] != 0)
+ return Error("Unknown bitstream version!");
+ break;
+ case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_TRIPLE record");
+ TheModule->setTargetTriple(S);
+ break;
+ }
+ case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_DATALAYOUT record");
+ TheModule->setDataLayout(S);
+ break;
+ }
+ case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_ASM record");
+ TheModule->setModuleInlineAsm(S);
+ break;
+ }
+ case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_DEPLIB record");
+ TheModule->addLibrary(S);
+ break;
+ }
+ case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_SECTIONNAME record");
+ SectionTable.push_back(S);
+ break;
+ }
+ case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_GCNAME record");
+ GCTable.push_back(S);
+ break;
+ }
+ // GLOBALVAR: [pointer type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr]
+ case bitc::MODULE_CODE_GLOBALVAR: {
+ if (Record.size() < 6)
+ return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ if (!Ty->isPointerTy())
+ return Error("Global not a pointer type!");
+ unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ bool isConstant = Record[1];
+ GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]);
+ unsigned Alignment = (1 << Record[4]) >> 1;
+ std::string Section;
+ if (Record[5]) {
+ if (Record[5]-1 >= SectionTable.size())
+ return Error("Invalid section ID");
+ Section = SectionTable[Record[5]-1];
+ }
+ GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+ if (Record.size() > 6)
+ Visibility = GetDecodedVisibility(Record[6]);
+ bool isThreadLocal = false;
+ if (Record.size() > 7)
+ isThreadLocal = Record[7];
+
+ bool UnnamedAddr = false;
+ if (Record.size() > 8)
+ UnnamedAddr = Record[8];
+
+ GlobalVariable *NewGV =
+ new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
+ isThreadLocal, AddressSpace);
+ NewGV->setAlignment(Alignment);
+ if (!Section.empty())
+ NewGV->setSection(Section);
+ NewGV->setVisibility(Visibility);
+ NewGV->setThreadLocal(isThreadLocal);
+ NewGV->setUnnamedAddr(UnnamedAddr);
+
+ ValueList.push_back(NewGV);
+
+ // Remember which value to use for the global initializer.
+ if (unsigned InitID = Record[2])
+ GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
+ break;
+ }
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattr,
+ // alignment, section, visibility, gc, unnamed_addr]
+ case bitc::MODULE_CODE_FUNCTION: {
+ if (Record.size() < 8)
+ return Error("Invalid MODULE_CODE_FUNCTION record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+ if (!Ty->isPointerTy())
+ return Error("Function not a pointer type!");
+ const FunctionType *FTy =
+ dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+ if (!FTy)
+ return Error("Function not a pointer to function type!");
+
+ Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ "", TheModule);
+
+ Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
+ bool isProto = Record[2];
+ Func->setLinkage(GetDecodedLinkage(Record[3]));
+ Func->setAttributes(getAttributes(Record[4]));
+
+ Func->setAlignment((1 << Record[5]) >> 1);
+ if (Record[6]) {
+ if (Record[6]-1 >= SectionTable.size())
+ return Error("Invalid section ID");
+ Func->setSection(SectionTable[Record[6]-1]);
+ }
+ Func->setVisibility(GetDecodedVisibility(Record[7]));
+ if (Record.size() > 8 && Record[8]) {
+ if (Record[8]-1 > GCTable.size())
+ return Error("Invalid GC ID");
+ Func->setGC(GCTable[Record[8]-1].c_str());
+ }
+ bool UnnamedAddr = false;
+ if (Record.size() > 9)
+ UnnamedAddr = Record[9];
+ Func->setUnnamedAddr(UnnamedAddr);
+ ValueList.push_back(Func);
+
+ // If this is a function with a body, remember the prototype we are
+ // creating now, so that we can match up the body with them later.
+ if (!isProto)
+ FunctionsWithBodies.push_back(Func);
+ break;
+ }
+ // ALIAS: [alias type, aliasee val#, linkage]
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
+ case bitc::MODULE_CODE_ALIAS: {
+ if (Record.size() < 3)
+ return Error("Invalid MODULE_ALIAS record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_ALIAS record");
+ if (!Ty->isPointerTy())
+ return Error("Function not a pointer type!");
+
+ GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
+ "", 0, TheModule);
+ // Old bitcode files didn't have visibility field.
+ if (Record.size() > 3)
+ NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+ ValueList.push_back(NewGA);
+ AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+ break;
+ }
+ /// MODULE_CODE_PURGEVALS: [numvals]
+ case bitc::MODULE_CODE_PURGEVALS:
+ // Trim down the value list to the specified size.
+ if (Record.size() < 1 || Record[0] > ValueList.size())
+ return Error("Invalid MODULE_PURGEVALS record");
+ ValueList.shrinkTo(Record[0]);
+ break;
+ }
+ Record.clear();
+ }
+
+ return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseBitcodeInto(Module *M) {
+ TheModule = 0;
+
+ unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+ unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ if (Buffer->getBufferSize() & 3) {
+ if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+ return Error("Invalid bitcode signature");
+ else
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+ }
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+ return Error("Invalid bitcode wrapper header");
+
+ StreamFile.init(BufPtr, BufEnd);
+ Stream.init(StreamFile);
+
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return Error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+
+ if (Code != bitc::ENTER_SUBBLOCK) {
+
+ // The ranlib in xcode 4 will align archive members by appending newlines to the
+ // end of them. If this file size is a multiple of 4 but not 8, we have to read and
+ // ignore these final 4 bytes :-(
+ if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 &&
+ Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
+ Stream.AtEndOfStream())
+ return false;
+
+ return Error("Invalid record at top-level");
+ }
+
+ unsigned BlockID = Stream.ReadSubBlockID();
+
+ // We only know the MODULE subblock ID.
+ switch (BlockID) {
+ case bitc::BLOCKINFO_BLOCK_ID:
+ if (Stream.ReadBlockInfoBlock())
+ return Error("Malformed BlockInfoBlock");
+ break;
+ case bitc::MODULE_BLOCK_ID:
+ // Reject multiple MODULE_BLOCK's in a single bitstream.
+ if (TheModule)
+ return Error("Multiple MODULE_BLOCKs in same stream");
+ TheModule = M;
+ if (ParseModule())
+ return true;
+ break;
+ default:
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this module.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of module block");
+
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ switch (Stream.ReadSubBlockID()) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_VERSION: // VERSION: [version#]
+ if (Record.size() < 1)
+ return Error("Malformed MODULE_CODE_VERSION");
+ // Only version #0 is supported so far.
+ if (Record[0] != 0)
+ return Error("Unknown bitstream version!");
+ break;
+ case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_TRIPLE record");
+ Triple = S;
+ break;
+ }
+ }
+ Record.clear();
+ }
+
+ return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseTriple(std::string &Triple) {
+ if (Buffer->getBufferSize() & 3)
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+ unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+ unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+ return Error("Invalid bitcode wrapper header");
+
+ StreamFile.init(BufPtr, BufEnd);
+ Stream.init(StreamFile);
+
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return Error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+
+ if (Code != bitc::ENTER_SUBBLOCK)
+ return Error("Invalid record at top-level");
+
+ unsigned BlockID = Stream.ReadSubBlockID();
+
+ // We only know the MODULE subblock ID.
+ switch (BlockID) {
+ case bitc::MODULE_BLOCK_ID:
+ if (ParseModuleTriple(Triple))
+ return true;
+ break;
+ default:
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ParseMetadataAttachment - Parse metadata attachments.
+bool BitcodeReader::ParseMetadataAttachment() {
+ if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+ while(1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of PARAMATTR block");
+ break;
+ }
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+ // Read a metadata attachment record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::METADATA_ATTACHMENT: {
+ unsigned RecordLength = Record.size();
+ if (Record.empty() || (RecordLength - 1) % 2 == 1)
+ return Error ("Invalid METADATA_ATTACHMENT reader!");
+ Instruction *Inst = InstructionList[Record[0]];
+ for (unsigned i = 1; i != RecordLength; i = i+2) {
+ unsigned Kind = Record[i];
+ DenseMap<unsigned, unsigned>::iterator I =
+ MDKindMap.find(Kind);
+ if (I == MDKindMap.end())
+ return Error("Invalid metadata kind ID");
+ Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
+ Inst->setMetadata(I->second, cast<MDNode>(Node));
+ }
+ break;
+ }
+ }
+ }
+ return false;
+}
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool BitcodeReader::ParseFunctionBody(Function *F) {
+ if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+ return Error("Malformed block record");
+
+ InstructionList.clear();
+ unsigned ModuleValueListSize = ValueList.size();
+ unsigned ModuleMDValueListSize = MDValueList.size();
+
+ // Add all the function arguments to the value table.
+ for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ ValueList.push_back(I);
+
+ unsigned NextValueNo = ValueList.size();
+ BasicBlock *CurBB = 0;
+ unsigned CurBBNo = 0;
+
+ DebugLoc LastLoc;
+
+ // Read all the records.
+ SmallVector<uint64_t, 64> Record;
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of function block");
+ break;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ switch (Stream.ReadSubBlockID()) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ case bitc::CONSTANTS_BLOCK_ID:
+ if (ParseConstants()) return true;
+ NextValueNo = ValueList.size();
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (ParseValueSymbolTable()) return true;
+ break;
+ case bitc::METADATA_ATTACHMENT_ID:
+ if (ParseMetadataAttachment()) return true;
+ break;
+ case bitc::METADATA_BLOCK_ID:
+ if (ParseMetadata()) return true;
+ break;
+ }
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ Instruction *I = 0;
+ unsigned BitCode = Stream.ReadRecord(Code, Record);
+ switch (BitCode) {
+ default: // Default behavior: reject
+ return Error("Unknown instruction");
+ case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks]
+ if (Record.size() < 1 || Record[0] == 0)
+ return Error("Invalid DECLAREBLOCKS record");
+ // Create all the basic blocks for the function.
+ FunctionBBs.resize(Record[0]);
+ for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+ FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+ CurBB = FunctionBBs[0];
+ continue;
+
+ case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN
+ // This record indicates that the last instruction is at the same
+ // location as the previous instruction with a location.
+ I = 0;
+
+ // Get the last instruction emitted.
+ if (CurBB && !CurBB->empty())
+ I = &CurBB->back();
+ else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+ !FunctionBBs[CurBBNo-1]->empty())
+ I = &FunctionBBs[CurBBNo-1]->back();
+
+ if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
+ I->setDebugLoc(LastLoc);
+ I = 0;
+ continue;
+
+ case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia]
+ I = 0; // Get the last instruction emitted.
+ if (CurBB && !CurBB->empty())
+ I = &CurBB->back();
+ else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+ !FunctionBBs[CurBBNo-1]->empty())
+ I = &FunctionBBs[CurBBNo-1]->back();
+ if (I == 0 || Record.size() < 4)
+ return Error("Invalid FUNC_CODE_DEBUG_LOC record");
+
+ unsigned Line = Record[0], Col = Record[1];
+ unsigned ScopeID = Record[2], IAID = Record[3];
+
+ MDNode *Scope = 0, *IA = 0;
+ if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
+ if (IAID) IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+ LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+ I->setDebugLoc(LastLoc);
+ I = 0;
+ continue;
+ }
+
+ case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode]
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ getValue(Record, OpNum, LHS->getType(), RHS) ||
+ OpNum+1 > Record.size())
+ return Error("Invalid BINOP record");
+
+ int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+ if (Opc == -1) return Error("Invalid BINOP record");
+ I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ InstructionList.push_back(I);
+ if (OpNum < Record.size()) {
+ if (Opc == Instruction::Add ||
+ Opc == Instruction::Sub ||
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
+ if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+ cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
+ if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+ cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
+ } else if (Opc == Instruction::SDiv ||
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
+ if (Record[OpNum] & (1 << bitc::PEO_EXACT))
+ cast<BinaryOperator>(I)->setIsExact(true);
+ }
+ }
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid CAST record");
+
+ const Type *ResTy = getTypeByID(Record[OpNum]);
+ int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+ if (Opc == -1 || ResTy == 0)
+ return Error("Invalid CAST record");
+ I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_INBOUNDS_GEP:
+ case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+ unsigned OpNum = 0;
+ Value *BasePtr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+ return Error("Invalid GEP record");
+
+ SmallVector<Value*, 16> GEPIdx;
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid GEP record");
+ GEPIdx.push_back(Op);
+ }
+
+ I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end());
+ InstructionList.push_back(I);
+ if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
+ cast<GetElementPtrInst>(I)->setIsInBounds(true);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_EXTRACTVAL: {
+ // EXTRACTVAL: [opty, opval, n x indices]
+ unsigned OpNum = 0;
+ Value *Agg;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ return Error("Invalid EXTRACTVAL record");
+
+ SmallVector<unsigned, 4> EXTRACTVALIdx;
+ for (unsigned RecSize = Record.size();
+ OpNum != RecSize; ++OpNum) {
+ uint64_t Index = Record[OpNum];
+ if ((unsigned)Index != Index)
+ return Error("Invalid EXTRACTVAL index");
+ EXTRACTVALIdx.push_back((unsigned)Index);
+ }
+
+ I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INSERTVAL: {
+ // INSERTVAL: [opty, opval, opty, opval, n x indices]
+ unsigned OpNum = 0;
+ Value *Agg;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ return Error("Invalid INSERTVAL record");
+ Value *Val;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+ return Error("Invalid INSERTVAL record");
+
+ SmallVector<unsigned, 4> INSERTVALIdx;
+ for (unsigned RecSize = Record.size();
+ OpNum != RecSize; ++OpNum) {
+ uint64_t Index = Record[OpNum];
+ if ((unsigned)Index != Index)
+ return Error("Invalid INSERTVAL index");
+ INSERTVALIdx.push_back((unsigned)Index);
+ }
+
+ I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
+ // obsolete form of select
+ // handles select i1 ... in old bitcode
+ unsigned OpNum = 0;
+ Value *TrueVal, *FalseVal, *Cond;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+ getValue(Record, OpNum, Type::getInt1Ty(Context), Cond))
+ return Error("Invalid SELECT record");
+
+ I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
+ // new form of select
+ // handles select i1 or select [N x i1]
+ unsigned OpNum = 0;
+ Value *TrueVal, *FalseVal, *Cond;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+ getValueTypePair(Record, OpNum, NextValueNo, Cond))
+ return Error("Invalid SELECT record");
+
+ // select condition can be either i1 or [N x i1]
+ if (const VectorType* vector_type =
+ dyn_cast<const VectorType>(Cond->getType())) {
+ // expect <n x i1>
+ if (vector_type->getElementType() != Type::getInt1Ty(Context))
+ return Error("Invalid SELECT condition type");
+ } else {
+ // expect i1
+ if (Cond->getType() != Type::getInt1Ty(Context))
+ return Error("Invalid SELECT condition type");
+ }
+
+ I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
+ unsigned OpNum = 0;
+ Value *Vec, *Idx;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+ return Error("Invalid EXTRACTELT record");
+ I = ExtractElementInst::Create(Vec, Idx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
+ unsigned OpNum = 0;
+ Value *Vec, *Elt, *Idx;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ getValue(Record, OpNum,
+ cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
+ getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+ return Error("Invalid INSERTELT record");
+ I = InsertElementInst::Create(Vec, Elt, Idx);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
+ unsigned OpNum = 0;
+ Value *Vec1, *Vec2, *Mask;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+ getValue(Record, OpNum, Vec1->getType(), Vec2))
+ return Error("Invalid SHUFFLEVEC record");
+
+ if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
+ return Error("Invalid SHUFFLEVEC record");
+ I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_CMP: // CMP: [opty, opval, opval, pred]
+ // Old form of ICmp/FCmp returning bool
+ // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
+ // both legal on vectors but had different behaviour.
+ case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
+ // FCmp/ICmp returning bool or vector of bool
+
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ getValue(Record, OpNum, LHS->getType(), RHS) ||
+ OpNum+1 != Record.size())
+ return Error("Invalid CMP record");
+
+ if (LHS->getType()->isFPOrFPVectorTy())
+ I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+ else
+ I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
+ {
+ unsigned Size = Record.size();
+ if (Size == 0) {
+ I = ReturnInst::Create(Context);
+ InstructionList.push_back(I);
+ break;
+ }
+
+ unsigned OpNum = 0;
+ Value *Op = NULL;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid RET record");
+ if (OpNum != Record.size())
+ return Error("Invalid RET record");
+
+ I = ReturnInst::Create(Context, Op);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+ if (Record.size() != 1 && Record.size() != 3)
+ return Error("Invalid BR record");
+ BasicBlock *TrueDest = getBasicBlock(Record[0]);
+ if (TrueDest == 0)
+ return Error("Invalid BR record");
+
+ if (Record.size() == 1) {
+ I = BranchInst::Create(TrueDest);
+ InstructionList.push_back(I);
+ }
+ else {
+ BasicBlock *FalseDest = getBasicBlock(Record[1]);
+ Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context));
+ if (FalseDest == 0 || Cond == 0)
+ return Error("Invalid BR record");
+ I = BranchInst::Create(TrueDest, FalseDest, Cond);
+ InstructionList.push_back(I);
+ }
+ break;
+ }
+ case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
+ if (Record.size() < 3 || (Record.size() & 1) == 0)
+ return Error("Invalid SWITCH record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ Value *Cond = getFnValueByID(Record[1], OpTy);
+ BasicBlock *Default = getBasicBlock(Record[2]);
+ if (OpTy == 0 || Cond == 0 || Default == 0)
+ return Error("Invalid SWITCH record");
+ unsigned NumCases = (Record.size()-3)/2;
+ SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+ InstructionList.push_back(SI);
+ for (unsigned i = 0, e = NumCases; i != e; ++i) {
+ ConstantInt *CaseVal =
+ dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
+ BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
+ if (CaseVal == 0 || DestBB == 0) {
+ delete SI;
+ return Error("Invalid SWITCH record!");
+ }
+ SI->addCase(CaseVal, DestBB);
+ }
+ I = SI;
+ break;
+ }
+ case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
+ if (Record.size() < 2)
+ return Error("Invalid INDIRECTBR record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ Value *Address = getFnValueByID(Record[1], OpTy);
+ if (OpTy == 0 || Address == 0)
+ return Error("Invalid INDIRECTBR record");
+ unsigned NumDests = Record.size()-2;
+ IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
+ InstructionList.push_back(IBI);
+ for (unsigned i = 0, e = NumDests; i != e; ++i) {
+ if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
+ IBI->addDestination(DestBB);
+ } else {
+ delete IBI;
+ return Error("Invalid INDIRECTBR record!");
+ }
+ }
+ I = IBI;
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INVOKE: {
+ // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
+ if (Record.size() < 4) return Error("Invalid INVOKE record");
+ AttrListPtr PAL = getAttributes(Record[0]);
+ unsigned CCInfo = Record[1];
+ BasicBlock *NormalBB = getBasicBlock(Record[2]);
+ BasicBlock *UnwindBB = getBasicBlock(Record[3]);
+
+ unsigned OpNum = 4;
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ return Error("Invalid INVOKE record");
+
+ const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = !CalleeTy ? 0 :
+ dyn_cast<FunctionType>(CalleeTy->getElementType());
+
+ // Check that the right number of fixed parameters are here.
+ if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
+ Record.size() < OpNum+FTy->getNumParams())
+ return Error("Invalid INVOKE record");
+
+ SmallVector<Value*, 16> Ops;
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+ if (Ops.back() == 0) return Error("Invalid INVOKE record");
+ }
+
+ if (!FTy->isVarArg()) {
+ if (Record.size() != OpNum)
+ return Error("Invalid INVOKE record");
+ } else {
+ // Read type/value pairs for varargs params.
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid INVOKE record");
+ Ops.push_back(Op);
+ }
+ }
+
+ I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
+ InstructionList.push_back(I);
+ cast<InvokeInst>(I)->setCallingConv(
+ static_cast<CallingConv::ID>(CCInfo));
+ cast<InvokeInst>(I)->setAttributes(PAL);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
+ I = new UnwindInst(Context);
+ InstructionList.push_back(I);
+ break;
+ case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+ I = new UnreachableInst(Context);
+ InstructionList.push_back(I);
+ break;
+ case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+ if (Record.size() < 1 || ((Record.size()-1)&1))
+ return Error("Invalid PHI record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid PHI record");
+
+ PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
+ InstructionList.push_back(PN);
+
+ for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+ Value *V = getFnValueByID(Record[1+i], Ty);
+ BasicBlock *BB = getBasicBlock(Record[2+i]);
+ if (!V || !BB) return Error("Invalid PHI record");
+ PN->addIncoming(V, BB);
+ }
+ I = PN;
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
+ if (Record.size() != 4)
+ return Error("Invalid ALLOCA record");
+ const PointerType *Ty =
+ dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+ const Type *OpTy = getTypeByID(Record[1]);
+ Value *Size = getFnValueByID(Record[2], OpTy);
+ unsigned Align = Record[3];
+ if (!Ty || !Size) return Error("Invalid ALLOCA record");
+ I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid LOAD record");
+
+ I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
+ unsigned OpNum = 0;
+ Value *Val, *Ptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ getValue(Record, OpNum,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid STORE record");
+
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CALL: {
+ // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+ if (Record.size() < 3)
+ return Error("Invalid CALL record");
+
+ AttrListPtr PAL = getAttributes(Record[0]);
+ unsigned CCInfo = Record[1];
+
+ unsigned OpNum = 2;
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ return Error("Invalid CALL record");
+
+ const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = 0;
+ if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+ if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
+ return Error("Invalid CALL record");
+
+ SmallVector<Value*, 16> Args;
+ // Read the fixed params.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ if (FTy->getParamType(i)->isLabelTy())
+ Args.push_back(getBasicBlock(Record[OpNum]));
+ else
+ Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+ if (Args.back() == 0) return Error("Invalid CALL record");
+ }
+
+ // Read type/value pairs for varargs params.
+ if (!FTy->isVarArg()) {
+ if (OpNum != Record.size())
+ return Error("Invalid CALL record");
+ } else {
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid CALL record");
+ Args.push_back(Op);
+ }
+ }
+
+ I = CallInst::Create(Callee, Args);
+ InstructionList.push_back(I);
+ cast<CallInst>(I)->setCallingConv(
+ static_cast<CallingConv::ID>(CCInfo>>1));
+ cast<CallInst>(I)->setTailCall(CCInfo & 1);
+ cast<CallInst>(I)->setAttributes(PAL);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
+ if (Record.size() < 3)
+ return Error("Invalid VAARG record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ Value *Op = getFnValueByID(Record[1], OpTy);
+ const Type *ResTy = getTypeByID(Record[2]);
+ if (!OpTy || !Op || !ResTy)
+ return Error("Invalid VAARG record");
+ I = new VAArgInst(Op, ResTy);
+ InstructionList.push_back(I);
+ break;
+ }
+ }
+
+ // Add instruction to end of current BB. If there is no current BB, reject
+ // this file.
+ if (CurBB == 0) {
+ delete I;
+ return Error("Invalid instruction with no BB");
+ }
+ CurBB->getInstList().push_back(I);
+
+ // If this was a terminator instruction, move to the next block.
+ if (isa<TerminatorInst>(I)) {
+ ++CurBBNo;
+ CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+ }
+
+ // Non-void values get registered in the value table for future use.
+ if (I && !I->getType()->isVoidTy())
+ ValueList.AssignValue(I, NextValueNo++);
+ }
+
+ // Check the function list for unresolved values.
+ if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+ if (A->getParent() == 0) {
+ // We found at least one unresolved value. Nuke them all to avoid leaks.
+ for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+ if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ delete A;
+ }
+ }
+ return Error("Never resolved value found in function!");
+ }
+ }
+
+ // FIXME: Check for unresolved forward-declared metadata references
+ // and clean up leaks.
+
+ // See if anything took the address of blocks in this function. If so,
+ // resolve them now.
+ DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
+ BlockAddrFwdRefs.find(F);
+ if (BAFRI != BlockAddrFwdRefs.end()) {
+ std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
+ for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
+ unsigned BlockIdx = RefList[i].first;
+ if (BlockIdx >= FunctionBBs.size())
+ return Error("Invalid blockaddress block #");
+
+ GlobalVariable *FwdRef = RefList[i].second;
+ FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
+ FwdRef->eraseFromParent();
+ }
+
+ BlockAddrFwdRefs.erase(BAFRI);
+ }
+
+ // Trim the value list down to the size it was before we parsed this function.
+ ValueList.shrinkTo(ModuleValueListSize);
+ MDValueList.shrinkTo(ModuleMDValueListSize);
+ std::vector<BasicBlock*>().swap(FunctionBBs);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GVMaterializer implementation
+//===----------------------------------------------------------------------===//
+
+
+bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ return F->isDeclaration() &&
+ DeferredFunctionInfo.count(const_cast<Function*>(F));
+ }
+ return false;
+}
+
+bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+ Function *F = dyn_cast<Function>(GV);
+ // If it's not a function or is already material, ignore the request.
+ if (!F || !F->isMaterializable()) return false;
+
+ DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
+ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+
+ // Move the bit stream to the saved position of the deferred function body.
+ Stream.JumpToBit(DFII->second);
+
+ if (ParseFunctionBody(F)) {
+ if (ErrInfo) *ErrInfo = ErrorString;
+ return true;
+ }
+
+ // Upgrade any old intrinsic calls in the function.
+ for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+ E = UpgradedIntrinsics.end(); I != E; ++I) {
+ if (I->first != I->second) {
+ for (Value::use_iterator UI = I->first->use_begin(),
+ UE = I->first->use_end(); UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, I->second);
+ }
+ }
+ }
+
+ return false;
+}
+
+bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F || F->isDeclaration())
+ return false;
+ return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void BitcodeReader::Dematerialize(GlobalValue *GV) {
+ Function *F = dyn_cast<Function>(GV);
+ // If this function isn't dematerializable, this is a noop.
+ if (!F || !isDematerializable(F))
+ return;
+
+ assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+ // Just forget the function body, we can remat it later.
+ F->deleteBody();
+}
+
+
+bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+ assert(M == TheModule &&
+ "Can only Materialize the Module this BitcodeReader is attached to.");
+ // Iterate over the module, deserializing any functions that are still on
+ // disk.
+ for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+ F != E; ++F)
+ if (F->isMaterializable() &&
+ Materialize(F, ErrInfo))
+ return true;
+
+ // Upgrade any intrinsic calls that slipped through (should not happen!) and
+ // delete the old functions to clean up. We can't do this unless the entire
+ // module is materialized because there could always be another function body
+ // with calls to the old function.
+ for (std::vector<std::pair<Function*, Function*> >::iterator I =
+ UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+ if (I->first != I->second) {
+ for (Value::use_iterator UI = I->first->use_begin(),
+ UE = I->first->use_end(); UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, I->second);
+ }
+ if (!I->first->use_empty())
+ I->first->replaceAllUsesWith(I->second);
+ I->first->eraseFromParent();
+ }
+ }
+ std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+ // Check debug info intrinsics.
+ CheckDebugInfoIntrinsics(TheModule);
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
+///
+Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
+ LLVMContext& Context,
+ std::string *ErrMsg) {
+ Module *M = new Module(Buffer->getBufferIdentifier(), Context);
+ BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ M->setMaterializer(R);
+ if (R->ParseBitcodeInto(M)) {
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+
+ delete M; // Also deletes R.
+ return 0;
+ }
+ // Have the BitcodeReader dtor delete 'Buffer'.
+ R->setBufferOwned(true);
+ return M;
+}
+
+/// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+ std::string *ErrMsg){
+ Module *M = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+ if (!M) return 0;
+
+ // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
+ // there was an error.
+ static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
+
+ // Read in the entire module, and destroy the BitcodeReader.
+ if (M->MaterializeAllPermanently(ErrMsg)) {
+ delete M;
+ return 0;
+ }
+
+ return M;
+}
+
+std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+ LLVMContext& Context,
+ std::string *ErrMsg) {
+ BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ // Don't let the BitcodeReader dtor delete 'Buffer'.
+ R->setBufferOwned(false);
+
+ std::string Triple("");
+ if (R->ParseTriple(Triple))
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+
+ delete R;
+ return Triple;
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
new file mode 100644
index 000000000000..1b3bf1a1854a
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
@@ -0,0 +1,278 @@
+//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITCODE_READER_H
+#define BITCODE_READER_H
+
+#include "llvm/GVMaterializer.h"
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+
+namespace llvm {
+ class MemoryBuffer;
+ class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+// BitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderValueList {
+ std::vector<WeakVH> ValuePtrs;
+
+ /// ResolveConstants - As we resolve forward-referenced constants, we add
+ /// information about them to this vector. This allows us to resolve them in
+ /// bulk instead of resolving each reference at a time. See the code in
+ /// ResolveConstantForwardRefs for more information about this.
+ ///
+ /// The key of this vector is the placeholder constant, the value is the slot
+ /// number that holds the resolved value.
+ typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+ ResolveConstantsTy ResolveConstants;
+ LLVMContext &Context;
+public:
+ BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+ ~BitcodeReaderValueList() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ }
+
+ // vector compatibility methods
+ unsigned size() const { return ValuePtrs.size(); }
+ void resize(unsigned N) { ValuePtrs.resize(N); }
+ void push_back(Value *V) {
+ ValuePtrs.push_back(V);
+ }
+
+ void clear() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ ValuePtrs.clear();
+ }
+
+ Value *operator[](unsigned i) const {
+ assert(i < ValuePtrs.size());
+ return ValuePtrs[i];
+ }
+
+ Value *back() const { return ValuePtrs.back(); }
+ void pop_back() { ValuePtrs.pop_back(); }
+ bool empty() const { return ValuePtrs.empty(); }
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ ValuePtrs.resize(N);
+ }
+
+ Constant *getConstantFwdRef(unsigned Idx, const Type *Ty);
+ Value *getValueFwdRef(unsigned Idx, const Type *Ty);
+
+ void AssignValue(Value *V, unsigned Idx);
+
+ /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+ /// resolves any forward references.
+ void ResolveConstantForwardRefs();
+};
+
+
+//===----------------------------------------------------------------------===//
+// BitcodeReaderMDValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderMDValueList {
+ std::vector<WeakVH> MDValuePtrs;
+
+ LLVMContext &Context;
+public:
+ BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
+
+ // vector compatibility methods
+ unsigned size() const { return MDValuePtrs.size(); }
+ void resize(unsigned N) { MDValuePtrs.resize(N); }
+ void push_back(Value *V) { MDValuePtrs.push_back(V); }
+ void clear() { MDValuePtrs.clear(); }
+ Value *back() const { return MDValuePtrs.back(); }
+ void pop_back() { MDValuePtrs.pop_back(); }
+ bool empty() const { return MDValuePtrs.empty(); }
+
+ Value *operator[](unsigned i) const {
+ assert(i < MDValuePtrs.size());
+ return MDValuePtrs[i];
+ }
+
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ MDValuePtrs.resize(N);
+ }
+
+ Value *getValueFwdRef(unsigned Idx);
+ void AssignValue(Value *V, unsigned Idx);
+};
+
+class BitcodeReader : public GVMaterializer {
+ LLVMContext &Context;
+ Module *TheModule;
+ MemoryBuffer *Buffer;
+ bool BufferOwned;
+ BitstreamReader StreamFile;
+ BitstreamCursor Stream;
+
+ const char *ErrorString;
+
+ std::vector<Type*> TypeList;
+ BitcodeReaderValueList ValueList;
+ BitcodeReaderMDValueList MDValueList;
+ SmallVector<Instruction *, 64> InstructionList;
+
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+
+ /// MAttributes - The set of attributes by index. Index zero in the
+ /// file is for null, and is thus not represented here. As such all indices
+ /// are off by one.
+ std::vector<AttrListPtr> MAttributes;
+
+ /// FunctionBBs - While parsing a function body, this is a list of the basic
+ /// blocks for the function.
+ std::vector<BasicBlock*> FunctionBBs;
+
+ // When reading the module header, this list is populated with functions that
+ // have bodies later in the file.
+ std::vector<Function*> FunctionsWithBodies;
+
+ // When intrinsic functions are encountered which require upgrading they are
+ // stored here with their replacement function.
+ typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+ UpgradedIntrinsicMap UpgradedIntrinsics;
+
+ // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+ DenseMap<unsigned, unsigned> MDKindMap;
+
+ // After the module header has been read, the FunctionsWithBodies list is
+ // reversed. This keeps track of whether we've done this yet.
+ bool HasReversedFunctionsWithBodies;
+
+ /// DeferredFunctionInfo - When function bodies are initially scanned, this
+ /// map contains info about where to find deferred function body in the
+ /// stream.
+ DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+
+ /// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These
+ /// are resolved lazily when functions are loaded.
+ typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
+ DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+public:
+ explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
+ : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+ ErrorString(0), ValueList(C), MDValueList(C) {
+ HasReversedFunctionsWithBodies = false;
+ }
+ ~BitcodeReader() {
+ FreeState();
+ }
+
+ void FreeState();
+
+ /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+ /// when the reader is destroyed.
+ void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+
+ virtual bool isMaterializable(const GlobalValue *GV) const;
+ virtual bool isDematerializable(const GlobalValue *GV) const;
+ virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+ virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+ virtual void Dematerialize(GlobalValue *GV);
+
+ bool Error(const char *Str) {
+ ErrorString = Str;
+ return true;
+ }
+ const char *getErrorString() const { return ErrorString; }
+
+ /// @brief Main interface to parsing a bitcode buffer.
+ /// @returns true if an error occurred.
+ bool ParseBitcodeInto(Module *M);
+
+ /// @brief Cheap mechanism to just extract module triple
+ /// @returns true if an error occurred.
+ bool ParseTriple(std::string &Triple);
+private:
+ Type *getTypeByID(unsigned ID);
+ Type *getTypeByIDOrNull(unsigned ID);
+ Value *getFnValueByID(unsigned ID, const Type *Ty) {
+ if (Ty && Ty->isMetadataTy())
+ return MDValueList.getValueFwdRef(ID);
+ return ValueList.getValueFwdRef(ID, Ty);
+ }
+ BasicBlock *getBasicBlock(unsigned ID) const {
+ if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+ return FunctionBBs[ID];
+ }
+ AttrListPtr getAttributes(unsigned i) const {
+ if (i-1 < MAttributes.size())
+ return MAttributes[i-1];
+ return AttrListPtr();
+ }
+
+ /// getValueTypePair - Read a value/type pair out of the specified record from
+ /// slot 'Slot'. Increment Slot past the number of slots used in the record.
+ /// Return true on failure.
+ bool getValueTypePair(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ unsigned InstNum, Value *&ResVal) {
+ if (Slot == Record.size()) return true;
+ unsigned ValNo = (unsigned)Record[Slot++];
+ if (ValNo < InstNum) {
+ // If this is not a forward reference, just return the value we already
+ // have.
+ ResVal = getFnValueByID(ValNo, 0);
+ return ResVal == 0;
+ } else if (Slot == Record.size()) {
+ return true;
+ }
+
+ unsigned TypeNo = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+ return ResVal == 0;
+ }
+ bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ const Type *Ty, Value *&ResVal) {
+ if (Slot == Record.size()) return true;
+ unsigned ValNo = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, Ty);
+ return ResVal == 0;
+ }
+
+
+ bool ParseModule();
+ bool ParseAttributeBlock();
+ bool ParseTypeTable();
+ bool ParseOldTypeTable(); // FIXME: Remove in LLVM 3.1
+ bool ParseTypeTableBody();
+
+ bool ParseOldTypeSymbolTable(); // FIXME: Remove in LLVM 3.1
+ bool ParseValueSymbolTable();
+ bool ParseConstants();
+ bool RememberAndSkipFunctionBody();
+ bool ParseFunctionBody(Function *F);
+ bool ResolveGlobalAndAliasInits();
+ bool ParseMetadata();
+ bool ParseMetadataAttachment();
+ bool ParseModuleTriple(std::string &Triple);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
new file mode 100644
index 000000000000..428842246331
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
@@ -0,0 +1,40 @@
+//===-- BitWriter.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
+ std::string ErrorInfo;
+ raw_fd_ostream OS(Path, ErrorInfo,
+ raw_fd_ostream::F_Binary);
+
+ if (!ErrorInfo.empty())
+ return -1;
+
+ WriteBitcodeToFile(unwrap(M), OS);
+ return 0;
+}
+
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+ int Unbuffered) {
+ raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
+
+ WriteBitcodeToFile(unwrap(M), OS);
+ return 0;
+}
+
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
+ return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
new file mode 100644
index 000000000000..85d67ce62b9f
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -0,0 +1,1628 @@
+//===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "ValueEnumerator.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Program.h"
+#include <cctype>
+#include <map>
+using namespace llvm;
+
+/// These are manifest constants used by the bitcode writer. They do not need to
+/// be kept in sync with the reader, but need to be consistent within this file.
+enum {
+ CurVersion = 0,
+
+ // VALUE_SYMTAB_BLOCK abbrev id's.
+ VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ VST_ENTRY_7_ABBREV,
+ VST_ENTRY_6_ABBREV,
+ VST_BBENTRY_6_ABBREV,
+
+ // CONSTANTS_BLOCK abbrev id's.
+ CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ CONSTANTS_INTEGER_ABBREV,
+ CONSTANTS_CE_CAST_Abbrev,
+ CONSTANTS_NULL_Abbrev,
+
+ // FUNCTION_BLOCK abbrev id's.
+ FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ FUNCTION_INST_BINOP_ABBREV,
+ FUNCTION_INST_BINOP_FLAGS_ABBREV,
+ FUNCTION_INST_CAST_ABBREV,
+ FUNCTION_INST_RET_VOID_ABBREV,
+ FUNCTION_INST_RET_VAL_ABBREV,
+ FUNCTION_INST_UNREACHABLE_ABBREV
+};
+
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown cast instruction!");
+ case Instruction::Trunc : return bitc::CAST_TRUNC;
+ case Instruction::ZExt : return bitc::CAST_ZEXT;
+ case Instruction::SExt : return bitc::CAST_SEXT;
+ case Instruction::FPToUI : return bitc::CAST_FPTOUI;
+ case Instruction::FPToSI : return bitc::CAST_FPTOSI;
+ case Instruction::UIToFP : return bitc::CAST_UITOFP;
+ case Instruction::SIToFP : return bitc::CAST_SITOFP;
+ case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
+ case Instruction::FPExt : return bitc::CAST_FPEXT;
+ case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
+ case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
+ case Instruction::BitCast : return bitc::CAST_BITCAST;
+ }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown binary instruction!");
+ case Instruction::Add:
+ case Instruction::FAdd: return bitc::BINOP_ADD;
+ case Instruction::Sub:
+ case Instruction::FSub: return bitc::BINOP_SUB;
+ case Instruction::Mul:
+ case Instruction::FMul: return bitc::BINOP_MUL;
+ case Instruction::UDiv: return bitc::BINOP_UDIV;
+ case Instruction::FDiv:
+ case Instruction::SDiv: return bitc::BINOP_SDIV;
+ case Instruction::URem: return bitc::BINOP_UREM;
+ case Instruction::FRem:
+ case Instruction::SRem: return bitc::BINOP_SREM;
+ case Instruction::Shl: return bitc::BINOP_SHL;
+ case Instruction::LShr: return bitc::BINOP_LSHR;
+ case Instruction::AShr: return bitc::BINOP_ASHR;
+ case Instruction::And: return bitc::BINOP_AND;
+ case Instruction::Or: return bitc::BINOP_OR;
+ case Instruction::Xor: return bitc::BINOP_XOR;
+ }
+}
+
+static void WriteStringRecord(unsigned Code, StringRef Str,
+ unsigned AbbrevToUse, BitstreamWriter &Stream) {
+ SmallVector<unsigned, 64> Vals;
+
+ // Code: [strchar x N]
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+ AbbrevToUse = 0;
+ Vals.push_back(Str[i]);
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+// Emit information about parameter attributes.
+static void WriteAttributeTable(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
+ if (Attrs.empty()) return;
+
+ Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+ const AttrListPtr &A = Attrs[i];
+ for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
+ const AttributeWithIndex &PAWI = A.getSlot(i);
+ Record.push_back(PAWI.Index);
+
+ // FIXME: remove in LLVM 3.0
+ // Store the alignment in the bitcode as a 16-bit raw value instead of a
+ // 5-bit log2 encoded value. Shift the bits above the alignment up by
+ // 11 bits.
+ uint64_t FauxAttr = PAWI.Attrs & 0xffff;
+ if (PAWI.Attrs & Attribute::Alignment)
+ FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16);
+ FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11;
+
+ Record.push_back(FauxAttr);
+ }
+
+ Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ const ValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+ Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
+ SmallVector<uint64_t, 64> TypeVals;
+
+ // Abbrev for TYPE_CODE_POINTER.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
+ unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_FUNCTION.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
+ Abbv->Add(BitCodeAbbrevOp(0)); // FIXME: DEAD value, remove in LLVM 3.0
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT_ANON.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT_NAME.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT_NAMED.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+
+
+ // Abbrev for TYPE_CODE_ARRAY.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Emit an entry count so the reader can reserve space.
+ TypeVals.push_back(TypeList.size());
+ Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
+ TypeVals.clear();
+
+ // Loop over all of the types, emitting each in turn.
+ for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+ const Type *T = TypeList[i];
+ int AbbrevToUse = 0;
+ unsigned Code = 0;
+
+ switch (T->getTypeID()) {
+ default: llvm_unreachable("Unknown type!");
+ case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break;
+ case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break;
+ case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
+ case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
+ case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+ case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
+ case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
+ case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+ case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+ case Type::IntegerTyID:
+ // INTEGER: [width]
+ Code = bitc::TYPE_CODE_INTEGER;
+ TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+ break;
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(T);
+ // POINTER: [pointee type, address space]
+ Code = bitc::TYPE_CODE_POINTER;
+ TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+ unsigned AddressSpace = PTy->getAddressSpace();
+ TypeVals.push_back(AddressSpace);
+ if (AddressSpace == 0) AbbrevToUse = PtrAbbrev;
+ break;
+ }
+ case Type::FunctionTyID: {
+ const FunctionType *FT = cast<FunctionType>(T);
+ // FUNCTION: [isvararg, attrid, retty, paramty x N]
+ Code = bitc::TYPE_CODE_FUNCTION;
+ TypeVals.push_back(FT->isVarArg());
+ TypeVals.push_back(0); // FIXME: DEAD: remove in llvm 3.0
+ TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+ TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+ AbbrevToUse = FunctionAbbrev;
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType *ST = cast<StructType>(T);
+ // STRUCT: [ispacked, eltty x N]
+ TypeVals.push_back(ST->isPacked());
+ // Output all of the element types.
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ TypeVals.push_back(VE.getTypeID(*I));
+
+ if (ST->isAnonymous()) {
+ Code = bitc::TYPE_CODE_STRUCT_ANON;
+ AbbrevToUse = StructAnonAbbrev;
+ } else {
+ if (ST->isOpaque()) {
+ Code = bitc::TYPE_CODE_OPAQUE;
+ } else {
+ Code = bitc::TYPE_CODE_STRUCT_NAMED;
+ AbbrevToUse = StructNamedAbbrev;
+ }
+
+ // Emit the name if it is present.
+ if (!ST->getName().empty())
+ WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+ StructNameAbbrev, Stream);
+ }
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType *AT = cast<ArrayType>(T);
+ // ARRAY: [numelts, eltty]
+ Code = bitc::TYPE_CODE_ARRAY;
+ TypeVals.push_back(AT->getNumElements());
+ TypeVals.push_back(VE.getTypeID(AT->getElementType()));
+ AbbrevToUse = ArrayAbbrev;
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType *VT = cast<VectorType>(T);
+ // VECTOR [numelts, eltty]
+ Code = bitc::TYPE_CODE_VECTOR;
+ TypeVals.push_back(VT->getNumElements());
+ TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+ break;
+ }
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+ TypeVals.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+ switch (GV->getLinkage()) {
+ default: llvm_unreachable("Invalid linkage!");
+ case GlobalValue::ExternalLinkage: return 0;
+ case GlobalValue::WeakAnyLinkage: return 1;
+ case GlobalValue::AppendingLinkage: return 2;
+ case GlobalValue::InternalLinkage: return 3;
+ case GlobalValue::LinkOnceAnyLinkage: return 4;
+ case GlobalValue::DLLImportLinkage: return 5;
+ case GlobalValue::DLLExportLinkage: return 6;
+ case GlobalValue::ExternalWeakLinkage: return 7;
+ case GlobalValue::CommonLinkage: return 8;
+ case GlobalValue::PrivateLinkage: return 9;
+ case GlobalValue::WeakODRLinkage: return 10;
+ case GlobalValue::LinkOnceODRLinkage: return 11;
+ case GlobalValue::AvailableExternallyLinkage: return 12;
+ case GlobalValue::LinkerPrivateLinkage: return 13;
+ case GlobalValue::LinkerPrivateWeakLinkage: return 14;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
+ }
+}
+
+static unsigned getEncodedVisibility(const GlobalValue *GV) {
+ switch (GV->getVisibility()) {
+ default: llvm_unreachable("Invalid visibility!");
+ case GlobalValue::DefaultVisibility: return 0;
+ case GlobalValue::HiddenVisibility: return 1;
+ case GlobalValue::ProtectedVisibility: return 2;
+ }
+}
+
+// Emit top-level description of module, including target triple, inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ // Emit the list of dependent libraries for the Module.
+ for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+ WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream);
+
+ // Emit various pieces of data attached to a module.
+ if (!M->getTargetTriple().empty())
+ WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
+ 0/*TODO*/, Stream);
+ if (!M->getDataLayout().empty())
+ WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, M->getDataLayout(),
+ 0/*TODO*/, Stream);
+ if (!M->getModuleInlineAsm().empty())
+ WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(),
+ 0/*TODO*/, Stream);
+
+ // Emit information about sections and GC, computing how many there are. Also
+ // compute the maximum alignment value.
+ std::map<std::string, unsigned> SectionMap;
+ std::map<std::string, unsigned> GCMap;
+ unsigned MaxAlignment = 0;
+ unsigned MaxGlobalType = 0;
+ for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+ GV != E; ++GV) {
+ MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
+ MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
+
+ if (!GV->hasSection()) continue;
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[GV->getSection()];
+ if (Entry != 0) continue;
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ MaxAlignment = std::max(MaxAlignment, F->getAlignment());
+ if (F->hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[F->getSection()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ }
+ if (F->hasGC()) {
+ // Same for GC names.
+ unsigned &Entry = GCMap[F->getGC()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(),
+ 0/*TODO*/, Stream);
+ Entry = GCMap.size();
+ }
+ }
+ }
+
+ // Emit abbrev for globals, now that we know # sections and max alignment.
+ unsigned SimpleGVarAbbrev = 0;
+ if (!M->global_empty()) {
+ // Add an abbrev for common globals with no visibility or thread localness.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxGlobalType+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Constant.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Linkage.
+ if (MaxAlignment == 0) // Alignment.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else {
+ unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxEncAlignment+1)));
+ }
+ if (SectionMap.empty()) // Section.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(SectionMap.size()+1)));
+ // Don't bother emitting vis + thread local.
+ SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+ }
+
+ // Emit the global variable information.
+ SmallVector<unsigned, 64> Vals;
+ for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+ GV != E; ++GV) {
+ unsigned AbbrevToUse = 0;
+
+ // GLOBALVAR: [type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr]
+ Vals.push_back(VE.getTypeID(GV->getType()));
+ Vals.push_back(GV->isConstant());
+ Vals.push_back(GV->isDeclaration() ? 0 :
+ (VE.getValueID(GV->getInitializer()) + 1));
+ Vals.push_back(getEncodedLinkage(GV));
+ Vals.push_back(Log2_32(GV->getAlignment())+1);
+ Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
+ if (GV->isThreadLocal() ||
+ GV->getVisibility() != GlobalValue::DefaultVisibility ||
+ GV->hasUnnamedAddr()) {
+ Vals.push_back(getEncodedVisibility(GV));
+ Vals.push_back(GV->isThreadLocal());
+ Vals.push_back(GV->hasUnnamedAddr());
+ } else {
+ AbbrevToUse = SimpleGVarAbbrev;
+ }
+
+ Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the function proto information.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ // FUNCTION: [type, callingconv, isproto, paramattr,
+ // linkage, alignment, section, visibility, gc, unnamed_addr]
+ Vals.push_back(VE.getTypeID(F->getType()));
+ Vals.push_back(F->getCallingConv());
+ Vals.push_back(F->isDeclaration());
+ Vals.push_back(getEncodedLinkage(F));
+ Vals.push_back(VE.getAttributeID(F->getAttributes()));
+ Vals.push_back(Log2_32(F->getAlignment())+1);
+ Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
+ Vals.push_back(getEncodedVisibility(F));
+ Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+ Vals.push_back(F->hasUnnamedAddr());
+
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the alias information.
+ for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
+ AI != E; ++AI) {
+ Vals.push_back(VE.getTypeID(AI->getType()));
+ Vals.push_back(VE.getValueID(AI->getAliasee()));
+ Vals.push_back(getEncodedLinkage(AI));
+ Vals.push_back(getEncodedVisibility(AI));
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+}
+
+static uint64_t GetOptimizationFlags(const Value *V) {
+ uint64_t Flags = 0;
+
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(V)) {
+ if (OBO->hasNoSignedWrap())
+ Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
+ if (OBO->hasNoUnsignedWrap())
+ Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
+ } else if (const PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(V)) {
+ if (PEO->isExact())
+ Flags |= 1 << bitc::PEO_EXACT;
+ }
+
+ return Flags;
+}
+
+static void WriteMDNode(const MDNode *N,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream,
+ SmallVector<uint64_t, 64> &Record) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i)) {
+ Record.push_back(VE.getTypeID(N->getOperand(i)->getType()));
+ Record.push_back(VE.getValueID(N->getOperand(i)));
+ } else {
+ Record.push_back(VE.getTypeID(Type::getVoidTy(N->getContext())));
+ Record.push_back(0);
+ }
+ }
+ unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+ bitc::METADATA_NODE;
+ Stream.EmitRecord(MDCode, Record, 0);
+ Record.clear();
+}
+
+static void WriteModuleMetadata(const Module *M,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+ bool StartedMetadataBlock = false;
+ unsigned MDSAbbrev = 0;
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+
+ if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
+ if (!N->isFunctionLocal() || !N->getFunction()) {
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
+ }
+ WriteMDNode(N, VE, Stream, Record);
+ }
+ } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+ // Abbrev for METADATA_STRING.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ MDSAbbrev = Stream.EmitAbbrev(Abbv);
+ StartedMetadataBlock = true;
+ }
+
+ // Code: [strchar x N]
+ Record.append(MDS->begin(), MDS->end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
+ Record.clear();
+ }
+ }
+
+ // Write named metadata.
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
+ }
+
+ // Write name.
+ StringRef Str = NMD->getName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ Record.push_back(Str[i]);
+ Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+ Record.clear();
+
+ // Write named metadata operands.
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(NMD->getOperand(i)));
+ Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
+ Record.clear();
+ }
+
+ if (StartedMetadataBlock)
+ Stream.ExitBlock();
+}
+
+static void WriteFunctionLocalMetadata(const Function &F,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ bool StartedMetadataBlock = false;
+ SmallVector<uint64_t, 64> Record;
+ const SmallVector<const MDNode *, 8> &Vals = VE.getFunctionLocalMDValues();
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (const MDNode *N = Vals[i])
+ if (N->isFunctionLocal() && N->getFunction() == &F) {
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
+ }
+ WriteMDNode(N, VE, Stream, Record);
+ }
+
+ if (StartedMetadataBlock)
+ Stream.ExitBlock();
+}
+
+static void WriteMetadataAttachment(const Function &F,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Write metadata attachments
+ // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ MDs.clear();
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+
+ // If no metadata, ignore instruction.
+ if (MDs.empty()) continue;
+
+ Record.push_back(VE.getInstructionID(I));
+
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ Record.push_back(MDs[i].first);
+ Record.push_back(VE.getValueID(MDs[i].second));
+ }
+ Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
+ SmallVector<uint64_t, 64> Record;
+
+ // Write metadata kinds
+ // METADATA_KIND - [n x [id, name]]
+ SmallVector<StringRef, 4> Names;
+ M->getMDKindNames(Names);
+
+ if (Names.empty()) return;
+
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+ for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
+ Record.push_back(MDKindID);
+ StringRef KName = Names[MDKindID];
+ Record.append(KName.begin(), KName.end());
+
+ Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream, bool isGlobal) {
+ if (FirstVal == LastVal) return;
+
+ Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
+
+ unsigned AggregateAbbrev = 0;
+ unsigned String8Abbrev = 0;
+ unsigned CString7Abbrev = 0;
+ unsigned CString6Abbrev = 0;
+ // If this is a constant pool for the module, emit module-specific abbrevs.
+ if (isGlobal) {
+ // Abbrev for CST_CODE_AGGREGATE.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
+ AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for CST_CODE_STRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ String8Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ CString7Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ CString6Abbrev = Stream.EmitAbbrev(Abbv);
+ }
+
+ SmallVector<uint64_t, 64> Record;
+
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+ const Type *LastTy = 0;
+ for (unsigned i = FirstVal; i != LastVal; ++i) {
+ const Value *V = Vals[i].first;
+ // If we need to switch types, do so now.
+ if (V->getType() != LastTy) {
+ LastTy = V->getType();
+ Record.push_back(VE.getTypeID(LastTy));
+ Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
+ CONSTANTS_SETTYPE_ABBREV);
+ Record.clear();
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Record.push_back(unsigned(IA->hasSideEffects()) |
+ unsigned(IA->isAlignStack()) << 1);
+
+ // Add the asm string.
+ const std::string &AsmStr = IA->getAsmString();
+ Record.push_back(AsmStr.size());
+ for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
+ Record.push_back(AsmStr[i]);
+
+ // Add the constraint string.
+ const std::string &ConstraintStr = IA->getConstraintString();
+ Record.push_back(ConstraintStr.size());
+ for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
+ Record.push_back(ConstraintStr[i]);
+ Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
+ Record.clear();
+ continue;
+ }
+ const Constant *C = cast<Constant>(V);
+ unsigned Code = -1U;
+ unsigned AbbrevToUse = 0;
+ if (C->isNullValue()) {
+ Code = bitc::CST_CODE_NULL;
+ } else if (isa<UndefValue>(C)) {
+ Code = bitc::CST_CODE_UNDEF;
+ } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+ if (IV->getBitWidth() <= 64) {
+ uint64_t V = IV->getSExtValue();
+ if ((int64_t)V >= 0)
+ Record.push_back(V << 1);
+ else
+ Record.push_back((-V << 1) | 1);
+ Code = bitc::CST_CODE_INTEGER;
+ AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+ } else { // Wide integers, > 64 bits in size.
+ // We have an arbitrary precision integer value to write whose
+ // bit width is > 64. However, in canonical unsigned integer
+ // format it is likely that the high bits are going to be zero.
+ // So, we only write the number of active words.
+ unsigned NWords = IV->getValue().getActiveWords();
+ const uint64_t *RawWords = IV->getValue().getRawData();
+ for (unsigned i = 0; i != NWords; ++i) {
+ int64_t V = RawWords[i];
+ if (V >= 0)
+ Record.push_back(V << 1);
+ else
+ Record.push_back((-V << 1) | 1);
+ }
+ Code = bitc::CST_CODE_WIDE_INTEGER;
+ }
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ Code = bitc::CST_CODE_FLOAT;
+ const Type *Ty = CFP->getType();
+ if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+ Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ } else if (Ty->isX86_FP80Ty()) {
+ // api needed to prevent premature destruction
+ // bits are not in the same order as a normal i80 APInt, compensate.
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back((p[1] << 48) | (p[0] >> 16));
+ Record.push_back(p[0] & 0xffffLL);
+ } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back(p[0]);
+ Record.push_back(p[1]);
+ } else {
+ assert (0 && "Unknown FP type!");
+ }
+ } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+ const ConstantArray *CA = cast<ConstantArray>(C);
+ // Emit constant strings specially.
+ unsigned NumOps = CA->getNumOperands();
+ // If this is a null-terminated string, use the denser CSTRING encoding.
+ if (CA->getOperand(NumOps-1)->isNullValue()) {
+ Code = bitc::CST_CODE_CSTRING;
+ --NumOps; // Don't encode the null, which isn't allowed by char6.
+ } else {
+ Code = bitc::CST_CODE_STRING;
+ AbbrevToUse = String8Abbrev;
+ }
+ bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
+ bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned char V = cast<ConstantInt>(CA->getOperand(i))->getZExtValue();
+ Record.push_back(V);
+ isCStr7 &= (V & 128) == 0;
+ if (isCStrChar6)
+ isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
+ }
+
+ if (isCStrChar6)
+ AbbrevToUse = CString6Abbrev;
+ else if (isCStr7)
+ AbbrevToUse = CString7Abbrev;
+ } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
+ isa<ConstantVector>(V)) {
+ Code = bitc::CST_CODE_AGGREGATE;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ AbbrevToUse = AggregateAbbrev;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ default:
+ if (Instruction::isCast(CE->getOpcode())) {
+ Code = bitc::CST_CODE_CE_CAST;
+ Record.push_back(GetEncodedCastOpcode(CE->getOpcode()));
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+ } else {
+ assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+ Code = bitc::CST_CODE_CE_BINOP;
+ Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ uint64_t Flags = GetOptimizationFlags(CE);
+ if (Flags != 0)
+ Record.push_back(Flags);
+ }
+ break;
+ case Instruction::GetElementPtr:
+ Code = bitc::CST_CODE_CE_GEP;
+ if (cast<GEPOperator>(C)->isInBounds())
+ Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ }
+ break;
+ case Instruction::Select:
+ Code = bitc::CST_CODE_CE_SELECT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::CST_CODE_CE_EXTRACTELT;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::CST_CODE_CE_INSERTELT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ShuffleVector:
+ // If the return type and argument types are the same, this is a
+ // standard shufflevector instruction. If the types are different,
+ // then the shuffle is widening or truncating the input vectors, and
+ // the argument type must also be encoded.
+ if (C->getType() == C->getOperand(0)->getType()) {
+ Code = bitc::CST_CODE_CE_SHUFFLEVEC;
+ } else {
+ Code = bitc::CST_CODE_CE_SHUFVEC_EX;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ }
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ Code = bitc::CST_CODE_CE_CMP;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(CE->getPredicate());
+ break;
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Code = bitc::CST_CODE_BLOCKADDRESS;
+ Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
+ Record.push_back(VE.getValueID(BA->getFunction()));
+ Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
+ } else {
+#ifndef NDEBUG
+ C->dump();
+#endif
+ llvm_unreachable("Unknown constant!");
+ }
+ Stream.EmitRecord(Code, Record, AbbrevToUse);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteModuleConstants(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+
+ // Find the first constant to emit, which is the first non-globalvalue value.
+ // We know globalvalues have been emitted by WriteModuleInfo.
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ if (!isa<GlobalValue>(Vals[i].first)) {
+ WriteConstants(i, Vals.size(), VE, Stream, true);
+ return;
+ }
+ }
+}
+
+/// PushValueAndType - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references. However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals. If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID.
+static bool PushValueAndType(const Value *V, unsigned InstID,
+ SmallVector<unsigned, 64> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ Vals.push_back(ValID);
+ if (ValID >= InstID) {
+ Vals.push_back(VE.getTypeID(V->getType()));
+ return true;
+ }
+ return false;
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+static void WriteInstruction(const Instruction &I, unsigned InstID,
+ ValueEnumerator &VE, BitstreamWriter &Stream,
+ SmallVector<unsigned, 64> &Vals) {
+ unsigned Code = 0;
+ unsigned AbbrevToUse = 0;
+ VE.setInstructionID(&I);
+ switch (I.getOpcode()) {
+ default:
+ if (Instruction::isCast(I.getOpcode())) {
+ Code = bitc::FUNC_CODE_INST_CAST;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(GetEncodedCastOpcode(I.getOpcode()));
+ } else {
+ assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+ Code = bitc::FUNC_CODE_INST_BINOP;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+ uint64_t Flags = GetOptimizationFlags(&I);
+ if (Flags != 0) {
+ if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV)
+ AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
+ Vals.push_back(Flags);
+ }
+ }
+ break;
+
+ case Instruction::GetElementPtr:
+ Code = bitc::FUNC_CODE_INST_GEP;
+ if (cast<GEPOperator>(&I)->isInBounds())
+ Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP;
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+ break;
+ case Instruction::ExtractValue: {
+ Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+ for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+ Vals.push_back(*i);
+ break;
+ }
+ case Instruction::InsertValue: {
+ Code = bitc::FUNC_CODE_INST_INSERTVAL;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+ const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+ for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+ Vals.push_back(*i);
+ break;
+ }
+ case Instruction::Select:
+ Code = bitc::FUNC_CODE_INST_VSELECT;
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(2)));
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::FUNC_CODE_INST_EXTRACTELT;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::FUNC_CODE_INST_INSERTELT;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(VE.getValueID(I.getOperand(2)));
+ break;
+ case Instruction::ShuffleVector:
+ Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(VE.getValueID(I.getOperand(2)));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ // compare returning Int1Ty or vector of Int1Ty
+ Code = bitc::FUNC_CODE_INST_CMP2;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(cast<CmpInst>(I).getPredicate());
+ break;
+
+ case Instruction::Ret:
+ {
+ Code = bitc::FUNC_CODE_INST_RET;
+ unsigned NumOperands = I.getNumOperands();
+ if (NumOperands == 0)
+ AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+ else if (NumOperands == 1) {
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+ } else {
+ for (unsigned i = 0, e = NumOperands; i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+ }
+ }
+ break;
+ case Instruction::Br:
+ {
+ Code = bitc::FUNC_CODE_INST_BR;
+ BranchInst &II = cast<BranchInst>(I);
+ Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+ if (II.isConditional()) {
+ Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+ Vals.push_back(VE.getValueID(II.getCondition()));
+ }
+ }
+ break;
+ case Instruction::Switch:
+ Code = bitc::FUNC_CODE_INST_SWITCH;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i)));
+ break;
+ case Instruction::IndirectBr:
+ Code = bitc::FUNC_CODE_INST_INDIRECTBR;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i)));
+ break;
+
+ case Instruction::Invoke: {
+ const InvokeInst *II = cast<InvokeInst>(&I);
+ const Value *Callee(II->getCalledValue());
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Code = bitc::FUNC_CODE_INST_INVOKE;
+
+ Vals.push_back(VE.getAttributeID(II->getAttributes()));
+ Vals.push_back(II->getCallingConv());
+ Vals.push_back(VE.getValueID(II->getNormalDest()));
+ Vals.push_back(VE.getValueID(II->getUnwindDest()));
+ PushValueAndType(Callee, InstID, Vals, VE);
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i))); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3;
+ i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg
+ }
+ break;
+ }
+ case Instruction::Unwind:
+ Code = bitc::FUNC_CODE_INST_UNWIND;
+ break;
+ case Instruction::Unreachable:
+ Code = bitc::FUNC_CODE_INST_UNREACHABLE;
+ AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+ break;
+
+ case Instruction::PHI: {
+ const PHINode &PN = cast<PHINode>(I);
+ Code = bitc::FUNC_CODE_INST_PHI;
+ Vals.push_back(VE.getTypeID(PN.getType()));
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
+ Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+ }
+ break;
+ }
+
+ case Instruction::Alloca:
+ Code = bitc::FUNC_CODE_INST_ALLOCA;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+ Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+ break;
+
+ case Instruction::Load:
+ Code = bitc::FUNC_CODE_INST_LOAD;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr
+ AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+
+ Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+ Vals.push_back(cast<LoadInst>(I).isVolatile());
+ break;
+ case Instruction::Store:
+ Code = bitc::FUNC_CODE_INST_STORE;
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // val.
+ Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+ Vals.push_back(cast<StoreInst>(I).isVolatile());
+ break;
+ case Instruction::Call: {
+ const CallInst &CI = cast<CallInst>(I);
+ const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ Code = bitc::FUNC_CODE_INST_CALL;
+
+ Vals.push_back(VE.getAttributeID(CI.getAttributes()));
+ Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
+ PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
+ i != e; ++i)
+ PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE); // varargs
+ }
+ break;
+ }
+ case Instruction::VAArg:
+ Code = bitc::FUNC_CODE_INST_VAARG;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // valist.
+ Vals.push_back(VE.getTypeID(I.getType())); // restype.
+ break;
+ }
+
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+ Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ if (VST.empty()) return;
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+ // FIXME: Set up the abbrev, we know how many values there are!
+ // FIXME: We know if the type names can use 7-bit ascii.
+ SmallVector<unsigned, 64> NameVals;
+
+ for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+ SI != SE; ++SI) {
+
+ const ValueName &Name = *SI;
+
+ // Figure out the encoding to use for the name.
+ bool is7Bit = true;
+ bool isChar6 = true;
+ for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+ C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128) {
+ is7Bit = false;
+ break; // don't bother scanning the rest.
+ }
+ }
+
+ unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+ // VST_ENTRY: [valueid, namechar x N]
+ // VST_BBENTRY: [bbid, namechar x N]
+ unsigned Code;
+ if (isa<BasicBlock>(SI->getValue())) {
+ Code = bitc::VST_CODE_BBENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_BBENTRY_6_ABBREV;
+ } else {
+ Code = bitc::VST_CODE_ENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_ENTRY_6_ABBREV;
+ else if (is7Bit)
+ AbbrevToUse = VST_ENTRY_7_ABBREV;
+ }
+
+ NameVals.push_back(VE.getValueID(SI->getValue()));
+ for (const char *P = Name.getKeyData(),
+ *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+ NameVals.push_back((unsigned char)*P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+ NameVals.clear();
+ }
+ Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
+ VE.incorporateFunction(F);
+
+ SmallVector<unsigned, 64> Vals;
+
+ // Emit the number of basic blocks, so the reader can create them ahead of
+ // time.
+ Vals.push_back(VE.getBasicBlocks().size());
+ Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+ Vals.clear();
+
+ // If there are function-local constants, emit them now.
+ unsigned CstStart, CstEnd;
+ VE.getFunctionConstantRange(CstStart, CstEnd);
+ WriteConstants(CstStart, CstEnd, VE, Stream, false);
+
+ // If there is function-local metadata, emit it now.
+ WriteFunctionLocalMetadata(F, VE, Stream);
+
+ // Keep a running idea of what the instruction ID is.
+ unsigned InstID = CstEnd;
+
+ bool NeedsMetadataAttachment = false;
+
+ DebugLoc LastDL;
+
+ // Finally, emit all the instructions, in order.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ WriteInstruction(*I, InstID, VE, Stream, Vals);
+
+ if (!I->getType()->isVoidTy())
+ ++InstID;
+
+ // If the instruction has metadata, write a metadata attachment later.
+ NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+
+ // If the instruction has a debug location, emit it.
+ DebugLoc DL = I->getDebugLoc();
+ if (DL.isUnknown()) {
+ // nothing todo.
+ } else if (DL == LastDL) {
+ // Just repeat the same debug loc as last time.
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+ } else {
+ MDNode *Scope, *IA;
+ DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
+
+ Vals.push_back(DL.getLine());
+ Vals.push_back(DL.getCol());
+ Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
+ Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+ Vals.clear();
+
+ LastDL = DL;
+ }
+ }
+
+ // Emit names for all the instructions etc.
+ WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+ if (NeedsMetadataAttachment)
+ WriteMetadataAttachment(F, VE, Stream);
+ VE.purgeFunction();
+ Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ // We only want to emit block info records for blocks that have multiple
+ // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other
+ // blocks can defined their abbrevs inline.
+ Stream.EnterBlockInfoBlock(2);
+
+ { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_8_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // 7-bit fixed width VST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_7_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_6_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_BBENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_BBENTRY_6_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+
+
+ { // SETTYPE abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_SETTYPE_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // INTEGER abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_INTEGER_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // CE_CAST abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_CE_CAST_Abbrev)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // NULL abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_NULL_Abbrev)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ // FIXME: This should only use space for first class types!
+
+ { // INST_LOAD abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_LOAD_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_BINOP_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_CAST abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_CAST_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+
+ Stream.ExitBlock();
+}
+
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+ // Emit the version number if it is non-zero.
+ if (CurVersion) {
+ SmallVector<unsigned, 1> Vals;
+ Vals.push_back(CurVersion);
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+ }
+
+ // Analyze the module, enumerating globals, functions, etc.
+ ValueEnumerator VE(M);
+
+ // Emit blockinfo, which defines the standard abbreviations etc.
+ WriteBlockInfo(VE, Stream);
+
+ // Emit information about parameter attributes.
+ WriteAttributeTable(VE, Stream);
+
+ // Emit information describing all of the types in the module.
+ WriteTypeTable(VE, Stream);
+
+ // Emit top-level description of module, including target triple, inline asm,
+ // descriptors for global variables, and function prototype info.
+ WriteModuleInfo(M, VE, Stream);
+
+ // Emit constants.
+ WriteModuleConstants(VE, Stream);
+
+ // Emit metadata.
+ WriteModuleMetadata(M, VE, Stream);
+
+ // Emit function bodies.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+ if (!F->isDeclaration())
+ WriteFunction(*F, VE, Stream);
+
+ // Emit metadata.
+ WriteModuleMetadataStore(M, Stream);
+
+ // Emit names for globals/functions etc.
+ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+ Stream.ExitBlock();
+}
+
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver. To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+///
+/// struct bc_header {
+/// uint32_t Magic; // 0x0B17C0DE
+/// uint32_t Version; // Version, currently always 0.
+/// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+/// uint32_t BitcodeSize; // Size of traditional bitcode file.
+/// uint32_t CPUType; // CPU specifier.
+/// ... potentially more later ...
+/// };
+enum {
+ DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+ DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
+ unsigned CPUType = ~0U;
+
+ // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+ // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+ // number from /usr/include/mach/machine.h. It is ok to reproduce the
+ // specific constants here because they are implicitly part of the Darwin ABI.
+ enum {
+ DARWIN_CPU_ARCH_ABI64 = 0x01000000,
+ DARWIN_CPU_TYPE_X86 = 7,
+ DARWIN_CPU_TYPE_ARM = 12,
+ DARWIN_CPU_TYPE_POWERPC = 18
+ };
+
+ Triple::ArchType Arch = TT.getArch();
+ if (Arch == Triple::x86_64)
+ CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+ else if (Arch == Triple::x86)
+ CPUType = DARWIN_CPU_TYPE_X86;
+ else if (Arch == Triple::ppc)
+ CPUType = DARWIN_CPU_TYPE_POWERPC;
+ else if (Arch == Triple::ppc64)
+ CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+ else if (Arch == Triple::arm || Arch == Triple::thumb)
+ CPUType = DARWIN_CPU_TYPE_ARM;
+
+ // Traditional Bitcode starts after header.
+ unsigned BCOffset = DarwinBCHeaderSize;
+
+ Stream.Emit(0x0B17C0DE, 32);
+ Stream.Emit(0 , 32); // Version.
+ Stream.Emit(BCOffset , 32);
+ Stream.Emit(0 , 32); // Filled in later.
+ Stream.Emit(CPUType , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+ // Update the size field in the header.
+ Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+
+ // If the file is not a multiple of 16 bytes, insert dummy padding.
+ while (BufferSize & 15) {
+ Stream.Emit(0, 8);
+ ++BufferSize;
+ }
+}
+
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+ std::vector<unsigned char> Buffer;
+ BitstreamWriter Stream(Buffer);
+
+ Buffer.reserve(256*1024);
+
+ WriteBitcodeToStream( M, Stream );
+
+ // Write the generated bitstream to "Out".
+ Out.write((char*)&Buffer.front(), Buffer.size());
+}
+
+/// WriteBitcodeToStream - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
+ // If this is darwin or another generic macho target, emit a file header and
+ // trailer if needed.
+ Triple TT(M->getTargetTriple());
+ if (TT.isOSDarwin())
+ EmitDarwinBCHeader(Stream, TT);
+
+ // Emit the file header.
+ Stream.Emit((unsigned)'B', 8);
+ Stream.Emit((unsigned)'C', 8);
+ Stream.Emit(0x0, 4);
+ Stream.Emit(0xC, 4);
+ Stream.Emit(0xE, 4);
+ Stream.Emit(0xD, 4);
+
+ // Emit the module.
+ WriteModule(M, Stream);
+
+ if (TT.isOSDarwin())
+ EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
new file mode 100644
index 000000000000..91e115cba6cc
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -0,0 +1,41 @@
+//===--- Bitcode/Writer/BitcodeWriterPass.cpp - Bitcode Writer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BitcodeWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ class WriteBitcodePass : public ModulePass {
+ raw_ostream &OS; // raw_ostream to print on
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit WriteBitcodePass(raw_ostream &o)
+ : ModulePass(ID), OS(o) {}
+
+ const char *getPassName() const { return "Bitcode Writer"; }
+
+ bool runOnModule(Module &M) {
+ WriteBitcodeToFile(&M, OS);
+ return false;
+ }
+ };
+}
+
+char WriteBitcodePass::ID = 0;
+
+/// createBitcodeWriterPass - Create and return a pass that writes the module
+/// to the specified ostream.
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
+ return new WriteBitcodePass(Str);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
new file mode 100644
index 000000000000..b68bf92d51b2
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -0,0 +1,494 @@
+//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueEnumerator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Instructions.h"
+#include <algorithm>
+using namespace llvm;
+
+static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
+ return V.first->getType()->isIntegerTy();
+}
+
+/// ValueEnumerator - Enumerate module-level information.
+ValueEnumerator::ValueEnumerator(const Module *M) {
+ // Enumerate the global variables.
+ for (Module::const_global_iterator I = M->global_begin(),
+ E = M->global_end(); I != E; ++I)
+ EnumerateValue(I);
+
+ // Enumerate the functions.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ EnumerateValue(I);
+ EnumerateAttributes(cast<Function>(I)->getAttributes());
+ }
+
+ // Enumerate the aliases.
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ EnumerateValue(I);
+
+ // Remember what is the cutoff between globalvalue's and other constants.
+ unsigned FirstConstant = Values.size();
+
+ // Enumerate the global variable initializers.
+ for (Module::const_global_iterator I = M->global_begin(),
+ E = M->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ EnumerateValue(I->getInitializer());
+
+ // Enumerate the aliasees.
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ EnumerateValue(I->getAliasee());
+
+ // Insert constants and metadata that are named at module level into the slot
+ // pool so that the module symbol table can refer to them...
+ EnumerateValueSymbolTable(M->getValueSymbolTable());
+ EnumerateNamedMetadata(M);
+
+ SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+
+ // Enumerate types used by function bodies and argument lists.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ EnumerateType(I->getType());
+
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if (MDNode *MD = dyn_cast<MDNode>(*OI))
+ if (MD->isFunctionLocal() && MD->getFunction())
+ // These will get enumerated during function-incorporation.
+ continue;
+ EnumerateOperandType(*OI);
+ }
+ EnumerateType(I->getType());
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ EnumerateAttributes(CI->getAttributes());
+ else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
+ EnumerateAttributes(II->getAttributes());
+
+ // Enumerate metadata attached with this instruction.
+ MDs.clear();
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i)
+ EnumerateMetadata(MDs[i].second);
+
+ if (!I->getDebugLoc().isUnknown()) {
+ MDNode *Scope, *IA;
+ I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
+ if (Scope) EnumerateMetadata(Scope);
+ if (IA) EnumerateMetadata(IA);
+ }
+ }
+ }
+
+ // Optimize constant ordering.
+ OptimizeConstants(FirstConstant, Values.size());
+}
+
+
+unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
+ InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+ assert(I != InstructionMap.end() && "Instruction is not mapped!");
+ return I->second;
+}
+
+void ValueEnumerator::setInstructionID(const Instruction *I) {
+ InstructionMap[I] = InstructionCount++;
+}
+
+unsigned ValueEnumerator::getValueID(const Value *V) const {
+ if (isa<MDNode>(V) || isa<MDString>(V)) {
+ ValueMapType::const_iterator I = MDValueMap.find(V);
+ assert(I != MDValueMap.end() && "Value not in slotcalculator!");
+ return I->second-1;
+ }
+
+ ValueMapType::const_iterator I = ValueMap.find(V);
+ assert(I != ValueMap.end() && "Value not in slotcalculator!");
+ return I->second-1;
+}
+
+// Optimize constant ordering.
+namespace {
+ struct CstSortPredicate {
+ ValueEnumerator &VE;
+ explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
+ bool operator()(const std::pair<const Value*, unsigned> &LHS,
+ const std::pair<const Value*, unsigned> &RHS) {
+ // Sort by plane.
+ if (LHS.first->getType() != RHS.first->getType())
+ return VE.getTypeID(LHS.first->getType()) <
+ VE.getTypeID(RHS.first->getType());
+ // Then by frequency.
+ return LHS.second > RHS.second;
+ }
+ };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+ if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+ CstSortPredicate P(*this);
+ std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+ // Ensure that integer constants are at the start of the constant pool. This
+ // is important so that GEP structure indices come before gep constant exprs.
+ std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+ isIntegerValue);
+
+ // Rebuild the modified portion of ValueMap.
+ for (; CstStart != CstEnd; ++CstStart)
+ ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+ for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+ VI != VE; ++VI)
+ EnumerateValue(VI->getValue());
+}
+
+/// EnumerateNamedMetadata - Insert all of the values referenced by
+/// named metadata in the specified module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I)
+ EnumerateNamedMDNode(I);
+}
+
+void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+ EnumerateMetadata(MD->getOperand(i));
+}
+
+/// EnumerateMDNodeOperands - Enumerate all non-function-local values
+/// and types referenced by the given MDNode.
+void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i)) {
+ if (isa<MDNode>(V) || isa<MDString>(V))
+ EnumerateMetadata(V);
+ else if (!isa<Instruction>(V) && !isa<Argument>(V))
+ EnumerateValue(V);
+ } else
+ EnumerateType(Type::getVoidTy(N->getContext()));
+ }
+}
+
+void ValueEnumerator::EnumerateMetadata(const Value *MD) {
+ assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
+
+ // Enumerate the type of this value.
+ EnumerateType(MD->getType());
+
+ const MDNode *N = dyn_cast<MDNode>(MD);
+
+ // In the module-level pass, skip function-local nodes themselves, but
+ // do walk their operands.
+ if (N && N->isFunctionLocal() && N->getFunction()) {
+ EnumerateMDNodeOperands(N);
+ return;
+ }
+
+ // Check to see if it's already in!
+ unsigned &MDValueID = MDValueMap[MD];
+ if (MDValueID) {
+ // Increment use count.
+ MDValues[MDValueID-1].second++;
+ return;
+ }
+ MDValues.push_back(std::make_pair(MD, 1U));
+ MDValueID = MDValues.size();
+
+ // Enumerate all non-function-local operands.
+ if (N)
+ EnumerateMDNodeOperands(N);
+}
+
+/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
+/// information reachable from the given MDNode.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
+ assert(N->isFunctionLocal() && N->getFunction() &&
+ "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
+
+ // Enumerate the type of this value.
+ EnumerateType(N->getType());
+
+ // Check to see if it's already in!
+ unsigned &MDValueID = MDValueMap[N];
+ if (MDValueID) {
+ // Increment use count.
+ MDValues[MDValueID-1].second++;
+ return;
+ }
+ MDValues.push_back(std::make_pair(N, 1U));
+ MDValueID = MDValues.size();
+
+ // To incoroporate function-local information visit all function-local
+ // MDNodes and all function-local values they reference.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (Value *V = N->getOperand(i)) {
+ if (MDNode *O = dyn_cast<MDNode>(V)) {
+ if (O->isFunctionLocal() && O->getFunction())
+ EnumerateFunctionLocalMetadata(O);
+ } else if (isa<Instruction>(V) || isa<Argument>(V))
+ EnumerateValue(V);
+ }
+
+ // Also, collect all function-local MDNodes for easy access.
+ FunctionLocalMDs.push_back(N);
+}
+
+void ValueEnumerator::EnumerateValue(const Value *V) {
+ assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+ assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+ "EnumerateValue doesn't handle Metadata!");
+
+ // Check to see if it's already in!
+ unsigned &ValueID = ValueMap[V];
+ if (ValueID) {
+ // Increment use count.
+ Values[ValueID-1].second++;
+ return;
+ }
+
+ // Enumerate the type of this value.
+ EnumerateType(V->getType());
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<GlobalValue>(C)) {
+ // Initializers for globals are handled explicitly elsewhere.
+ } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+ // Do not enumerate the initializers for an array of simple characters.
+ // The initializers just pollute the value table, and we emit the strings
+ // specially.
+ } else if (C->getNumOperands()) {
+ // If a constant has operands, enumerate them. This makes sure that if a
+ // constant has uses (for example an array of const ints), that they are
+ // inserted also.
+
+ // We prefer to enumerate them with values before we enumerate the user
+ // itself. This makes it more likely that we can avoid forward references
+ // in the reader. We know that there can be no cycles in the constants
+ // graph that don't go through a global variable.
+ for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+ I != E; ++I)
+ if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+ EnumerateValue(*I);
+
+ // Finally, add the value. Doing this could make the ValueID reference be
+ // dangling, don't reuse it.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueMap[V] = Values.size();
+ return;
+ }
+ }
+
+ // Add the value.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueID = Values.size();
+}
+
+
+void ValueEnumerator::EnumerateType(const Type *Ty) {
+ unsigned *TypeID = &TypeMap[Ty];
+
+ // We've already seen this type.
+ if (*TypeID)
+ return;
+
+ // If it is a non-anonymous struct, mark the type as being visited so that we
+ // don't recursively visit it. This is safe because we allow forward
+ // references of these in the bitcode reader.
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isAnonymous())
+ *TypeID = ~0U;
+
+ // Enumerate all of the subtypes before we enumerate this type. This ensures
+ // that the type will be enumerated in an order that can be directly built.
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ EnumerateType(*I);
+
+ // Refresh the TypeID pointer in case the table rehashed.
+ TypeID = &TypeMap[Ty];
+
+ // Check to see if we got the pointer another way. This can happen when
+ // enumerating recursive types that hit the base case deeper than they start.
+ //
+ // If this is actually a struct that we are treating as forward ref'able,
+ // then emit the definition now that all of its contents are available.
+ if (*TypeID && *TypeID != ~0U)
+ return;
+
+ // Add this type now that its contents are all happily enumerated.
+ Types.push_back(Ty);
+
+ *TypeID = Types.size();
+}
+
+// Enumerate the types for the specified value. If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void ValueEnumerator::EnumerateOperandType(const Value *V) {
+ EnumerateType(V->getType());
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ // If this constant is already enumerated, ignore it, we know its type must
+ // be enumerated.
+ if (ValueMap.count(V)) return;
+
+ // This constant may have operands, make sure to enumerate the types in
+ // them.
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ const Value *Op = C->getOperand(i);
+
+ // Don't enumerate basic blocks here, this happens as operands to
+ // blockaddress.
+ if (isa<BasicBlock>(Op)) continue;
+
+ EnumerateOperandType(Op);
+ }
+
+ if (const MDNode *N = dyn_cast<MDNode>(V)) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (Value *Elem = N->getOperand(i))
+ EnumerateOperandType(Elem);
+ }
+ } else if (isa<MDString>(V) || isa<MDNode>(V))
+ EnumerateMetadata(V);
+}
+
+void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
+ if (PAL.isEmpty()) return; // null is always 0.
+ // Do a lookup.
+ unsigned &Entry = AttributeMap[PAL.getRawPointer()];
+ if (Entry == 0) {
+ // Never saw this before, add it.
+ Attributes.push_back(PAL);
+ Entry = Attributes.size();
+ }
+}
+
+void ValueEnumerator::incorporateFunction(const Function &F) {
+ InstructionCount = 0;
+ NumModuleValues = Values.size();
+ NumModuleMDValues = MDValues.size();
+
+ // Adding function arguments to the value table.
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I)
+ EnumerateValue(I);
+
+ FirstFuncConstantID = Values.size();
+
+ // Add all function-level constants to the value table.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+ isa<InlineAsm>(*OI))
+ EnumerateValue(*OI);
+ }
+ BasicBlocks.push_back(BB);
+ ValueMap[BB] = BasicBlocks.size();
+ }
+
+ // Optimize the constant layout.
+ OptimizeConstants(FirstFuncConstantID, Values.size());
+
+ // Add the function's parameter attributes so they are available for use in
+ // the function's instruction.
+ EnumerateAttributes(F.getAttributes());
+
+ FirstInstID = Values.size();
+
+ SmallVector<MDNode *, 8> FnLocalMDVector;
+ // Add all of the instructions.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if (MDNode *MD = dyn_cast<MDNode>(*OI))
+ if (MD->isFunctionLocal() && MD->getFunction())
+ // Enumerate metadata after the instructions they might refer to.
+ FnLocalMDVector.push_back(MD);
+ }
+
+ SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ MDNode *N = MDs[i].second;
+ if (N->isFunctionLocal() && N->getFunction())
+ FnLocalMDVector.push_back(N);
+ }
+
+ if (!I->getType()->isVoidTy())
+ EnumerateValue(I);
+ }
+ }
+
+ // Add all of the function-local metadata.
+ for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
+ EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
+}
+
+void ValueEnumerator::purgeFunction() {
+ /// Remove purged values from the ValueMap.
+ for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+ ValueMap.erase(Values[i].first);
+ for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
+ MDValueMap.erase(MDValues[i].first);
+ for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+ ValueMap.erase(BasicBlocks[i]);
+
+ Values.resize(NumModuleValues);
+ MDValues.resize(NumModuleMDValues);
+ BasicBlocks.clear();
+ FunctionLocalMDs.clear();
+}
+
+static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
+ DenseMap<const BasicBlock*, unsigned> &IDMap) {
+ unsigned Counter = 0;
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ IDMap[BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block. This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+ unsigned &Idx = GlobalBasicBlockIDs[BB];
+ if (Idx != 0)
+ return Idx-1;
+
+ IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+ return getGlobalBasicBlockID(BB);
+}
+
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
new file mode 100644
index 000000000000..6617b60deb26
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -0,0 +1,153 @@
+//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUE_ENUMERATOR_H
+#define VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Attributes.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class Instruction;
+class BasicBlock;
+class Function;
+class Module;
+class MDNode;
+class NamedMDNode;
+class AttrListPtr;
+class ValueSymbolTable;
+class MDSymbolTable;
+
+class ValueEnumerator {
+public:
+ typedef std::vector<const Type*> TypeList;
+
+ // For each value, we remember its Value* and occurrence frequency.
+ typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+ typedef DenseMap<const Type*, unsigned> TypeMapType;
+ TypeMapType TypeMap;
+ TypeList Types;
+
+ typedef DenseMap<const Value*, unsigned> ValueMapType;
+ ValueMapType ValueMap;
+ ValueList Values;
+ ValueList MDValues;
+ SmallVector<const MDNode *, 8> FunctionLocalMDs;
+ ValueMapType MDValueMap;
+
+ typedef DenseMap<void*, unsigned> AttributeMapType;
+ AttributeMapType AttributeMap;
+ std::vector<AttrListPtr> Attributes;
+
+ /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+ /// the "getGlobalBasicBlockID" method.
+ mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
+
+ typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+ InstructionMapType InstructionMap;
+ unsigned InstructionCount;
+
+ /// BasicBlocks - This contains all the basic blocks for the currently
+ /// incorporated function. Their reverse mapping is stored in ValueMap.
+ std::vector<const BasicBlock*> BasicBlocks;
+
+ /// When a function is incorporated, this is the size of the Values list
+ /// before incorporation.
+ unsigned NumModuleValues;
+
+ /// When a function is incorporated, this is the size of the MDValues list
+ /// before incorporation.
+ unsigned NumModuleMDValues;
+
+ unsigned FirstFuncConstantID;
+ unsigned FirstInstID;
+
+ ValueEnumerator(const ValueEnumerator &); // DO NOT IMPLEMENT
+ void operator=(const ValueEnumerator &); // DO NOT IMPLEMENT
+public:
+ ValueEnumerator(const Module *M);
+
+ unsigned getValueID(const Value *V) const;
+
+ unsigned getTypeID(const Type *T) const {
+ TypeMapType::const_iterator I = TypeMap.find(T);
+ assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
+ return I->second-1;
+ }
+
+ unsigned getInstructionID(const Instruction *I) const;
+ void setInstructionID(const Instruction *I);
+
+ unsigned getAttributeID(const AttrListPtr &PAL) const {
+ if (PAL.isEmpty()) return 0; // Null maps to zero.
+ AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
+ assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
+ return I->second;
+ }
+
+ /// getFunctionConstantRange - Return the range of values that corresponds to
+ /// function-local constants.
+ void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+ Start = FirstFuncConstantID;
+ End = FirstInstID;
+ }
+
+ const ValueList &getValues() const { return Values; }
+ const ValueList &getMDValues() const { return MDValues; }
+ const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const {
+ return FunctionLocalMDs;
+ }
+ const TypeList &getTypes() const { return Types; }
+ const std::vector<const BasicBlock*> &getBasicBlocks() const {
+ return BasicBlocks;
+ }
+ const std::vector<AttrListPtr> &getAttributes() const {
+ return Attributes;
+ }
+
+ /// getGlobalBasicBlockID - This returns the function-specific ID for the
+ /// specified basic block. This is relatively expensive information, so it
+ /// should only be used by rare constructs such as address-of-label.
+ unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
+
+ /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+ /// use these two methods to get its data into the ValueEnumerator!
+ ///
+ void incorporateFunction(const Function &F);
+ void purgeFunction();
+
+private:
+ void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+
+ void EnumerateMDNodeOperands(const MDNode *N);
+ void EnumerateMetadata(const Value *MD);
+ void EnumerateFunctionLocalMetadata(const MDNode *N);
+ void EnumerateNamedMDNode(const NamedMDNode *NMD);
+ void EnumerateValue(const Value *V);
+ void EnumerateType(const Type *T);
+ void EnumerateOperandType(const Value *V);
+ void EnumerateAttributes(const AttrListPtr &PAL);
+
+ void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+ void EnumerateNamedMetadata(const Module *M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 000000000000..25842a7876a2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,962 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB) :
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0),
+ KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0)
+{
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
+ GroupNodeIndices[i] = i;
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+ unsigned Node = GroupNodeIndices[Reg];
+ while (GroupNodes[Node] != Node)
+ Node = GroupNodes[Node];
+
+ return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+ Regs.push_back(Reg);
+ }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+ assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+ assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+ // find group for each register
+ unsigned Group1 = GetGroup(Reg1);
+ unsigned Group2 = GetGroup(Reg2);
+
+ // if either group is 0, then that must become the parent
+ unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+ unsigned Other = (Parent == Group1) ? Group2 : Group1;
+ GroupNodes.at(Other) = Parent;
+ return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+ // Create a new GroupNode for Reg. Reg's existing GroupNode must
+ // stay as is because there could be other GroupNodes referring to
+ // it.
+ unsigned idx = GroupNodes.size();
+ GroupNodes.push_back(idx);
+ GroupNodeIndices[Reg] = idx;
+ return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+ // KillIndex must be defined and DefIndex not defined for a register
+ // to be live.
+ return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ RegClassInfo(RCI),
+ State(NULL) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+ delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ assert(State == NULL);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+ bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+ // Determine the live-out physregs for this block.
+ if (IsReturnBlock) {
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ }
+ }
+ }
+
+ // In a non-return block, examine the live-in regs of all successors.
+ // Note a return block can have successors if the return instruction is
+ // predicated.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias) {
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+ delete State;
+ State = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+ PrescanInstruction(MI, Count, PassthruRegs);
+ ScanInstruction(MI, Count);
+
+ DEBUG(dbgs() << "Observe: ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << "\tRegs:");
+
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ // If Reg is current live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled). If it is not live but was defined in the
+ // previous schedule region, then set its def index to the most
+ // conservative location (i.e. the beginning of the previous
+ // schedule region).
+ if (State->IsLive(Reg)) {
+ DEBUG(if (State->GetGroup(Reg) != 0)
+ dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg) << "->g0(region live-out)");
+ State->UnionGroups(Reg, 0);
+ } else if ((DefIndices[Reg] < InsertPosIndex)
+ && (DefIndices[Reg] >= Count)) {
+ DefIndices[Reg] = Count;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+ MachineOperand& MO)
+{
+ if (!MO.isReg() || !MO.isImplicit())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ return false;
+
+ MachineOperand *Op = NULL;
+ if (MO.isDef())
+ Op = MI->findRegisterUseOperand(Reg, true);
+ else
+ Op = MI->findRegisterDefOperand(Reg);
+
+ return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+ std::set<unsigned>& PassthruRegs) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+ IsImplicitDefUse(MI, MO)) {
+ const unsigned Reg = MO.getReg();
+ PassthruRegs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ PassthruRegs.insert(*Subreg);
+ }
+ }
+ }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+ SmallSet<unsigned, 4> RegSet;
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+ unsigned Reg = P->getReg();
+ if (RegSet.count(Reg) == 0) {
+ Edges.push_back(&*P);
+ RegSet.insert(Reg);
+ }
+ }
+ }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU != 0) {
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : 0;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+ const char *tag,
+ const char *header,
+ const char *footer) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ if (!State->IsLive(Reg)) {
+ KillIndices[Reg] = KillIdx;
+ DefIndices[Reg] = ~0u;
+ RegRefs.erase(Reg);
+ State->LeaveGroup(Reg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ }
+ // Repeat for subregisters.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ State->GetGroup(SubregReg) << tag);
+ }
+ }
+
+ DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+ unsigned Count,
+ std::set<unsigned>& PassthruRegs) {
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Handle dead defs by simulating a last-use of the register just
+ // after the def. A dead def can occur because the def is truly
+ // dead, or because only a subregister is live at the def. If we
+ // don't do this the dead def will be incorrectly merged into the
+ // previous def.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+ }
+
+ DEBUG(dbgs() << "\tDef Groups:");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI)) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Any aliased that are live at this point are completely or
+ // partially defined here, so group those aliases with Reg.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (State->IsLive(AliasReg)) {
+ State->UnionGroups(Reg, AliasReg);
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+ TRI->getName(AliasReg) << ")");
+ }
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = TII->getRegClass(MI->getDesc(), i, TRI);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Scan the register defs for this instruction and update
+ // live-ranges.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ // Ignore KILLs and passthru registers for liveness...
+ if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+ continue;
+
+ // Update def for Reg and aliases.
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
+ DefIndices[AliasReg] = Count;
+ }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ DEBUG(dbgs() << "\tUse Groups:");
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->getDesc().isCall() ||
+ MI->getDesc().hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
+ // Scan the register uses for this instruction and update
+ // live-ranges, groups and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg));
+
+ // It wasn't previously live but now it is, this is a kill. Forget
+ // the previous live-range information and start a new live-range
+ // for the register.
+ HandleLastUse(Reg, Count, "(last-use)");
+
+ if (Special) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = TII->getRegClass(MI->getDesc(), i, TRI);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Form a group of all defs and uses of a KILL instruction to ensure
+ // that all registers are renamed as a group.
+ if (MI->isKill()) {
+ DEBUG(dbgs() << "\tKill Group:");
+
+ unsigned FirstReg = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (FirstReg != 0) {
+ DEBUG(dbgs() << "=" << TRI->getName(Reg));
+ State->UnionGroups(FirstReg, Reg);
+ } else {
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ FirstReg = Reg;
+ }
+ }
+
+ DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+ BitVector BV(TRI->getNumRegs(), false);
+ bool first = true;
+
+ // Check all references that need rewriting for Reg. For each, use
+ // the corresponding register class to narrow the set of registers
+ // that are appropriate for renaming.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = State->GetRegRefs().equal_range(Reg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+ QE = Range.second; Q != QE; ++Q) {
+ const TargetRegisterClass *RC = Q->second.RC;
+ if (RC == NULL) continue;
+
+ BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+ if (first) {
+ BV |= RCBV;
+ first = false;
+ } else {
+ BV &= RCBV;
+ }
+
+ DEBUG(dbgs() << " " << RC->getName());
+ }
+
+ return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+ unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
+ std::vector<unsigned> Regs;
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+ assert(Regs.size() > 0 && "Empty register group!");
+ if (Regs.size() == 0)
+ return false;
+
+ // Find the "superest" register in the group. At the same time,
+ // collect the BitVector of registers that can be used to rename
+ // each register.
+ DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
+ std::map<unsigned, BitVector> RenameRegisterMap;
+ unsigned SuperReg = 0;
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+ SuperReg = Reg;
+
+ // If Reg has any references, then collect possible rename regs
+ if (RegRefs.count(Reg) > 0) {
+ DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+ BitVector BV = GetRenameRegisters(Reg);
+ RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+ DEBUG(dbgs() << " ::");
+ DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+
+ // All group registers should be a subreg of SuperReg.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if (Reg == SuperReg) continue;
+ bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+ assert(IsSub && "Expecting group subregister");
+ if (!IsSub)
+ return false;
+ }
+
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int renamecnt = 0;
+ if (renamecnt++ % DebugDiv != DebugMod)
+ return false;
+
+ dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+ " for debug ***\n";
+ }
+#endif
+
+ // Check each possible rename register for SuperReg in round-robin
+ // order. If that register is available, and the corresponding
+ // registers are available for the other group subregisters, then we
+ // can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
+ const TargetRegisterClass *SuperRC =
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+ ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+ if (Order.empty()) {
+ DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tFind Registers:");
+
+ if (RenameOrder.count(SuperRC) == 0)
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+
+ unsigned OrigR = RenameOrder[SuperRC];
+ unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+ unsigned R = OrigR;
+ do {
+ if (R == 0) R = Order.size();
+ --R;
+ const unsigned NewSuperReg = Order[R];
+ // Don't consider non-allocatable registers
+ if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+ // Don't replace a register with itself.
+ if (NewSuperReg == SuperReg) continue;
+
+ DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+ RenameMap.clear();
+
+ // For each referenced group register (which must be a SuperReg or
+ // a subregister of SuperReg), find the corresponding subregister
+ // of NewSuperReg and make sure it is free to be renamed.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ unsigned NewReg = 0;
+ if (Reg == SuperReg) {
+ NewReg = NewSuperReg;
+ } else {
+ unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+ if (NewSubRegIdx != 0)
+ NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+ }
+
+ DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+ // Check if Reg can be renamed to NewReg.
+ BitVector BV = RenameRegisterMap[Reg];
+ if (!BV.test(NewReg)) {
+ DEBUG(dbgs() << "(no rename)");
+ goto next_super_reg;
+ }
+
+ // If NewReg is dead and NewReg's most recent def is not before
+ // Regs's kill, it's safe to replace Reg with NewReg. We
+ // must also check all aliases of NewReg, because we can't define a
+ // register when any sub or super is already live.
+ if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+ DEBUG(dbgs() << "(live)");
+ goto next_super_reg;
+ } else {
+ bool found = false;
+ for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+ *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (State->IsLive(AliasReg) ||
+ (KillIndices[Reg] > DefIndices[AliasReg])) {
+ DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto next_super_reg;
+ }
+
+ // Record that 'Reg' can be renamed to 'NewReg'.
+ RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+ }
+
+ // If we fall-out here, then every register in the group can be
+ // renamed, as recorded in RenameMap.
+ RenameOrder.erase(SuperRC);
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+ DEBUG(dbgs() << "]\n");
+ return true;
+
+ next_super_reg:
+ DEBUG(dbgs() << ']');
+ } while (R != EndR);
+
+ DEBUG(dbgs() << '\n');
+
+ // No registers are free and available!
+ return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+ const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // For each regclass the next register to use for renaming.
+ RenameOrderType RenameOrder;
+
+ // ...need a map from MI to SUnit.
+ std::map<MachineInstr *, const SUnit *> MISUnitMap;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+ SU));
+ }
+
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ const SUnit *CriticalPathSU = 0;
+ MachineInstr *CriticalPathMI = 0;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (!State->IsLive(Reg))
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // Attempt to break anti-dependence edges. Walk the instructions
+ // from the bottom up, tracking information about liveness as we go
+ // to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ DEBUG(dbgs() << "Anti: ");
+ DEBUG(MI->dump());
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+
+ // Process the defs in MI...
+ PrescanInstruction(MI, Count, PassthruRegs);
+
+ // The dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
+ std::vector<const SDep *> Edges;
+ const SUnit *PathSU = MISUnitMap[MI];
+ AntiDepEdges(PathSU, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = NULL;
+ if (MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ } else {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
+ // Ignore KILL instructions (they form a group in ScanInstruction
+ // but don't cause any anti-dependence breaking themselves)
+ if (!MI->isKill()) {
+ // Attempt to break each anti-dependency...
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+ const SDep *Edge = Edges[i];
+ SUnit *NextSU = Edge->getSUnit();
+
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
+
+ unsigned AntiDepReg = Edge->getReg();
+ DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+ if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+ // Don't break anti-dependencies on non-allocatable registers.
+ DEBUG(dbgs() << " (non-allocatable)\n");
+ continue;
+ } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(dbgs() << " (not critical-path)\n");
+ continue;
+ } else if (PassthruRegs.count(AntiDepReg) != 0) {
+ // If the anti-dep register liveness "passes-thru", then
+ // don't try to change it. It will be changed along with
+ // the use if required to break an earlier antidep.
+ DEBUG(dbgs() << " (passthru)\n");
+ continue;
+ } else {
+ // No anti-dep breaking for implicit deps
+ MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+ assert(AntiDepOp != NULL &&
+ "Can't find index for defined register operand");
+ if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+ DEBUG(dbgs() << " (implicit)\n");
+ continue;
+ }
+
+ // If the SUnit has other dependencies on the SUnit that
+ // it anti-depends on, don't bother breaking the
+ // anti-dependency since those edges would prevent such
+ // units from being scheduled past each other
+ // regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+ (P->getKind() != SDep::Output)) {
+ DEBUG(dbgs() << " (real dependency)\n");
+ AntiDepReg = 0;
+ break;
+ } else if ((P->getSUnit() != NextSU) &&
+ (P->getKind() == SDep::Data) &&
+ (P->getReg() == AntiDepReg)) {
+ DEBUG(dbgs() << " (other dependency)\n");
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ if (AntiDepReg == 0) continue;
+ }
+
+ assert(AntiDepReg != 0);
+ if (AntiDepReg == 0) continue;
+
+ // Determine AntiDepReg's register group.
+ const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+ if (GroupIndex == 0) {
+ DEBUG(dbgs() << " (zero group)\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Look for a suitable register to use to break the anti-dependence.
+ std::map<unsigned, unsigned> RenameMap;
+ if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg) << ":");
+
+ // Handle each group register...
+ for (std::map<unsigned, unsigned>::iterator
+ S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+ unsigned CurrReg = S->first;
+ unsigned NewReg = S->second;
+
+ DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+ TRI->getName(NewReg) << "(" <<
+ RegRefs.count(CurrReg) << " refs)");
+
+ // Update the references to the old register CurrReg to
+ // refer to the new register NewReg.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = RegRefs.equal_range(CurrReg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second.Operand->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for CurrReg is now inconsistent. Set
+ // the state as if it were dead.
+ State->UnionGroups(NewReg, 0);
+ RegRefs.erase(NewReg);
+ DefIndices[NewReg] = DefIndices[CurrReg];
+ KillIndices[NewReg] = KillIndices[CurrReg];
+
+ State->UnionGroups(CurrReg, 0);
+ RegRefs.erase(CurrReg);
+ DefIndices[CurrReg] = KillIndices[CurrReg];
+ KillIndices[CurrReg] = ~0u;
+ assert(((KillIndices[CurrReg] == ~0u) !=
+ (DefIndices[CurrReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ }
+
+ ++Broken;
+ DEBUG(dbgs() << '\n');
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 000000000000..706778485429
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,184 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+
+ /// Class AggressiveAntiDepState
+ /// Contains all the state necessary for anti-dep breaking.
+ class AggressiveAntiDepState {
+ public:
+ /// RegisterReference - Information about a register reference
+ /// within a liverange
+ typedef struct {
+ /// Operand - The registers operand
+ MachineOperand *Operand;
+ /// RC - The register class
+ const TargetRegisterClass *RC;
+ } RegisterReference;
+
+ private:
+ /// NumTargetRegs - Number of non-virtual target registers
+ /// (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
+ /// GroupNodes - Implements a disjoint-union data structure to
+ /// form register groups. A node is represented by an index into
+ /// the vector. A node can "point to" itself to indicate that it
+ /// is the parent of a group, or point to another node to indicate
+ /// that it is a member of the same group as that node.
+ std::vector<unsigned> GroupNodes;
+
+ /// GroupNodeIndices - For each register, the index of the GroupNode
+ /// currently representing the group that the register belongs to.
+ /// Register 0 is always represented by the 0 group, a group
+ /// composed of registers that are not eligible for anti-aliasing.
+ std::vector<unsigned> GroupNodeIndices;
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, RegisterReference> RegRefs;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ public:
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+ /// GetKillIndices - Return the kill indices.
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+ /// GetDefIndices - Return the define indices.
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+ /// GetRegRefs - Return the RegRefs map.
+ std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+ // GetGroup - Get the group for a register. The returned value is
+ // the index of the GroupNode representing the group.
+ unsigned GetGroup(unsigned Reg);
+
+ // GetGroupRegs - Return a vector of the registers belonging to a
+ // group. If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+ // UnionGroups - Union Reg1's and Reg2's groups to form a new
+ // group. Return the index of the GroupNode representing the
+ // group.
+ unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+ // LeaveGroup - Remove a register from its current group and place
+ // it alone in its own group. Return the index of the GroupNode
+ // representing the registers new group.
+ unsigned LeaveGroup(unsigned Reg);
+
+ /// IsLive - Return true if Reg is live
+ bool IsLive(unsigned Reg);
+ };
+
+
+ /// Class AggressiveAntiDepBreaker
+ class AggressiveAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// CriticalPathSet - The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
+ /// State - The state used to identify and rename anti-dependence
+ /// registers.
+ AggressiveAntiDepState *State;
+
+ public:
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ ~AggressiveAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ /// Keep track of a position in the allocation order for each regclass.
+ typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+
+ /// IsImplicitDefUse - Return true if MO represents a register
+ /// that is both implicitly used and defined in MI
+ bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+
+ /// GetPassthruRegs - If MI implicitly def/uses a register, then
+ /// return that register and all subregisters.
+ void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+ void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+ const char *header =NULL, const char *footer =NULL);
+
+ void PrescanInstruction(MachineInstr *MI, unsigned Count,
+ std::set<unsigned>& PassthruRegs);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ BitVector GetRenameRegisters(unsigned Reg);
+ bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..1005f102bea6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,79 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "RegisterClassInfo.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo)
+ : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
+ const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+ std::pair<unsigned, unsigned> HintPair =
+ VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+ // HintPair.second is a register, phys or virt.
+ Hint = HintPair.second;
+
+ // Translate to physreg, or 0 if not assigned yet.
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = VRM.getPhys(Hint);
+
+ // The first hint pair component indicates a target-specific hint.
+ if (HintPair.first) {
+ const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
+ // The remaining allocation order may depend on the hint.
+ ArrayRef<unsigned> Order =
+ TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
+ VRM.getMachineFunction());
+ if (Order.empty())
+ return;
+
+ // Copy the allocation order with reserved registers removed.
+ OwnedBegin = true;
+ unsigned *P = new unsigned[Order.size()];
+ Begin = P;
+ for (unsigned i = 0; i != Order.size(); ++i)
+ if (!RCI.isReserved(Order[i]))
+ *P++ = Order[i];
+ End = P;
+
+ // Target-dependent hints require resolution.
+ Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
+ VRM.getMachineFunction());
+ } else {
+ // If there is no hint or just a normal hint, use the cached allocation
+ // order from RegisterClassInfo.
+ ArrayRef<unsigned> O = RCI.getOrder(RC);
+ Begin = O.begin();
+ End = O.end();
+ }
+
+ // The hint must be a valid physreg for allocation.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || RCI.isReserved(Hint)))
+ Hint = 0;
+}
+
+AllocationOrder::~AllocationOrder() {
+ if (OwnedBegin)
+ delete [] Begin;
+}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..d1e48a1f2e96
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,73 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+
+class AllocationOrder {
+ const unsigned *Begin;
+ const unsigned *End;
+ const unsigned *Pos;
+ const RegisterClassInfo &RCI;
+ unsigned Hint;
+ bool OwnedBegin;
+public:
+
+ /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param ReservedRegs Set of reserved registers as returned by
+ /// TargetRegisterInfo::getReservedRegs().
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo);
+
+ ~AllocationOrder();
+
+ /// next - Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next again after it returned 0.
+ /// It will keep returning 0 until rewind() is called.
+ unsigned next() {
+ // First take the hint.
+ if (!Pos) {
+ Pos = Begin;
+ if (Hint)
+ return Hint;
+ }
+ // Then look at the order from TRI.
+ while (Pos != End) {
+ unsigned Reg = *Pos++;
+ if (Reg != Hint)
+ return Reg;
+ }
+ return 0;
+ }
+
+ /// rewind - Start over from the beginning.
+ void rewind() { Pos = 0; }
+
+ /// isHint - Return true if PhysReg is a preferred register.
+ bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
new file mode 100644
index 000000000000..125e64196f15
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,303 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ if (Indices && *Indices == i)
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty->isVoidTy())
+ return;
+ // Base case: we can get an EVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+ if (GV && GV->getName() == "llvm.eh.catch.all.value") {
+ assert(GV->hasInitializer() &&
+ "The EH catch-all value must have an initializer");
+ Value *Init = GV->getInitializer();
+ GV = dyn_cast<GlobalVariable>(Init);
+ if (!GV) V = cast<ConstantPointerNull>(Init);
+ }
+
+ assert((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+ ISD::CondCode FPC, FOC;
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+ case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
+ case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
+ default:
+ llvm_unreachable("Invalid FCmp predicate opcode!");
+ FOC = FPC = ISD::SETFALSE;
+ break;
+ }
+ if (NoNaNsFPMath)
+ return FOC;
+ else
+ return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ llvm_unreachable("Invalid ICmp predicate opcode!");
+ return ISD::SETNE;
+ }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+ const TargetLowering &TLI) {
+ const Instruction *I = CS.getInstruction();
+ const BasicBlock *ExitBB = I->getParent();
+ const TerminatorInst *Term = ExitBB->getTerminator();
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+
+ // The block must end in a return statement or unreachable.
+ //
+ // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+ // an unreachable, for now. The way tailcall optimization is currently
+ // implemented means it will add an epilogue followed by a jump. That is
+ // not profitable. Also, if the callee is a special function (e.g.
+ // longjmp on x86), it can end up causing miscompilation that has not
+ // been fully understood.
+ if (!Ret &&
+ (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+ // If I will have a chain, make sure no other instruction that will have a
+ // chain interposes between I and the return.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+ !I->isSafeToSpeculativelyExecute())
+ for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+ --BBI) {
+ if (&*BBI == I)
+ break;
+ // Debug info intrinsics do not get in the way of tail call optimization.
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !BBI->isSafeToSpeculativelyExecute())
+ return false;
+ }
+
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ const Function *F = ExitBB->getParent();
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Otherwise, make sure the unmodified return value of I is the return value.
+ for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+ U = dyn_cast<Instruction>(U->getOperand(0))) {
+ if (!U)
+ return false;
+ if (!U->hasOneUse())
+ return false;
+ if (U == I)
+ break;
+ // Check for a truly no-op truncate.
+ if (isa<TruncInst>(U) &&
+ TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+ continue;
+ // Check for a truly no-op bitcast.
+ if (isa<BitCastInst>(U) &&
+ (U->getOperand(0)->getType() == U->getType() ||
+ (U->getOperand(0)->getType()->isPointerTy() &&
+ U->getType()->isPointerTy())))
+ continue;
+ // Otherwise it's not a true no-op.
+ return false;
+ }
+
+ return true;
+}
+
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ const TargetLowering &TLI) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if (CallerRetAttr & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 000000000000..df47f984d578
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,71 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+
+ virtual ~AntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies within a
+ /// basic-block region and break them by renaming registers. Return
+ /// the number of anti-dependencies broken.
+ ///
+ virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) = 0;
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ virtual void Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) =0;
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ virtual void FinishBlock() =0;
+
+ /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+ /// other machine instruction to use NewReg.
+ void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
+ assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
+ MI->getOperand(0).setReg(NewReg);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 000000000000..5861fa4817f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,87 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+ {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+ Asm->OutStreamer.EmitFnStart();
+ if (Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+ if (!Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitCantUnwind();
+ else {
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Emit references to personality.
+ if (const Function * Personality =
+ MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+ MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+ Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+ Asm->OutStreamer.EmitPersonality(PerSym);
+ }
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ Asm->OutStreamer.EmitHandlerData();
+
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
+
+ Asm->OutStreamer.EmitFnEnd();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 000000000000..7f314eed3ae6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,1996 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+ if (P == 0)
+ P = new gcp_map_type();
+ return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form. This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+ unsigned InBits = 0) {
+ unsigned NumBits = 0;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ NumBits = TD.getPreferredAlignmentLog(GVar);
+
+ // If InBits is specified, round it to it.
+ if (InBits > NumBits)
+ NumBits = InBits;
+
+ // If the GV has a specified alignment, take it into account.
+ if (GV->getAlignment() == 0)
+ return NumBits;
+
+ unsigned GVAlign = Log2_32(GV->getAlignment());
+
+ // If the GVAlign is larger than NumBits, or if we are required to obey
+ // NumBits because the GV has an assigned section, obey it.
+ if (GVAlign > NumBits || GV->hasSection())
+ NumBits = GVAlign;
+ return NumBits;
+}
+
+
+
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+ : MachineFunctionPass(ID),
+ TM(tm), MAI(tm.getMCAsmInfo()),
+ OutContext(Streamer.getContext()),
+ OutStreamer(Streamer),
+ LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+ DD = 0; DE = 0; MMI = 0; LI = 0;
+ GCMetadataPrinters = 0;
+ VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+ assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+
+ if (GCMetadataPrinters != 0) {
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+
+ for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+ delete I->second;
+ delete &GCMap;
+ GCMetadataPrinters = 0;
+ }
+
+ delete &OutStreamer;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return TM.getTargetLowering()->getObjFileLowering();
+}
+
+
+/// getTargetData - Return information about data layout.
+const TargetData &AsmPrinter::getTargetData() const {
+ return *TM.getTargetData();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer.getCurrentSection();
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+ if (isVerbose())
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getTargetData());
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ EmitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+ OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(*this);
+
+ // Emit module-level inline asm if it exists.
+ if (!M.getModuleInlineAsm().empty()) {
+ OutStreamer.AddComment("Start of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+ OutStreamer.AddComment("End of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ DD = new DwarfDebug(this, &M);
+
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ return false;
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ DE = new DwarfCFIException(this);
+ return false;
+ case ExceptionHandling::ARM:
+ DE = new ARMException(this);
+ return false;
+ case ExceptionHandling::Win64:
+ DE = new Win64Exception(this);
+ return false;
+ }
+
+ llvm_unreachable("Unknown exception type.");
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ if (MAI->getWeakDefDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ if ((GlobalValue::LinkageTypes)Linkage !=
+ GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->getLinkOnceDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
+ } else {
+ // .weak _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol.
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ break;
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+ }
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+
+ if (!GV->hasInitializer()) // External globals require no extra code.
+ return;
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const TargetData *TD = TM.getTargetData();
+ uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+ // If the alignment is specified, we *must* obey it. Overaligning a global
+ // with a specified alignment is a prompt way to break globals emitted to
+ // sections and expected to be contiguous (e.g. ObjC metadata).
+ unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+
+ // Handle common and BSS local symbols (.lcomm).
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ // Handle common symbols.
+ if (GVKind.isCommon()) {
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ // Handle local BSS symbols.
+ if (MAI->hasMachoZeroFillDirective()) {
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ if (MAI->hasLCOMMDirective()) {
+ // .lcomm _foo, 42
+ OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
+ return;
+ }
+
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .local _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+ // Handle the zerofill directive on darwin, which is a special form of BSS
+ // emission.
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
+
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // .zerofill __DATA, __common, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS())
+ OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ else if (GVKind.isThreadData()) {
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitAlignment(AlignLog, GV);
+ OutStreamer.EmitLabel(MangSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Emit the variable struct for the runtime.
+ const MCSection *TLVSect
+ = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer.SwitchSection(TLVSect);
+ // Emit the linkage here.
+ EmitLinkage(GV->getLinkage(), GVSym);
+ OutStreamer.EmitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ PtrSize, 0);
+ OutStreamer.EmitIntValue(0, PtrSize, 0);
+ OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitLinkage(GV->getLinkage(), GVSym);
+ EmitAlignment(AlignLog, GV);
+
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+
+ OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+ // Print out constants referenced by the function
+ EmitConstantPool();
+
+ // Print the 'header' of function.
+ const Function *F = MF->getFunction();
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+ EmitVisibility(CurrentFnSym, F->getVisibility());
+
+ EmitLinkage(F->getLinkage(), CurrentFnSym);
+ EmitAlignment(MF->getAlignment(), F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), F,
+ /*PrintType=*/false, F->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to
+ // do their wild and crazy things as required.
+ EmitFunctionEntryLabel();
+
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+ for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ OutStreamer.AddComment("Address taken block that was later removed");
+ OutStreamer.EmitLabel(DeadBlockSyms[i]);
+ }
+
+ // Add some workaround for linkonce linkage on Cygwin\MinGW.
+ if (MAI->getLinkOnceDirective() != 0 &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
+ // FIXME: What is this?
+ MCSymbol *FakeStub =
+ OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+ CurrentFnSym->getName());
+ OutStreamer.EmitLabel(FakeStub);
+ }
+
+ // Emit pre-function debug and/or EH information.
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->BeginFunction(MF);
+ }
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginFunction(MF);
+ }
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isUndefined())
+ return OutStreamer.EmitLabel(CurrentFnSym);
+
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+}
+
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ const MachineMemOperand *MMO;
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Reload\n";
+ }
+ } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Spill\n";
+ }
+ } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ }
+
+ // Check for spill-induced copies
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+}
+
+/// EmitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+ unsigned RegNo = MI->getOperand(0).getReg();
+ AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+ AP.TM.getRegisterInfo()->getName(RegNo));
+ AP.OutStreamer.AddBlankLine();
+}
+
+static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+ std::string Str = "kill:";
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ assert(Op.isReg() && "KILL instruction must have only register operands");
+ Str += ' ';
+ Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+ Str += (Op.isDef() ? "<def>" : "<kill>");
+ }
+ AP.OutStreamer.AddComment(Str);
+ AP.OutStreamer.AddBlankLine();
+}
+
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so. A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+ // This code handles only the 3-operand target-independent form.
+ if (MI->getNumOperands() != 3)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+ if (V.getContext().isSubprogram())
+ OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+ OS << V.getName() << " <- ";
+
+ // Register or immediate value. Register 0 means undef.
+ if (MI->getOperand(0).isFPImm()) {
+ APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+ if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+ OS << (double)APF.convertToFloat();
+ } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ } else if (MI->getOperand(0).isImm()) {
+ OS << MI->getOperand(0).getImm();
+ } else if (MI->getOperand(0).isCImm()) {
+ MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else {
+ assert(MI->getOperand(0).isReg() && "Unknown operand type");
+ if (MI->getOperand(0).getReg() == 0) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+ }
+ OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+ }
+
+ OS << '+' << MI->getOperand(1).getImm();
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+}
+
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+ MF->getFunction()->needsUnwindTableEntry())
+ return CFI_M_EH;
+
+ if (MMI->hasDebugInfo())
+ return CFI_M_Debug;
+
+ return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+ return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+ MF->getFunction()->needsUnwindTableEntry();
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ MachineModuleInfo &MMI = MF->getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool FoundOne = false;
+ (void)FoundOne;
+ for (std::vector<MachineMove>::iterator I = Moves.begin(),
+ E = Moves.end(); I != E; ++I) {
+ if (I->getLabel() == Label) {
+ EmitCFIFrameMove(*I);
+ FoundOne = true;
+ }
+ }
+ assert(FoundOne);
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+ // Emit target-specific gunk before the function body.
+ EmitFunctionBodyStart();
+
+ bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ const MachineInstr *LastMI = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(I);
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ LastMI = II;
+
+ // Print the assembly for the instruction.
+ if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+ !II->isDebugValue()) {
+ HasAnyRealCode = true;
+ ++EmittedInsts;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginInstruction(II);
+ }
+
+ if (isVerbose())
+ EmitComments(*II, OutStreamer.GetCommentOS());
+
+ switch (II->getOpcode()) {
+ case TargetOpcode::PROLOG_LABEL:
+ emitPrologLabel(*II);
+ break;
+
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::INLINEASM:
+ EmitInlineAsm(II);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose()) {
+ if (!EmitDebugValueComment(II, *this))
+ EmitInstruction(II);
+ }
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ if (isVerbose()) EmitImplicitDef(II, *this);
+ break;
+ case TargetOpcode::KILL:
+ if (isVerbose()) EmitKill(II, *this);
+ break;
+ default:
+ if (!TM.hasMCUseLoc())
+ MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
+ EmitInstruction(II);
+ break;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endInstruction(II);
+ }
+ }
+ }
+
+ // If the last instruction was a prolog label, then we have a situation where
+ // we emitted a prolog but no function body. This results in the ending prolog
+ // label equaling the end of function label and an invalid "row" in the
+ // FDE. We need to emit a noop in this situation so that the FDE's rows are
+ // valid.
+ bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a noop.
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+ MCInst Noop;
+ TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+ if (Noop.getOpcode()) {
+ OutStreamer.AddComment("avoids zero-length function");
+ OutStreamer.EmitInstruction(Noop);
+ } else // Target not mc-ized yet.
+ OutStreamer.EmitRawText(StringRef("\tnop\n"));
+ }
+
+ // Emit target-specific gunk after the function body.
+ EmitFunctionBodyEnd();
+
+ // If the target wants a .size directive for the size of the function, emit
+ // it.
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function, so we can get the size as
+ // difference between the function label and the temp label.
+ MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(FnEndLabel);
+
+ const MCExpr *SizeExp =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+ MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+ OutContext);
+ OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+ }
+
+ // Emit post-function debug information.
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endFunction(MF);
+ }
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndFunction();
+ }
+ MMI->EndFunction();
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo();
+
+ OutStreamer.AddBlankLine();
+}
+
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Target specific DBG_VALUE instructions are handled by each target.
+ return MachineLocation();
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+ for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+ *SR && Reg < 0; ++SR) {
+ Reg = TRI->getDwarfRegNum(*SR, false);
+ // FIXME: Get the bit range this register uses of the superregister
+ // so that we can produce a DW_OP_bit_piece
+ }
+
+ // FIXME: Handle cases like a super register being encoded as
+ // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
+
+ // FIXME: We have no reasonable way of handling errors in here. The
+ // caller might be in the middle of an dwarf expression. We should
+ // probably assert that Reg >= 0 once debug info generation is more mature.
+
+ if (int Offset = MLoc.getOffset()) {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+ EmitInt8(dwarf::DW_OP_breg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_bregx");
+ EmitInt8(dwarf::DW_OP_bregx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ EmitSLEB128(Offset);
+ } else {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+ EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_regx");
+ EmitInt8(dwarf::DW_OP_regx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ }
+
+ // FIXME: Produce a DW_OP_bit_piece if we used a superregister
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Emit global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobalVariable(I);
+
+ // Emit visibility info for declarations
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = Mang->getSymbol(&F);
+ EmitVisibility(Name, V, false);
+ }
+
+ // Finalize debug and EH information.
+ if (DE) {
+ {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndModule();
+ }
+ delete DE; DE = 0;
+ }
+ if (DD) {
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endModule();
+ }
+ delete DD; DD = 0;
+ }
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+ }
+
+ if (MAI->hasSetDirective()) {
+ OutStreamer.AddBlankLine();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ MCSymbol *Name = Mang->getSymbol(I);
+
+ const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+ MCSymbol *Target = Mang->getSymbol(GV);
+
+ if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (I->hasWeakLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+ EmitVisibility(Name, I->getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name,
+ MCSymbolRefExpr::Create(Target, OutContext));
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(*this);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer.SwitchSection(S);
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ EmitEndOfAsmFile(M);
+
+ delete Mang; Mang = 0;
+ MMI = 0;
+
+ OutStreamer.Finish();
+ return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ // Get the function symbol.
+ CurrentFnSym = Mang->getSymbol(MF.getFunction());
+
+ if (isVerbose())
+ LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const MCSection *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16();break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+
+ const Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+ OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function *F = MF->getFunction();
+ bool JTInDiffSection = false;
+ if (// In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // FIXME: Need a better predicate for this: what about custom entries?
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ // We should also do if the section name is NULL or function is declared
+ // in discardable section
+ // FIXME: this isn't the right predicate, should be based on the MCSection
+ // for the function.
+ F->isWeakForLinker()) {
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+ } else {
+ // Otherwise, drop it in the readonly section.
+ const MCSection *ReadOnlySection =
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(ReadOnlySection);
+ JTInDiffSection = true;
+ }
+
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+ // .set directive for each unique entry. This reduces the number of
+ // relocations the assembler will generate for the jump table.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->hasSetDirective()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = TM.getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ const MachineBasicBlock *MBB = JTBBs[ii];
+ if (!EmittedSets.insert(MBB)) continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+ OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ }
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+ const MCExpr *Value = 0;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+ OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // EK_LabelDifference32 - Each entry is the address of the block minus
+ // the address of the jump table. This is used for PIC jump tables where
+ // gprel32 is not supported. e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive is supported, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ if (MAI->hasSetDirective()) {
+ // If we used .set, reference the .set's symbol.
+ Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ // Otherwise, use the difference as the jump table entry.
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+ OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ if (GV->getName() == "llvm.global_ctors") {
+ OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
+ EmitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer());
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".constructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
+ EmitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer());
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".destructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+ }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(const Constant *List) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (!isa<ConstantArray>(List)) return;
+ const ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1));
+ }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+}
+
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Size) const {
+ // Get the Hi-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective()) {
+ OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+ return;
+ }
+
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
+}
+
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+ const MCSymbol *Lo, unsigned Size)
+ const {
+
+ // Emit Hi+Offset - Lo
+ // Get the Hi+Offset expression.
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ // Get the Hi+Offset-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(Plus,
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective())
+ OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+ else {
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+ }
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size)
+ const {
+
+ // Emit Label+Offset
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+ if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+
+ if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+
+ if (getCurrentSection()->getKind().isText())
+ OutStreamer.EmitCodeAlignment(1 << NumBits);
+ else
+ OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0) {
+ llvm_unreachable("Unknown constant value to lower!");
+ return MCConstantExpr::Create(0, Ctx);
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using TargetData as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+ if (C != CE)
+ return LowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ return MCConstantExpr::Create(0, Ctx);
+ case Instruction::GetElementPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Generate a symbolic expression for the byte address
+ const Constant *PtrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0],
+ IdxVec.size());
+
+ const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ if (Offset == 0)
+ return Base;
+
+ // Truncate/sext the offset to the pointer size.
+ if (TD.getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD.getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return LowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return LowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+ AsmPrinter &AP);
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ if (AddrSpace != 0 || !CA->isString()) {
+ // Not a string. Print the values in successive locations
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ return;
+ }
+
+ // Otherwise, it can be emitted as .ascii.
+ SmallVector<char, 128> TmpVec;
+ TmpVec.reserve(CA->getNumOperands());
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+ AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantVector(const ConstantVector *CV,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+ const TargetData &TD = *AP.TM.getTargetData();
+ unsigned Size = TD.getTypeAllocSize(CV->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+ CV->getType()->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+}
+
+static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = AP.TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = TD->getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+ - Layout->getElementOffset(i)) - FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstantImpl(Field, AddrSpace, AP);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ if (CFP->getType()->isDoubleTy()) {
+ if (AP.isVerbose()) {
+ double Val = CFP->getValueAPF().convertToDouble();
+ AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+ }
+
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ return;
+ }
+
+ if (CFP->getType()->isFloatTy()) {
+ if (AP.isVerbose()) {
+ float Val = CFP->getValueAPF().convertToFloat();
+ AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+ }
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
+ return;
+ }
+
+ if (CFP->getType()->isX86_FP80Ty()) {
+ // all long double variants are printed as hex
+ // API needed to prevent premature destruction
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = API.getRawData();
+ if (AP.isVerbose()) {
+ // Convert to double so we can print the approximate val as a comment.
+ APFloat DoubleVal = CFP->getValueAPF();
+ bool ignored;
+ DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
+ << DoubleVal.convertToDouble() << '\n';
+ }
+
+ if (AP.TM.getTargetData()->isBigEndian()) {
+ AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ } else {
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+ }
+
+ // Emit the tail padding for the long double.
+ const TargetData &TD = *AP.TM.getTargetData();
+ AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+ TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+ return;
+ }
+
+ assert(CFP->getType()->isPPC_FP128Ty() &&
+ "Floating point constant type not handled");
+ // All long double variants are printed as hex
+ // API needed to prevent premature destruction.
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = API.getRawData();
+ if (AP.TM.getTargetData()->isBigEndian()) {
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+ } else {
+ AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ }
+}
+
+static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ const TargetData *TD = AP.TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ }
+}
+
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
+ uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ switch (Size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+ AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ return;
+ default:
+ EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
+ return;
+ }
+ }
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return EmitGlobalConstantFP(CFP, AddrSpace, AP);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+ return;
+ }
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return EmitGlobalConstantVector(V, AddrSpace, AP);
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+ AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+ AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ if (Size)
+ EmitGlobalConstantImpl(CV, AddrSpace, *this);
+ else if (MAI->hasSubsectionsViaSymbols()) {
+ // If the global has zero size, emit a single byte so that two labels don't
+ // look like they are at the same location.
+ OutStreamer.EmitIntValue(0, 1, AddrSpace);
+ }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+ if (Offset > 0)
+ OS << '+' << Offset;
+ else if (Offset < 0)
+ OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ Name + Twine(ID));
+}
+
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+ Name);
+}
+
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+ return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ + "_" + Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix,
+ bool ForcePrivate) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+ NameStr.append(Suffix.begin(), Suffix.end());
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym);
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (Loop == 0) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+ OS.indent((*CL)->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, *CL, FunctionNumber);
+ }
+}
+
+/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (Loop == 0) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer.AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->empty())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+ // Emit an alignment directive for this block, if needed.
+ if (unsigned Align = MBB->getAlignment())
+ EmitAlignment(Log2_32(Align));
+
+ // If the block has its address taken, emit any labels that were used to
+ // reference the block. It is possible that there is more than one label
+ // here, because multiple LLVM BB's may have been RAUW'd to this block after
+ // the references were generated.
+ if (MBB->hasAddressTaken()) {
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (isVerbose())
+ OutStreamer.AddComment("Block address taken");
+
+ std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+ for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+ OutStreamer.EmitLabel(Syms[i]);
+ }
+
+ // Print the main label for the block.
+ if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+ Twine(MBB->getNumber()) + ":");
+ }
+ } else {
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
+ OutStreamer.EmitLabel(MBB->getSymbol());
+ }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ if (IsDefinition)
+ Attr = MAI->getHiddenVisibilityAttr();
+ else
+ Attr = MAI->getHiddenDeclarationVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check the terminators in the previous blocks
+ for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+ IE = Pred->end(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+
+ // If it is not a simple branch, we are in a table somewhere.
+ if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+ return false;
+
+ // If we are the operands of one of the branches, this is not
+ // a fall through.
+ for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+ OE = MI.operands_end(); OI != OE; ++OI) {
+ const MachineOperand& OP = *OI;
+ if (OP.isJTI())
+ return false;
+ if (OP.isMBB() && OP.getMBB() == MBB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+ gcp_map_type::iterator GCPI = GCMap.find(S);
+ if (GCPI != GCMap.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMap.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+ return 0;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 000000000000..dd5b0e261490
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,233 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ if (MAI->hasLEB128()) {
+ OutStreamer.EmitSLEB128IntValue(Value);
+ return;
+ }
+
+ // If we don't have .sleb128, emit as .bytes.
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ if (IsMore) Byte |= 0x80;
+ OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+ } while (IsMore);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ // FIXME: Should we add a PadTo option to the streamer?
+ if (MAI->hasLEB128() && PadTo == 0) {
+ OutStreamer.EmitULEB128IntValue(Value);
+ return;
+ }
+
+ // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ if (Value || PadTo != 0) Byte |= 0x80;
+ OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+ } while (Value);
+
+ if (PadTo) {
+ if (PadTo > 1)
+ OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+ OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+ }
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+ if (isVerbose()) {
+ if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+ OutStreamer.AddComment("DW_CFA_offset + Reg (" +
+ Twine(Val-dwarf::DW_CFA_offset) + ")");
+ else
+ OutStreamer.AddComment(dwarf::CallFrameString(Val));
+ }
+ OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr: return "absptr";
+ case dwarf::DW_EH_PE_omit: return "omit";
+ case dwarf::DW_EH_PE_pcrel: return "pcrel";
+ case dwarf::DW_EH_PE_udata4: return "udata4";
+ case dwarf::DW_EH_PE_udata8: return "udata8";
+ case dwarf::DW_EH_PE_sdata4: return "sdata4";
+ case dwarf::DW_EH_PE_sdata8: return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+ return "indirect pcrel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+ if (isVerbose()) {
+ if (Desc != 0)
+ OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ OutStreamer.AddComment(Twine("Encoding = ") +
+ DecodeDWARFEncoding(Val));
+ }
+
+ OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ default: assert(0 && "Invalid encoded value.");
+ case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+ case dwarf::DW_EH_PE_udata2: return 2;
+ case dwarf::DW_EH_PE_udata4: return 4;
+ case dwarf::DW_EH_PE_udata8: return 8;
+ }
+}
+
+void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
+ OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+ OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section. This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+ const MCSymbol *SectionLabel) const {
+ // On COFF targets, we have to emit the special .secrel32 directive.
+ if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
+ // FIXME: MCize.
+ OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+ return;
+ }
+
+ // Get the section that we're referring to, based on SectionLabel.
+ const MCSection &Section = SectionLabel->getSection();
+
+ // If Label has already been emitted, verify that it is in the same section as
+ // section label for sanity.
+ assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+ "Section offset using wrong section base for label");
+
+ // If the section in question will end up with an address of 0 anyway, we can
+ // just emit an absolute reference to save a relocation.
+ if (Section.isBaseAddressKnownZero()) {
+ OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+ return;
+ }
+
+ // Otherwise, emit it as a label difference from the start of the section.
+ EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitCFIFrameMove - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+ } else {
+ // Reg + Offset
+ OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
+ Src.getOffset());
+ }
+ } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(!Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+ Dst.getOffset());
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 000000000000..5ac455e1a1a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,419 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
+ assert(!Str.empty() && "Can't emit empty inline asm block");
+
+ // Remember if the buffer is nul terminated or not so we can avoid a copy.
+ bool isNullTerminated = Str.back() == 0;
+ if (isNullTerminated)
+ Str = Str.substr(0, Str.size()-1);
+
+ // If the output streamer is actually a .s file, just emit the blob textually.
+ // This is useful in case the asm parser doesn't handle something but the
+ // system assembler does.
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText(Str);
+ return;
+ }
+
+ SourceMgr SrcMgr;
+ SrcMgrDiagInfo DiagInfo;
+
+ // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ bool HasDiagHandler = false;
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
+ HasDiagHandler = true;
+ }
+
+ MemoryBuffer *Buffer;
+ if (isNullTerminated)
+ Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+ else
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
+ OutContext, OutStreamer,
+ *MAI));
+
+ // FIXME: It would be nice if we can avoid createing a new instance of
+ // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+ // we have to watch out for asm directives which can change subtarget
+ // state. e.g. .code 16, .code 32.
+ OwningPtr<MCSubtargetInfo>
+ STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+ TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser));
+ if (!TAP)
+ report_fatal_error("Inline asm not supported by this streamer because"
+ " we don't have an asm parser for this target\n");
+ Parser->setTargetParser(*TAP.get());
+
+ // Don't implicitly switch to the text section before the asm.
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
+ if (Res && !HasDiagHandler)
+ report_fatal_error("Error parsing inline asm\n");
+}
+
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ unsigned NumOperands = MI->getNumOperands();
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != NumOperands-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ // Don't emit the comments if writing to a .o file.
+ if (!OutStreamer.hasRawTextSupport()) return;
+
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ OS << '\t';
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+
+ std::string Val(StrStart, StrEnd);
+ PrintSpecial(MI, OS, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ if (OpNo >= MI->getNumOperands()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0] == 'l') // labels are target independent
+ // FIXME: What if the operand isn't an MBB, report error?
+ OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+ else {
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0,
+ OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0, OS);
+ }
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ OS << '\n' << (char)0; // null terminate string.
+ EmitInlineAsm(OS.str(), LocMD);
+
+ // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+ const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ OS << MAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ OS << MAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != getFunctionNumber()) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = getFunctionNumber();
+ }
+ OS << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 000000000000..21396ca37f06
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,368 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
+ // Emit its Dwarf tag type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ AP->EmitULEB128(0, "EOM(1)");
+ AP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << '\n';
+ }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
+ DIEInteger *DI = new (A) DIEInteger(0);
+ Values.insert(Values.begin(), DI);
+ Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+ return DI;
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << format("0x%lx", (long)(intptr_t)this)
+ << ", Offset: " << Offset
+ << ", Size: " << Size << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+ } else {
+ O << "Size: " << Size << "\n";
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(dbgs());
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ unsigned Size = ~0U;
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Size = 1; break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Size = 4; break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Size = 8; break;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+ Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
+ default: llvm_unreachable("DIE Value form not supported yet"); break;
+ }
+ return 0;
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+ O << "Int: " << (int64_t)Integer
+ << format(" 0x%llx", (unsigned long long)Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
+ // Emit nul terminator.
+ AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
+}
+
+#ifndef NDEBUG
+void DIEString::print(raw_ostream &O) {
+ O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIELabel::print(raw_ostream &O) {
+ O << "Lbl: " << Label->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+ O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+ O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+ if (!Size) {
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ switch (Form) {
+ default: assert(0 && "Improper form for block"); break;
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block"); break;
+ }
+ return 0;
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 000000000000..7d61f1edff4a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,433 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+ class AsmPrinter;
+ class MCSymbol;
+ class raw_ostream;
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+ /// Dwarf abbreviation.
+ class DIEAbbrevData {
+ /// Attribute - Dwarf attribute code.
+ ///
+ unsigned Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ unsigned Form;
+ public:
+ DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+
+ // Accessors.
+ unsigned getAttribute() const { return Attribute; }
+ unsigned getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+ /// information object.
+ class DIEAbbrev : public FoldingSetNode {
+ /// Tag - Dwarf tag code.
+ ///
+ unsigned Tag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ unsigned ChildrenFlag;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ SmallVector<DIEAbbrevData, 8> Data;
+
+ public:
+ DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+
+ // Accessors.
+ unsigned getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ unsigned getChildrenFlag() const { return ChildrenFlag; }
+ const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+ void setTag(unsigned T) { Tag = T; }
+ void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(unsigned Attribute, unsigned Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// Emit - Print the abbreviation using the specified asm printer.
+ ///
+ void Emit(AsmPrinter *AP) const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIE - A structured debug information entry. Has an abbreviation which
+ /// describes it's organization.
+ class DIEValue;
+
+ class DIE {
+ protected:
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ DIE *Parent;
+
+ /// Attributes values.
+ ///
+ SmallVector<DIEValue*, 32> Values;
+
+ // Private data for print()
+ mutable unsigned IndentCount;
+ public:
+ explicit DIE(unsigned Tag)
+ : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
+ Size(0), Parent (0), IndentCount(0) {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+ DIE *getParent() const { return Parent; }
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+
+ /// addValue - Add a value and attributes to a DIE.
+ ///
+ void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// SiblingOffset - Return the offset of the debug information entry's
+ /// sibling.
+ unsigned getSiblingOffset() const { return Offset + Size; }
+
+ /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+ /// The caller is responsible for deleting the return value at or after the
+ /// same time it destroys this DIE.
+ ///
+ DIEValue *addSiblingOffset(BumpPtrAllocator &A);
+
+ /// addChild - Add a child to the DIE.
+ ///
+ void addChild(DIE *Child) {
+ if (Child->getParent()) {
+ assert (Child->getParent() == this && "Unexpected DIE Parent!");
+ return;
+ }
+ Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+ Children.push_back(Child);
+ Child->Parent = this;
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEValue - A debug information entry value.
+ ///
+ class DIEValue {
+ public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isSectionOffset,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+ protected:
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+ public:
+ explicit DIEValue(unsigned T) : Type(T) {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *) { return true; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O) = 0;
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEInteger - An integer value DIE.
+ ///
+ class DIEInteger : public DIEValue {
+ uint64_t Integer;
+ public:
+ explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ if (IsSigned) {
+ if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
+ if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
+ if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ } else {
+ if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
+ if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+ if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4;
+ }
+ return dwarf::DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ uint64_t getValue() const { return Integer; }
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEInteger *) { return true; }
+ static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEString - A string value DIE. This DIE keeps string reference only.
+ ///
+ class DIEString : public DIEValue {
+ const StringRef Str;
+ public:
+ explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
+
+ /// EmitValue - Emit string value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of string value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
+ return Str.size() + sizeof(char); // sizeof('\0');
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEString *) { return true; }
+ static bool classof(const DIEValue *S) { return S->getType() == isString; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIELabel - A label expression DIE.
+ //
+ class DIELabel : public DIEValue {
+ const MCSymbol *Label;
+ public:
+ explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// getValue - Get MCSymbol.
+ ///
+ const MCSymbol *getValue() const { return Label; }
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIELabel *) { return true; }
+ static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDelta - A simple label difference DIE.
+ ///
+ class DIEDelta : public DIEValue {
+ const MCSymbol *LabelHi;
+ const MCSymbol *LabelLo;
+ public:
+ DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDelta *) { return true; }
+ static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEntry - A pointer to another debug information entry. An instance of
+ /// this class can also be used as a proxy for a debug information entry not
+ /// yet defined (ie. types.)
+ class DIEEntry : public DIEValue {
+ DIE *const Entry;
+ public:
+ explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ DIE *getEntry() const { return Entry; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEEntry *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEBlock - A block of values. Primarily used for location expressions.
+ //
+ class DIEBlock : public DIEValue, public DIE {
+ unsigned Size; // Size in bytes excluding size header.
+ public:
+ DIEBlock()
+ : DIEValue(isBlock), DIE(0), Size(0) {}
+ virtual ~DIEBlock() {}
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(AsmPrinter *AP);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
+ if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+ if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
+ return dwarf::DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEBlock *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..91b7d08c6ae2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,152 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
+ moveTypeModule(AsmPrinter::CFI_M_None) {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+ if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+ Asm->OutStreamer.EmitCFISections(false, true);
+
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+ return;
+
+ // Emit references to all used personality functions
+ bool AtLeastOne = false;
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+ if (!Personalities[i])
+ continue;
+ MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+ TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+ AtLeastOne = true;
+ }
+
+ if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+ // This is a temporary hack to keep sections in the same order they
+ // were before. This lets us produce bit identical outputs while
+ // transitioning to CFI.
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+ }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+ if (MoveType == AsmPrinter::CFI_M_EH ||
+ (MoveType == AsmPrinter::CFI_M_Debug &&
+ moveTypeModule == AsmPrinter::CFI_M_None))
+ moveTypeModule = MoveType;
+
+ shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIStartProc();
+
+ // Indicate personality routine, if any.
+ if (!shouldEmitPersonality)
+ return;
+
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+ Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ // Provide LSDA information.
+ if (!shouldEmitLSDA)
+ return;
+
+ Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()),
+ LSDAEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitPersonality)
+ EmitExceptionTable();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 000000000000..1fe035efde3e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1054 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
+ : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+ DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+ for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+ DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+ DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+ return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+ DIEValue *Value = Integer == 1 ?
+ DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
+ StringRef String) {
+ DIEValue *Value = new (DIEValueAllocator) DIEString(String);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+ DIE *Entry) {
+ Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(Asm);
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+ Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+ // Verify variable.
+ if (!V.Verify())
+ return;
+
+ unsigned Line = V.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+ V.getContext().getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+ // Verify global variable.
+ if (!G.Verify())
+ return;
+
+ unsigned Line = G.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(),
+ G.getContext().getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+ // Verify subprogram.
+ if (!SP.Verify())
+ return;
+ // If the line number is 0, don't add it.
+ if (SP.getLineNumber() == 0)
+ return;
+
+ unsigned Line = SP.getLineNumber();
+ if (!SP.getContext().Verify())
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0 || !Ty.getContext().Verify())
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+ // Verify namespace.
+ if (!NS.Verify())
+ return;
+
+ unsigned Line = NS.getLineNumber();
+ if (Line == 0)
+ return;
+ StringRef FN = NS.getFilename();
+
+ unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
+ MachineLocation Location) {
+ if (DV->variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV->isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+ int64_t Offset) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (Reg == TRI->getFrameRegister(*Asm->MF))
+ // If variable offset is based in frame register then use fbreg.
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ else if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+ addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location. Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ unsigned N = DV->getNumAddrElements();
+ unsigned i = 0;
+ if (Location.isReg()) {
+ if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+ i = 2;
+ } else
+ addRegisterOp(Block, Location.getReg());
+ }
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ for (;i < N; ++i) {
+ uint64_t Element = DV->getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ } else llvm_unreachable("unknown DIBuilder Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+ VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+ gives the variable VarName either the struct, or a pointer to the struct, as
+ its type. This is necessary for various behind-the-scenes things the
+ compiler needs to do with by-reference variables in Blocks.
+
+ However, as far as the original *programmer* is concerned, the variable
+ should still have type 'SomeType', as originally declared.
+
+ The function getBlockByrefType dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable, which is then assigned to
+ the variable's Debug Information Entry as its real type. So far, so good.
+ However now the debugger will expect the variable VarName to have the type
+ SomeType. So we need the location attribute for the variable to be an
+ expression that explains to the debugger how to navigate through the
+ pointers and struct to find the actual variable of type SomeType.
+
+ The following function does just that. We start by getting
+ the "normal" location for the variable. This will be the location
+ of either the struct __Block_byref_x_VarName or the pointer to the
+ struct __Block_byref_x_VarName.
+
+ The struct will look something like:
+
+ struct __Block_byref_x_VarName {
+ ... <various fields>
+ struct __Block_byref_x_VarName *forwarding;
+ ... <various other fields>
+ SomeType VarName;
+ ... <maybe more fields>
+ };
+
+ If we are given the struct directly (as our starting point) we
+ need to tell the debugger to:
+
+ 1). Add the offset of the forwarding field.
+
+ 2). Follow that pointer to get the real __Block_byref_x_VarName
+ struct to use (the real one may have been copied onto the heap).
+
+ 3). Add the offset for the field VarName, to find the actual variable.
+
+ If we started with a pointer to the struct, then we need to
+ dereference that pointer first, before the other steps.
+ Translating this into DWARF ops, we will need to append the following
+ to the current location description for the variable:
+
+ DW_OP_deref -- optional, if we start with a pointer
+ DW_OP_plus_uconst <forward_fld_offset>
+ DW_OP_deref
+ DW_OP_plus_uconst <varName_fld_offset>
+
+ That is what this function does. */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location. Add the DWARF information to the die. For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIType Ty = DV->getType();
+ DIType TmpTy = Ty;
+ unsigned Tag = Ty.getTag();
+ bool isPointer = false;
+
+ StringRef varName = DV->getName();
+
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ TmpTy = DTy.getTypeDerivedFrom();
+ isPointer = true;
+ }
+
+ DICompositeType blockStruct = DICompositeType(TmpTy);
+
+ // Find the __forwarding field and the variable field in the __Block_byref
+ // struct.
+ DIArray Fields = blockStruct.getTypeArray();
+ DIDescriptor varField = DIDescriptor();
+ DIDescriptor forwardingField = DIDescriptor();
+
+ for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Fields.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ StringRef fieldName = DT.getName();
+ if (fieldName == "__forwarding")
+ forwardingField = Element;
+ else if (fieldName == varName)
+ varField = Element;
+ }
+
+ // Get the offsets for the forwarding field and the variable field.
+ unsigned forwardingFieldOffset =
+ DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+ unsigned varFieldOffset =
+ DIDerivedType(varField).getOffsetInBits() >> 3;
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ // If we started with a pointer to the __Block_byref... struct, then
+ // the first thing we need to do is dereference the pointer (DW_OP_deref).
+ if (isPointer)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Next add the offset for the '__forwarding' field:
+ // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
+ // adding the offset if it's 0.
+ if (forwardingFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ }
+
+ // Now dereference the __forwarding field to get to the real __Block_byref
+ // struct: DW_OP_deref.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Now that we've got the real __Block_byref... struct, add the offset
+ // for the variable's field to get to the location of the actual variable:
+ // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
+ if (varFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+ DIType Ty) {
+ assert (MO.isImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ unsigned form = dwarf::DW_FORM_udata;
+ switch (Ty.getSizeInBits()) {
+ case 8: form = dwarf::DW_FORM_data1; break;
+ case 16: form = dwarf::DW_FORM_data2; break;
+ case 32: form = dwarf::DW_FORM_data4; break;
+ case 64: form = dwarf::DW_FORM_data8; break;
+ default: break;
+ }
+
+ DIBasicType BTy(Ty);
+ if (BTy.Verify() &&
+ (BTy.getEncoding() == dwarf::DW_ATE_signed
+ || BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+ addSInt(Block, 0, form, MO.getImm());
+ else
+ addUInt(Block, 0, form, MO.getImm());
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+ assert (MO.isFPImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+ // Get the raw data form of the floating point.
+ const APInt FltVal = FPImm.bitcastToAPInt();
+ const char *FltPtr = (const char*)FltVal.getRawData();
+
+ int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & FltPtr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+ bool Unsigned) {
+ unsigned CIBitWidth = CI->getBitWidth();
+ if (CIBitWidth <= 64) {
+ unsigned form = 0;
+ switch (CIBitWidth) {
+ case 8: form = dwarf::DW_FORM_data1; break;
+ case 16: form = dwarf::DW_FORM_data2; break;
+ case 32: form = dwarf::DW_FORM_data4; break;
+ case 64: form = dwarf::DW_FORM_data8; break;
+ default:
+ form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+ }
+ if (Unsigned)
+ addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue());
+ else
+ addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue());
+ return true;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ // Get the raw data form of the large APInt.
+ const APInt Val = CI->getValue();
+ const char *Ptr = (const char*)Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & Ptr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+ // Add template parameters.
+ for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+ DIDescriptor Element = TParams.getElement(i);
+ if (Element.isTemplateTypeParameter())
+ Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+ DITemplateTypeParameter(Element)));
+ else if (Element.isTemplateValueParameter())
+ Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+ DITemplateValueParameter(Element)));
+ }
+
+}
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+ if (Context.isType()) {
+ DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
+ ContextDIE->addChild(Die);
+ } else if (Context.isNameSpace()) {
+ DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
+ ContextDIE->addChild(Die);
+ } else if (Context.isSubprogram()) {
+ DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context));
+ ContextDIE->addChild(Die);
+ } else if (DIE *ContextDIE = getDIE(Context))
+ ContextDIE->addChild(Die);
+ else
+ addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) {
+ DIE *TyDIE = getDIE(Ty);
+ if (TyDIE)
+ return TyDIE;
+
+ // Create new type.
+ TyDIE = new DIE(dwarf::DW_TAG_base_type);
+ insertDIE(Ty, TyDIE);
+ if (Ty.isBasicType())
+ constructTypeDIE(*TyDIE, DIBasicType(Ty));
+ else if (Ty.isCompositeType())
+ constructTypeDIE(*TyDIE, DICompositeType(Ty));
+ else {
+ assert(Ty.isDerivedType() && "Unknown kind of DIType");
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+ }
+
+ addToContextOwner(TyDIE, Ty.getContext());
+ return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty) {
+ if (!Ty.Verify())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *Entry = getDIEEntry(Ty);
+ // If it exists then use the existing value.
+ if (Entry) {
+ Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ return;
+ }
+
+ // Construct type.
+ DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+ // Set up proxy.
+ Entry = createDIEEntry(Buffer);
+ insertDIEEntry(Ty, Entry);
+ Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+
+ // If this is a complete composite type then include it in the
+ // list of global types.
+ addGlobalType(Ty);
+}
+
+/// addGlobalType - Add a new global type to the compile unit.
+///
+void CompileUnit::addGlobalType(DIType Ty) {
+ DIDescriptor Context = Ty.getContext();
+ if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+ && (Context.isCompileUnit() || Context.isFile() || Context.isNameSpace()))
+ if (DIEEntry *Entry = getDIEEntry(Ty))
+ GlobalTypes[Ty.getName()] = Entry->getEntry();
+}
+
+/// addPubTypes - Add type for pubtypes section.
+void CompileUnit::addPubTypes(DISubprogram SP) {
+ DICompositeType SPTy = SP.getType();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
+ return;
+
+ DIArray Args = SPTy.getTypeArray();
+ for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+ DIType ATy(Args.getElement(i));
+ if (!ATy.Verify())
+ continue;
+ addGlobalType(ATy);
+ }
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+ // Get core information.
+ StringRef Name = BTy.getName();
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+ // Get core information.
+ StringRef Name = DTy.getName();
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ addType(&Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ // Get core information.
+ StringRef Name = CTy.getName();
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIDescriptor Enum(Elements.getElement(i));
+ if (Enum.isEnumerator()) {
+ ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+ Buffer.addChild(ElemDie);
+ }
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ addType(&Buffer, DIType(RTy));
+
+ bool isPrototyped = true;
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Ty = Elements.getElement(i);
+ if (Ty.isUnspecifiedParameter()) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+ Buffer.addChild(Arg);
+ isPrototyped = false;
+ } else {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Ty));
+ Buffer.addChild(Arg);
+ }
+ }
+ // Add prototype flag.
+ if (isPrototyped)
+ addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ unsigned N = Elements.getNumElements();
+ if (N == 0)
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0; i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIE *ElemDie = NULL;
+ if (Element.isSubprogram()) {
+ DISubprogram SP(Element);
+ ElemDie = DD->createSubprogramDIE(DISubprogram(Element));
+ if (SP.isProtected())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (SP.isPrivate())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (SP.isExplicit())
+ addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+ }
+ else if (Element.isVariable()) {
+ DIVariable DV(Element);
+ ElemDie = new DIE(dwarf::DW_TAG_variable);
+ addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ DV.getName());
+ addType(ElemDie, DV.getType());
+ addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addSourceLine(ElemDie, DV);
+ } else if (Element.isDerivedType())
+ ElemDie = createMemberDIE(DIDerivedType(Element));
+ else
+ continue;
+ Buffer.addChild(ElemDie);
+ }
+
+ if (CTy.isAppleBlockExtension())
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+
+ DICompositeType ContainingType = CTy.getContainingType();
+ if (DIDescriptor(ContainingType).isCompositeType())
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DIType(ContainingType)));
+ else {
+ DIDescriptor Context = CTy.getContext();
+ addToContextOwner(&Buffer, Context);
+ }
+
+ if (CTy.isObjcClassComplete())
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
+ dwarf::DW_FORM_flag, 1);
+
+ if (Tag == dwarf::DW_TAG_class_type)
+ addTemplateParams(Buffer, CTy.getTemplateParams());
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+ || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ {
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else {
+ // Add zero size if it is not a forward declaration.
+ if (CTy.isForwardDecl())
+ addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ else
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+ }
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ addSourceLine(&Buffer, CTy);
+ }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+ DIE *ParamDIE = getDIE(TP);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+ addType(ParamDIE, TP.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+ return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+ DIE *ParamDIE = getDIE(TPV);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+ addType(ParamDIE, TPV.getType());
+ if (!TPV.getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ TPV.getValue());
+ return ParamDIE;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+ DIE *NDie = getDIE(NS);
+ if (NDie)
+ return NDie;
+ NDie = new DIE(dwarf::DW_TAG_namespace);
+ insertDIE(NS, NDie);
+ if (!NS.getName().empty())
+ addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+ addSourceLine(NDie, NS);
+ addToContextOwner(NDie, NS.getContext());
+ return NDie;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+ int64_t L = SR.getLo();
+ int64_t H = SR.getHi();
+
+ // The L value defines the lower bounds which is typically zero for C/C++. The
+ // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size
+ // of the array. If L > H then do not emit DW_AT_lower_bound and
+ // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
+ // array has one element and in such case do not emit lower bound.
+
+ if (L > H) {
+ Buffer.addChild(DW_Subrange);
+ return;
+ }
+ if (L)
+ addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+ addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+ // Emit derived type.
+ addType(&Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Get an anonymous type for index type.
+ DIE *IdxTy = getIndexTyDie();
+ if (!IdxTy) {
+ // Construct an anonymous type for index type.
+ IdxTy = new DIE(dwarf::DW_TAG_base_type);
+ addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ addDie(IdxTy);
+ setIndexTyDie(IdxTy);
+ }
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+ }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ StringRef Name = ETy.getName();
+ addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ int64_t Value = ETy.getEnumValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+ DIE *MemberDie = new DIE(DT.getTag());
+ StringRef Name = DT.getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ addType(MemberDie, DT.getTypeDerivedFrom());
+
+ addSourceLine(MemberDie, DT);
+
+ DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getTargetData().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+ // Here WD_AT_data_member_location points to the anonymous
+ // field that includes this bit field.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+ } else
+ // This is not a bitfield.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+ if (DT.getTag() == dwarf::DW_TAG_inheritance
+ && DT.isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ VBaseLocationDie);
+ } else
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+ if (DT.isProtected())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ // Otherwise C++ member and base classes are considered public.
+ else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (DT.isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ dwarf::DW_VIRTUALITY_virtual);
+
+ // Objective-C properties.
+ StringRef PropertyName = DT.getObjCPropertyName();
+ if (!PropertyName.empty()) {
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
+ PropertyName);
+ StringRef GetterName = DT.getObjCPropertyGetterName();
+ if (!GetterName.empty())
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
+ dwarf::DW_FORM_string, GetterName);
+ StringRef SetterName = DT.getObjCPropertySetterName();
+ if (!SetterName.empty())
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
+ dwarf::DW_FORM_string, SetterName);
+ unsigned PropertyAttributes = 0;
+ if (DT.isReadOnlyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+ if (DT.isReadWriteObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+ if (DT.isAssignObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+ if (DT.isRetainObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+ if (DT.isCopyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+ if (DT.isNonAtomicObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+ if (PropertyAttributes)
+ addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ PropertyAttributes);
+ }
+ return MemberDie;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 000000000000..213c7fc630d3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,280 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+ /// ID - File identifier for source.
+ ///
+ unsigned ID;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ const OwningPtr<DIE> CUDie;
+
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ DwarfDebug *DD;
+
+ /// IndexTyDie - An anonymous type for index type. Owned by CUDie.
+ DIE *IndexTyDie;
+
+ /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+ /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+ /// Globals - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> Globals;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ ///
+ StringMap<DIE*> GlobalTypes;
+
+ /// DIEBlocks - A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+public:
+ CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+ ~CompileUnit();
+
+ // Accessors.
+ unsigned getID() const { return ID; }
+ DIE* getCUDie() const { return CUDie.get(); }
+ const StringMap<DIE*> &getGlobals() const { return Globals; }
+ const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+ /// addGlobal - Add a new global entity to the compile unit.
+ ///
+ void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
+
+ /// addGlobalType - Add a new global type to the compile unit.
+ ///
+ void addGlobalType(DIType Ty);
+
+ /// getDIE - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+ DIEBlock *getDIEBlock() {
+ return new (DIEValueAllocator) DIEBlock();
+ }
+
+ /// insertDIE - Insert DIE into the map.
+ void insertDIE(const MDNode *N, DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(N, D));
+ }
+
+ /// getDIEEntry - Returns the debug information entry for the specified
+ /// debug variable.
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I =
+ MDNodeToDIEEntryMap.find(N);
+ if (I == MDNodeToDIEEntryMap.end())
+ return NULL;
+ return I->second;
+ }
+
+ /// insertDIEEntry - Insert debug information entry into the map.
+ void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+ }
+
+ /// addDie - Adds or interns the DIE to the compile unit.
+ ///
+ void addDie(DIE *Buffer) {
+ this->CUDie->addChild(Buffer);
+ }
+
+ // getIndexTyDie - Get an anonymous type for index type.
+ DIE *getIndexTyDie() {
+ return IndexTyDie;
+ }
+
+ // setIndexTyDie - Set D as anonymous type for index which can be reused
+ // later.
+ void setIndexTyDie(DIE *D) {
+ IndexTyDie = D;
+ }
+public:
+
+ /// addUInt - Add an unsigned integer attribute data and value.
+ ///
+ void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// addSInt - Add an signed integer attribute data and value.
+ ///
+ void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// addString - Add a string attribute data and value.
+ ///
+ void addString(DIE *Die, unsigned Attribute, unsigned Form,
+ const StringRef Str);
+
+ /// addLabel - Add a Dwarf label attribute data and value.
+ ///
+ void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label);
+
+ /// addDelta - Add a label delta attribute data and value.
+ ///
+ void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// addDIEEntry - Add a DIE attribute data and value.
+ ///
+ void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+
+ /// addBlock - Add block data.
+ ///
+ void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// addSourceLine - Add location information to specified debug information
+ /// entry.
+ void addSourceLine(DIE *Die, DIVariable V);
+ void addSourceLine(DIE *Die, DIGlobalVariable G);
+ void addSourceLine(DIE *Die, DISubprogram SP);
+ void addSourceLine(DIE *Die, DIType Ty);
+ void addSourceLine(DIE *Die, DINameSpace NS);
+
+ /// addAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addConstantValue - Add constant value entry in variable DIE.
+ bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+ bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+
+ /// addConstantFPValue - Add constant value entry in variable DIE.
+ bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+
+ /// addTemplateParams - Add template parameters in buffer.
+ void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+ /// addRegisterOp - Add register operand.
+ void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+ /// addRegisterOffset - Add register offset.
+ void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+ /// addComplexAddress - Start with the address based on the location provided,
+ /// and generate the DWARF information necessary to find the actual variable
+ /// (navigating the extra location information encoded in the type) based on
+ /// the starting location. Add the DWARF information to the die.
+ ///
+ void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ // FIXME: Should be reformulated in terms of addComplexAddress.
+ /// addBlockByrefAddress - Start with the address based on the location
+ /// provided, and generate the DWARF information necessary to find the
+ /// actual Block variable (navigating the Block struct) based on the
+ /// starting location. Add the DWARF information to the die. Obsolete,
+ /// please use addComplexAddress instead.
+ ///
+ void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addVariableAddress - Add DW_AT_location attribute for a
+ /// DbgVariable based on provided MachineLocation.
+ void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+ /// addToContextOwner - Add Die into the list of its context owner's children.
+ void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+ /// addType - Add a new type attribute to the specified entity.
+ void addType(DIE *Entity, DIType Ty);
+
+ /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+ DIE *getOrCreateNameSpace(DINameSpace NS);
+
+ /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+ /// given DIType.
+ DIE *getOrCreateTypeDIE(DIType Ty);
+
+ /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateTypeParameter.
+ DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateValueParameter.
+ DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+ /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *createDIEEntry(DIE *Entry);
+
+ void addPubTypes(DISubprogram SP);
+
+ /// constructTypeDIE - Construct basic type die from DIBasicType.
+ void constructTypeDIE(DIE &Buffer,
+ DIBasicType BTy);
+
+ /// constructTypeDIE - Construct derived type die from DIDerivedType.
+ void constructTypeDIE(DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// constructTypeDIE - Construct type DIE from DICompositeType.
+ void constructTypeDIE(DIE &Buffer,
+ DICompositeType CTy);
+
+ /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+ /// createMemberDIE - Create new member DIE.
+ DIE *createMemberDIE(DIDerivedType DT);
+
+private:
+
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+ DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 000000000000..125e1e86b12f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2755 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/Path.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
+ cl::desc("Print DbgScope information for each machine instruction"));
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+ cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::init(false));
+
+namespace {
+ const char *DWARFGroupName = "DWARF Emission";
+ const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+DIType DbgVariable::getType() const {
+ DIType Ty = Var.getType();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Var.isBlockByrefVariable()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType);
+ DIArray Elements = blockStruct.getTypeArray();
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ if (getName() == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+ return Ty;
+ }
+ return Ty;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgRange - This is used to track range of instructions with identical
+/// debug info scope.
+///
+typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange;
+
+//===----------------------------------------------------------------------===//
+/// DbgScope - This class is used to track scope information.
+///
+class DbgScope {
+ DbgScope *Parent; // Parent to this scope.
+ DIDescriptor Desc; // Debug info descriptor for scope.
+ // Location at which this scope is inlined.
+ AssertingVH<const MDNode> InlinedAtLocation;
+ bool AbstractScope; // Abstract Scope
+ const MachineInstr *LastInsn; // Last instruction of this scope.
+ const MachineInstr *FirstInsn; // First instruction of this scope.
+ unsigned DFSIn, DFSOut;
+ // Scopes defined in scope. Contents not owned.
+ SmallVector<DbgScope *, 4> Scopes;
+ // Variables declared in scope. Contents owned.
+ SmallVector<DbgVariable *, 8> Variables;
+ SmallVector<DbgRange, 4> Ranges;
+ // Private state for dump()
+ mutable unsigned IndentLevel;
+public:
+ DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0)
+ : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+ LastInsn(0), FirstInsn(0),
+ DFSIn(0), DFSOut(0), IndentLevel(0) {}
+ virtual ~DbgScope();
+
+ // Accessors.
+ DbgScope *getParent() const { return Parent; }
+ void setParent(DbgScope *P) { Parent = P; }
+ DIDescriptor getDesc() const { return Desc; }
+ const MDNode *getInlinedAt() const { return InlinedAtLocation; }
+ const MDNode *getScopeNode() const { return Desc; }
+ const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+ const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; }
+ const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
+
+ /// openInsnRange - This scope covers instruction range starting from MI.
+ void openInsnRange(const MachineInstr *MI) {
+ if (!FirstInsn)
+ FirstInsn = MI;
+
+ if (Parent)
+ Parent->openInsnRange(MI);
+ }
+
+ /// extendInsnRange - Extend the current instruction range covered by
+ /// this scope.
+ void extendInsnRange(const MachineInstr *MI) {
+ assert (FirstInsn && "MI Range is not open!");
+ LastInsn = MI;
+ if (Parent)
+ Parent->extendInsnRange(MI);
+ }
+
+ /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected
+ /// until now. This is used when a new scope is encountered while walking
+ /// machine instructions.
+ void closeInsnRange(DbgScope *NewScope = NULL) {
+ assert (LastInsn && "Last insn missing!");
+ Ranges.push_back(DbgRange(FirstInsn, LastInsn));
+ FirstInsn = NULL;
+ LastInsn = NULL;
+ // If Parent dominates NewScope then do not close Parent's instruction
+ // range.
+ if (Parent && (!NewScope || !Parent->dominates(NewScope)))
+ Parent->closeInsnRange(NewScope);
+ }
+
+ void setAbstractScope() { AbstractScope = true; }
+ bool isAbstractScope() const { return AbstractScope; }
+
+ // Depth First Search support to walk and mainpluate DbgScope hierarchy.
+ unsigned getDFSOut() const { return DFSOut; }
+ void setDFSOut(unsigned O) { DFSOut = O; }
+ unsigned getDFSIn() const { return DFSIn; }
+ void setDFSIn(unsigned I) { DFSIn = I; }
+ bool dominates(const DbgScope *S) {
+ if (S == this)
+ return true;
+ if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
+ return true;
+ return false;
+ }
+
+ /// addScope - Add a scope to the scope.
+ ///
+ void addScope(DbgScope *S) { Scopes.push_back(S); }
+
+ /// addVariable - Add a variable to the scope.
+ ///
+ void addVariable(DbgVariable *V) { Variables.push_back(V); }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // end llvm namespace
+
+#ifndef NDEBUG
+void DbgScope::dump() const {
+ raw_ostream &err = dbgs();
+ err.indent(IndentLevel);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ N->dump();
+ if (AbstractScope)
+ err << "Abstract Scope\n";
+
+ IndentLevel += 2;
+ if (!Scopes.empty())
+ err << "Children ...\n";
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
+ if (Scopes[i] != this)
+ Scopes[i]->dump();
+
+ IndentLevel -= 2;
+}
+#endif
+
+DbgScope::~DbgScope() {
+ for (unsigned j = 0, M = Variables.size(); j < M; ++j)
+ delete Variables[j];
+}
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+ : Asm(A), MMI(Asm->MMI), FirstCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize),
+ CurrentFnDbgScope(0), PrevLabel(NULL) {
+ NextStringPoolNumber = 0;
+
+ DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+ DwarfStrSectionSym = TextSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+ FunctionBeginSym = FunctionEndSym = 0;
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ beginModule(M);
+ }
+}
+DwarfDebug::~DwarfDebug() {
+}
+
+MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
+ std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+ if (Entry.first) return Entry.first;
+
+ Entry.second = NextStringPoolNumber++;
+ return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+}
+
+
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// createSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
+ CompileUnit *SPCU = getCompileUnit(SP);
+ DIE *SPDie = SPCU->getDIE(SP);
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ SPCU->insertDIE(SP, SPDie);
+
+ // Add to context owner.
+ SPCU->addToContextOwner(SPDie, SP.getContext());
+
+ // Add function template parameters.
+ SPCU->addTemplateParams(*SPDie, SP.getTemplateParams());
+
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty())
+ SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+
+ // If this DIE is going to refer declaration info using AT_specification
+ // then there is no need to add other attributes.
+ if (SP.getFunctionDeclaration().isSubprogram())
+ return SPDie;
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ SP.getName());
+
+ SPCU->addSourceLine(SPDie, SP);
+
+ if (SP.isPrototyped())
+ SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+ SPCU->addType(SPDie, SPTy);
+ else
+ SPCU->addType(SPDie, DIType(Args.getElement(0)));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ DIEBlock *Block = SPCU->getDIEBlock();
+ SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie,
+ SP.getContainingType()));
+ }
+
+ if (!SP.isDefinition()) {
+ SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i)));
+ SPCU->addType(Arg, ATy);
+ if (ATy.isArtificial())
+ SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ }
+
+ if (SP.isArtificial())
+ SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+ if (!SP.isLocalToUnit())
+ SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ if (SP.isOptimized())
+ SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ if (unsigned isa = Asm->getISAEncoding()) {
+ SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ return SPDie;
+}
+
+DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) {
+ assert(N && "Invalid Scope encoding!");
+
+ DbgScope *AScope = AbstractScopes.lookup(N);
+ if (AScope)
+ return AScope;
+
+ DbgScope *Parent = NULL;
+
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ Parent = getOrCreateAbstractScope(ParentDesc);
+ }
+
+ AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
+
+ if (Parent)
+ Parent->addScope(AScope);
+ AScope->setAbstractScope();
+ AbstractScopes[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+static bool isSubprogramContext(const MDNode *Context) {
+ if (!Context)
+ return false;
+ DIDescriptor D(Context);
+ if (D.isSubprogram())
+ return true;
+ if (D.isType())
+ return isSubprogramContext(DIType(Context).getContext());
+ return false;
+}
+
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
+ CompileUnit *SPCU = getCompileUnit(SPNode);
+ DIE *SPDie = SPCU->getDIE(SPNode);
+
+ assert(SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ if (SPDecl.isSubprogram())
+ // Refer function declaration directly.
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ createSubprogramDIE(SPDecl));
+ else {
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+ !SP.getContext().isFile() &&
+ !isSubprogramContext(SP.getContext())) {
+ SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i)));
+ SPCU->addType(Arg, ATy);
+ if (ATy.isArtificial())
+ SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ SPDeclDie);
+ SPCU->addDie(SPDie);
+ }
+ }
+ // Pick up abstract subprogram DIE.
+ if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsSPDIE);
+ SPCU->addDie(SPDie);
+ }
+
+ SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+ SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ return SPDie;
+}
+
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
+
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return 0;
+
+ CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode());
+ SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+ for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ return ScopeDIE;
+ }
+
+ const MCSymbol *Start = getLabelBeforeInsn(RI->first);
+ const MCSymbol *End = getLabelAfterInsn(RI->second);
+
+ if (End == 0) return 0;
+
+ assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+ assert(End->isDefined() && "Invalid end label for an inlined scope!");
+
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+
+ return ScopeDIE;
+}
+
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
+
+ const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+ assert (Ranges.empty() == false
+ && "DbgScope does not have instruction markers!");
+
+ // FIXME : .debug_inlined section specification does not clearly state how
+ // to emit inlined scope that is split into multiple instruction ranges.
+ // For now, use first instruction range and emit low_pc/high_pc pair and
+ // corresponding .debug_inlined section entry for this pair.
+ SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+ const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+ const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+ if (StartLabel == 0 || EndLabel == 0) {
+ assert (0 && "Unexpected Start and End labels for a inlined scope!");
+ return 0;
+ }
+ assert(StartLabel->isDefined() &&
+ "Invalid starting label for an inlined scope!");
+ assert(EndLabel->isDefined() &&
+ "Invalid end label for an inlined scope!");
+
+ if (!Scope->getScopeNode())
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ CompileUnit *TheCU = getCompileUnit(InlinedSP);
+ DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+ if (!OriginDIE) {
+ DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+ return NULL;
+ }
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
+
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
+
+ InlinedSubprogramDIEs.insert(OriginDIE);
+
+ // Track the start label for this inlined function.
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP);
+
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
+ ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP);
+ } else
+ I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+
+ DILocation DL(Scope->getInlinedAt());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ return ScopeDIE;
+}
+
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+ DIDerivedType DTy(Ty);
+ if (DTy.Verify())
+ return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+ DIBasicType BTy(Ty);
+ if (BTy.Verify()) {
+ unsigned Encoding = BTy.getEncoding();
+ if (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char)
+ return true;
+ }
+ return false;
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
+ StringRef Name = DV->getName();
+ if (Name.empty())
+ return NULL;
+
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (DV->getTag()) {
+ case dwarf::DW_TAG_return_variable:
+ return NULL;
+ case dwarf::DW_TAG_arg_variable:
+ Tag = dwarf::DW_TAG_formal_parameter;
+ break;
+ case dwarf::DW_TAG_auto_variable: // fall thru
+ default:
+ Tag = dwarf::DW_TAG_variable;
+ break;
+ }
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ CompileUnit *VariableCU = getCompileUnit(DV->getVariable());
+ DIE *AbsDIE = NULL;
+ DenseMap<const DbgVariable *, const DbgVariable *>::iterator
+ V2AVI = VarToAbstractVarMap.find(DV);
+ if (V2AVI != VarToAbstractVarMap.end())
+ AbsDIE = V2AVI->second->getDIE();
+
+ if (AbsDIE)
+ VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
+ else {
+ VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ Name);
+ VariableCU->addSourceLine(VariableDie, DV->getVariable());
+
+ // Add variable type.
+ VariableCU->addType(VariableDie, DV->getType());
+ }
+
+ if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
+ VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial,
+ dwarf::DW_FORM_flag, 1);
+ else if (DIVariable(DV->getVariable()).isArtificial())
+ VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial,
+ dwarf::DW_FORM_flag, 1);
+
+ if (Scope->isAbstractScope()) {
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV->getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("debug_loc", Offset));
+ DV->setDIE(VariableDie);
+ UseDotDebugLocEntry.insert(VariableDie);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI =
+ DbgVariableToDbgInstMap.find(DV);
+ if (DVI != DbgVariableToDbgInstMap.end()) {
+ const MachineInstr *DVInsn = DVI->second;
+ bool updated = false;
+ // FIXME : Handle getNumOperands != 3
+ if (DVInsn->getNumOperands() == 3) {
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF,
+ DVInsn->getOperand(1).getImm(),
+ FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ VariableCU->addVariableAddress(DV, VariableDie, Location);
+
+ } else if (RegOp.getReg())
+ VariableCU->addVariableAddress(DV, VariableDie,
+ MachineLocation(RegOp.getReg()));
+ updated = true;
+ }
+ else if (DVInsn->getOperand(0).isImm())
+ updated =
+ VariableCU->addConstantValue(VariableDie, DVInsn->getOperand(0),
+ DV->getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
+ VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ updated =
+ VariableCU->addConstantValue(VariableDie,
+ DVInsn->getOperand(0).getCImm(),
+ isUnsignedDIType(DV->getType()));
+ } else {
+ VariableCU->addVariableAddress(DV, VariableDie,
+ Asm->getDebugValueLocation(DVInsn));
+ updated = true;
+ }
+ if (!updated) {
+ // If variableDie is not updated then DBG_VALUE instruction does not
+ // have valid variable info.
+ delete VariableDie;
+ return NULL;
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // .. else use frame index, if available.
+ int FI = 0;
+ if (findVariableFrameIndex(DV, &FI)) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ VariableCU->addVariableAddress(DV, VariableDie, Location);
+ }
+
+ DV->setDIE(VariableDie);
+ return VariableDie;
+
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
+ if (!Scope || !Scope->getScopeNode())
+ return NULL;
+
+ SmallVector <DIE *, 8> Children;
+
+ // Collect arguments for current function.
+ if (Scope == CurrentFnDbgScope)
+ for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+ if (DbgVariable *ArgDV = CurrentFnArguments[i])
+ if (DIE *Arg = constructVariableDIE(ArgDV, Scope))
+ Children.push_back(Arg);
+
+ // Collect lexical scope childrens first.
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+ if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+ Children.push_back(Variable);
+ const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+ if (DIE *Nested = constructScopeDIE(Scopes[j]))
+ Children.push_back(Nested);
+ DIScope DS(Scope->getScopeNode());
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = constructInlinedScopeDIE(Scope);
+ else if (DS.isSubprogram()) {
+ ProcessedSPNodes.insert(DS);
+ if (Scope->isAbstractScope()) {
+ ScopeDIE = getCompileUnit(DS)->getDIE(DS);
+ // Note down abstract DIE.
+ if (ScopeDIE)
+ AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+ }
+ else
+ ScopeDIE = updateSubprogramScopeDIE(DS);
+ }
+ else {
+ // There is no need to emit empty lexical block DIE.
+ if (Children.empty())
+ return NULL;
+ ScopeDIE = constructLexicalScopeDIE(Scope);
+ }
+
+ if (!ScopeDIE) return NULL;
+
+ // Add children
+ for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ ScopeDIE->addChild(*I);
+
+ if (DS.isSubprogram())
+ getCompileUnit(DS)->addPubTypes(DISubprogram(DS));
+
+ return ScopeDIE;
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>", StringRef());
+
+ // MCStream expects full path name as filename.
+ if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+ SmallString<128> FullPathName = DirName;
+ sys::path::append(FullPathName, FileName);
+ // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+ return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+ }
+
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
+
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
+
+ // Print out a .file directive to specify files for .loc directives.
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+ return SrcId;
+}
+
+/// constructCompileUnit - Create new CompileUnit for the given
+/// metadata node with tag DW_TAG_compile_unit.
+void DwarfDebug::constructCompileUnit(const MDNode *N) {
+ DICompileUnit DIUnit(N);
+ StringRef FN = DIUnit.getFilename();
+ StringRef Dir = DIUnit.getDirectory();
+ unsigned ID = GetOrCreateSourceID(FN, Dir);
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
+ NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+ DIUnit.getProducer());
+ NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit.getLanguage());
+ NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+ // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+ // simplifies debug range entries.
+ NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section.
+ if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("section_line"));
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+
+ if (!Dir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (DIUnit.isOptimized())
+ NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ StringRef Flags = DIUnit.getFlags();
+ if (!Flags.empty())
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+
+ unsigned RVer = DIUnit.getRunTimeVersion();
+ if (RVer)
+ NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ if (!FirstCU)
+ FirstCU = NewCU;
+ CUMap.insert(std::make_pair(N, NewCU));
+}
+
+/// getCompielUnit - Get CompileUnit DIE.
+CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
+ assert (N && "Invalid DwarfDebug::getCompileUnit argument!");
+ DIDescriptor D(N);
+ const MDNode *CUNode = NULL;
+ if (D.isCompileUnit())
+ CUNode = N;
+ else if (D.isSubprogram())
+ CUNode = DISubprogram(N).getCompileUnit();
+ else if (D.isType())
+ CUNode = DIType(N).getCompileUnit();
+ else if (D.isGlobalVariable())
+ CUNode = DIGlobalVariable(N).getCompileUnit();
+ else if (D.isVariable())
+ CUNode = DIVariable(N).getCompileUnit();
+ else if (D.isNameSpace())
+ CUNode = DINameSpace(N).getCompileUnit();
+ else if (D.isFile())
+ CUNode = DIFile(N).getCompileUnit();
+ else
+ return FirstCU;
+
+ DenseMap<const MDNode *, CompileUnit *>::const_iterator I
+ = CUMap.find(CUNode);
+ if (I == CUMap.end())
+ return FirstCU;
+ return I->second;
+}
+
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global value.
+ if (!isa<GlobalValue>(CE->getOperand(0)))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
+/// constructGlobalVariableDIE - Construct global variable DIE.
+void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
+ DIGlobalVariable GV(N);
+
+ // If debug information is malformed then ignore it.
+ if (GV.Verify() == false)
+ return;
+
+ // Check for pre-existence.
+ CompileUnit *TheCU = getCompileUnit(N);
+ if (TheCU->getDIE(GV))
+ return;
+
+ DIType GTy = GV.getType();
+ DIE *VariableDIE = new DIE(GV.getTag());
+
+ bool isGlobalVariable = GV.getGlobal() != NULL;
+
+ // Add name.
+ TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ GV.getDisplayName());
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty() && isGlobalVariable)
+ TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+ // Add type.
+ TheCU->addType(VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ // Expose as global.
+ TheCU->addGlobal(GV.getName(), VariableDIE);
+ }
+ // Add line number info.
+ TheCU->addSourceLine(VariableDIE, GV);
+ // Add to map.
+ TheCU->insertDIE(N, VariableDIE);
+ // Add to context owner.
+ DIDescriptor GVContext = GV.getContext();
+ TheCU->addToContextOwner(VariableDIE, GVContext);
+ // Add location.
+ if (isGlobalVariable) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ TheCU->addDie(VariableSpecDIE);
+ } else {
+ TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+ TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+ else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+ ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+ TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+ TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+
+ return;
+}
+
+/// construct SubprogramDIE - Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
+ DISubprogram SP(N);
+
+ // Check for pre-existence.
+ CompileUnit *TheCU = getCompileUnit(N);
+ if (TheCU->getDIE(N))
+ return;
+
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return;
+
+ DIE *SubprogramDie = createSubprogramDIE(SP);
+
+ // Add to map.
+ TheCU->insertDIE(N, SubprogramDie);
+
+ // Add to context owner.
+ TheCU->addToContextOwner(SubprogramDie, SP.getContext());
+
+ // Expose as global.
+ TheCU->addGlobal(SP.getName(), SubprogramDie);
+
+ return;
+}
+
+/// beginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
+void DwarfDebug::beginModule(Module *M) {
+ if (DisableDebugInfoPrinting)
+ return;
+
+ // If module has named metadata anchors then use them, otherwise scan the module
+ // using debug info finder to collect debug info.
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+
+ NamedMDNode *GV_Nodes = M->getNamedMetadata("llvm.dbg.gv");
+ NamedMDNode *SP_Nodes = M->getNamedMetadata("llvm.dbg.sp");
+ if (!GV_Nodes && !SP_Nodes)
+ // If there are not any global variables or any functions then
+ // there is not any debug info in this module.
+ return;
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
+ constructCompileUnit(CU_Nodes->getOperand(i));
+
+ if (GV_Nodes)
+ for (unsigned i = 0, e = GV_Nodes->getNumOperands(); i != e; ++i)
+ constructGlobalVariableDIE(GV_Nodes->getOperand(i));
+
+ if (SP_Nodes)
+ for (unsigned i = 0, e = SP_Nodes->getNumOperands(); i != e; ++i)
+ constructSubprogramDIE(SP_Nodes->getOperand(i));
+
+ } else {
+
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(*M);
+
+ bool HasDebugInfo = false;
+ // Scan all the compile-units to see if there are any marked as the main unit.
+ // if not, we do not generate debug info.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ if (DICompileUnit(*I).isMain()) {
+ HasDebugInfo = true;
+ break;
+ }
+ }
+ if (!HasDebugInfo) return;
+
+ // Create all the compile unit DIEs.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I)
+ constructCompileUnit(*I);
+
+ // Create DIEs for each global variable.
+ for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+ E = DbgFinder.global_variable_end(); I != E; ++I)
+ constructGlobalVariableDIE(*I);
+
+ // Create DIEs for each subprogram.
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end(); I != E; ++I)
+ constructSubprogramDIE(*I);
+ }
+
+ // Tell MMI that we have debug info.
+ MMI->setDebugInfoAvailability(true);
+
+ // Emit initial sections.
+ EmitSectionLabels();
+
+ //getOrCreateTypeDIE
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIType Ty(NMD->getOperand(i));
+ getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIType Ty(NMD->getOperand(i));
+ getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+ }
+
+ // Prime section data.
+ SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+}
+
+/// endModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::endModule() {
+ if (!FirstCU) return;
+ const Module *M = MMI->getModule();
+ DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap;
+ if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
+ for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
+ if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
+ DISubprogram SP(AllSPs->getOperand(SI));
+ if (!SP.Verify()) continue;
+
+ // Collect info for variables that were optimized out.
+ if (!SP.isDefinition()) continue;
+ StringRef FName = SP.getLinkageName();
+ if (FName.empty())
+ FName = SP.getName();
+ NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
+ if (!NMD) continue;
+ unsigned E = NMD->getNumOperands();
+ if (!E) continue;
+ DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+ DeadFnScopeMap[SP] = Scope;
+ for (unsigned I = 0; I != E; ++I) {
+ DIVariable DV(NMD->getOperand(I));
+ if (!DV.Verify()) continue;
+ Scope->addVariable(new DbgVariable(DV));
+ }
+
+ // Construct subprogram DIE and add variables DIEs.
+ constructSubprogramDIE(SP);
+ DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
+ if (VariableDIE)
+ ScopeDIE->addChild(VariableDIE);
+ }
+ }
+ }
+
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+
+ for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ const MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+ if (!N) continue;
+ DIE *NDie = getCompileUnit(N)->getDIE(N);
+ if (!NDie) continue;
+ getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type,
+ dwarf::DW_FORM_ref4, NDie);
+ }
+
+ // Standard sections final addresses.
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+
+ // End text sections.
+ for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+ Asm->OutStreamer.SwitchSection(SectionMap[i]);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+ }
+
+ // Compute DIE offsets and sizes.
+ computeSizeAndOffsets();
+
+ // Emit all the DIEs into a debug info section
+ emitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit info into a debug pubnames section.
+ emitDebugPubNames();
+
+ // Emit info into a debug pubtypes section.
+ emitDebugPubTypes();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ EmitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit inline info.
+ emitDebugInlineInfo();
+
+ // Emit info into a debug str section.
+ emitDebugStr();
+
+ // clean up.
+ DeleteContainerSeconds(DeadFnScopeMap);
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I)
+ delete I->second;
+ FirstCU = NULL; // Reset for the next Module, if any.
+}
+
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+ DebugLoc ScopeLoc) {
+
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ LLVMContext &Ctx = Var->getContext();
+ DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var);
+ Scope->addVariable(AbsDbgVariable);
+ AbstractVariables[Var] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
+/// addCurrentFnArgument - If Var is an current function argument that add
+/// it in CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, DbgScope *Scope) {
+ if (Scope != CurrentFnDbgScope)
+ return false;
+ DIVariable DV = Var->getVariable();
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ return false;
+ unsigned ArgNo = DV.getArgNumber();
+ if (ArgNo == 0)
+ return false;
+
+ size_t Size = CurrentFnArguments.size();
+ if (Size == 0)
+ CurrentFnArguments.resize(MF->getFunction()->arg_size());
+ // llvm::Function argument size is not good indicator of how many
+ // arguments does the function have at source level.
+ if (ArgNo > Size)
+ CurrentFnArguments.resize(ArgNo * 2);
+ CurrentFnArguments[ArgNo - 1] = Var;
+ return true;
+}
+
+/// collectVariableInfoFromMMITable - Collect variable information from
+/// side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ Processed.insert(Var);
+ DIVariable DV(Var);
+ const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+ DbgScope *Scope = findDbgScope(VP.second);
+
+ // If variable scope is not found then skip this variable.
+ if (Scope == 0)
+ continue;
+
+ DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+ DbgVariable *RegVar = new DbgVariable(DV);
+ recordVariableFrameIndex(RegVar, VP.first);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ Scope->addVariable(RegVar);
+ if (AbsDbgVariable) {
+ recordVariableFrameIndex(AbsDbgVariable, VP.first);
+ VarToAbstractVarMap[RegVar] = AbsDbgVariable;
+ }
+ }
+}
+
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// DBG_VALUE instruction, is in a defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+ assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ return MI->getNumOperands() == 3 &&
+ MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+ MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
+}
+
+/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting
+/// at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+ const MCSymbol *FLabel,
+ const MCSymbol *SLabel,
+ const MachineInstr *MI) {
+ const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ if (MI->getNumOperands() != 3) {
+ MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+ MachineLocation MLoc;
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+ assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
+ return DotDebugLocEntry();
+}
+
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+
+ /// collection info from MMI table.
+ collectVariableInfoFromMMITable(MF, Processed);
+
+ for (SmallVectorImpl<const MDNode*>::const_iterator
+ UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
+ ++UVI) {
+ const MDNode *Var = *UVI;
+ if (Processed.count(Var))
+ continue;
+
+ // History contains relevant DBG_VALUE instructions for Var and instructions
+ // clobbering it.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty())
+ continue;
+ const MachineInstr *MInsn = History.front();
+
+ DIVariable DV(Var);
+ DbgScope *Scope = NULL;
+ if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(DV.getContext()).describes(MF->getFunction()))
+ Scope = CurrentFnDbgScope;
+ else
+ Scope = findDbgScope(MInsn->getDebugLoc());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ Processed.insert(DV);
+ assert(MInsn->isDebugValue() && "History must begin with debug value");
+ DbgVariable *RegVar = new DbgVariable(DV);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ Scope->addVariable(RegVar);
+ if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
+ DbgVariableToDbgInstMap[AbsVar] = MInsn;
+ VarToAbstractVarMap[RegVar] = AbsVar;
+ }
+
+ // Simple ranges that are fully coalesced.
+ if (History.size() <= 1 || (History.size() == 2 &&
+ MInsn->isIdenticalTo(History.back()))) {
+ DbgVariableToDbgInstMap[RegVar] = MInsn;
+ continue;
+ }
+
+ // handle multiple DBG_VALUE instructions describing one variable.
+ RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+
+ for (SmallVectorImpl<const MachineInstr*>::const_iterator
+ HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+ const MachineInstr *Begin = *HI;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if DBG_VALUE is truncating a range.
+ if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
+ && !Begin->getOperand(0).getReg())
+ continue;
+
+ // Compute the range for a register location.
+ const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+ const MCSymbol *SLabel = 0;
+
+ if (HI + 1 == HE)
+ // If Begin is the last instruction in History then its value is valid
+ // until the end of the function.
+ SLabel = FunctionEndSym;
+ else {
+ const MachineInstr *End = HI[1];
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n"
+ << "\t" << *Begin << "\t" << *End << "\n");
+ if (End->isDebugValue())
+ SLabel = getLabelBeforeInsn(End);
+ else {
+ // End is a normal instruction clobbering the range.
+ SLabel = getLabelAfterInsn(End);
+ assert(SLabel && "Forgot label after clobber instruction");
+ ++HI;
+ }
+ }
+
+ // The value is valid until the next DBG_VALUE or clobber.
+ DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+ }
+ DotDebugLocEntries.push_back(DotDebugLocEntry());
+ }
+
+ // Collect info for variables that were optimized out.
+ const Function *F = MF->getFunction();
+ if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
+ if (!DV || !Processed.insert(DV))
+ continue;
+ DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
+ if (Scope)
+ Scope->addVariable(new DbgVariable(DV));
+ }
+ }
+}
+
+/// getLabelBeforeInsn - Return Label preceding the instruction.
+const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+/// getLabelAfterInsn - Return Label immediately following the instruction.
+const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ // Check if source location changes, but ignore DBG_VALUE locations.
+ if (!MI->isDebugValue()) {
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+ unsigned Flags = DWARF2_FLAG_IS_STMT;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ }
+ if (!DL.isUnknown()) {
+ const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+ } else
+ recordSourceLine(0, 0, 0, 0);
+ }
+ }
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
+ // Don't create a new label after DBG_VALUE instructions.
+ // They don't generate code.
+ if (!MI->isDebugValue())
+ PrevLabel = 0;
+
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsAfterInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsAfterInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ // We need a label after this instruction.
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+/// getOrCreateDbgScope - Create DbgScope for the scope.
+DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) {
+ LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+ MDNode *Scope = NULL;
+ MDNode *InlinedAt = NULL;
+ DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
+
+ if (!InlinedAt) {
+ DbgScope *WScope = DbgScopeMap.lookup(Scope);
+ if (WScope)
+ return WScope;
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+ DbgScopeMap.insert(std::make_pair(Scope, WScope));
+ if (DIDescriptor(Scope).isLexicalBlock()) {
+ DbgScope *Parent =
+ getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope));
+ WScope->setParent(Parent);
+ Parent->addScope(WScope);
+ } else if (DIDescriptor(Scope).isSubprogram()
+ && DISubprogram(Scope).describes(Asm->MF->getFunction()))
+ CurrentFnDbgScope = WScope;
+
+ return WScope;
+ }
+
+ getOrCreateAbstractScope(Scope);
+ DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+ if (WScope)
+ return WScope;
+
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+ DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+ InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope;
+ DbgScope *Parent =
+ getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt));
+ WScope->setParent(Parent);
+ Parent->addScope(WScope);
+ return WScope;
+}
+
+/// calculateDominanceGraph - Calculate dominance graph for DbgScope
+/// hierarchy.
+static void calculateDominanceGraph(DbgScope *Scope) {
+ assert (Scope && "Unable to calculate scop edominance graph!");
+ SmallVector<DbgScope *, 4> WorkStack;
+ WorkStack.push_back(Scope);
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ DbgScope *WS = WorkStack.back();
+ const SmallVector<DbgScope *, 4> &Children = WS->getScopes();
+ bool visitedChildren = false;
+ for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI) {
+ DbgScope *ChildScope = *SI;
+ if (!ChildScope->getDFSOut()) {
+ WorkStack.push_back(ChildScope);
+ visitedChildren = true;
+ ChildScope->setDFSIn(++Counter);
+ break;
+ }
+ }
+ if (!visitedChildren) {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
+static
+void printDbgScopeInfo(const MachineFunction *MF,
+ DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
+{
+#ifndef NDEBUG
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ unsigned PrevDFSIn = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+ MDNode *Scope = NULL;
+ MDNode *InlinedAt = NULL;
+
+ // Check if instruction has valid location information.
+ DebugLoc MIDL = MInsn->getDebugLoc();
+ if (!MIDL.isUnknown()) {
+ MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
+ dbgs() << " [ ";
+ if (InlinedAt)
+ dbgs() << "*";
+ DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
+ MI2ScopeMap.find(MInsn);
+ if (DI != MI2ScopeMap.end()) {
+ DbgScope *S = DI->second;
+ dbgs() << S->getDFSIn();
+ PrevDFSIn = S->getDFSIn();
+ } else
+ dbgs() << PrevDFSIn;
+ } else
+ dbgs() << " [ x" << PrevDFSIn;
+ dbgs() << " ]";
+ MInsn->dump();
+ }
+ dbgs() << "\n";
+ }
+#endif
+}
+/// extractScopeInformation - Scan machine instructions in this function
+/// and collect DbgScopes. Return true, if at least one scope was found.
+bool DwarfDebug::extractScopeInformation() {
+ // If scope information was extracted using .dbg intrinsics then there is not
+ // any need to extract these information by scanning each instruction.
+ if (!DbgScopeMap.empty())
+ return false;
+
+ // Scan each instruction and create scopes. First build working set of scopes.
+ SmallVector<DbgRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
+ DebugLoc PrevDL;
+ const MachineInstr *RangeBeginMI = NULL;
+ const MachineInstr *PrevMI = NULL;
+ for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+
+ // Check if instruction has valid location information.
+ const DebugLoc MIDL = MInsn->getDebugLoc();
+ if (MIDL.isUnknown()) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // Ignore DBG_VALUE. It does not contribute any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
+ if (RangeBeginMI) {
+ // If we have alread seen a beginning of a instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ DEBUG(dbgs() << "Creating new instruction range :\n");
+ DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI);
+ DEBUG(dbgs() << "End Range at " << *PrevMI);
+ DEBUG(dbgs() << "Next Range starting at " << *MInsn);
+ DEBUG(dbgs() << "------------------------\n");
+ DbgRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = MInsn;
+
+ // Reset previous markers.
+ PrevMI = MInsn;
+ PrevDL = MIDL;
+ }
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+ DbgRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
+ }
+
+ if (!CurrentFnDbgScope)
+ return false;
+
+ calculateDominanceGraph(CurrentFnDbgScope);
+ if (PrintDbgScope)
+ printDbgScopeInfo(Asm->MF, MI2ScopeMap);
+
+ // Find ranges of instructions covered by each DbgScope;
+ DbgScope *PrevDbgScope = NULL;
+ for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(),
+ RE = MIRanges.end(); RI != RE; ++RI) {
+ const DbgRange &R = *RI;
+ DbgScope *S = MI2ScopeMap.lookup(R.first);
+ assert (S && "Lost DbgScope for a machine instruction!");
+ if (PrevDbgScope && !PrevDbgScope->dominates(S))
+ PrevDbgScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevDbgScope = S;
+ }
+
+ if (PrevDbgScope)
+ PrevDbgScope->closeInsnRange();
+
+ identifyScopeMarkers();
+
+ return !DbgScopeMap.empty();
+}
+
+/// identifyScopeMarkers() -
+/// Each DbgScope has first instruction and last instruction to mark beginning
+/// and end of a scope respectively. Create an inverse map that list scopes
+/// starts (and ends) with an instruction. One instruction may start (or end)
+/// multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
+ SmallVector<DbgScope *, 4> WorkList;
+ WorkList.push_back(CurrentFnDbgScope);
+ while (!WorkList.empty()) {
+ DbgScope *S = WorkList.pop_back_val();
+
+ const SmallVector<DbgScope *, 4> &Children = S->getScopes();
+ if (!Children.empty())
+ for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ if (S->isAbstractScope())
+ continue;
+
+ const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
+ if (Ranges.empty())
+ continue;
+ for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ assert(RI->first && "DbgRange does not have first instruction!");
+ assert(RI->second && "DbgRange does not have second instruction!");
+ requestLabelBeforeInsn(RI->first);
+ requestLabelAfterInsn(RI->second);
+ }
+ }
+}
+
+/// getScopeNode - Get MDNode for DebugLoc's scope.
+static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
+ if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
+ return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
+ return DL.getScope(Ctx);
+}
+
+/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
+/// line number info for the function.
+static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
+ const MDNode *Scope = getScopeNode(DL, Ctx);
+ DISubprogram SP = getDISubprogram(Scope);
+ if (SP.Verify())
+ return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ return DebugLoc();
+}
+
+/// beginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo()) return;
+ if (!extractScopeInformation()) return;
+
+ FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+
+ assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+ std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ bool AtBlockEntry = true;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+
+ if (MI->isDebugValue()) {
+ assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+
+ // Keep track of user variables.
+ const MDNode *Var =
+ MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ // Variable is in a register, we need to check for clobbers.
+ if (isDbgValueInDefinedReg(MI))
+ LiveUserVar[MI->getOperand(0).getReg()] = Var;
+
+ // Check the history of this variable.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty()) {
+ UserVariables.push_back(Var);
+ // The first mention of a function argument gets the FunctionBeginSym
+ // label, so arguments are visible when breaking at function entry.
+ DIVariable DV(Var);
+ if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(getDISubprogram(DV.getContext()))
+ .describes(MF->getFunction()))
+ LabelsBeforeInsn[MI] = FunctionBeginSym;
+ } else {
+ // We have seen this variable before. Try to coalesce DBG_VALUEs.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue()) {
+ // Coalesce identical entries at the end of History.
+ if (History.size() >= 2 &&
+ Prev->isIdenticalTo(History[History.size() - 2])) {
+ DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+ << "\t" << *Prev
+ << "\t" << *History[History.size() - 2] << "\n");
+ History.pop_back();
+ }
+
+ // Terminate old register assignments that don't reach MI;
+ MachineFunction::const_iterator PrevMBB = Prev->getParent();
+ if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+ isDbgValueInDefinedReg(Prev)) {
+ // Previous register assignment needs to terminate at the end of
+ // its basic block.
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end()) {
+ // Drop DBG_VALUE for empty range.
+ DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+ << "\t" << *Prev << "\n");
+ History.pop_back();
+ }
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ }
+ }
+ History.push_back(MI);
+ } else {
+ // Not a DBG_VALUE instruction.
+ if (!MI->isLabel())
+ AtBlockEntry = false;
+
+ // First known non DBG_VALUE location marks beginning of function
+ // body.
+ if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+ PrologEndLoc = MI->getDebugLoc();
+
+ // Check if the instruction clobbers any registers with debug vars.
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+ continue;
+ for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+ unsigned Reg = *AI; ++AI) {
+ const MDNode *Var = LiveUserVar[Reg];
+ if (!Var)
+ continue;
+ // Reg is now clobbered.
+ LiveUserVar[Reg] = 0;
+
+ // Was MD last defined by a DBG_VALUE referring to Reg?
+ DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
+ if (HistI == DbgValues.end())
+ continue;
+ SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+ if (History.empty())
+ continue;
+ const MachineInstr *Prev = History.back();
+ // Sanity-check: Register assignments are terminated at the end of
+ // their block.
+ if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
+ continue;
+ // Is the variable still in Reg?
+ if (!isDbgValueInDefinedReg(Prev) ||
+ Prev->getOperand(0).getReg() != Reg)
+ continue;
+ // Var is clobbered. Make sure the next instruction gets a label.
+ History.push_back(MI);
+ }
+ }
+ }
+ }
+ }
+
+ for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
+ I != E; ++I) {
+ SmallVectorImpl<const MachineInstr*> &History = I->second;
+ if (History.empty())
+ continue;
+
+ // Make sure the final register assignments are terminated.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
+ const MachineBasicBlock *PrevMBB = Prev->getParent();
+ MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end())
+ // Drop DBG_VALUE for empty range.
+ History.pop_back();
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ // Request labels for the full history.
+ for (unsigned i = 0, e = History.size(); i != e; ++i) {
+ const MachineInstr *MI = History[i];
+ if (MI->isDebugValue())
+ requestLabelBeforeInsn(MI);
+ else
+ requestLabelAfterInsn(MI);
+ }
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = FunctionBeginSym;
+
+ // Record beginning of function.
+ if (!PrologEndLoc.isUnknown()) {
+ DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
+ MF->getFunction()->getContext());
+ recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+ FnStartDL.getScope(MF->getFunction()->getContext()),
+ DWARF2_FLAG_IS_STMT);
+ }
+}
+
+/// endFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
+
+ if (CurrentFnDbgScope) {
+
+ // Define end label for subprogram.
+ FunctionEndSym = Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionEndSym);
+
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
+
+ // Construct abstract scopes.
+ for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+ AE = AbstractScopesList.end(); AI != AE; ++AI) {
+ DISubprogram SP((*AI)->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ StringRef FName = SP.getLinkageName();
+ if (FName.empty())
+ FName = SP.getName();
+ if (NamedMDNode *NMD =
+ getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
+ if (!DV || !ProcessedVars.insert(DV))
+ continue;
+ DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
+ if (Scope)
+ Scope->addVariable(new DbgVariable(DV));
+ }
+ }
+ }
+ if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0)
+ constructScopeDIE(*AI);
+ }
+
+ DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
+
+ if (!DisableFramePointerElim(*MF))
+ getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE,
+ dwarf::DW_AT_APPLE_omit_frame_ptr,
+ dwarf::DW_FORM_flag, 1);
+
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+ MMI->getFrameMoves()));
+ }
+
+ // Clear debug info
+ CurrentFnDbgScope = NULL;
+ DeleteContainerPointers(CurrentFnArguments);
+ DbgVariableToFrameIndexMap.clear();
+ VarToAbstractVarMap.clear();
+ DbgVariableToDbgInstMap.clear();
+ InlinedDbgScopeMap.clear();
+ DeleteContainerSeconds(DbgScopeMap);
+ UserVariables.clear();
+ DbgValues.clear();
+ DeleteContainerSeconds(AbstractScopes);
+ AbstractScopesList.clear();
+ AbstractVariables.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ PrevLabel = NULL;
+}
+
+/// recordVariableFrameIndex - Record a variable's index.
+void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) {
+ assert (V && "Invalid DbgVariable!");
+ DbgVariableToFrameIndexMap[V] = Index;
+}
+
+/// findVariableFrameIndex - Return true if frame index for the variable
+/// is found. Update FI to hold value of the index.
+bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
+ assert (V && "Invalid DbgVariable!");
+ DenseMap<const DbgVariable *, int>::iterator I =
+ DbgVariableToFrameIndexMap.find(V);
+ if (I == DbgVariableToFrameIndexMap.end())
+ return false;
+ *FI = I->second;
+ return true;
+}
+
+/// findDbgScope - Find DbgScope for the debug loc.
+DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) {
+ if (DL.isUnknown())
+ return NULL;
+
+ DbgScope *Scope = NULL;
+ LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+ if (MDNode *IA = DL.getInlinedAt(Ctx))
+ Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+ return Scope;
+}
+
+
+/// recordSourceLine - Register a source line with debug info. Returns the
+/// unique label that was emitted and which provides correspondence to
+/// the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+ unsigned Flags) {
+ StringRef Fn;
+ StringRef Dir;
+ unsigned Src = 1;
+ if (S) {
+ DIDescriptor Scope(S);
+
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Fn = CU.getFilename();
+ Dir = CU.getDirectory();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
+ Dir = F.getDirectory();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Fn = SP.getFilename();
+ Dir = SP.getDirectory();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Fn = DB.getFilename();
+ Dir = DB.getDirectory();
+ } else
+ assert(0 && "Unexpected scope info");
+
+ Src = GetOrCreateSourceID(Fn, Dir);
+ }
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags,
+ 0, 0, Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
+///
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // If not last sibling and has children then add sibling offset attribute.
+ if (!Last && !Children.empty())
+ Die->addSiblingOffset(DIEValueAllocator);
+
+ // Record the abbreviation.
+ assignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::computeSizeAndOffsets() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ // Compute size of compile unit header.
+ unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+ computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+ }
+}
+
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+ const char *SymbolStem = 0) {
+ Asm->OutStreamer.SwitchSection(Section);
+ if (!SymbolStem) return 0;
+
+ MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+ Asm->OutStreamer.EmitLabel(TmpSym);
+ return TmpSym;
+}
+
+/// EmitSectionLabels - Emit initial Dwarf sections with a label at
+/// the start of each one.
+void DwarfDebug::EmitSectionLabels() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Dwarf sections base addresses.
+ DwarfInfoSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+ DwarfAbbrevSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+ EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+
+ if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+ EmitSectionSym(Asm, MacroInfo);
+
+ EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ EmitSectionSym(Asm, TLOF.getDwarfLocSection());
+ EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+ EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ DwarfStrSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
+ DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+ "debug_range");
+
+ DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+ "section_debug_loc");
+
+ TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+ EmitSectionSym(Asm, TLOF.getDataSection());
+}
+
+/// emitDIE - Recusively Emits a debug information entry.
+///
+void DwarfDebug::emitDIE(DIE *Die) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Emit the code (index) for the abbreviation.
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+ Twine::utohexstr(Die->getOffset()) + ":0x" +
+ Twine::utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag()));
+ Asm->EmitULEB128(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+ switch (Attr) {
+ case dwarf::DW_AT_sibling:
+ Asm->EmitInt32(Die->getSiblingOffset());
+ break;
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr = Origin->getOffset();
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ case dwarf::DW_AT_ranges: {
+ // DW_AT_range Value encodes offset in debug_range section.
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+
+ if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+ Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ 4);
+ } else {
+ Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ DwarfDebugRangeSectionSym,
+ 4);
+ }
+ break;
+ }
+ case dwarf::DW_AT_location: {
+ if (UseDotDebugLocEntry.count(Die) != 0) {
+ DIELabel *L = cast<DIELabel>(Values[i]);
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ } else
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ case dwarf::DW_AT_accessibility: {
+ if (Asm->isVerbose()) {
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+ Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+ }
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ emitDIE(Children[j]);
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("End Of Children Mark");
+ Asm->EmitInt8(0);
+ }
+}
+
+/// emitDebugInfo - Emit the debug info section.
+///
+void DwarfDebug::emitDebugInfo() {
+ // Start debug info section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfInfoSection());
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ DIE *Die = TheCU->getCUDie();
+
+ // Emit the compile units header.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+ TheCU->getID()));
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer.AddComment("DWARF version number");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+ DwarfAbbrevSectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+ emitDIE(Die);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+ }
+}
+
+/// emitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::emitAbbreviations() const {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAbbrevSection());
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbreviations[i];
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128(0, "EOM(3)");
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
+ }
+}
+
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->OutStreamer.AddComment("Extended Op");
+ Asm->EmitInt8(0);
+
+ Asm->OutStreamer.AddComment("Op size");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+ Asm->OutStreamer.AddComment("DW_LNE_set_address");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+ Asm->OutStreamer.AddComment("Section end label");
+
+ Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+ Asm->getTargetData().getPointerSize(),
+ 0/*AddrSpace*/);
+
+ // Mark end of matrix.
+ Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(1);
+ Asm->EmitInt8(1);
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ Asm->OutStreamer.AddComment("Length of Public Names Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
+ Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+ TheCU->getID()));
+
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+ Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobals();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE *Entity = GI->second;
+
+ Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+ TheCU->getID()));
+ }
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubTypesSection());
+ Asm->OutStreamer.AddComment("Length of Public Types Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+ TheCU->getID()));
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+ Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE * Entity = GI->second;
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+ TheCU->getID()));
+ }
+}
+
+/// emitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::emitDebugStr() {
+ // Check to see if it is worth the effort.
+ if (StringPool.empty()) return;
+
+ // Start the dwarf str section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfStrSection());
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+
+ for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+ I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &*I));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+
+ // Emit the string itself.
+ Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+ }
+}
+
+/// emitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::emitDebugLoc() {
+ if (DotDebugLocEntries.empty())
+ return;
+
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I) {
+ DotDebugLocEntry &Entry = *I;
+ if (I + 1 != DotDebugLocEntries.end())
+ Entry.Merge(I+1);
+ }
+
+ // Start the dwarf loc section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ unsigned char Size = Asm->getTargetData().getPointerSize();
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+ unsigned index = 1;
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I, ++index) {
+ DotDebugLocEntry &Entry = *I;
+ if (Entry.isMerged()) continue;
+ if (Entry.isEmpty()) {
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+ } else {
+ Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
+ Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+ DIVariable DV(Entry.Variable);
+ Asm->OutStreamer.AddComment("Loc expr size");
+ MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+ MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+ Asm->EmitLabelDifference(end, begin, 2);
+ Asm->OutStreamer.EmitLabel(begin);
+ if (Entry.isInt()) {
+ DIBasicType BTy(DV.getType());
+ if (BTy.Verify() &&
+ (BTy.getEncoding() == dwarf::DW_ATE_signed
+ || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+ Asm->OutStreamer.AddComment("DW_OP_consts");
+ Asm->EmitInt8(dwarf::DW_OP_consts);
+ Asm->EmitSLEB128(Entry.getInt());
+ } else {
+ Asm->OutStreamer.AddComment("DW_OP_constu");
+ Asm->EmitInt8(dwarf::DW_OP_constu);
+ Asm->EmitULEB128(Entry.getInt());
+ }
+ } else if (Entry.isLocation()) {
+ if (!DV.hasComplexAddress())
+ // Regular entry.
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ else {
+ // Complex address entry.
+ unsigned N = DV.getNumAddrElements();
+ unsigned i = 0;
+ if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+ if (Entry.Loc.getOffset()) {
+ i = 2;
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ Asm->OutStreamer.AddComment("DW_OP_deref");
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitSLEB128(DV.getAddrElement(1));
+ } else {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+ Asm->EmitDwarfRegOp(Loc);
+ i = 2;
+ }
+ } else {
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ }
+
+ // Emit remaining complex address elements.
+ for (; i < N; ++i) {
+ uint64_t Element = DV.getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitULEB128(DV.getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref)
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ else llvm_unreachable("unknown Opcode found in complex address");
+ }
+ }
+ }
+ // else ... ignore constant fp. There is not any good way to
+ // to represent them here in dwarf.
+ Asm->OutStreamer.EmitLabel(end);
+ }
+ }
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+}
+
+/// emitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::emitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
+ unsigned char Size = Asm->getTargetData().getPointerSize();
+ for (SmallVector<const MCSymbol *, 8>::iterator
+ I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+ I != E; ++I) {
+ if (*I)
+ Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ }
+}
+
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::emitDebugMacInfo() {
+ if (const MCSection *LineInfo =
+ Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer.SwitchSection(LineInfo);
+ }
+}
+
+/// emitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+/// otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+ if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+ return;
+
+ if (!FirstCU)
+ return;
+
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+ Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+ Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
+
+ Asm->OutStreamer.AddComment("Dwarf Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+ for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+ const MDNode *Node = *I;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+ = InlineInfo.find(Node);
+ SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+ DISubprogram SP(Node);
+ StringRef LName = SP.getLinkageName();
+ StringRef Name = SP.getName();
+
+ Asm->OutStreamer.AddComment("MIPS linkage name");
+ if (LName.empty()) {
+ Asm->OutStreamer.EmitBytes(Name, 0);
+ Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
+ } else
+ Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+ DwarfStrSectionSym);
+
+ Asm->OutStreamer.AddComment("Function name");
+ Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+ Asm->EmitULEB128(Labels.size(), "Inline count");
+
+ for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(LI->second->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+ Asm->OutStreamer.EmitSymbolValue(LI->first,
+ Asm->getTargetData().getPointerSize(),0);
+ }
+ }
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 000000000000..b2450064e3d0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,531 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class CompileUnit;
+class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class MachineOperand;
+class MCAsmInfo;
+class DIEAbbrev;
+class DIE;
+class DIEBlock;
+class DIEEntry;
+
+//===----------------------------------------------------------------------===//
+/// SrcLineInfo - This class is used to record source line correspondence.
+///
+class SrcLineInfo {
+ unsigned Line; // Source line number.
+ unsigned Column; // Source column.
+ unsigned SourceID; // Source ID number.
+ MCSymbol *Label; // Label in code ID number.
+public:
+ SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+ : Line(L), Column(C), SourceID(S), Label(label) {}
+
+ // Accessors
+ unsigned getLine() const { return Line; }
+ unsigned getColumn() const { return Column; }
+ unsigned getSourceID() const { return SourceID; }
+ MCSymbol *getLabel() const { return Label; }
+};
+
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ const MDNode *Variable;
+ bool Merged;
+ bool Constant;
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt
+ };
+ enum EntryType EntryKind;
+
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constants;
+ DotDebugLocEntry()
+ : Begin(0), End(0), Variable(0), Merged(false),
+ Constant(false) { Constants.Int = 0;}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+ const MDNode *V)
+ : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
+ Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
+
+ /// Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ bool isMerged() { return Merged; }
+ void Merge(DotDebugLocEntry *Next) {
+ if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+ return;
+ Next->Begin = Begin;
+ Merged = true;
+ }
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() { return Constants.Int; }
+ const ConstantFP *getConstantFP() { return Constants.CFP; }
+ const ConstantInt *getConstantInt() { return Constants.CIP; }
+} DotDebugLocEntry;
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ DIE *TheDIE; // Variable DIE.
+ unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
+public:
+ // AbsVar may be NULL.
+ DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
+ void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
+ unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+ StringRef getName() const { return Var.getName(); }
+ unsigned getTag() const { return Var.getTag(); }
+ bool variableHasComplexAddress() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.hasComplexAddress();
+ }
+ bool isBlockByrefVariable() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.isBlockByrefVariable();
+ }
+ unsigned getNumAddrElements() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.getNumAddrElements();
+ }
+ uint64_t getAddrElement(unsigned i) const {
+ return Var.getAddrElement(i);
+ }
+ DIType getType() const;
+};
+
+class DwarfDebug {
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ /// MMI - Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ //===--------------------------------------------------------------------===//
+ // Attributes used to construct specific Dwarf sections.
+ //
+
+ CompileUnit *FirstCU;
+ DenseMap <const MDNode *, CompileUnit *> CUMap;
+
+ /// AbbreviationsSet - Used to uniquely define abbreviations.
+ ///
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ /// Abbreviations - A list of all the unique abbreviations in use.
+ ///
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ /// SourceIdMap - Source id map, i.e. pair of directory id and source file
+ /// id mapped to a unique id.
+ StringMap<unsigned> SourceIdMap;
+
+ /// StringPool - A String->Symbol mapping of strings used by indirect
+ /// references.
+ StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+ unsigned NextStringPoolNumber;
+
+ MCSymbol *getStringPoolEntry(StringRef Str);
+
+ /// SectionMap - Provides a unique id per text section.
+ ///
+ UniqueVector<const MCSection*> SectionMap;
+
+ /// CurrentFnDbgScope - Top level scope for the current function.
+ ///
+ DbgScope *CurrentFnDbgScope;
+
+ /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+ SmallVector<DbgVariable *, 8> CurrentFnArguments;
+
+ /// DbgScopeMap - Tracks the scopes in the current function. Owns the
+ /// contained DbgScope*s.
+ DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
+
+ /// InlinedDbgScopeMap - Tracks inlined function scopes in current function.
+ DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap;
+
+ /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+ /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
+ DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+
+ /// AbstractSPDies - Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+ /// AbstractScopesList - Tracks abstract scopes constructed while processing
+ /// a function. This list is cleared during endFunction().
+ SmallVector<DbgScope *, 4>AbstractScopesList;
+
+ /// AbstractVariables - Collection on abstract variables. Owned by the
+ /// DbgScopes in AbstractScopes.
+ DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+ /// DbgVariableToFrameIndexMap - Tracks frame index used to find
+ /// variable's value.
+ DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
+
+ /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
+ /// machine instruction.
+ DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
+
+ /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+ SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+ /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
+ /// idetifies corresponding .debug_loc entry offset.
+ SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
+
+ /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
+ /// DbgVariable, if any.
+ DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
+
+ /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+ /// (at the end of the module) as DW_AT_inline.
+ SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+ /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+ /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+ /// InlineInfo - Keep track of inlined functions and their location. This
+ /// information is used to populate debug_inlined section.
+ typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<const MDNode *, 4> InlinedSPNodes;
+
+ // ProcessedSPNodes - This is a collection of subprogram MDNodes that
+ // are processed to create DIEs.
+ SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
+ /// LabelsBeforeInsn - Maps instruction with label emitted before
+ /// instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+ /// LabelsAfterInsn - Maps instruction with label emitted after
+ /// instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+ /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
+ /// in order of appearance.
+ SmallVector<const MDNode*, 8> UserVariables;
+
+ /// DbgValues - For each user variable, keep a list of DBG_VALUE
+ /// instructions in order. The list can also contain normal instructions that
+ /// clobber the previous DBG_VALUE.
+ typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
+ DbgValueHistoryMap;
+ DbgValueHistoryMap DbgValues;
+
+ SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
+
+ /// Previous instruction's location information. This is used to determine
+ /// label location to indicate scope boundries in dwarf debug info.
+ DebugLoc PrevInstLoc;
+ MCSymbol *PrevLabel;
+
+ /// PrologEndLoc - This location indicates end of function prologue and
+ /// beginning of function body.
+ DebugLoc PrologEndLoc;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+ : Number(Num), Moves(M) {}
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ // Section Symbols: these are assembler temporary labels that are emitted at
+ // the beginning of each supported dwarf section. These are used to form
+ // section offsets and are created by EmitSectionLabels.
+ MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+ MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+ MCSymbol *DwarfDebugLocSectionSym;
+ MCSymbol *FunctionBeginSym, *FunctionEndSym;
+
+private:
+
+ /// assignAbbrevNumber - Define a unique number for the abbreviation.
+ ///
+ void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// getOrCreateDbgScope - Create DbgScope for the scope.
+ DbgScope *getOrCreateDbgScope(DebugLoc DL);
+
+ DbgScope *getOrCreateAbstractScope(const MDNode *N);
+
+ /// findAbstractVariable - Find abstract variable associated with Var.
+ DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
+
+ /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+ /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+ /// If there are global variables in this scope then create and insert
+ /// DIEs for these variables.
+ DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
+
+ /// constructLexicalScope - Construct new DW_TAG_lexical_block
+ /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+ DIE *constructLexicalScopeDIE(DbgScope *Scope);
+
+ /// constructInlinedScopeDIE - This scope represents inlined body of
+ /// a function. Construct DIE to represent this concrete inlined copy
+ /// of the function.
+ DIE *constructInlinedScopeDIE(DbgScope *Scope);
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
+
+ /// constructScopeDIE - Construct a DIE for this scope.
+ DIE *constructScopeDIE(DbgScope *Scope);
+
+ /// EmitSectionLabels - Emit initial Dwarf sections with a label at
+ /// the start of each one.
+ void EmitSectionLabels();
+
+ /// emitDIE - Recusively Emits a debug information entry.
+ ///
+ void emitDIE(DIE *Die);
+
+ /// computeSizeAndOffset - Compute the size and offset of a DIE.
+ ///
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
+
+ /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+ ///
+ void computeSizeAndOffsets();
+
+ /// EmitDebugInfo - Emit the debug info section.
+ ///
+ void emitDebugInfo();
+
+ /// emitAbbreviations - Emit the abbreviation section.
+ ///
+ void emitAbbreviations() const;
+
+ /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+ /// the line matrix.
+ ///
+ void emitEndOfLineMatrix(unsigned SectionEnd);
+
+ /// emitDebugPubNames - Emit visible names into a debug pubnames section.
+ ///
+ void emitDebugPubNames();
+
+ /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
+ ///
+ void emitDebugPubTypes();
+
+ /// emitDebugStr - Emit visible names into a debug str section.
+ ///
+ void emitDebugStr();
+
+ /// emitDebugLoc - Emit visible names into a debug loc section.
+ ///
+ void emitDebugLoc();
+
+ /// EmitDebugARanges - Emit visible names into a debug aranges section.
+ ///
+ void EmitDebugARanges();
+
+ /// emitDebugRanges - Emit visible names into a debug ranges section.
+ ///
+ void emitDebugRanges();
+
+ /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+ ///
+ void emitDebugMacInfo();
+
+ /// emitDebugInlineInfo - Emit inline info using following format.
+ /// Section Header:
+ /// 1. length of section
+ /// 2. Dwarf version number
+ /// 3. address size.
+ ///
+ /// Entries (one "entry" for each function that was inlined):
+ ///
+ /// 1. offset into __debug_str section for MIPS linkage name, if exists;
+ /// otherwise offset into __debug_str for regular function name.
+ /// 2. offset into __debug_str section for regular function name.
+ /// 3. an unsigned LEB128 number indicating the number of distinct inlining
+ /// instances for the function.
+ ///
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ /// inlined instance; the die_offset points to the inlined_subroutine die in
+ /// the __debug_info section, and the low_pc is the starting address for the
+ /// inlining instance.
+ void emitDebugInlineInfo();
+
+ /// constructCompileUnit - Create new CompileUnit for the given
+ /// metadata node with tag DW_TAG_compile_unit.
+ void constructCompileUnit(const MDNode *N);
+
+ /// getCompielUnit - Get CompileUnit DIE.
+ CompileUnit *getCompileUnit(const MDNode *N) const;
+
+ /// constructGlobalVariableDIE - Construct global variable DIE.
+ void constructGlobalVariableDIE(const MDNode *N);
+
+ /// construct SubprogramDIE - Construct subprogram DIE.
+ void constructSubprogramDIE(const MDNode *N);
+
+ /// recordSourceLine - Register a source line with debug info. Returns the
+ /// unique label that was emitted and which provides correspondence to
+ /// the source line list.
+ void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+ unsigned Flags);
+
+ /// recordVariableFrameIndex - Record a variable's index.
+ void recordVariableFrameIndex(const DbgVariable *V, int Index);
+
+ /// findVariableFrameIndex - Return true if frame index for the variable
+ /// is found. Update FI to hold value of the index.
+ bool findVariableFrameIndex(const DbgVariable *V, int *FI);
+
+ /// findDbgScope - Find DbgScope for the debug loc.
+ DbgScope *findDbgScope(DebugLoc DL);
+
+ /// identifyScopeMarkers() - Indentify instructions that are marking
+ /// beginning of or end of a scope.
+ void identifyScopeMarkers();
+
+ /// extractScopeInformation - Scan machine instructions in this function
+ /// and collect DbgScopes. Return true, if atleast one scope was found.
+ bool extractScopeInformation();
+
+ /// addCurrentFnArgument - If Var is an current function argument that add
+ /// it in CurrentFnArguments list.
+ bool addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, DbgScope *Scope);
+
+ /// collectVariableInfo - Populate DbgScope entries with variables' info.
+ void collectVariableInfo(const MachineFunction *,
+ SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+
+ /// collectVariableInfoFromMMITable - Collect variable information from
+ /// side table maintained by MMI.
+ void collectVariableInfoFromMMITable(const MachineFunction * MF,
+ SmallPtrSet<const MDNode *, 16> &P);
+
+ /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+ void requestLabelBeforeInsn(const MachineInstr *MI) {
+ LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// getLabelBeforeInsn - Return Label preceding the instruction.
+ const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+ void requestLabelAfterInsn(const MachineInstr *MI) {
+ LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// getLabelAfterInsn - Return Label immediately following the instruction.
+ const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(AsmPrinter *A, Module *M);
+ ~DwarfDebug();
+
+ /// beginModule - Emit all Dwarf sections that should come prior to the
+ /// content.
+ void beginModule(Module *M);
+
+ /// endModule - Emit all Dwarf sections that should come after the content.
+ ///
+ void endModule();
+
+ /// beginFunction - Gather pre-function debug information. Assumes being
+ /// emitted immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF);
+
+ /// endFunction - Gather and emit post-function debug information.
+ ///
+ void endFunction(const MachineFunction *MF);
+
+ /// beginInstruction - Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI);
+
+ /// endInstruction - Prcess end of an instruction.
+ void endInstruction(const MachineInstr *MI);
+
+ /// GetOrCreateSourceID - Look up the source id with the given directory and
+ /// source file names. If none currently exists, create a new id and insert it
+ /// in the SourceIds map.
+ unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
+
+ /// createSubprogramDIE - Create new DIE using SP.
+ DIE *createSubprogramDIE(DISubprogram SP);
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 000000000000..1f992faaadb5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,739 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfException::DwarfException(AsmPrinter *A)
+ : Asm(A), MMI(Asm->MMI) {}
+
+DwarfException::~DwarfException() {}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ const LandingPadInfo *PrevLPI = 0;
+
+ for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+ I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+ const LandingPadInfo *LPI = *I;
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ unsigned PrevAction = (unsigned)-1;
+
+ if (NumShared) {
+ unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = Actions.size() - 1;
+ SizeAction =
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+ SizeAction -=
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeAction += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = Actions.size() - 1;
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when created the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 indicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+
+ return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (!MO.isGlobal()) continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (F == 0) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+
+ return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action. The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action. Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table. Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ if (MI->getDesc().isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+ continue;
+ }
+
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf exception handling (SjLj handling doesn't use this). If some
+ // instruction between the previous try-range and this one may throw,
+ // create a call-site entry with no landing pad for the region between the
+ // try-ranges.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (Asm->MAI->isExceptionHandlingDwarf())
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+ CallSites.push_back(Site);
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced when using DWARF exception handling.
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // Compute the call-site table.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+ // Final tallies.
+
+ // Call sites.
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+ unsigned CallSiteTableLength;
+ if (IsSJLJ)
+ CallSiteTableLength = 0;
+ else {
+ unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+ CallSiteTableLength =
+ CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+ }
+
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ if (IsSJLJ)
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+ }
+
+ // Type infos.
+ const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ unsigned TTypeEncoding;
+ unsigned TypeFormatSize;
+
+ if (!HaveTTData) {
+ // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+ // that we're omitting that bit.
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
+ // dwarf::DW_EH_PE_absptr
+ TypeFormatSize = Asm->getTargetData().getPointerSize();
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+ }
+
+ // Begin the exception table.
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer.SwitchSection(LSDASection);
+ Asm->EmitAlignment(2);
+
+ // Emit the LSDA.
+ MCSymbol *GCCETSym =
+ Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+ Twine(Asm->getFunctionNumber()));
+ Asm->OutStreamer.EmitLabel(GCCETSym);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()));
+
+ if (IsSJLJ)
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+ Asm->getFunctionNumber()));
+
+ // Emit the LSDA header.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+ // The type infos need to be aligned. GCC does this by inserting padding just
+ // before the type infos. However, this changes the size of the exception
+ // table, so you need to take this into account when you output the exception
+ // table size. However, the size is output using a variable length encoding.
+ // So by increasing the size by inserting padding, you may increase the number
+ // of bytes used for writing the size. If it increases, say by one byte, then
+ // you now need to output one less byte of padding to get the type infos
+ // aligned. However this decreases the size of the exception table. This
+ // changes the value you have to output for the exception table size. Due to
+ // the variable length encoding, the number of bytes used for writing the
+ // length may decrease. If so, you then have to increase the amount of
+ // padding. And so on. If you look carefully at the GCC code you will see that
+ // it indeed does this in a loop, going on and on until the values stabilize.
+ // We chose another solution: don't output padding inside the table like GCC
+ // does, instead output it before the table.
+ unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+ unsigned CallSiteTableLengthSize =
+ MCAsmInfo::getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
+ }
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ // SjLj Exception handling
+ if (IsSJLJ) {
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ // Emit the landing pad site information.
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ if (VerboseAsm) {
+ // Emit comments that decode the call site.
+ Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+ llvm::utostr(idx) + " <<");
+ Asm->OutStreamer.AddComment(Twine(" On exception at call site ") +
+ llvm::utostr(idx));
+
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" Action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(Twine(" Action: ") +
+ llvm::utostr((S.Action - 1) / 2 + 1));
+
+ Asm->OutStreamer.AddBlankLine();
+ }
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ Asm->EmitULEB128(idx);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ Asm->EmitULEB128(S.Action);
+ }
+ } else {
+ // DWARF Exception handling
+ assert(Asm->MAI->isExceptionHandlingDwarf());
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ // Emit the landing pad call site table.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ unsigned Entry = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+ const CallSiteEntry &S = *I;
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (BeginLabel == 0)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (EndLabel == 0)
+ EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+ if (VerboseAsm) {
+ // Emit comments that decode the call site.
+ Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+ llvm::utostr(++Entry) + " <<");
+ Asm->OutStreamer.AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+
+ if (!S.PadLabel) {
+ Asm->OutStreamer.AddComment(" has no landing pad");
+ } else {
+ Asm->OutStreamer.AddComment(Twine(" jumps to ") +
+ S.PadLabel->getName());
+
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(Twine(" On action: ") +
+ llvm::utostr((S.Action - 1) / 2 + 1));
+ }
+
+ Asm->OutStreamer.AddBlankLine();
+ }
+
+ // Offset of the call site relative to the previous call site, counted in
+ // number of 16-byte bundles. The first call site is counted relative to
+ // the start of the procedure fragment.
+ Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (!S.PadLabel)
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ else
+ Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ Asm->EmitULEB128(S.Action);
+ }
+ }
+
+ // Emit the Action Table.
+ int Entry = 0;
+ for (SmallVectorImpl<ActionEntry>::const_iterator
+ I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+ const ActionEntry &Action = *I;
+
+ if (VerboseAsm) {
+ // Emit comments that decode the action table.
+ Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
+ llvm::utostr(++Entry) + " <<");
+ if (Action.ValueForTypeID >= 0)
+ Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") +
+ llvm::itostr(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") +
+ llvm::itostr(Action.ValueForTypeID));
+
+ if (Action.NextAction == 0) {
+ Asm->OutStreamer.AddComment(" No further actions");
+ } else {
+ unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+ Asm->OutStreamer.AddComment(Twine(" Continue to action ") +
+ llvm::utostr(NextAction));
+ }
+
+ Asm->OutStreamer.AddBlankLine();
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ Asm->EmitSLEB128(Action.ValueForTypeID);
+
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ Asm->EmitSLEB128(Action.NextAction);
+ }
+
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+ if (GV)
+ Asm->EmitReference(GV, TTypeEncoding);
+ else
+ Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
+ 0);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+ }
+
+ Asm->EmitULEB128(TypeID);
+ }
+
+ Asm->EmitAlignment(2);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ assert(0 && "Should be implemented");
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+ assert(0 && "Should be implemented");
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ assert(0 && "Should be implemented");
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 000000000000..b5f86ab1b951
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,242 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineMove;
+class MachineInstr;
+class MachineFunction;
+class MCAsmInfo;
+class MCExpr;
+class MCSymbol;
+class Function;
+class AsmPrinter;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException {
+protected:
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ /// MMI - Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+ /// PadRange - Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+ /// ActionEntry - Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// CallSiteEntry - Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+
+ // The landing pad starts at PadLabel.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// ComputeActionsTable - Compute the actions table and gather the first
+ /// action index for each landing pad site.
+ unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+ /// marked `nounwind'. Return `false' otherwise.
+ bool CallToNoUnwindFunction(const MachineInstr *MI);
+
+ /// ComputeCallSiteTable - Compute the call-site table. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address.
+ void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+ void EmitExceptionTable();
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(AsmPrinter *A);
+ virtual ~DwarfException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+ /// should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ AsmPrinter::CFIMoveType moveTypeModule;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ virtual ~DwarfCFIException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class ARMException : public DwarfException {
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ ARMException(AsmPrinter *A);
+ virtual ~ARMException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class Win64Exception : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if personality
+ /// info should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ Win64Exception(AsmPrinter *A);
+ virtual ~Win64Exception();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 000000000000..115381767751
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string SymName;
+ SymName += "caml";
+ size_t Letter = SymName.size();
+ SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ SymName += "__";
+ SymName += Id;
+
+ // Capitalize the first letter of the module name.
+ SymName[Letter] = toupper(SymName[Letter]);
+
+ SmallString<128> TmpStr;
+ AP.Mang->getNameWithPrefix(TmpStr, SymName);
+
+ MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+
+ AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer.EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_begin");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_end");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_end");
+
+ // FIXME: Why does ocaml emit this??
+ AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "frametable");
+
+ int NumDescriptors = 0;
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ NumDescriptors++;
+ }
+ }
+
+ if (NumDescriptors >= 1<<16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.EmitInt16(NumDescriptors);
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+ "(" + Twine(uintptr_t(&FI)) + ")");
+ }
+
+ AP.OutStreamer.AddComment("live roots for " +
+ Twine(FI.getFunction().getName()));
+ AP.OutStreamer.AddBlankLine();
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Live root count "+Twine(LiveCount)+" >= 65536.");
+ }
+
+ AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
+ AP.EmitInt16(FrameSize);
+ AP.EmitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ if (K->StackOffset >= 1<<16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.EmitInt16(K->StackOffset);
+ }
+
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
new file mode 100644
index 000000000000..c2ad5eba3b3c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -0,0 +1,116 @@
+//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+Win64Exception::Win64Exception(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
+ {}
+
+Win64Exception::~Win64Exception() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void Win64Exception::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void Win64Exception::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ shouldEmitMoves = Asm->needsSEHMoves();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+
+ if (!shouldEmitPersonality)
+ return;
+
+ MCSymbol *GCCHandlerSym =
+ Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
+ Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void Win64Exception::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+
+ Asm->OutStreamer.PushSection();
+ Asm->OutStreamer.EmitWin64EHHandlerData();
+ Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
+ 4);
+ EmitExceptionTable();
+ Asm->OutStreamer.PopSection();
+ }
+ Asm->OutStreamer.EmitWin64EHEndProc();
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 000000000000..99090a8269d4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1671 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass,
+ public BranchFolder {
+ public:
+ static char ID;
+ explicit BranchFolderPass(bool defaultEnableTailMerge)
+ : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ };
+}
+
+char BranchFolderPass::ID = 0;
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+ return new BranchFolderPass(DefaultEnableTailMerge);
+}
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+ return OptimizeFunction(MF,
+ MF.getTarget().getInstrInfo(),
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+
+ EnableHoistCommonCode = CommonHoist;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Avoid matching if this pointer gets reused.
+ TriedMerging.erase(MBB);
+
+ // Remove the block.
+ MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (!I->isImplicitDef())
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ ImpDefRegs.insert(SubReg);
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi) {
+ if (!tii) return false;
+
+ TriedMerging.clear();
+
+ TII = tii;
+ TRI = tri;
+ MMI = mmi;
+
+ RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(MBB);
+ }
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
+ MadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ if (JTI == 0) {
+ delete RS;
+ return MadeChange;
+ }
+
+ // Walk the function to find jump tables that are live.
+ BitVector JTIsLive(JTI->getJumpTables().size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJTI()) continue;
+
+ // Remember that this JT is live.
+ JTIsLive.set(Op.getIndex());
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens when the
+ // indirect jump was unreachable (and thus deleted).
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ MadeChange = true;
+ }
+
+ delete RS;
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ // Skip debug info so it will not affect codegen.
+ while (I->isDebugValue()) {
+ if (I==MBB->begin())
+ return 0; // MBB empty except for debug info.
+ --I;
+ }
+
+ return HashMachineInstr(I);
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ // Skip debugging pseudos; necessary to avoid changing the code.
+ while (I1->isDebugValue()) {
+ if (I1==MBB1->begin()) {
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin())
+ // I1==DBG at begin; I2==DBG at begin
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+ return TailLen;
+ }
+ --I1;
+ }
+ // I1==first (untested) non-DBG preceding known match
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin()) {
+ ++I1;
+ // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+ return TailLen;
+ }
+ --I2;
+ }
+ // I1, I2==first (untested) non-DBGs preceding known match
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->isInlineAsm()) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ // Back past possible debugging pseudos at beginning of block. This matters
+ // when one block differs from the other only by whether debugging pseudos
+ // are present at the beginning. (This way, the various checks later for
+ // I1==MBB1->begin() work as expected.)
+ if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+ --I2;
+ while (I2->isDebugValue()) {
+ if (I2 == MBB2->begin()) {
+ return TailLen;
+ }
+ --I2;
+ }
+ ++I2;
+ }
+ if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+ --I1;
+ while (I1->isDebugValue()) {
+ if (I1 == MBB1->begin())
+ return TailLen;
+ --I1;
+ }
+ ++I1;
+ }
+ return TailLen;
+}
+
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB) {
+ if (RS) {
+ RS->enterBasicBlock(CurMBB);
+ if (!CurMBB->empty())
+ RS->forward(prior(CurMBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *CurMBB = OldInst->getParent();
+
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(CurMBB, NewDest);
+
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return 0;
+
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(&CurMBB, NewMBB);
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ const MCInstrDesc &MCID = I->getDesc();
+ if (MCID.isCall())
+ Time += 10;
+ else if (MCID.mayLoad() || MCID.mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL,
+ SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ else if (getHash() > o.getHash())
+ return false;
+ else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ llvm_unreachable("Predecessor appears twice");
+#endif
+ return false;
+ }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
+ }
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails. Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength,
+ unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+ if (CommonTailLen == 0)
+ return false;
+ DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+ << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+ << '\n');
+
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().getDesc().isBarrier() &&
+ !MBB2->back().getDesc().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ MachineFunction *MF = MBB1->getParent();
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin()))
+ return true;
+
+ return false;
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = prior(MergePotentials.end());
+ for (MPIterator CurMPIter = prior(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+ unsigned CommonTailLen;
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB)) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
+ }
+ if (I == B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter == B)
+ break;
+ }
+ if (CurMPIter->getHash() != CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
+ unsigned TimeEstimate = ~0U;
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].getBlock() == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // If the common tail includes any debug info we will take it pretty
+ // randomly from one of the inputs. Might be better to remove it?
+ DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+ << maxCommonTailLength);
+
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ if (!newMBB) {
+ DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB == MBB)
+ PredBB = newMBB;
+
+ return true;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ bool MadeChange = false;
+
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ dbgs() << "\n";
+ if (SuccBB) {
+ dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ dbgs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = MergePotentials.back().getHash();
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+ getParent()->begin();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
+ }
+ }
+
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ if (!CreateCommonTailOnlyBlock(PredBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
+ continue;
+ DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].getMPIter());
+ }
+ DEBUG(dbgs() << "\n");
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+ if (!EnableTailMerge) return false;
+
+ bool MadeChange = false;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+ if (TriedMerging.count(I))
+ continue;
+ if (I->succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I) {
+ if (I->pred_size() >= 2) {
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
+ MachineBasicBlock *PBB = *P;
+ if (TriedMerging.count(PBB))
+ continue;
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB))
+ continue;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and
+ // we cannot reverse the branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = llvm::next(MachineFunction::iterator(PBB));
+ }
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice
+ // to have a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
+ continue;
+ }
+ }
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+ }
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+ }
+ }
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ }
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+ if (MBB->empty())
+ return true;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI!=MBBE; ++MBBI) {
+ if (!MBBI->isDebugValue())
+ return false;
+ }
+ return true;
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator MBBI, MBBE;
+ for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+ if (!MBBI->isDebugValue())
+ break;
+ }
+ return (MBBI->getDesc().isBranch());
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ // Neither block consists entirely of debug info (per IsEmptyBlock check),
+ // so we needn't test for falling off the beginning here.
+ MachineBasicBlock::iterator MBB1I = --MBB1->end();
+ while (MBB1I->isDebugValue())
+ --MBB1I;
+ MachineBasicBlock::iterator MBB2I = --MBB2->end();
+ while (MBB2I->isDebugValue())
+ --MBB2I;
+ return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ DebugLoc dl; // FIXME: this is nowhere
+ReoptimizeBlock:
+
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block. Blocks with their addresses taken shouldn't be
+ // optimized away.
+ if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return MadeChange;
+
+ if (FallThrough == MF.end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+ DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ // Remove redundant DBG_VALUEs first.
+ if (PrevBB.begin() != PrevBB.end()) {
+ MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+ --PrevBBIter;
+ MachineBasicBlock::iterator MBBIter = MBB->begin();
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // DBG_VALUE at the beginning of MBB.
+ while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+ && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+ if (!MBBIter->isIdenticalTo(PrevBBIter))
+ break;
+ MachineInstr *DuplicateDbg = MBBIter;
+ ++MBBIter; -- PrevBBIter;
+ DuplicateDbg->eraseFromParent();
+ }
+ }
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this block has no successors (e.g. it is a return block or ends with
+ // a call to a no-return function like abort or __cxa_throw) and if the pred
+ // falls through into this block, and if it would otherwise fall through
+ // into the block after this, move this block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !MBB->canFallThrough()) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MF.end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
+
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return MadeChange;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+ !MBB->hasAddressTaken()) {
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+ // If the only things remaining in the block are debug info, remove these
+ // as well, so this will behave the same as an empty block in non-debug
+ // mode.
+ if (!MBB->empty()) {
+ bool NonDebugInfoFound = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (!I->isDebugValue()) {
+ NonDebugInfoFound = true;
+ break;
+ }
+ }
+ if (!NonDebugInfoFound)
+ // Make the block empty, losing the debug info (we could probably
+ // improve this in some cases.)
+ MBB->erase(MBB->begin(), MBB->end());
+ }
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+ NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return MadeChange;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!PrevBB.canFallThrough()) {
+
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = MBB->canFallThrough();
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !PredBB->canFallThrough() &&
+ !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= HoistCommonCodeInSuccs(MBB);
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is ususally just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<unsigned,4> &Uses,
+ SmallSet<unsigned,4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(Loc))
+ return MBB->end();
+
+ for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Loc->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ Uses.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.insert(*AS);
+ } else if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+ }
+
+ if (Uses.empty())
+ return Loc;
+ if (Loc == MBB->begin())
+ return MBB->end();
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = Loc;
+ --PI;
+ while (PI != MBB->begin() && Loc->isDebugValue())
+ --PI;
+
+ bool IsDef = false;
+ for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg))
+ IsDef = true;
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+ TII->isPredicated(PI))
+ return MBB->end();
+
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ Uses.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.insert(*AS);
+ } else {
+ if (Uses.count(Reg)) {
+ Uses.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Uses.erase(*SR); // Use getSubRegisters to be conservative
+ }
+ Defs.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.insert(*AS);
+ }
+ }
+
+ return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<unsigned, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ bool HasDups = false;
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallSet<unsigned, 4> LocalDefsSet;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+ break;
+
+ if (TII->isPredicated(TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (Defs.count(Reg) && !MO.isDead()) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, r1<kill>
+ IsSafe = false;
+ break;
+ }
+ } else if (!LocalDefsSet.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+ break;
+
+ // Track local defs so we can update liveins.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (!MO.isDead()) {
+ LocalDefs.push_back(Reg);
+ LocalDefsSet.insert(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ LocalDefsSet.insert(*SR);
+ }
+ } else if (MO.isKill() && LocalDefsSet.count(Reg)) {
+ LocalDefsSet.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ LocalDefsSet.erase(*SR);
+ }
+ }
+
+ HasDups = true;;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!HasDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+
+ // Update livein's.
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Def = LocalDefs[i];
+ if (LocalDefsSet.count(Def)) {
+ TBB->addLiveIn(Def);
+ FBB->addLiveIn(Def);
+ }
+ }
+
+ ++NumHoist;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
new file mode 100644
index 000000000000..df795dfc248e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,123 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
+
+namespace llvm {
+ class MachineFunction;
+ class MachineModuleInfo;
+ class RegScavenger;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+
+ class BranchFolder {
+ public:
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
+
+ bool OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi);
+ private:
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+ SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
+ std::vector<SameTailElt> SameTails;
+
+ bool EnableTailMerge;
+ bool EnableHoistCommonCode;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ RegScavenger *RS;
+
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
+
+ bool OptimizeBranches(MachineFunction &MF);
+ bool OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool HoistCommonCode(MachineFunction &MF);
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
+ };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 000000000000..e6b3bbca2068
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,219 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequired<MachineLoopInfo>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+
+ DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: "
+ << fn.getFunction()->getName() << '\n');
+
+ LiveIntervals &lis = getAnalysis<LiveIntervals>();
+ VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
+ for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
+ LiveInterval &li = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(li.reg))
+ vrai.CalculateWeightAndHint(li);
+ }
+ return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+ const TargetRegisterInfo &tri,
+ const MachineRegisterInfo &mri) {
+ unsigned sub, hreg, hsub;
+ if (mi->getOperand(0).getReg() == reg) {
+ sub = mi->getOperand(0).getSubReg();
+ hreg = mi->getOperand(1).getReg();
+ hsub = mi->getOperand(1).getSubReg();
+ } else {
+ sub = mi->getOperand(1).getSubReg();
+ hreg = mi->getOperand(0).getReg();
+ hsub = mi->getOperand(0).getSubReg();
+ }
+
+ if (!hreg)
+ return 0;
+
+ if (TargetRegisterInfo::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : 0;
+
+ const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+ MachineRegisterInfo &mri = MF.getRegInfo();
+ const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
+ MachineBasicBlock *mbb = 0;
+ MachineLoop *loop = 0;
+ unsigned loopDepth = 0;
+ bool isExiting = false;
+ float totalWeight = 0;
+ SmallPtrSet<MachineInstr*, 8> visited;
+
+ // Find the best physreg hist and the best virtreg hint.
+ float bestPhys = 0, bestVirt = 0;
+ unsigned hintPhys = 0, hintVirt = 0;
+
+ // Don't recompute a target specific hint.
+ bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+ // Don't recompute spill weight for an unspillable register.
+ bool Spillable = li.isSpillable();
+
+ for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+ MachineInstr *mi = I.skipInstruction();) {
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+ if (!visited.insert(mi))
+ continue;
+
+ float weight = 1.0f;
+ if (Spillable) {
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = Loops.getLoopFor(mbb);
+ loopDepth = loop ? loop->getLoopDepth() : 0;
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
+ }
+
+ // Get allocation hints from copies.
+ if (noHint || !mi->isCopy())
+ continue;
+ unsigned hint = copyHint(mi, li.reg, tri, mri);
+ if (!hint)
+ continue;
+ float hweight = Hint[hint] += weight;
+ if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+ if (hweight > bestPhys && LIS.isAllocatable(hint))
+ bestPhys = hweight, hintPhys = hint;
+ } else {
+ if (hweight > bestVirt)
+ bestVirt = hweight, hintVirt = hint;
+ }
+ }
+
+ Hint.clear();
+
+ // Always prefer the physreg hint.
+ if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+ mri.setRegAllocationHint(li.reg, 0, hint);
+ // Weakly boost the spill weight of hinted registers.
+ totalWeight *= 1.01F;
+ }
+
+ // If the live interval was already unspillable, leave it that way.
+ if (!Spillable)
+ return;
+
+ // Mark li as unspillable if all live ranges are tiny.
+ if (li.isZeroLength(LIS.getSlotIndexes())) {
+ li.markNotSpillable();
+ return;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If none of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ bool isLoad = false;
+ if (LIS.isReMaterializable(li, 0, isLoad)) {
+ if (isLoad)
+ totalWeight *= 0.9F;
+ else
+ totalWeight *= 0.5F;
+ }
+
+ li.weight = normalizeSpillWeight(totalWeight, li.getSize());
+}
+
+void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
+ const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return;
+
+ // Accumulate constraints from all uses.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg),
+ E = MRI.reg_nodbg_end(); I != E; ++I) {
+ // TRI doesn't have accurate enough information to model this yet.
+ if (I.getOperand().getSubReg())
+ return;
+ // Inline asm instuctions don't remember their constraints.
+ if (I->isInlineAsm())
+ return;
+ const TargetRegisterClass *OpRC =
+ TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI);
+ if (OpRC)
+ NewRC = getCommonSubClass(NewRC, OpRC);
+ if (!NewRC || NewRC == OldRC)
+ return;
+ }
+ DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg)
+ << " to " << NewRC->getName() <<".\n");
+ MRI.setRegClass(reg, NewRC);
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 000000000000..14eb0541dc8d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,182 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+ const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
+ // No stack is used.
+ StackOffset = 0;
+
+ clearFirstByValReg();
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate space on the stack large enough to pass an argument
+// by value. The size and alignment information of the argument is encoded in
+// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ if (MF.getFrameInfo()->getMaxAlignment() < Align)
+ MF.getFrameInfo()->setMaxAlignment(Align);
+ TM.getTargetLowering()->HandleByVal(this, Size);
+ unsigned Offset = AllocateStack(Size, Align);
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ for (const unsigned *Alias = TRI.getOverlaps(Reg);
+ unsigned Reg = *Alias; ++Alias)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ unsigned NumArgs = Ins.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << EVT(VT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ MVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result has unhandled type "
+ << EVT(VT).getEVTString();
+#endif
+ llvm_unreachable(0);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..489746cf3c72
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,59 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeCalculateSpillWeightsPass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializePEIPass(Registry);
+ initializeRALinScanPass(Registry);
+ initializeRegisterCoalescerPass(Registry);
+ initializeRenderMachineFunctionPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeLoopSplitterPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStrongPHIEliminationPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
new file mode 100644
index 000000000000..270c337ef67e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
@@ -0,0 +1,425 @@
+//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that optimizes code placement and aligns loop
+// headers to target-specific alignment boundaries.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "code-placement"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoopsAligned, "Number of loops aligned");
+STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
+STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
+
+namespace {
+ class CodePlacementOpt : public MachineFunctionPass {
+ const MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
+
+ public:
+ static char ID;
+ CodePlacementOpt() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const {
+ return "Code Placement Optimizer";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool HasFallthrough(MachineBasicBlock *MBB);
+ bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
+ void Splice(MachineFunction &MF,
+ MachineFunction::iterator InsertPt,
+ MachineFunction::iterator Begin,
+ MachineFunction::iterator End);
+ bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+ MachineLoop *L);
+ bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+ MachineLoop *L);
+ bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
+ bool OptimizeIntraLoopEdges(MachineFunction &MF);
+ bool AlignLoops(MachineFunction &MF);
+ bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
+ };
+
+ char CodePlacementOpt::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createCodePlacementOptPass() {
+ return new CodePlacementOpt();
+}
+
+/// HasFallthrough - Test whether the given branch has a fallthrough, either as
+/// a plain fallthrough or as a fallthrough case of a conditional branch.
+///
+bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+ return false;
+ // This conditional branch has no fallthrough.
+ if (FBB)
+ return false;
+ // An unconditional branch has no fallthrough.
+ if (Cond.empty() && TBB)
+ return false;
+ // It has a fallthrough.
+ return true;
+}
+
+/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
+/// This is called before major changes are begun to test whether it will be
+/// possible to complete the changes.
+///
+/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
+/// whenever possible.
+///
+bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
+ // Conservatively ignore EH landing pads.
+ if (MBB->isLandingPad()) return false;
+
+ // Aggressively handle return blocks and similar constructs.
+ if (MBB->succ_empty()) return true;
+
+ // Ask the target's AnalyzeBranch if it can handle this block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ // Make sure the terminator is understood.
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+ return false;
+ // Ignore blocks which look like they might have EH-related control flow.
+ // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
+ // recognize the possibility of a control transfer through an unwind.
+ // Such blocks contain EH_LABEL instructions, however they may be in the
+ // middle of the block. Instead of searching for them, just check to see
+ // if the CFG disagrees with AnalyzeBranch.
+ if (1u + !Cond.empty() != MBB->succ_size())
+ return false;
+ // Make sure we have the option of reversing the condition.
+ if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
+ return false;
+ return true;
+}
+
+/// Splice - Move the sequence of instructions [Begin,End) to just before
+/// InsertPt. Update branch instructions as needed to account for broken
+/// fallthrough edges and to take advantage of newly exposed fallthrough
+/// opportunities.
+///
+void CodePlacementOpt::Splice(MachineFunction &MF,
+ MachineFunction::iterator InsertPt,
+ MachineFunction::iterator Begin,
+ MachineFunction::iterator End) {
+ assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
+ "Splice can't change the entry block!");
+ MachineFunction::iterator OldBeginPrior = prior(Begin);
+ MachineFunction::iterator OldEndPrior = prior(End);
+
+ MF.splice(InsertPt, Begin, End);
+
+ prior(Begin)->updateTerminator();
+ OldBeginPrior->updateTerminator();
+ OldEndPrior->updateTerminator();
+}
+
+/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
+/// to the loop top to the top of the loop so that they have a fall through.
+/// This can introduce a branch on entry to the loop, but it can eliminate a
+/// branch within the loop. See the @simple case in
+/// test/CodeGen/X86/loop_blocks.ll for an example of this.
+bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+ MachineLoop *L) {
+ bool Changed = false;
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+
+ bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
+
+ if (TopMBB == MF.begin() ||
+ HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
+ new_top:
+ for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
+ PE = TopMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ if (Pred == TopMBB) continue;
+ if (HasFallthrough(Pred)) continue;
+ if (!L->contains(Pred)) continue;
+
+ // Verify that we can analyze all the loop entry edges before beginning
+ // any changes which will require us to be able to analyze them.
+ if (Pred == MF.begin())
+ continue;
+ if (!HasAnalyzableTerminator(Pred))
+ continue;
+ if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
+ continue;
+
+ // Move the block.
+ DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
+ << " to top of loop.\n");
+ Changed = true;
+
+ // Move it and all the blocks that can reach it via fallthrough edges
+ // exclusively, to keep existing fallthrough edges intact.
+ MachineFunction::iterator Begin = Pred;
+ MachineFunction::iterator End = llvm::next(Begin);
+ while (Begin != MF.begin()) {
+ MachineFunction::iterator Prior = prior(Begin);
+ if (Prior == MF.begin())
+ break;
+ // Stop when a non-fallthrough edge is found.
+ if (!HasFallthrough(Prior))
+ break;
+ // Stop if a block which could fall-through out of the loop is found.
+ if (Prior->isSuccessor(End))
+ break;
+ // If we've reached the top, stop scanning.
+ if (Prior == MachineFunction::iterator(TopMBB)) {
+ // We know top currently has a fall through (because we just checked
+ // it) which would be lost if we do the transformation, so it isn't
+ // worthwhile to do the transformation unless it would expose a new
+ // fallthrough edge.
+ if (!Prior->isSuccessor(End))
+ goto next_pred;
+ // Otherwise we can stop scanning and procede to move the blocks.
+ break;
+ }
+ // If we hit a switch or something complicated, don't move anything
+ // for this predecessor.
+ if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
+ break;
+ // Ok, the block prior to Begin will be moved along with the rest.
+ // Extend the range to include it.
+ Begin = Prior;
+ ++NumIntraMoved;
+ }
+
+ // Move the blocks.
+ Splice(MF, TopMBB, Begin, End);
+
+ // Update TopMBB.
+ TopMBB = L->getTopBlock();
+
+ // We have a new loop top. Iterate on it. We shouldn't have to do this
+ // too many times if BranchFolding has done a reasonable job.
+ goto new_top;
+ next_pred:;
+ }
+ }
+
+ // If the loop previously didn't exit with a fall-through and it now does,
+ // we eliminated a branch.
+ if (Changed &&
+ !BotHasFallthrough &&
+ HasFallthrough(L->getBottomBlock())) {
+ ++NumIntraElim;
+ }
+
+ return Changed;
+}
+
+/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
+/// portion of the loop contiguous with the header. This usually makes the loop
+/// contiguous, provided that AnalyzeBranch can handle all the relevant
+/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
+/// for an example of this.
+bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+ MachineLoop *L) {
+ bool Changed = false;
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock *BotMBB = L->getBottomBlock();
+
+ // Determine a position to move orphaned loop blocks to. If TopMBB is not
+ // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
+ // to the top of the loop to avoid losing that fallthrough. Otherwise append
+ // them to the bottom, even if it previously had a fallthrough, on the theory
+ // that it's worth an extra branch to keep the loop contiguous.
+ MachineFunction::iterator InsertPt =
+ llvm::next(MachineFunction::iterator(BotMBB));
+ bool InsertAtTop = false;
+ if (TopMBB != MF.begin() &&
+ !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
+ HasFallthrough(BotMBB)) {
+ InsertPt = TopMBB;
+ InsertAtTop = true;
+ }
+
+ // Keep a record of which blocks are in the portion of the loop contiguous
+ // with the loop header.
+ SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
+ for (MachineFunction::iterator I = TopMBB,
+ E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
+ ContiguousBlocks.insert(I);
+
+ // Find non-contigous blocks and fix them.
+ if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
+ for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ MachineBasicBlock *BB = *BI;
+
+ // Verify that we can analyze all the loop entry edges before beginning
+ // any changes which will require us to be able to analyze them.
+ if (!HasAnalyzableTerminator(BB))
+ continue;
+ if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
+ continue;
+
+ // If the layout predecessor is part of the loop, this block will be
+ // processed along with it. This keeps them in their relative order.
+ if (BB != MF.begin() &&
+ L->contains(prior(MachineFunction::iterator(BB))))
+ continue;
+
+ // Check to see if this block is already contiguous with the main
+ // portion of the loop.
+ if (!ContiguousBlocks.insert(BB))
+ continue;
+
+ // Move the block.
+ DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
+ << " to be contiguous with loop.\n");
+ Changed = true;
+
+ // Process this block and all loop blocks contiguous with it, to keep
+ // them in their relative order.
+ MachineFunction::iterator Begin = BB;
+ MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
+ for (; End != MF.end(); ++End) {
+ if (!L->contains(End)) break;
+ if (!HasAnalyzableTerminator(End)) break;
+ ContiguousBlocks.insert(End);
+ ++NumIntraMoved;
+ }
+
+ // If we're inserting at the bottom of the loop, and the code we're
+ // moving originally had fall-through successors, bring the sucessors
+ // up with the loop blocks to preserve the fall-through edges.
+ if (!InsertAtTop)
+ for (; End != MF.end(); ++End) {
+ if (L->contains(End)) break;
+ if (!HasAnalyzableTerminator(End)) break;
+ if (!HasFallthrough(prior(End))) break;
+ }
+
+ // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
+ // we don't need them anymore at this point.
+ Splice(MF, InsertPt, Begin, End);
+ }
+
+ return Changed;
+}
+
+/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+/// This code takes the approach of making minor changes to the existing
+/// layout to fix specific loop-oriented problems. Also, it depends on
+/// AnalyzeBranch, which can't understand complex control instructions.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
+ MachineLoop *L) {
+ bool Changed = false;
+
+ // Do optimization for nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+ // Do optimization for this loop.
+ Changed |= EliminateUnconditionalJumpsToTop(MF, L);
+ Changed |= MoveDiscontiguousLoopBlocks(MF, L);
+
+ return Changed;
+}
+
+/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
+ bool Changed = false;
+
+ if (!TLI->shouldOptimizeCodePlacement())
+ return Changed;
+
+ // Do optimization for each loop in the function.
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I)
+ if (!(*I)->getParentLoop())
+ Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+ return Changed;
+}
+
+/// AlignLoops - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return false; // Don't care about loop alignment.
+
+ bool Changed = false;
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I)
+ Changed |= AlignLoop(MF, *I, Align);
+
+ return Changed;
+}
+
+/// AlignLoop - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
+ unsigned Align) {
+ bool Changed = false;
+
+ // Do alignment for nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= AlignLoop(MF, *I, Align);
+
+ L->getTopBlock()->setAlignment(Align);
+ Changed = true;
+ ++NumLoopsAligned;
+
+ return Changed;
+}
+
+bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
+ MLI = &getAnalysis<MachineLoopInfo>();
+ if (MLI->empty())
+ return false; // No loops.
+
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool Changed = OptimizeIntraLoopEdges(MF);
+
+ Changed |= AlignLoops(MF);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 000000000000..84c4d59c0e41
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,663 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+ KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+
+ // Clear "do not change" set.
+ KeepRegs.clear();
+
+ bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
+ // Determine the live-out physregs for this block.
+ if (IsReturnBlock) {
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+
+ // In a non-return block, examine the live-in regs of all successors.
+ // Note a return block can have successors if the return instruction is
+ // predicated.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+ RegRefs.clear();
+ KeepRegs.clear();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ if (MI->isDebugValue())
+ return;
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+ // It's not safe to change register allocation for source operands of
+ // that have special allocation requirements. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservative here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->getDesc().isCall() ||
+ MI->getDesc().hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC = 0;
+
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *Alias;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ if (MO.isUse() && Special) {
+ if (KeepRegs.insert(Reg)) {
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ KeepRegs.insert(*Subreg);
+ }
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ // Update liveness.
+ // Proceding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ KeepRegs.erase(Reg);
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ KeepRegs.erase(SubregReg);
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC = 0;
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ if (KillIndices[Reg] == ~0u) {
+ KillIndices[Reg] = Count;
+ DefIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ }
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instructions defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who know what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC)
+{
+ ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned NewReg = Order[i];
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+ && "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+ && "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] != ~0u ||
+ Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+ KillIndices[AntiDepReg] > DefIndices[NewReg])
+ continue;
+ return NewReg;
+ }
+
+ // No registers are free and available!
+ return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
+ // Find the node at the bottom of the critical path.
+ const SUnit *Max = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap[SU->getInstr()] = SU;
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+#ifndef NDEBUG
+ {
+ DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] == ~0u)
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#endif
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ const SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (MI == CriticalPathMI) {
+ if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ const SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ if (!RegClassInfo.isAllocatable(AntiDepReg))
+ // Don't break anti-dependencies on non-allocatable registers.
+ AntiDepReg = 0;
+ else if (KeepRegs.count(AntiDepReg))
+ // Don't break anti-dependencies if an use down below requires
+ // this exact register.
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = 0;
+ CriticalPathMI = 0;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI))
+ // If this instruction's defs have special allocation requirement, don't
+ // break this anti-dependency.
+ AntiDepReg = 0;
+ else if (AntiDepReg) {
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+ assert((AntiDepReg == 0 || RC != NULL) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-depenence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
+ LastNewReg[AntiDepReg],
+ RC)) {
+ DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n");
+
+ // Update the references to the old register to refer to the new
+ // register.
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-dependence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = 0;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ LastNewReg[AntiDepReg] = NewReg;
+ ++Broken;
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 000000000000..07107802972d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,110 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+ class CriticalAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// Classes - For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ std::vector<const TargetRegisterClass*> Classes;
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ /// KeepRegs - A set of registers which are live and cannot be changed to
+ /// break anti-dependencies.
+ SmallSet<unsigned, 4> KeepRegs;
+
+ public:
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+ ~CriticalAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ void PrescanInstruction(MachineInstr *MI);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 000000000000..6de6c0cb81bd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,201 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-dce"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class DeadMachineInstructionElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ bool isDead(const MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+ "Remove dead machine instructions", false, false)
+
+FunctionPass *llvm::createDeadMachineInstructionElimPass() {
+ return new DeadMachineInstructionElim();
+}
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
+ LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Treat reserved registers as always live.
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = ReservedRegs;
+
+ // Also add any explicit live-out physregs for this block.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
+ LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
+ unsigned Reg = *LOI;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ LivePhysRegs.set(Reg);
+ }
+
+ // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // live across blocks, but some targets (x86) can have flags live out of a
+ // block.
+ for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+ E = MBB->succ_end(); S != E; S++)
+ for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+ LI != (*S)->livein_end(); LI++)
+ LivePhysRegs.set(*LI);
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ assert(Use.isDebug());
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+ AnyChanges = true;
+ MI->eraseFromParent();
+ ++NumDeletes;
+ MIE = MBB->rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.reset(Reg);
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs)
+ LivePhysRegs.reset(*SubRegs);
+ }
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.set(Reg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+ *AliasSet; ++AliasSet)
+ LivePhysRegs.set(*AliasSet);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 000000000000..03604b0a170f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,685 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
+STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
+STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+
+namespace {
+ class DwarfEHPrepare : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLowering *TLI;
+
+ // The eh.exception intrinsic.
+ Function *ExceptionValueIntrinsic;
+
+ // The eh.selector intrinsic.
+ Function *SelectorIntrinsic;
+
+ // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
+ Constant *URoR;
+
+ // The EH language-specific catch-all type.
+ GlobalVariable *EHCatchAllValue;
+
+ // _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ // We both use and preserve dominator info.
+ DominatorTree *DT;
+
+ // The function we are running on.
+ Function *F;
+
+ // The landing pads for this function.
+ typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+ BBSet LandingPads;
+
+ bool NormalizeLandingPads();
+ bool LowerUnwindsAndResumes();
+ bool MoveExceptionValueCalls();
+
+ Instruction *CreateExceptionValueCall(BasicBlock *BB);
+
+ /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
+ /// use the "llvm.eh.catch.all.value" call need to convert to using its
+ /// initializer instead.
+ bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+ bool HasCatchAllInSelector(IntrinsicInst *);
+
+ /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+ void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
+
+ /// FindAllURoRInvokes - Find all URoR invokes in the function.
+ void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
+
+ /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+ /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+ /// a landing pad within the current function. This is a candidate to merge
+ /// the selector associated with the URoR invoke with the one from the
+ /// URoR's landing pad.
+ bool HandleURoRInvokes();
+
+ /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
+ /// with the eh.exception call. This recursively looks past instructions
+ /// which don't change the EH pointer value, like casts or PHI nodes.
+ bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+ SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+ SmallPtrSet<PHINode*, 32> &SeenPHIs);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetMachine *tm) :
+ FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
+ ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
+ URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+
+ // getAnalysisUsage - We need the dominator tree for handling URoR.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+ return new DwarfEHPrepare(tm);
+}
+
+/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
+/// catch-all.
+bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
+ if (!EHCatchAllValue) return false;
+
+ unsigned ArgIdx = II->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
+ return GV == EHCatchAllValue;
+}
+
+/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+void DwarfEHPrepare::
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
+ for (Value::use_iterator
+ I = SelectorIntrinsic->use_begin(),
+ E = SelectorIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *II = cast<IntrinsicInst>(*I);
+
+ if (II->getParent()->getParent() != F)
+ continue;
+
+ if (!HasCatchAllInSelector(II))
+ Sels.insert(II);
+ else
+ CatchAllSels.insert(II);
+ }
+}
+
+/// FindAllURoRInvokes - Find all URoR invokes in the function.
+void DwarfEHPrepare::
+FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
+ for (Value::use_iterator
+ I = URoR->use_begin(),
+ E = URoR->use_end(); I != E; ++I) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
+ URoRInvokes.insert(II);
+ }
+}
+
+/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
+/// the "llvm.eh.catch.all.value" call need to convert to using its
+/// initializer instead.
+bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+ if (!EHCatchAllValue) return false;
+
+ if (!SelectorIntrinsic) {
+ SelectorIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+ if (!SelectorIntrinsic) return false;
+ }
+
+ bool Changed = false;
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ I = Sels.begin(), E = Sels.end(); I != E; ++I) {
+ IntrinsicInst *Sel = *I;
+
+ // Index of the "llvm.eh.catch.all.value" variable.
+ unsigned OpIdx = Sel->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
+ if (GV != EHCatchAllValue) continue;
+ Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// FindSelectorAndURoR - Find the eh.selector call associated with the
+/// eh.exception call. And indicate if there is a URoR "invoke" associated with
+/// the eh.exception call. This recursively looks past instructions which don't
+/// change the EH pointer value, like casts or PHI nodes.
+bool
+DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+ SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+ SmallPtrSet<PHINode*, 32> &SeenPHIs) {
+ bool Changed = false;
+
+ for (Value::use_iterator
+ I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
+ Instruction *II = dyn_cast<Instruction>(*I);
+ if (!II || II->getParent()->getParent() != F) continue;
+
+ if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
+ if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
+ SelCalls.insert(Sel);
+ } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
+ if (Invoke->getCalledFunction() == URoR)
+ URoRInvoke = true;
+ } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
+ Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
+ } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
+ if (SeenPHIs.insert(PN))
+ // Don't process a PHI node more than once.
+ Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
+ }
+ }
+
+ return Changed;
+}
+
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
+bool DwarfEHPrepare::HandleURoRInvokes() {
+ if (!EHCatchAllValue) {
+ EHCatchAllValue =
+ F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
+ if (!EHCatchAllValue) return false;
+ }
+
+ if (!SelectorIntrinsic) {
+ SelectorIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+ if (!SelectorIntrinsic) return false;
+ }
+
+ SmallPtrSet<IntrinsicInst*, 32> Sels;
+ SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
+ FindAllCleanupSelectors(Sels, CatchAllSels);
+
+ if (!URoR) {
+ URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
+ if (!URoR) return CleanupSelectors(CatchAllSels);
+ }
+
+ SmallPtrSet<InvokeInst*, 32> URoRInvokes;
+ FindAllURoRInvokes(URoRInvokes);
+
+ SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
+
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
+ const BasicBlock *SelBB = (*SI)->getParent();
+ for (SmallPtrSet<InvokeInst*, 32>::iterator
+ UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
+ const BasicBlock *URoRBB = (*UI)->getParent();
+ if (DT->dominates(SelBB, URoRBB)) {
+ SelsToConvert.insert(*SI);
+ break;
+ }
+ }
+ }
+
+ bool Changed = false;
+
+ if (Sels.size() != SelsToConvert.size()) {
+ // If we haven't been able to convert all of the clean-up selectors, then
+ // loop through the slow way to see if they still need to be converted.
+ if (!ExceptionValueIntrinsic) {
+ ExceptionValueIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
+ if (!ExceptionValueIntrinsic)
+ return CleanupSelectors(CatchAllSels);
+ }
+
+ for (Value::use_iterator
+ I = ExceptionValueIntrinsic->use_begin(),
+ E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
+ if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
+
+ bool URoRInvoke = false;
+ SmallPtrSet<IntrinsicInst*, 8> SelCalls;
+ SmallPtrSet<PHINode*, 32> SeenPHIs;
+ Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
+
+ if (URoRInvoke) {
+ // This EH pointer is being used by an invoke of an URoR instruction and
+ // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
+ // need to convert it to a 'catch-all'.
+ for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+ SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
+ if (!HasCatchAllInSelector(*SI))
+ SelsToConvert.insert(*SI);
+ }
+ }
+ }
+
+ if (!SelsToConvert.empty()) {
+ // Convert all clean-up eh.selectors, which are associated with "invokes" of
+ // URoR calls, into catch-all eh.selectors.
+ Changed = true;
+
+ for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+ SI = SelsToConvert.begin(), SE = SelsToConvert.end();
+ SI != SE; ++SI) {
+ IntrinsicInst *II = *SI;
+
+ // Use the exception object pointer and the personality function
+ // from the original selector.
+ CallSite CS(II);
+ IntrinsicInst::op_iterator I = CS.arg_begin();
+ IntrinsicInst::op_iterator E = CS.arg_end();
+ IntrinsicInst::op_iterator B = prior(E);
+
+ // Exclude last argument if it is an integer.
+ if (isa<ConstantInt>(B)) E = B;
+
+ // Add exception object pointer (front).
+ // Add personality function (next).
+ // Add in any filter IDs (rest).
+ SmallVector<Value*, 8> Args(I, E);
+
+ Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
+
+ CallInst *NewSelector =
+ CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
+
+ NewSelector->setTailCall(II->isTailCall());
+ NewSelector->setAttributes(II->getAttributes());
+ NewSelector->setCallingConv(II->getCallingConv());
+
+ II->replaceAllUsesWith(NewSelector);
+ II->eraseFromParent();
+ }
+ }
+
+ Changed |= CleanupSelectors(CatchAllSels);
+ return Changed;
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set. A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+ bool Changed = false;
+
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
+ bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<InvokeInst>(TI))
+ continue;
+ BasicBlock *LPad = TI->getSuccessor(1);
+ // Skip landing pads that have already been normalized.
+ if (LandingPads.count(LPad))
+ continue;
+
+ // Check that only invoke unwind edges end at the landing pad.
+ bool OnlyUnwoundTo = true;
+ bool SwitchOK = usingSjLjEH;
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+ PI != PE; ++PI) {
+ TerminatorInst *PT = (*PI)->getTerminator();
+ // The SjLj dispatch block uses a switch instruction. This is effectively
+ // an unwind edge, so we can disregard it here. There will only ever
+ // be one dispatch, however, so if there are multiple switches, one
+ // of them truly is a normal edge, not an unwind edge.
+ if (SwitchOK && isa<SwitchInst>(PT)) {
+ SwitchOK = false;
+ continue;
+ }
+ if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+ OnlyUnwoundTo = false;
+ break;
+ }
+ }
+
+ if (OnlyUnwoundTo) {
+ // Only unwind edges lead to the landing pad. Remember the landing pad.
+ LandingPads.insert(LPad);
+ continue;
+ }
+
+ // At least one normal edge ends at the landing pad. Redirect the unwind
+ // edges to a new basic block which falls through into this one.
+
+ // Create the new basic block.
+ BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+ LPad->getName() + "_unwind_edge");
+
+ // Insert it into the function right before the original landing pad.
+ LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+ // Redirect unwind edges from the original landing pad to NewBB.
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+ TerminatorInst *PT = (*PI++)->getTerminator();
+ if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+ // Unwind to the new block.
+ PT->setSuccessor(1, NewBB);
+ }
+
+ // If there are any PHI nodes in LPad, we need to update them so that they
+ // merge incoming values from NewBB instead.
+ for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+ PHINode *PN = cast<PHINode>(II);
+ pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+ // Check to see if all of the values coming in via unwind edges are the
+ // same. If so, we don't need to create a new PHI node.
+ Value *InVal = PN->getIncomingValueForBlock(*PB);
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal == 0) {
+ // Different unwind edges have different values. Create a new PHI node
+ // in NewBB.
+ PHINode *NewPN = PHINode::Create(PN->getType(),
+ PN->getNumIncomingValues(),
+ PN->getName()+".unwind", NewBB);
+ // Add an entry for each unwind edge, using the value from the old PHI.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+ // Now use this new PHI as the common incoming value for NewBB in PN.
+ InVal = NewPN;
+ }
+
+ // Revector exactly one entry in the PHI node to come from NewBB
+ // and delete all other entries that come from unwind edges. If
+ // there are both normal and unwind edges from the same predecessor,
+ // this leaves an entry for the normal edge.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ PN->removeIncomingValue(*PI);
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ // Add a fallthrough from NewBB to the original landing pad.
+ BranchInst::Create(LPad, NewBB);
+
+ // Now update DominatorTree analysis information.
+ DT->splitBlock(NewBB);
+
+ // Remember the newly constructed landing pad. The original landing pad
+ // LPad is no longer a landing pad now that all unwind edges have been
+ // revectored to NewBB.
+ LandingPads.insert(NewBB);
+ ++NumLandingPadsSplit;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception. This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwindsAndResumes() {
+ SmallVector<Instruction*, 16> ResumeInsts;
+
+ for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
+ for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
+ if (isa<UnwindInst>(bi))
+ ResumeInsts.push_back(bi);
+ else if (CallInst *call = dyn_cast<CallInst>(bi))
+ if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
+ if (fn->getName() == "llvm.eh.resume")
+ ResumeInsts.push_back(bi);
+ }
+ }
+
+ if (ResumeInsts.empty()) return false;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = ResumeInsts[0]->getContext();
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ bool Changed = false;
+
+ for (SmallVectorImpl<Instruction*>::iterator
+ I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
+ Instruction *RI = *I;
+
+ // Replace the resuming instruction with a call to _Unwind_Resume (or the
+ // appropriate target equivalent).
+
+ llvm::Value *ExnValue;
+ if (isa<UnwindInst>(RI))
+ ExnValue = CreateExceptionValueCall(RI->getParent());
+ else
+ ExnValue = cast<CallInst>(RI)->getArgOperand(0);
+
+ // Create the call...
+ CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // ...followed by an UnreachableInst, if it was an unwind.
+ // Calls to llvm.eh.resume are typically already followed by this.
+ if (isa<UnwindInst>(RI))
+ new UnreachableInst(RI->getContext(), RI);
+
+ if (isa<UnwindInst>(RI))
+ ++NumUnwindsLowered;
+ else
+ ++NumResumesLowered;
+
+ // Nuke the resume instruction.
+ RI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with direct use of
+/// a register holding the appropriate value; this requires adding calls inside
+/// all landing pads to initialize the register. Also, move eh.exception calls
+/// inside landing pads to the start of the landing pad (optional, but may make
+/// things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+ // If the eh.exception intrinsic is not declared in the module then there is
+ // nothing to do. Speed up compilation by checking for this common case.
+ if (!ExceptionValueIntrinsic &&
+ !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+ return false;
+
+ bool Changed = false;
+
+ // Move calls to eh.exception that are inside a landing pad to the start of
+ // the landing pad.
+ for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ BasicBlock *LP = *LI;
+ for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception.
+ if (!EI->use_empty()) {
+ // If there is already a call to eh.exception at the start of the
+ // landing pad, then get hold of it; otherwise create such a call.
+ Value *CallAtStart = CreateExceptionValueCall(LP);
+
+ // If the call was at the start of a landing pad then leave it alone.
+ if (EI == CallAtStart)
+ continue;
+ EI->replaceAllUsesWith(CallAtStart);
+ }
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
+ }
+
+ // Look for calls to eh.exception that are not in a landing pad. If one is
+ // found, then a register that holds the exception value will be created in
+ // each landing pad, and the SSAUpdater will be used to compute the values
+ // returned by eh.exception calls outside of landing pads.
+ SSAUpdater SSA;
+
+ // Remember where we found the eh.exception call, to avoid rescanning earlier
+ // basic blocks which we already know contain no eh.exception calls.
+ bool FoundCallOutsideLandingPad = false;
+ Function::iterator BB = F->begin();
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
+
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE; ++II)
+ if (isa<EHExceptionInst>(II)) {
+ SSA.Initialize(II->getType(), II->getName());
+ FoundCallOutsideLandingPad = true;
+ break;
+ }
+
+ if (FoundCallOutsideLandingPad)
+ break;
+ }
+
+ // If all calls to eh.exception are in landing pads then we are done.
+ if (!FoundCallOutsideLandingPad)
+ return Changed;
+
+ // Add a call to eh.exception at the start of each landing pad, and tell the
+ // SSAUpdater that this is the value produced by the landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI)
+ SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
+
+ // Now turn all calls to eh.exception that are not in a landing pad into a use
+ // of the appropriate register.
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
+
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception, replace it with the value from any
+ // upstream landing pad(s).
+ EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ }
+ }
+
+ return true;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHIOrDbg();
+ // Is this a call to eh.exception?
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+ // Reuse the existing call.
+ return Start;
+
+ // Find the eh.exception intrinsic if we didn't already.
+ if (!ExceptionValueIntrinsic)
+ ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::eh_exception);
+
+ // Create the call.
+ return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+
+ // Initialize internal state.
+ DT = &getAnalysis<DominatorTree>();
+ F = &Fn;
+
+ // Ensure that only unwind edges end at landing pads (a landing pad is a
+ // basic block where an invoke unwind edge ends).
+ Changed |= NormalizeLandingPads();
+
+ // Turn unwind instructions and eh.resume calls into libcalls.
+ Changed |= LowerUnwindsAndResumes();
+
+ // TODO: Move eh.selector calls to landing pads and combine them.
+
+ // Move eh.exception calls to landing pads.
+ Changed |= MoveExceptionValueCalls();
+
+ Changed |= HandleURoRInvokes();
+
+ LandingPads.clear();
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/ELF.h b/contrib/llvm/lib/CodeGen/ELF.h
new file mode 100644
index 000000000000..5b634682cc87
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELF.h
@@ -0,0 +1,227 @@
+//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
+// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ELF_H
+#define CODEGEN_ELF_H
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+ class GlobalValue;
+
+ /// ELFSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct ELFSym {
+
+ // ELF symbols are related to llvm ones by being one of the two llvm
+ // types, for the other ones (section, file, func) a null pointer is
+ // assumed by default.
+ union {
+ const GlobalValue *GV; // If this is a pointer to a GV
+ const char *Ext; // If this is a pointer to a named symbol
+ } Source;
+
+ // Describes from which source type this ELF symbol comes from,
+ // they can be GlobalValue, ExternalSymbol or neither.
+ enum {
+ isGV, // The Source.GV field is valid.
+ isExtSym, // The Source.ExtSym field is valid.
+ isOther // Not a GlobalValue or External Symbol
+ };
+ unsigned SourceType;
+
+ bool isGlobalValue() const { return SourceType == isGV; }
+ bool isExternalSym() const { return SourceType == isExtSym; }
+
+ // getGlobalValue - If this is a global value which originated the
+ // elf symbol, return a reference to it.
+ const GlobalValue *getGlobalValue() const {
+ assert(SourceType == isGV && "This is not a global value");
+ return Source.GV;
+ }
+
+ // getExternalSym - If this is an external symbol which originated the
+ // elf symbol, return a reference to it.
+ const char *getExternalSymbol() const {
+ assert(SourceType == isExtSym && "This is not an external symbol");
+ return Source.Ext;
+ }
+
+ // getGV - From a global value return a elf symbol to represent it
+ static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
+ unsigned Type, unsigned Visibility) {
+ ELFSym *Sym = new ELFSym();
+ Sym->Source.GV = GV;
+ Sym->setBind(Bind);
+ Sym->setType(Type);
+ Sym->setVisibility(Visibility);
+ Sym->SourceType = isGV;
+ return Sym;
+ }
+
+ // getExtSym - Create and return an elf symbol to represent an
+ // external symbol
+ static ELFSym *getExtSym(const char *Ext) {
+ ELFSym *Sym = new ELFSym();
+ Sym->Source.Ext = Ext;
+ Sym->setBind(ELF::STB_GLOBAL);
+ Sym->setType(ELF::STT_NOTYPE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
+ Sym->SourceType = isExtSym;
+ return Sym;
+ }
+
+ // getSectionSym - Returns a elf symbol to represent an elf section
+ static ELFSym *getSectionSym() {
+ ELFSym *Sym = new ELFSym();
+ Sym->setBind(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_SECTION);
+ Sym->setVisibility(ELF::STV_DEFAULT);
+ Sym->SourceType = isOther;
+ return Sym;
+ }
+
+ // getFileSym - Returns a elf symbol to represent the module identifier
+ static ELFSym *getFileSym() {
+ ELFSym *Sym = new ELFSym();
+ Sym->setBind(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_FILE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
+ Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS;
+ Sym->SourceType = isOther;
+ return Sym;
+ }
+
+ // getUndefGV - Returns a STT_NOTYPE symbol
+ static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
+ ELFSym *Sym = new ELFSym();
+ Sym->Source.GV = GV;
+ Sym->setBind(Bind);
+ Sym->setType(ELF::STT_NOTYPE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
+ Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF;
+ Sym->SourceType = isGV;
+ return Sym;
+ }
+
+ // ELF specific fields
+ unsigned NameIdx; // Index in .strtab of name, once emitted.
+ uint64_t Value;
+ unsigned Size;
+ uint8_t Info;
+ uint8_t Other;
+ unsigned short SectionIdx;
+
+ // Symbol index into the Symbol table
+ unsigned SymTabIdx;
+
+ ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
+ Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
+ SymTabIdx(0) {}
+
+ unsigned getBind() const { return (Info >> 4) & 0xf; }
+ unsigned getType() const { return Info & 0xf; }
+ bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
+ bool isFileType() const { return getType() == ELF::STT_FILE; }
+
+ void setBind(unsigned X) {
+ assert(X == (X & 0xF) && "Bind value out of range!");
+ Info = (Info & 0x0F) | (X << 4);
+ }
+
+ void setType(unsigned X) {
+ assert(X == (X & 0xF) && "Type value out of range!");
+ Info = (Info & 0xF0) | X;
+ }
+
+ void setVisibility(unsigned V) {
+ assert(V == (V & 0x3) && "Visibility value out of range!");
+ Other = V;
+ }
+ };
+
+ /// ELFSection - This struct contains information about each section that is
+ /// emitted to the file. This is eventually turned into the section header
+ /// table at the end of the file.
+ class ELFSection : public BinaryObject {
+ public:
+ // ELF specific fields
+ unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted.
+ unsigned Type; // sh_type - Section contents & semantics
+ unsigned Flags; // sh_flags - Section flags.
+ uint64_t Addr; // sh_addr - The mem addr this section is in.
+ unsigned Offset; // sh_offset - Offset from the file start
+ unsigned Size; // sh_size - The section size.
+ unsigned Link; // sh_link - Section header table index link.
+ unsigned Info; // sh_info - Auxiliary information.
+ unsigned Align; // sh_addralign - Alignment of section.
+ unsigned EntSize; // sh_entsize - Size of entries in the section e
+
+ /// SectionIdx - The number of the section in the Section Table.
+ unsigned short SectionIdx;
+
+ /// Sym - The symbol to represent this section if it has one.
+ ELFSym *Sym;
+
+ /// getSymIndex - Returns the symbol table index of the symbol
+ /// representing this section.
+ unsigned getSymbolTableIndex() const {
+ assert(Sym && "section not present in the symbol table");
+ return Sym->SymTabIdx;
+ }
+
+ ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+ : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+ Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
+ };
+
+ /// ELFRelocation - This class contains all the information necessary to
+ /// to generate any 32-bit or 64-bit ELF relocation entry.
+ class ELFRelocation {
+ uint64_t r_offset; // offset in the section of the object this applies to
+ uint32_t r_symidx; // symbol table index of the symbol to use
+ uint32_t r_type; // machine specific relocation type
+ int64_t r_add; // explicit relocation addend
+ bool r_rela; // if true then the addend is part of the entry
+ // otherwise the addend is at the location specified
+ // by r_offset
+ public:
+ uint64_t getInfo(bool is64Bit) const {
+ if (is64Bit)
+ return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
+ else
+ return (r_symidx << 8) + (r_type & 0xFFL);
+ }
+
+ uint64_t getOffset() const { return r_offset; }
+ int64_t getAddend() const { return r_add; }
+
+ ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
+ bool rela = true, int64_t addend = 0) :
+ r_offset(off), r_symidx(sym), r_type(type),
+ r_add(addend), r_rela(rela) {}
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp
new file mode 100644
index 000000000000..3fb087c5ea8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp
@@ -0,0 +1,205 @@
+//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfce"
+
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//===----------------------------------------------------------------------===//
+// ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "processing function: "
+ << MF.getFunction()->getName() << "\n");
+
+ // Get the ELF Section that this function belongs in.
+ ES = &EW.getTextSection(MF.getFunction());
+
+ // Set the desired binary object to be used by the code emitters
+ setBinaryObject(ES);
+
+ // Get the function alignment in bytes
+ unsigned Align = (1 << MF.getAlignment());
+
+ // The function must start on its required alignment
+ ES->emitAlignment(Align);
+
+ // Update the section alignment if needed.
+ ES->Align = std::max(ES->Align, Align);
+
+ // Record the function start offset
+ FnStartOff = ES->getCurrentPCOffset();
+
+ // Emit constant pool and jump tables to their appropriate sections.
+ // They need to be emitted before the function because in some targets
+ // the later may reference JT or CP entry address.
+ emitConstantPool(MF.getConstantPool());
+ if (MF.getJumpTableInfo())
+ emitJumpTables(MF.getJumpTableInfo());
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+ // Add a symbol to represent the function.
+ const Function *F = MF.getFunction();
+ ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
+ EW.getGlobalELFVisibility(F));
+ FnSym->SectionIdx = ES->SectionIdx;
+ FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+ EW.AddPendingGlobalSymbol(F, true);
+
+ // Offset from start of Section
+ FnSym->Value = FnStartOff;
+
+ if (!F->hasPrivateLinkage())
+ EW.SymbolList.push_back(FnSym);
+
+ // Patch up Jump Table Section relocations to use the real MBBs offsets
+ // now that the MBB label offsets inside the function are known.
+ if (MF.getJumpTableInfo()) {
+ ELFSection &JTSection = EW.getJumpTableSection();
+ for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+ MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+ MachineRelocation &MR = *MRI;
+ uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setResultPointer((void*)MBBOffset);
+ MR.setConstantVal(ES->SectionIdx);
+ JTSection.addRelocation(MR);
+ }
+ }
+
+ // If we have emitted any relocations to function-specific objects such as
+ // basic blocks, constant pools entries, or jump tables, record their
+ // addresses now so that we can rewrite them with the correct addresses later
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ intptr_t Addr;
+ if (MR.isGlobalValue()) {
+ EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+ } else if (MR.isExternalSymbol()) {
+ EW.AddPendingExternalSymbol(MR.getExternalSymbol());
+ } else if (MR.isBasicBlock()) {
+ Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setConstantVal(ES->SectionIdx);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isConstantPoolIndex()) {
+ Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isJumpTableIndex()) {
+ ELFSection &JTSection = EW.getJumpTableSection();
+ Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+ MR.setConstantVal(JTSection.SectionIdx);
+ MR.setResultPointer((void*)Addr);
+ } else {
+ llvm_unreachable("Unhandled relocation type");
+ }
+ ES->addRelocation(MR);
+ }
+
+ // Clear per-function data structures.
+ JTRelocations.clear();
+ Relocations.clear();
+ CPLocations.clear();
+ CPSections.clear();
+ JTLocations.clear();
+ MBBLocations.clear();
+ return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // TODO: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for elf constant pools!");
+
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = CP[i];
+
+ // Record the constant pool location and the section index
+ ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+ CPLocations.push_back(CstPool.size());
+ CPSections.push_back(CstPool.SectionIdx);
+
+ if (CPE.isMachineConstantPoolEntry())
+ assert("CPE.isMachineConstantPoolEntry not supported yet");
+
+ // Emit the constant to constant pool section
+ EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
+ }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for elf jump tables!");
+
+ const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+ unsigned EntrySize = 4; //MJTI->getEntrySize();
+
+ // Get the ELF Section to emit the jump table
+ ELFSection &JTSection = EW.getJumpTableSection();
+
+ // For each JT, record its offset from the start of the section
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+
+ // Record JT 'i' offset in the JT section
+ JTLocations.push_back(JTSection.size());
+
+ // Each MBB entry in the Jump table section has a relocation entry
+ // against the current text section.
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
+ MachineRelocation MR =
+ MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
+
+ // Add the relocation to the Jump Table section
+ JTRelocations.push_back(MR);
+
+ // Output placeholder for MBB in the JT section
+ for (unsigned s=0; s < EntrySize; ++s)
+ JTSection.emitByte(0);
+ }
+ }
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h
new file mode 100644
index 000000000000..2ec1f6e873d6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h
@@ -0,0 +1,78 @@
+//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFCODEEMITTER_H
+#define ELFCODEEMITTER_H
+
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <vector>
+
+namespace llvm {
+ class ELFWriter;
+ class ELFSection;
+
+ /// ELFCodeEmitter - This class is used by the ELFWriter to
+ /// emit the code for functions to the ELF file.
+ class ELFCodeEmitter : public ObjectCodeEmitter {
+ ELFWriter &EW;
+
+ /// Target machine description
+ TargetMachine &TM;
+
+ /// Section containing code for functions
+ ELFSection *ES;
+
+ /// Relocations - Record relocations needed by the current function
+ std::vector<MachineRelocation> Relocations;
+
+ /// JTRelocations - Record relocations needed by the relocation
+ /// section.
+ std::vector<MachineRelocation> JTRelocations;
+
+ /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+ uintptr_t FnStartOff;
+ public:
+ explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
+
+ /// addRelocation - Register new relocations for this function
+ void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ /// emitConstantPool - For each constant pool entry, figure out which
+ /// section the constant should live in and emit data to it
+ void emitConstantPool(MachineConstantPool *MCP);
+
+ /// emitJumpTables - Emit all the jump tables for a given jump table
+ /// info and record them to the appropriate section.
+ void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+ void startFunction(MachineFunction &F);
+ bool finishFunction(MachineFunction &F);
+
+ /// emitLabel - Emits a label
+ virtual void emitLabel(MCSymbol *Label) {
+ assert("emitLabel not implemented");
+ }
+
+ /// getLabelAddress - Return the address of the specified LabelID,
+ /// only usable after the LabelID has been emitted.
+ virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+ assert("getLabelAddress not implemented");
+ return 0;
+ }
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
+
+}; // end class ELFCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 000000000000..d977651c32f7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,1105 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer. This file writes out
+// the ELF file in the following order:
+//
+// #1. ELF Header
+// #2. '.text' section
+// #3. '.data' section
+// #4. '.bss' section (conceptual position in file)
+// ...
+// #X. '.shstrtab' section
+// #Y. Section Table
+//
+// The entries in the section table are laid out as:
+// #0. Null entry [required]
+// #1. ".text" entry - the program code
+// #2. ".data" entry - global variables with initializers. [ if needed ]
+// #3. ".bss" entry - global variables without initializers. [ if needed ]
+// ...
+// #N. ".shstrtab" entry - String table for the section names.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfwriter"
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(ID), O(o), TM(tm),
+ OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
+ TLOF(TM.getTargetLowering()->getObjFileLowering()),
+ is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+ isLittleEndian(TM.getTargetData()->isLittleEndian()),
+ ElfHdr(isLittleEndian, is64Bit) {
+
+ MAI = TM.getMCAsmInfo();
+ TEW = TM.getELFWriterInfo();
+
+ // Create the object code emitter object for this target.
+ ElfCE = new ELFCodeEmitter(*this);
+
+ // Initial number of sections
+ NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+ delete ElfCE;
+ delete &OutContext;
+
+ while(!SymbolList.empty()) {
+ delete SymbolList.back();
+ SymbolList.pop_back();
+ }
+
+ while(!PrivateSyms.empty()) {
+ delete PrivateSyms.back();
+ PrivateSyms.pop_back();
+ }
+
+ while(!SectionList.empty()) {
+ delete SectionList.back();
+ SectionList.pop_back();
+ }
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getTargetData());
+
+ // ELF Header
+ // ----------
+ // Fields e_shnum e_shstrndx are only known after all section have
+ // been emitted. They locations in the ouput buffer are recorded so
+ // to be patched up later.
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+ ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+ ElfHdr.emitByte('E'); // e_ident[EI_MAG1]
+ ElfHdr.emitByte('L'); // e_ident[EI_MAG2]
+ ElfHdr.emitByte('F'); // e_ident[EI_MAG3]
+
+ ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+ ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
+ ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
+
+ ElfHdr.emitWord16(ELF::ET_REL); // e_type
+ ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+ ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version
+ ElfHdr.emitWord(0); // e_entry, no entry point in .o file
+ ElfHdr.emitWord(0); // e_phoff, no program header for .o
+ ELFHdr_e_shoff_Offset = ElfHdr.size();
+ ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes
+ ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants
+ ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size
+ ElfHdr.emitWord16(0); // e_phentsize = prog header entry size
+ ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ ElfHdr.emitWord16(TEW->getSHdrSize());
+
+ // e_shnum = # of section header ents
+ ELFHdr_e_shnum_Offset = ElfHdr.size();
+ ElfHdr.emitWord16(0); // Placeholder
+
+ // e_shstrndx = Section # of '.shstrtab'
+ ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+ ElfHdr.emitWord16(0); // Placeholder
+
+ // Add the null section, which is required to be first in the file.
+ getNullSection();
+
+ // The first entry in the symtab is the null symbol and the second
+ // is a local symbol containing the module/file name
+ SymbolList.push_back(new ELFSym());
+ SymbolList.push_back(ELFSym::getFileSym());
+
+ return false;
+}
+
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV,
+ bool AddToLookup /* = false */) {
+ PendingGlobals.insert(GV);
+ if (AddToLookup)
+ GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+ PendingExternals.insert(External);
+ ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+ const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+ return getSection(Data->getSectionName(), Data->getType(),
+ Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+ const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+ return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+ const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+ return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags());
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+ const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+ return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(const Function *F) {
+ const MCSectionELF *Text =
+ (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+ return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+ const MCSectionELF *JT =
+ (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+ return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+ TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16(); break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSectionELF *CPSect =
+ (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+ return getSection(CPSect->getSectionName(), CPSect->getType(),
+ CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA'
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+ unsigned SectionType = TEW->hasRelocationAddend() ?
+ ELF::SHT_RELA : ELF::SHT_REL;
+
+ std::string SectionName(".rel");
+ if (TEW->hasRelocationAddend())
+ SectionName.append("a");
+ SectionName.append(S.getName());
+
+ return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
+unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
+ switch (GV->getVisibility()) {
+ default:
+ llvm_unreachable("unknown visibility type");
+ case GlobalValue::DefaultVisibility:
+ return ELF::STV_DEFAULT;
+ case GlobalValue::HiddenVisibility:
+ return ELF::STV_HIDDEN;
+ case GlobalValue::ProtectedVisibility:
+ return ELF::STV_PROTECTED;
+ }
+ return 0;
+}
+
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
+ if (GV->hasInternalLinkage())
+ return ELF::STB_LOCAL;
+
+ if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
+ return ELF::STB_WEAK;
+
+ return ELF::STB_GLOBAL;
+}
+
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+ if (GV->isDeclaration())
+ return ELF::STT_NOTYPE;
+
+ if (isa<Function>(GV))
+ return ELF::STT_FUNC;
+
+ return ELF::STT_OBJECT;
+}
+
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+ return GV->isDeclaration() || (isa<Function>(GV));
+}
+
+// AddToSymbolList - Update the symbol lookup and If the symbol is
+// private add it to PrivateSyms list, otherwise to SymbolList.
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+ assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+ const GlobalValue *GV = GblSym->getGlobalValue();
+ if (GV->hasPrivateLinkage()) {
+ // For a private symbols, keep track of the index inside
+ // the private list since it will never go to the symbol
+ // table and won't be patched up later.
+ PrivateSyms.push_back(GblSym);
+ GblSymLookup[GV] = PrivateSyms.size()-1;
+ } else {
+ // Non private symbol are left with zero indices until
+ // they are patched up during the symbol table emition
+ // (where the indicies are created).
+ SymbolList.push_back(GblSym);
+ GblSymLookup[GV] = 0;
+ }
+}
+
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+ // FIXME: this is wrong, a common symbol can be in .data for example.
+ if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+ return true;
+
+ return false;
+}
+
+
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
+
+ // Check if the referenced symbol is already emitted
+ if (GblSymLookup.find(GV) != GblSymLookup.end())
+ return;
+
+ // Handle ELF Bind, Visibility and Type for the current symbol
+ unsigned SymBind = getGlobalELFBinding(GV);
+ unsigned SymType = getGlobalELFType(GV);
+ bool IsUndefSym = IsELFUndefSym(GV);
+
+ ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+ : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+ if (!IsUndefSym) {
+ assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+ // Handle special llvm globals
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ // Get the ELF section where this global belongs from TLOF
+ const MCSectionELF *S =
+ (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+ ELFSection &ES =
+ getSection(S->getSectionName(), S->getType(), S->getFlags());
+ SectionKind Kind = S->getKind();
+
+ // The symbol align should update the section alignment if needed
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = TD->getPreferredAlignment(GVar);
+ unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+ GblSym->Size = Size;
+
+ if (HasCommonSymbols(*S)) { // Symbol must go to a common section
+ GblSym->SectionIdx = ELF::SHN_COMMON;
+
+ // A new linkonce section is created for each global in the
+ // common section, the default alignment is 1 and the symbol
+ // value contains its alignment.
+ ES.Align = 1;
+ GblSym->Value = Align;
+
+ } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+ GblSym->SectionIdx = ES.SectionIdx;
+
+ // Update the size with alignment and the next object can
+ // start in the right offset in the section
+ if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+ ES.Align = std::max(ES.Align, Align);
+
+ // GblSym->Value should contain the virtual offset inside the section.
+ // Virtual because the BSS space is not allocated on ELF objects
+ GblSym->Value = ES.Size;
+ ES.Size += Size;
+
+ } else { // The symbol must go to some kind of data section
+ GblSym->SectionIdx = ES.SectionIdx;
+
+ // GblSym->Value should contain the symbol offset inside the section,
+ // and all symbols should start on their required alignment boundary
+ ES.Align = std::max(ES.Align, Align);
+ ES.emitAlignment(Align);
+ GblSym->Value = ES.size();
+
+ // Emit the global to the data section 'ES'
+ EmitGlobalConstant(GVar->getInitializer(), ES);
+ }
+ }
+
+ AddToSymbolList(GblSym);
+}
+
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ ELFSection &GblS) {
+
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CVS->getType());
+ const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+ uint64_t sizeSoFar = 0;
+ for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+ const Constant* field = CVS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+ uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+ - cvsLayout->getElementOffset(i)) - fieldSize;
+ sizeSoFar += fieldSize + padSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstant(field, GblS);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ GblS.emitZeros(padSize);
+ }
+ assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+ for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstant(CVA->getOperand(i), GblS);
+ return;
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ GblS.emitZeros(Size);
+ return;
+ } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+ EmitGlobalConstantStruct(CVS, GblS);
+ return;
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ APInt Val = CFP->getValueAPF().bitcastToAPInt();
+ if (CFP->getType()->isDoubleTy())
+ GblS.emitWord64(Val.getZExtValue());
+ else if (CFP->getType()->isFloatTy())
+ GblS.emitWord32(Val.getZExtValue());
+ else if (CFP->getType()->isX86_FP80Ty()) {
+ unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+ TD->getTypeStoreSize(CFP->getType());
+ GblS.emitWordFP80(Val.getRawData(), PadSize);
+ } else if (CFP->getType()->isPPC_FP128Ty())
+ llvm_unreachable("PPC_FP128Ty global emission not implemented");
+ return;
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ if (Size == 1)
+ GblS.emitByte(CI->getZExtValue());
+ else if (Size == 2)
+ GblS.emitWord16(CI->getZExtValue());
+ else if (Size == 4)
+ GblS.emitWord32(CI->getZExtValue());
+ else
+ EmitGlobalConstantLargeInt(CI, GblS);
+ return;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ const VectorType *PTy = CP->getType();
+ for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+ EmitGlobalConstant(CP->getOperand(I), GblS);
+ return;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Resolve a constant expression which returns a (Constant, Offset)
+ // pair. If 'Res.first' is a GlobalValue, emit a relocation with
+ // the offset 'Res.second', otherwise emit a global constant like
+ // it is always done for not contant expression types.
+ CstExprResTy Res = ResolveConstantExpr(CE);
+ const Constant *Op = Res.first;
+
+ if (isa<GlobalValue>(Op))
+ EmitGlobalDataRelocation(cast<const GlobalValue>(Op),
+ TD->getTypeAllocSize(Op->getType()),
+ GblS, Res.second);
+ else
+ EmitGlobalConstant(Op, GblS);
+
+ return;
+ } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+ // Fill the data entry with zeros or emit a relocation entry
+ if (isa<ConstantPointerNull>(CV))
+ GblS.emitZeros(Size);
+ else
+ EmitGlobalDataRelocation(cast<const GlobalValue>(CV),
+ Size, GblS);
+ return;
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ // This is a constant address for a global variable or function and
+ // therefore must be referenced using a relocation entry.
+ EmitGlobalDataRelocation(GV, Size, GblS);
+ return;
+ }
+
+ std::string msg;
+ raw_string_ostream ErrorMsg(msg);
+ ErrorMsg << "Constant unimp for type: " << *CV->getType();
+ report_fatal_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+ const TargetData *TD = TM.getTargetData();
+
+ // There ins't constant expression inside others anymore
+ if (!isa<ConstantExpr>(CV))
+ return std::make_pair(CV, 0);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ return ResolveConstantExpr(CE->getOperand(0));
+
+ case Instruction::GetElementPtr: {
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+ idxVec.size());
+ return std::make_pair(ptrVal, Offset);
+ }
+ case Instruction::IntToPtr: {
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return ResolveConstantExpr(Op);
+ }
+ case Instruction::PtrToInt: {
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot greater or equal to the size of the pointer.
+ if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+ return ResolveConstantExpr(Op);
+
+ llvm_unreachable("Integer size less then pointer size");
+ }
+ case Instruction::Add:
+ case Instruction::Sub: {
+ // Only handle cases where there's a constant expression with GlobalValue
+ // as first operand and ConstantInt as second, which are the cases we can
+ // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+ // 1) Instruction::Add => (global) + CstInt
+ // 2) Instruction::Sub => (global) + -CstInt
+ const Constant *Op0 = CE->getOperand(0);
+ const Constant *Op1 = CE->getOperand(1);
+ assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+ CstExprResTy Res = ResolveConstantExpr(Op0);
+ assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+ const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ return std::make_pair(Res.first, RHS.getSExtValue());
+ case Instruction::Sub:
+ return std::make_pair(Res.first, (-RHS).getSExtValue());
+ }
+ }
+ }
+
+ report_fatal_error(CE->getOpcodeName() +
+ StringRef(": Unsupported ConstantExpr type"));
+
+ return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+ ELFSection &GblS, int64_t Offset) {
+ // Create the relocation entry for the global value
+ MachineRelocation MR =
+ MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+ TEW->getAbsoluteLabelMachineRelTy(),
+ const_cast<GlobalValue*>(GV),
+ Offset);
+
+ // Fill the data entry with zeros
+ GblS.emitZeros(Size);
+
+ // Add the relocation entry for the current data section
+ GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ ELFSection &S) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert(isPowerOf2_32(BitWidth) &&
+ "Non-power-of-2-sized integers not handled!");
+
+ const uint64_t *RawData = CI->getValue().getRawData();
+ uint64_t Val = 0;
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+ S.emitWord64(Val);
+ }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used")
+ llvm_unreachable("not implemented yet");
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = TD->getPointerPrefAlignment();
+ if (GV->getName() == "llvm.global_ctors") {
+ ELFSection &Ctor = getCtorSection();
+ Ctor.emitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer(), Ctor);
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ ELFSection &Dtor = getDtorSection();
+ Dtor.emitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer(), Dtor);
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
+ // Should be an array of '{ i32, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (List->isNullValue()) return;
+ const ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ if (InitList->getOperand(i)->isNullValue())
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+
+ if (CS->getOperand(1)->isNullValue())
+ continue;
+
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1), Xtor);
+ }
+}
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the ElfCE object above.
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+ // Emit .data section placeholder
+ getDataSection();
+
+ // Emit .bss section placeholder
+ getBSSSection();
+
+ // Build and emit data, bss and "common" sections.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit all pending globals
+ for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+ I != E; ++I)
+ EmitGlobal(*I);
+
+ // Emit all pending externals
+ for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+ I != E; ++I)
+ SymbolList.push_back(ELFSym::getExtSym(*I));
+
+ // Emit a symbol for each section created until now, skip null section
+ for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+ ELFSection &ES = *SectionList[i];
+ ELFSym *SectionSym = ELFSym::getSectionSym();
+ SectionSym->SectionIdx = ES.SectionIdx;
+ SymbolList.push_back(SectionSym);
+ ES.Sym = SymbolList.back();
+ }
+
+ // Emit string table
+ EmitStringTable(M.getModuleIdentifier());
+
+ // Emit the symbol table now, if non-empty.
+ EmitSymbolTable();
+
+ // Emit the relocation sections.
+ EmitRelocations();
+
+ // Emit the sections string table.
+ EmitSectionTableStringTable();
+
+ // Dump the sections and section table to the .o file.
+ OutputSectionsAndSectionTable();
+
+ return false;
+}
+
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+ int64_t Value, unsigned Size) {
+ if (Size == 32)
+ BO.fixWord32(Value, Offset);
+ else if (Size == 64)
+ BO.fixWord64(Value, Offset);
+ else
+ llvm_unreachable("don't know howto patch relocatable field");
+}
+
+/// EmitRelocations - Emit relocations
+void ELFWriter::EmitRelocations() {
+
+ // True if the target uses the relocation entry to hold the addend,
+ // otherwise the addend is written directly to the relocatable field.
+ bool HasRelA = TEW->hasRelocationAddend();
+
+ // Create Relocation sections for each section which needs it.
+ for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+ ELFSection &S = *SectionList[i];
+
+ // This section does not have relocations
+ if (!S.hasRelocations()) continue;
+ ELFSection &RelSec = getRelocSection(S);
+
+ // 'Link' - Section hdr idx of the associated symbol table
+ // 'Info' - Section hdr idx of the section to which the relocation applies
+ ELFSection &SymTab = getSymbolTableSection();
+ RelSec.Link = SymTab.SectionIdx;
+ RelSec.Info = S.SectionIdx;
+ RelSec.EntSize = TEW->getRelocationEntrySize();
+
+ // Get the relocations from Section
+ std::vector<MachineRelocation> Relos = S.getRelocations();
+ for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
+ MRE = Relos.end(); MRI != MRE; ++MRI) {
+ MachineRelocation &MR = *MRI;
+
+ // Relocatable field offset from the section start
+ unsigned RelOffset = MR.getMachineCodeOffset();
+
+ // Symbol index in the symbol table
+ unsigned SymIdx = 0;
+
+ // Target specific relocation field type and size
+ unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
+ unsigned RelTySize = TEW->getRelocationTySize(RelType);
+ int64_t Addend = 0;
+
+ // There are several machine relocations types, and each one of
+ // them needs a different approach to retrieve the symbol table index.
+ if (MR.isGlobalValue()) {
+ const GlobalValue *G = MR.getGlobalValue();
+ int64_t GlobalOffset = MR.getConstantVal();
+ SymIdx = GblSymLookup[G];
+ if (G->hasPrivateLinkage()) {
+ // If the target uses a section offset in the relocation:
+ // SymIdx + Addend = section sym for global + section offset
+ unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+ Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+ SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+ } else {
+ Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+ }
+ } else if (MR.isExternalSymbol()) {
+ const char *ExtSym = MR.getExternalSymbol();
+ SymIdx = ExtSymLookup[ExtSym];
+ Addend = TEW->getDefaultAddendForRelTy(RelType);
+ } else {
+ // Get the symbol index for the section symbol
+ unsigned SectionIdx = MR.getConstantVal();
+ SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+ // The symbol offset inside the section
+ int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+ // For pc relative relocations where symbols are defined in the same
+ // section they are referenced, ignore the relocation entry and patch
+ // the relocatable field with the symbol offset directly.
+ if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+ int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+ RelocateField(S, RelOffset, Value, RelTySize);
+ continue;
+ }
+
+ Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
+ }
+
+ // The target without addend on the relocation symbol must be
+ // patched in the relocation place itself to contain the addend
+ // otherwise write zeros to make sure there is no garbage there
+ RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
+ // Get the relocation entry and emit to the relocation section
+ ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
+ EmitRelocation(RelSec, Rel, HasRelA);
+ }
+ }
+}
+
+/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
+void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
+ bool HasRelA) {
+ RelSec.emitWord(Rel.getOffset());
+ RelSec.emitWord(Rel.getInfo(is64Bit));
+ if (HasRelA)
+ RelSec.emitWord(Rel.getAddend());
+}
+
+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+ if (is64Bit) {
+ SymbolTable.emitWord32(Sym.NameIdx);
+ SymbolTable.emitByte(Sym.Info);
+ SymbolTable.emitByte(Sym.Other);
+ SymbolTable.emitWord16(Sym.SectionIdx);
+ SymbolTable.emitWord64(Sym.Value);
+ SymbolTable.emitWord64(Sym.Size);
+ } else {
+ SymbolTable.emitWord32(Sym.NameIdx);
+ SymbolTable.emitWord32(Sym.Value);
+ SymbolTable.emitWord32(Sym.Size);
+ SymbolTable.emitByte(Sym.Info);
+ SymbolTable.emitByte(Sym.Other);
+ SymbolTable.emitWord16(Sym.SectionIdx);
+ }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
+ const ELFSection &SHdr) {
+ SHdrTab.emitWord32(SHdr.NameIdx);
+ SHdrTab.emitWord32(SHdr.Type);
+ if (is64Bit) {
+ SHdrTab.emitWord64(SHdr.Flags);
+ SHdrTab.emitWord(SHdr.Addr);
+ SHdrTab.emitWord(SHdr.Offset);
+ SHdrTab.emitWord64(SHdr.Size);
+ SHdrTab.emitWord32(SHdr.Link);
+ SHdrTab.emitWord32(SHdr.Info);
+ SHdrTab.emitWord64(SHdr.Align);
+ SHdrTab.emitWord64(SHdr.EntSize);
+ } else {
+ SHdrTab.emitWord32(SHdr.Flags);
+ SHdrTab.emitWord(SHdr.Addr);
+ SHdrTab.emitWord(SHdr.Offset);
+ SHdrTab.emitWord32(SHdr.Size);
+ SHdrTab.emitWord32(SHdr.Link);
+ SHdrTab.emitWord32(SHdr.Info);
+ SHdrTab.emitWord32(SHdr.Align);
+ SHdrTab.emitWord32(SHdr.EntSize);
+ }
+}
+
+/// EmitStringTable - If the current symbol table is non-empty, emit the string
+/// table for it
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
+ if (!SymbolList.size()) return; // Empty symbol table.
+ ELFSection &StrTab = getStringTableSection();
+
+ // Set the zero'th symbol to a null byte, as required.
+ StrTab.emitByte(0);
+
+ // Walk on the symbol list and write symbol names into the string table.
+ unsigned Index = 1;
+ for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+ ELFSym &Sym = *(*I);
+
+ std::string Name;
+ if (Sym.isGlobalValue()) {
+ SmallString<40> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
+ Name.append(NameStr.begin(), NameStr.end());
+ } else if (Sym.isExternalSym())
+ Name.append(Sym.getExternalSymbol());
+ else if (Sym.isFileType())
+ Name.append(ModuleName);
+
+ if (Name.empty()) {
+ Sym.NameIdx = 0;
+ } else {
+ Sym.NameIdx = Index;
+ StrTab.emitString(Name);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += Name.size()+1;
+ }
+ }
+ assert(Index == StrTab.size());
+ StrTab.Size = Index;
+}
+
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+ unsigned FirstNonLocalSymbol;
+ std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+ for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+ if ((*I)->isLocalBind())
+ LocalSyms.push_back(*I);
+ else
+ OtherSyms.push_back(*I);
+ }
+ SymbolList.clear();
+ FirstNonLocalSymbol = LocalSyms.size();
+
+ for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+ SymbolList.push_back(LocalSyms[i]);
+
+ for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+ SymbolList.push_back(*I);
+
+ LocalSyms.clear();
+ OtherSyms.clear();
+
+ return FirstNonLocalSymbol;
+}
+
+/// EmitSymbolTable - Emit the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+ if (!SymbolList.size()) return; // Empty symbol table.
+
+ // Now that we have emitted the string table and know the offset into the
+ // string table of each symbol, emit the symbol table itself.
+ ELFSection &SymTab = getSymbolTableSection();
+ SymTab.Align = TEW->getPrefELFAlignment();
+
+ // Section Index of .strtab.
+ SymTab.Link = getStringTableSection().SectionIdx;
+
+ // Size of each symtab entry.
+ SymTab.EntSize = TEW->getSymTabEntrySize();
+
+ // Reorder the symbol table with local symbols first!
+ unsigned FirstNonLocalSymbol = SortSymbols();
+
+ // Emit all the symbols to the symbol table.
+ for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+ ELFSym &Sym = *SymbolList[i];
+
+ // Emit symbol to the symbol table
+ EmitSymbol(SymTab, Sym);
+
+ // Record the symbol table index for each symbol
+ if (Sym.isGlobalValue())
+ GblSymLookup[Sym.getGlobalValue()] = i;
+ else if (Sym.isExternalSym())
+ ExtSymLookup[Sym.getExternalSymbol()] = i;
+
+ // Keep track on the symbol index into the symbol table
+ Sym.SymTabIdx = i;
+ }
+
+ // One greater than the symbol table index of the last local symbol
+ SymTab.Info = FirstNonLocalSymbol;
+ SymTab.Size = SymTab.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+ // First step: add the section for the string table to the list of sections:
+ ELFSection &SHStrTab = getSectionHeaderStringTableSection();
+
+ // Now that we know which section number is the .shstrtab section, update the
+ // e_shstrndx entry in the ELF header.
+ ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+
+ // Set the NameIdx of each section in the string table and emit the bytes for
+ // the string table.
+ unsigned Index = 0;
+
+ for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+ ELFSection &S = *(*I);
+ // Set the index into the table. Note if we have lots of entries with
+ // common suffixes, we could memoize them here if we cared.
+ S.NameIdx = Index;
+ SHStrTab.emitString(S.getName());
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += S.getName().size()+1;
+ }
+
+ // Set the size of .shstrtab now that we know what it is.
+ assert(Index == SHStrTab.size());
+ SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+ // Pass #1: Compute the file offset for each section.
+ size_t FileOff = ElfHdr.size(); // File header first.
+
+ // Adjust alignment of all section if needed, skip the null section.
+ for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+ ELFSection &ES = *SectionList[i];
+ if (!ES.size()) {
+ ES.Offset = FileOff;
+ continue;
+ }
+
+ // Update Section size
+ if (!ES.Size)
+ ES.Size = ES.size();
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (ES.Align)
+ FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
+
+ ES.Offset = FileOff;
+ FileOff += ES.Size;
+ }
+
+ // Align Section Header.
+ unsigned TableAlign = TEW->getPrefELFAlignment();
+ FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+ // Now that we know where all of the sections will be emitted, set the e_shnum
+ // entry in the ELF header.
+ ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
+
+ // Now that we know the offset in the file of the section table, update the
+ // e_shoff address in the ELF header.
+ ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
+
+ // Now that we know all of the data in the file header, emit it and all of the
+ // sections!
+ O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+ FileOff = ElfHdr.size();
+
+ // Section Header Table blob
+ BinaryObject SHdrTable(isLittleEndian, is64Bit);
+
+ // Emit all of sections to the file and build the section header table.
+ for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+ ELFSection &S = *(*I);
+ DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+ << ", Size: " << S.Size << ", Offset: " << S.Offset
+ << ", SectionData Size: " << S.size() << "\n");
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (S.size()) {
+ if (S.Align) {
+ for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+ }
+ O.write((char *)&S.getData()[0], S.Size);
+ FileOff += S.Size;
+ }
+
+ EmitSectionHeader(SHdrTable, S);
+ }
+
+ // Align output for the section table.
+ for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+
+ // Emit the section table itself.
+ O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
+}
diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.h b/contrib/llvm/lib/CodeGen/ELFWriter.h
new file mode 100644
index 000000000000..6f7fbace8aba
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,251 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <map>
+
+namespace llvm {
+ class BinaryObject;
+ class Constant;
+ class ConstantInt;
+ class ConstantStruct;
+ class ELFCodeEmitter;
+ class ELFRelocation;
+ class ELFSection;
+ struct ELFSym;
+ class GlobalVariable;
+ class JITDebugRegisterer;
+ class Mangler;
+ class MachineCodeEmitter;
+ class MachineConstantPoolEntry;
+ class ObjectCodeEmitter;
+ class MCAsmInfo;
+ class TargetELFWriterInfo;
+ class TargetLoweringObjectFile;
+ class raw_ostream;
+ class SectionKind;
+ class MCContext;
+ class TargetMachine;
+
+ typedef std::vector<ELFSym*>::iterator ELFSymIter;
+ typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+ typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+ typedef SetVector<const char *>::const_iterator PendingExtsIter;
+ typedef std::pair<const Constant *, int64_t> CstExprResTy;
+
+ /// ELFWriter - This class implements the common target-independent code for
+ /// writing ELF files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class ELFWriter : public MachineFunctionPass {
+ friend class ELFCodeEmitter;
+ friend class JITDebugRegisterer;
+ public:
+ static char ID;
+
+ /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+ ObjectCodeEmitter *getObjectCodeEmitter() {
+ return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
+ }
+
+ ELFWriter(raw_ostream &O, TargetMachine &TM);
+ ~ELFWriter();
+
+ protected:
+ /// Output stream to send the resultant object file to.
+ raw_ostream &O;
+
+ /// Target machine description.
+ TargetMachine &TM;
+
+ /// Context object for machine code objects.
+ MCContext &OutContext;
+
+ /// Target Elf Writer description.
+ const TargetELFWriterInfo *TEW;
+
+ /// Mang - The object used to perform name mangling for this module.
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ ELFCodeEmitter *ElfCE;
+
+ /// TLOF - Target Lowering Object File, provide section names for globals
+ /// and other object file specific stuff
+ const TargetLoweringObjectFile &TLOF;
+
+ /// MAI - Target Asm Info, provide information about section names for
+ /// globals and other target specific stuff.
+ const MCAsmInfo *MAI;
+
+ //===------------------------------------------------------------------===//
+ // Properties inferred automatically from the target machine.
+ //===------------------------------------------------------------------===//
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the ELF file.
+ bool doInitialization(Module &M);
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the ELF file to 'O'.
+ bool doFinalization(Module &M);
+
+ private:
+ /// Blob containing the Elf header
+ BinaryObject ElfHdr;
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the section header table
+ /// is constructed from this info.
+ std::vector<ELFSection*> SectionList;
+ unsigned NumSections; // Always = SectionList.size()
+
+ /// SectionLookup - This is a mapping from section name to section number in
+ /// the SectionList. Used to quickly gather the Section Index from MAI names
+ std::map<std::string, ELFSection*> SectionLookup;
+
+ /// PendingGlobals - Globals not processed as symbols yet.
+ SetVector<const GlobalValue*> PendingGlobals;
+
+ /// GblSymLookup - This is a mapping from global value to a symbol index
+ /// in the symbol table or private symbols list. This is useful since reloc
+ /// symbol references must be quickly mapped to their indices on the lists.
+ std::map<const GlobalValue*, uint32_t> GblSymLookup;
+
+ /// PendingExternals - Externals not processed as symbols yet.
+ SetVector<const char *> PendingExternals;
+
+ /// ExtSymLookup - This is a mapping from externals to a symbol index
+ /// in the symbol table list. This is useful since reloc symbol references
+ /// must be quickly mapped to their symbol table indices.
+ std::map<const char *, uint32_t> ExtSymLookup;
+
+ /// SymbolList - This is the list of symbols emitted to the symbol table.
+ /// When the SymbolList is finally built, local symbols must be placed in
+ /// the beginning while non-locals at the end.
+ std::vector<ELFSym*> SymbolList;
+
+ /// PrivateSyms - Record private symbols, every symbol here must never be
+ /// present in the SymbolList.
+ std::vector<ELFSym*> PrivateSyms;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ ELFSection &getSection(const std::string &Name, unsigned Type,
+ unsigned Flags = 0, unsigned Align = 0) {
+ ELFSection *&SN = SectionLookup[Name];
+ if (SN) return *SN;
+
+ SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+ SN = SectionList.back();
+ SN->SectionIdx = NumSections++;
+ SN->Type = Type;
+ SN->Flags = Flags;
+ SN->Link = ELF::SHN_UNDEF;
+ SN->Align = Align;
+ return *SN;
+ }
+
+ ELFSection &getNonExecStackSection() {
+ return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
+ }
+
+ ELFSection &getSymbolTableSection() {
+ return getSection(".symtab", ELF::SHT_SYMTAB, 0);
+ }
+
+ ELFSection &getStringTableSection() {
+ return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
+ }
+
+ ELFSection &getSectionHeaderStringTableSection() {
+ return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
+ }
+
+ ELFSection &getNullSection() {
+ return getSection("", ELF::SHT_NULL, 0);
+ }
+
+ ELFSection &getDataSection();
+ ELFSection &getBSSSection();
+ ELFSection &getCtorSection();
+ ELFSection &getDtorSection();
+ ELFSection &getJumpTableSection();
+ ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+ ELFSection &getTextSection(const Function *F);
+ ELFSection &getRelocSection(ELFSection &S);
+
+ // Helpers for obtaining ELF specific info.
+ unsigned getGlobalELFBinding(const GlobalValue *GV);
+ unsigned getGlobalELFType(const GlobalValue *GV);
+ unsigned getGlobalELFVisibility(const GlobalValue *GV);
+
+ // AddPendingGlobalSymbol - Add a global to be processed and to
+ // the global symbol lookup, use a zero index because the table
+ // index will be determined later.
+ void AddPendingGlobalSymbol(const GlobalValue *GV,
+ bool AddToLookup = false);
+
+ // AddPendingExternalSymbol - Add the external to be processed
+ // and to the external symbol lookup, use a zero index because
+ // the symbol table index will be determined later.
+ void AddPendingExternalSymbol(const char *External);
+
+ // AddToSymbolList - Update the symbol lookup and If the symbol is
+ // private add it to PrivateSyms list, otherwise to SymbolList.
+ void AddToSymbolList(ELFSym *GblSym);
+
+ // As we complete the ELF file, we need to update fields in the ELF header
+ // (e.g. the location of the section table). These members keep track of
+ // the offset in ELFHeader of these various pieces to update and other
+ // locations in the file.
+ unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header.
+ unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header.
+ unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header.
+
+ private:
+ void EmitGlobal(const GlobalValue *GV);
+ void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+ void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ ELFSection &GblS);
+ void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+ void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+ ELFSection &GblS, int64_t Offset = 0);
+ bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+ void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
+ void EmitRelocations();
+ void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
+ void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
+ void EmitSectionTableStringTable();
+ void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+ void EmitSymbolTable();
+ void EmitStringTable(const std::string &ModuleName);
+ void OutputSectionsAndSectionTable();
+ void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+ unsigned Size);
+ unsigned SortSymbols();
+ CstExprResTy ResolveConstantExpr(const Constant *CV);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..a7aba89b87f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,97 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->getNumBlockIDs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+ const MachineBasicBlock &MBB = *I;
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+
+ // Compute the reverse mapping.
+ Blocks.clear();
+ Blocks.resize(getNumBundles());
+
+ for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+ unsigned b0 = getBundle(i, 0);
+ unsigned b1 = getBundle(i, 1);
+ Blocks[b0].push_back(i);
+ if (b1 != b0)
+ Blocks[b1].push_back(i);
+ }
+
+ return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const std::string &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ unsigned BB = I->getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..a67140ece4a5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Expand ISel Pseudo-instructions";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+ return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr *MI = MBBI++;
+
+ // If MI is a pseudo, expand it.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB =
+ TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB;
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 000000000000..d757cf409d50
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,213 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
+
+ public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ };
+
+ class Deleter : public FunctionPass {
+ static char ID;
+
+ public:
+ Deleter();
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ strategy_map_type::iterator NMI = StrategyMap.find(Name);
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (Name == I->getName()) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Name).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ dbgs() << "unsupported GC: " << Name << "\n";
+ llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ default: llvm_unreachable("Unknown GC point kind");
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasGC()) return false;
+
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": "
+ << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+
+ return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+ return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(ID) {}
+
+const char *Deleter::getPassName() const {
+ return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+ return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Deleter didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 000000000000..f80e9ced0bc9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,27 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 000000000000..766c6ee542a9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,416 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// MachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class MachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ MachineCodeAnalysis();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable(0);
+ return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTree>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I);
+ SI->insertAfter(*I);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+ if (UseCustomLoweringPass)
+ MadeChange |= S.performCustomLowering(F);
+
+ // Custom lowering may modify the CFG, so dominators must be recomputed.
+ if (UseCustomLoweringPass) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->DT->recalculate(F);
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*, 32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getArgOperand(0)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+ return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+ : MachineFunctionPass(ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+ return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->getDesc().isCall())
+ VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+ RE = FI->roots_end(); RI != RE; ++RI)
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 000000000000..6cb22778caf9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1513 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ const MachineLoopInfo *MLI;
+ bool MadeChange;
+ int FnNum;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "If Converter"; }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ const BranchProbability &Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ const BranchProbability &Prediction) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction);
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ InstrItins = MF.getTarget().getInstrItineraryData();
+ if (!TII) return false;
+
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false);
+ bool BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+
+ DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getFunction()->getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: assert(false && "Unexpected!");
+ break;
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ DEBUG(dbgs() << " false");
+ if (isRev)
+ DEBUG(dbgs() << " rev");
+ DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
+ } else {
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumDiamonds;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange && IfCvtBranchFold) {
+ BranchFolder BF(false, false);
+ BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ MadeChange |= BFChange;
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction))
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB))
+ break;
+ ++Dups1;
+ ++TIB;
+ ++FIB;
+ }
+
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->getDesc().isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->getDesc().isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
+ break;
+ }
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
+ break;
+ }
+ if (!TIE->isIdenticalTo(FIE))
+ break;
+ ++Dups2;
+ --TIE;
+ --FIE;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ const MCInstrDesc &MCID = I->getDesc();
+ if (MCID.isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(I)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+ // considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = TrueBBI.Predicate.size() > 0;
+ bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool Enqueued = false;
+
+ // Try to predict the branch, using loop info to guide us.
+ // General heuristics are:
+ // - backedge -> 90% taken
+ // - early exit -> 20% taken
+ // - branch predictor confidence -> 90%
+ BranchProbability Prediction(5, 10);
+ MachineLoop *Loop = MLI->getLoopFor(BB);
+ if (Loop) {
+ if (TrueBBI.BB == Loop->getHeader())
+ Prediction = BranchProbability(9, 10);
+ else if (FalseBBI.BB == Loop->getHeader())
+ Prediction = BranchProbability(1, 10);
+
+ MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+ MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+ if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+ Prediction = BranchProbability(2, 10);
+ else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+ Prediction = BranchProbability(8, 10);
+ }
+
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ AnalyzeBlock(BB, Tokens);
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator PI = BB;
+ MachineFunction::iterator I = llvm::next(PI);
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(I))
+ return false;
+ PI = I++;
+ }
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Redefs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Redefs.insert(*Subreg);
+ }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI,
+ bool AddImpUse = false) {
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (MO.isKill()) {
+ Redefs.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.erase(*SR);
+ }
+ }
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (Redefs.count(Reg)) {
+ if (AddImpUse)
+ // Treat predicated update as read + write.
+ MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/,false/*IsKill*/));
+ } else {
+ Redefs.insert(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.insert(*SR);
+ }
+ }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ while (I != E) {
+ UpdatePredRedefs(I, Redefs, TRI);
+ ++I;
+ }
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI, false);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(BBI1->BB, Redefs, TRI);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+ MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+ // Skip dbg_value instructions
+ while (DI1 != DIE1 && DI1->isDebugValue())
+ ++DI1;
+ while (DI2 != DIE2 && DI2->isDebugValue())
+ ++DI2;
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ while (NumDups1 != 0) {
+ ++DI2;
+ if (!DI2->isDebugValue())
+ --NumDups1;
+ }
+
+ UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Predicate the 'true' block after removing its branch.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
+ --DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
+
+ // Predicate the 'false' block.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
+ --DI2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
+ }
+ PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1, TailBB == 0);
+ MergeBlocks(BBI, *BBI2, TailBB == 0);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ bool CanMergeTail = !TailBBI.HasFallThrough;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ BBI.BB->addSuccessor(TailBB);
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB);
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs) {
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (I->isDebugValue() || TII->isPredicated(I))
+ continue;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs, TRI, true);
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ ++NumIfConvBBs;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ const MCInstrDesc &MCID = I->getDesc();
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && MCID.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
+
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+ if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(MI, Redefs, TRI, true);
+ }
+
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ if (AddEdges)
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+ FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 000000000000..5547f735ba5e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,1058 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "Spiller.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
+STATISTIC(NumSnippets, "Number of snippets included in spills");
+STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumFolded, "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads, "Number of folded loads");
+STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills after reloads");
+STATISTIC(NumHoistLocal, "Number of locally hoisted spills");
+STATISTIC(NumHoistGlobal, "Number of globally hoisted spills");
+STATISTIC(NumRedundantSpills, "Number of redundant spills identified");
+
+namespace {
+class InlineSpiller : public Spiller {
+ MachineFunctionPass &Pass;
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveRangeEdit *Edit;
+ LiveInterval *StackInt;
+ int StackSlot;
+ unsigned Original;
+
+ // All registers to spill to StackSlot, including the main register.
+ SmallVector<unsigned, 8> RegsToSpill;
+
+ // All COPY instructions to/from snippets.
+ // They are ignored since both operands refer to the same stack slot.
+ SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+ // Values that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> UsedValues;
+
+ // Information about a value that was defined by a copy from a sibling
+ // register.
+ struct SibValueInfo {
+ // True when all reaching defs were reloads: No spill is necessary.
+ bool AllDefsAreReloads;
+
+ // The preferred register to spill.
+ unsigned SpillReg;
+
+ // The value of SpillReg that should be spilled.
+ VNInfo *SpillVNI;
+
+ // A defining instruction that is not a sibling copy or a reload, or NULL.
+ // This can be used as a template for rematerialization.
+ MachineInstr *DefMI;
+
+ SibValueInfo(unsigned Reg, VNInfo *VNI)
+ : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {}
+ };
+
+ // Values in RegsToSpill defined by sibling copies.
+ typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+ SibValueMap SibValues;
+
+ // Dead defs generated during spilling.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ ~InlineSpiller() {}
+
+public:
+ InlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm)
+ : Pass(pass),
+ MF(mf),
+ LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AliasAnalysis>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()),
+ VRM(vrm),
+ MFI(*mf.getFrameInfo()),
+ MRI(mf.getRegInfo()),
+ TII(*mf.getTarget().getInstrInfo()),
+ TRI(*mf.getTarget().getRegisterInfo()) {}
+
+ void spill(LiveRangeEdit &);
+
+private:
+ bool isSnippet(const LiveInterval &SnipLI);
+ void collectRegsToSpill();
+
+ bool isRegToSpill(unsigned Reg) {
+ return std::find(RegsToSpill.begin(),
+ RegsToSpill.end(), Reg) != RegsToSpill.end();
+ }
+
+ bool isSibling(unsigned Reg);
+ MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+ void analyzeSiblingValues();
+
+ bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+ void markValueUsed(LiveInterval*, VNInfo*);
+ bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
+ void reMaterializeAll();
+
+ bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
+ bool foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI = 0);
+ void insertReload(LiveInterval &NewLI, SlotIndex,
+ MachineBasicBlock::iterator MI);
+ void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex, MachineBasicBlock::iterator MI);
+
+ void spillAroundUses(unsigned Reg);
+ void spillAll();
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
+ if (!MI->isFullCopy())
+ return 0;
+ if (MI->getOperand(0).getReg() == Reg)
+ return MI->getOperand(1).getReg();
+ if (MI->getOperand(1).getReg() == Reg)
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+ unsigned Reg = Edit->getReg();
+
+ // A snippet is a tiny live range with only a single instruction using it
+ // besides copies to/from Reg or spills/fills. We accept:
+ //
+ // %snip = COPY %Reg / FILL fi#
+ // %snip = USE %snip
+ // %Reg = COPY %snip / SPILL %snip, fi#
+ //
+ if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ MachineInstr *UseMI = 0;
+
+ // Check that all uses satisfy our criteria.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ RI = MRI.reg_nodbg_begin(SnipLI.reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Allow copies to/from Reg.
+ if (isFullCopyOf(MI, Reg))
+ continue;
+
+ // Allow stack slot loads.
+ int FI;
+ if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow stack slot stores.
+ if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow a single additional instruction.
+ if (UseMI && MI != UseMI)
+ return false;
+ UseMI = MI;
+ }
+ return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+ unsigned Reg = Edit->getReg();
+
+ // Main register always spills.
+ RegsToSpill.assign(1, Reg);
+ SnippetCopies.clear();
+
+ // Snippets all have the same original, so there can't be any for an original
+ // register.
+ if (Original == Reg)
+ return;
+
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+ unsigned SnipReg = isFullCopyOf(MI, Reg);
+ if (!isSibling(SnipReg))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+ if (!isSnippet(SnipLI))
+ continue;
+ SnippetCopies.insert(MI);
+ if (isRegToSpill(SnipReg))
+ continue;
+ RegsToSpill.push_back(SnipReg);
+ DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ ++NumSnippets;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ VRM.getOriginal(Reg) == Original;
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
+///
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
+///
+/// Return a defining instruction that may be a candidate for rematerialization.
+///
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+ VNInfo *OrigVNI) {
+ DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << '\n');
+ SmallPtrSet<VNInfo*, 8> Visited;
+ SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+ // Best spill candidate seen so far. This must dominate UseVNI.
+ SibValueInfo SVI(UseReg, UseVNI);
+ MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def);
+ MachineBasicBlock *SpillMBB = UseMBB;
+ unsigned SpillDepth = Loops.getLoopDepth(SpillMBB);
+ bool SeenOrigPHI = false; // Original PHI met.
+
+ do {
+ unsigned Reg;
+ VNInfo *VNI;
+ tie(Reg, VNI) = WorkList.pop_back_val();
+ if (!Visited.insert(VNI))
+ continue;
+
+ // Is this value a better spill candidate?
+ if (!isRegToSpill(Reg)) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ if (MBB == SpillMBB) {
+ // This is an alternative def earlier in the same MBB.
+ // Hoist the spill as far as possible in SpillMBB. This can ease
+ // register pressure:
+ //
+ // x = def
+ // y = use x
+ // s = copy x
+ //
+ // Hoisting the spill of s to immediately after the def removes the
+ // interference between x and y:
+ //
+ // x = def
+ // spill x
+ // y = use x<kill>
+ //
+ if (VNI->def < SVI.SpillVNI->def) {
+ DEBUG(dbgs() << " hoist in BB#" << MBB->getNumber() << ": "
+ << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+ << '\n');
+ SVI.SpillReg = Reg;
+ SVI.SpillVNI = VNI;
+ }
+ } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
+ // This is a valid spill location dominating UseVNI.
+ // Prefer to spill at a smaller loop depth.
+ unsigned Depth = Loops.getLoopDepth(MBB);
+ if (Depth < SpillDepth) {
+ DEBUG(dbgs() << " spill depth " << Depth << ": " << PrintReg(Reg)
+ << ':' << VNI->id << '@' << VNI->def << '\n');
+ SVI.SpillReg = Reg;
+ SVI.SpillVNI = VNI;
+ SpillMBB = MBB;
+ SpillDepth = Depth;
+ }
+ }
+ }
+
+ // Trace through PHI-defs created by live range splitting.
+ if (VNI->isPHIDef()) {
+ if (VNI->def == OrigVNI->def) {
+ DEBUG(dbgs() << " orig phi value " << PrintReg(Reg) << ':'
+ << VNI->id << '@' << VNI->def << '\n');
+ SeenOrigPHI = true;
+ continue;
+ }
+ // Get values live-out of predecessors.
+ LiveInterval &LI = LIS.getInterval(Reg);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+ if (PVNI)
+ WorkList.push_back(std::make_pair(Reg, PVNI));
+ }
+ continue;
+ }
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+
+ // Trace through sibling copies.
+ if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(SrcReg)) {
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex());
+ assert(SrcVNI && "Copy from non-existing value");
+ DEBUG(dbgs() << " copy of " << PrintReg(SrcReg) << ':'
+ << SrcVNI->id << '@' << SrcVNI->def << '\n');
+ WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ continue;
+ }
+ }
+
+ // Track reachable reloads.
+ int FI;
+ if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << " reload " << PrintReg(Reg) << ':'
+ << VNI->id << "@" << VNI->def << '\n');
+ SVI.AllDefsAreReloads = true;
+ continue;
+ }
+
+ // We have an 'original' def. Don't record trivial cases.
+ if (VNI == UseVNI) {
+ DEBUG(dbgs() << "Not a sibling copy.\n");
+ return MI;
+ }
+
+ // Potential remat candidate.
+ DEBUG(dbgs() << " def " << PrintReg(Reg) << ':'
+ << VNI->id << '@' << VNI->def << '\t' << *MI);
+ SVI.DefMI = MI;
+ } while (!WorkList.empty());
+
+ if (SeenOrigPHI || SVI.DefMI)
+ SVI.AllDefsAreReloads = false;
+
+ DEBUG({
+ if (SVI.AllDefsAreReloads)
+ dbgs() << "All defs are reloads.\n";
+ else
+ dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':'
+ << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n';
+ });
+ SibValues.insert(std::make_pair(UseVNI, SVI));
+ return SVI.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
+///
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+ SibValues.clear();
+
+ // No siblings at all?
+ if (Edit->getReg() == Original)
+ return;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+ VE = LI.vni_end(); VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = 0;
+ // Check possible sibling copies.
+ if (VNI->isPHIDef() || VNI->getCopy()) {
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ assert(OrigVNI && "Def outside original live range");
+ if (OrigVNI->def != VNI->def)
+ DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+ }
+ if (!DefMI && !VNI->isPHIDef())
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+ DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+ << VNI->def << " may remat from " << *DefMI);
+ }
+ }
+ }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+ SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
+ assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+ SibValueMap::iterator I = SibValues.find(VNI);
+ if (I == SibValues.end())
+ return false;
+
+ const SibValueInfo &SVI = I->second;
+
+ // Let the normal folding code deal with the boring case.
+ if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+ return false;
+
+ // SpillReg may have been deleted by remat and DCE.
+ if (!LIS.hasInterval(SVI.SpillReg)) {
+ DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+ if (!SibLI.containsValue(SVI.SpillVNI)) {
+ DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ // Conservatively extend the stack slot range to the range of the original
+ // value. We may be able to do better with stack slot coloring by being more
+ // careful here.
+ assert(StackInt && "No stack slot assigned yet.");
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
+
+ // Already spilled everywhere.
+ if (SVI.AllDefsAreReloads) {
+ ++NumOmitReloadSpill;
+ return true;
+ }
+ // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+ // any later spills of the same value.
+ eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+ MachineBasicBlock::iterator MII;
+ if (SVI.SpillVNI->isPHIDef())
+ MII = MBB->SkipPHIsAndLabels(MBB->begin());
+ else {
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+ assert(DefMI && "Defining instruction disappeared");
+ MII = DefMI;
+ ++MII;
+ }
+ // Insert spill without kill flag immediately after def.
+ TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+ MRI.getRegClass(SVI.SpillReg), &TRI);
+ --MII; // Point to store instruction.
+ LIS.InsertMachineInstrInMaps(MII);
+ VRM.addSpillSlotUse(StackSlot, MII);
+ DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+
+ if (MBB == CopyMI->getParent())
+ ++NumHoistLocal;
+ else
+ ++NumHoistGlobal;
+ return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+ assert(VNI && "Missing value");
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(&SLI, VNI));
+ assert(StackInt && "No stack slot assigned yet.");
+
+ do {
+ LiveInterval *LI;
+ tie(LI, VNI) = WorkList.pop_back_val();
+ unsigned Reg = LI->reg;
+ DEBUG(dbgs() << "Checking redundant spills for "
+ << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+ // Regs to spill are taken care of.
+ if (isRegToSpill(Reg))
+ continue;
+
+ // Add all of VNI's live range to StackInt.
+ StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+ // Find all spills and copies of VNI.
+ for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = UI.skipInstruction();) {
+ if (!MI->isCopy() && !MI->getDesc().mayStore())
+ continue;
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (LI->getVNInfoAt(Idx) != VNI)
+ continue;
+
+ // Follow sibling copies down the dominator tree.
+ if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(DstReg)) {
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ }
+ continue;
+ }
+
+ // Erase spills.
+ int FI;
+ if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+ // eliminateDeadDefs won't normally remove stores, so switch opcode.
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(MI);
+ ++NumRedundantSpills;
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(LI, VNI));
+ do {
+ tie(LI, VNI) = WorkList.pop_back_val();
+ if (!UsedValues.insert(VNI))
+ continue;
+
+ if (VNI->isPHIDef()) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+ if (PVNI)
+ WorkList.push_back(std::make_pair(LI, PVNI));
+ }
+ continue;
+ }
+
+ // Follow snippet copies.
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (!SnippetCopies.count(MI))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+ assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+ assert(SnipVNI && "Snippet undefined before copy");
+ WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+ } while (!WorkList.empty());
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
+ MachineBasicBlock::iterator MI) {
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx);
+
+ if (!ParentVNI) {
+ DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+ MO.setIsUndef();
+ }
+ DEBUG(dbgs() << UseIdx << '\t' << *MI);
+ return true;
+ }
+
+ if (SnippetCopies.count(MI))
+ return false;
+
+ // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+ if (SibI != SibValues.end())
+ RM.OrigMI = SibI->second.DefMI;
+ if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // If the instruction also writes VirtReg.reg, it had better not require the
+ // same register for uses and defs.
+ bool Reads, Writes;
+ SmallVector<unsigned, 8> Ops;
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
+ if (Writes) {
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
+ }
+ }
+ }
+
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+ foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+ Edit->markRematerialized(RM.ParentVNI);
+ ++NumFoldedLoads;
+ return true;
+ }
+
+ // Alocate a new register for the remat.
+ LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
+ NewLI.markNotSpillable();
+
+ // Finally we can rematerialize OrigMI before MI.
+ SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ LIS, TII, TRI);
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
+
+ // Replace operands
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ MO.setReg(NewLI.reg);
+ MO.setIsKill();
+ }
+ }
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
+
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ ++NumRemats;
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ // analyzeSiblingValues has already tested all relevant defining instructions.
+ if (!Edit->anyRematerializable(LIS, TII, AA))
+ return;
+
+ UsedValues.clear();
+
+ // Try to remat before all uses of snippets.
+ bool anyRemat = false;
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();)
+ anyRemat |= reMaterializeFor(LI, MI);
+ }
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ MI->addRegisterDead(Reg, &TRI);
+ if (!MI->allDefsAreDead())
+ continue;
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ DeadDefs.push_back(MI);
+ }
+ }
+
+ // Eliminate dead code after remat. Note that some snippet copies may be
+ // deleted here.
+ if (DeadDefs.empty())
+ return;
+ DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+
+ // Get rid of deleted and empty intervals.
+ for (unsigned i = RegsToSpill.size(); i != 0; --i) {
+ unsigned Reg = RegsToSpill[i-1];
+ if (!LIS.hasInterval(Reg)) {
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (!LI.empty())
+ continue;
+ Edit->eraseVirtReg(Reg, LIS);
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ }
+ DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
+ int FI = 0;
+ unsigned InstrReg;
+ if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) &&
+ !(InstrReg = TII.isStoreToStackSlot(MI, FI)))
+ return false;
+
+ // We have a stack access. Is it the right register and slot?
+ if (InstrReg != Reg || FI != StackSlot)
+ return false;
+
+ DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ return true;
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
+/// @param MI Instruction using or defining the current register.
+/// @param Ops Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success, and MI will be erased.
+bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) {
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned Idx = Ops[i];
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isImplicit())
+ continue;
+ // FIXME: Teach targets to deal with subregs.
+ if (MO.getSubReg())
+ return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (!MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ MachineInstr *FoldMI =
+ LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
+ : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
+ if (!FoldMI)
+ return false;
+ LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
+ if (!LoadMI)
+ VRM.addSpillSlotUse(StackSlot, FoldMI);
+ MI->eraseFromParent();
+ DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+ ++NumFolded;
+ return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+ SlotIndex Idx,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
+ --MI; // Point to load instruction.
+ SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+ VRM.addSpillSlotUse(StackSlot, MI);
+ DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
+ LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+ ++NumReloads;
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex Idx, MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
+ --MI; // Point to store instruction.
+ SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+ VRM.addSpillSlotUse(StackSlot, MI);
+ DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+ ++NumSpills;
+}
+
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+ DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+ LiveInterval &OldLI = LIS.getInterval(Reg);
+
+ // Iterate over instructions using Reg.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
+ Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else {
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+ continue;
+ }
+
+ // Ignore copies to/from snippets. We'll delete them.
+ if (SnippetCopies.count(MI))
+ continue;
+
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(MI, Reg))
+ continue;
+
+ // Analyze instruction.
+ bool Reads, Writes;
+ SmallVector<unsigned, 8> Ops;
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+ // Find the slot index where this instruction reads and writes OldLI.
+ // This is usually the def slot, except for tied early clobbers.
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+ if (SlotIndex::isSameInstr(Idx, VNI->def))
+ Idx = VNI->def;
+
+ // Check for a sibling copy.
+ unsigned SibReg = isFullCopyOf(MI, Reg);
+ if (SibReg && isSibling(SibReg)) {
+ // This may actually be a copy between snippets.
+ if (isRegToSpill(SibReg)) {
+ DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+ SnippetCopies.insert(MI);
+ continue;
+ }
+ if (Writes) {
+ // Hoist the spill of a sib-reg copy.
+ if (hoistSpill(OldLI, MI)) {
+ // This COPY is now dead, the value is already in the stack slot.
+ MI->getOperand(0).setIsDead();
+ DeadDefs.push_back(MI);
+ continue;
+ }
+ } else {
+ // This is a reload for a sib-reg copy. Drop spills downstream.
+ LiveInterval &SibLI = LIS.getInterval(SibReg);
+ eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+ // The COPY will fold to a reload below.
+ }
+ }
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(MI, Ops))
+ continue;
+
+ // Allocate interval around instruction.
+ // FIXME: Infer regclass from instruction alone.
+ LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
+ NewLI.markNotSpillable();
+
+ if (Reads)
+ insertReload(NewLI, Idx, MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ MO.setReg(NewLI.reg);
+ if (MO.isUse()) {
+ if (!MI->isRegTiedToDefOperand(Ops[i]))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+ DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (Writes && hasLiveDef)
+ insertSpill(NewLI, OldLI, Idx, MI);
+
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ }
+}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+ // Update LiveStacks now that we are committed to spilling.
+ if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+ StackSlot = VRM.assignVirt2StackSlot(Original);
+ StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+ StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+ } else
+ StackInt = &LSS.getInterval(StackSlot);
+
+ if (Original != Edit->getReg())
+ VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+ assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
+ StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+ // Spill around uses of all RegsToSpill.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ spillAroundUses(RegsToSpill[i]);
+
+ // Hoisted spills may cause dead code.
+ if (!DeadDefs.empty()) {
+ DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ }
+
+ // Finally delete the SnippetCopies.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
+ MachineInstr *MI = RI.skipInstruction();) {
+ assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+ // FIXME: Do this with a LiveRangeEdit callback.
+ VRM.RemoveMachineInstrFromMaps(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+ }
+
+ // Delete all spilled registers.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ Edit->eraseVirtReg(RegsToSpill[i], LIS);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ ++NumSpilledRanges;
+ Edit = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ // Share a stack slot among all descendants of Original.
+ Original = VRM.getOriginal(edit.getReg());
+ StackSlot = VRM.getStackSlot(Original);
+ StackInt = 0;
+
+ DEBUG(dbgs() << "Inline spilling "
+ << MRI.getRegClass(edit.getReg())->getName()
+ << ':' << edit.getParent() << "\nFrom original "
+ << LIS.getInterval(Original) << '\n');
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
+ assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+ collectRegsToSpill();
+ analyzeSiblingValues();
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (!RegsToSpill.empty())
+ spillAll();
+
+ Edit->calculateRegClassAndHint(MF, LIS, Loops);
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 000000000000..a09bb39f8336
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,166 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "InterferenceCache.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void InterferenceCache::init(MachineFunction *mf,
+ LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes,
+ const TargetRegisterInfo *tri) {
+ MF = mf;
+ LIUArray = liuarray;
+ TRI = tri;
+ PhysRegEntries.assign(TRI->getNumRegs(), 0);
+ for (unsigned i = 0; i != CacheEntries; ++i)
+ Entries[i].clear(mf, indexes);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+ unsigned E = PhysRegEntries[PhysReg];
+ if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+ if (!Entries[E].valid(LIUArray, TRI))
+ Entries[E].revalidate();
+ return &Entries[E];
+ }
+ // No valid entry exists, pick the next round-robin entry.
+ E = RoundRobin;
+ if (++RoundRobin == CacheEntries)
+ RoundRobin = 0;
+ for (unsigned i = 0; i != CacheEntries; ++i) {
+ // Skip entries that are in use.
+ if (Entries[E].hasRefs()) {
+ if (++E == CacheEntries)
+ E = 0;
+ continue;
+ }
+ Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+ PhysRegEntries[PhysReg] = E;
+ return &Entries[E];
+ }
+ llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate() {
+ // Invalidate all block entries.
+ ++Tag;
+ // Invalidate all iterators.
+ PrevPos = SlotIndex();
+ for (unsigned i = 0, e = Aliases.size(); i != e; ++i)
+ Aliases[i].second = Aliases[i].first->getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF) {
+ assert(!hasRefs() && "Cannot reset cache entry with references");
+ // LIU's changed, invalidate cache.
+ ++Tag;
+ PhysReg = physReg;
+ Blocks.resize(MF->getNumBlockIDs());
+ Aliases.clear();
+ for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+ LiveIntervalUnion *LIU = LIUArray + *AS;
+ Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
+ }
+
+ // Reset iterators.
+ PrevPos = SlotIndex();
+ unsigned e = Aliases.size();
+ Iters.resize(e);
+ for (unsigned i = 0; i != e; ++i)
+ Iters[i].setMap(Aliases[i].first->getMap());
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ unsigned i = 0, e = Aliases.size();
+ for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+ LiveIntervalUnion *LIU = LIUArray + *AS;
+ if (i == e || Aliases[i].first != LIU)
+ return false;
+ if (LIU->changedSince(Aliases[i].second))
+ return false;
+ }
+ return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+ // Use advanceTo only when possible.
+ if (PrevPos != Start) {
+ if (!PrevPos.isValid() || Start < PrevPos)
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+ Iters[i].find(Start);
+ else
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+ Iters[i].advanceTo(Start);
+ PrevPos = Start;
+ }
+
+ MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+ BlockInterference *BI = &Blocks[MBBNum];
+ for (;;) {
+ BI->Tag = Tag;
+ BI->First = BI->Last = SlotIndex();
+
+ // Check for first interference.
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+ Iter &I = Iters[i];
+ if (!I.valid())
+ continue;
+ SlotIndex StartI = I.start();
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ PrevPos = Stop;
+ if (BI->First.isValid())
+ break;
+
+ // No interference in this block? Go ahead and precompute the next block.
+ if (++MFI == MF->end())
+ return;
+ MBBNum = MFI->getNumber();
+ BI = &Blocks[MBBNum];
+ if (BI->Tag == Tag)
+ return;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+ }
+
+ // Check for last interference in block.
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+ Iter &I = Iters[i];
+ if (!I.valid() || I.start() >= Stop)
+ continue;
+ I.advanceTo(Stop);
+ bool Backup = !I.valid() || I.start() >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I.stop();
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 000000000000..7f0a27a41baa
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,204 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTERFERENCECACHE
+#define LLVM_CODEGEN_INTERFERENCECACHE
+
+#include "LiveIntervalUnion.h"
+
+namespace llvm {
+
+class InterferenceCache {
+ const TargetRegisterInfo *TRI;
+ LiveIntervalUnion *LIUArray;
+ SlotIndexes *Indexes;
+ MachineFunction *MF;
+
+ /// BlockInterference - information about the interference in a single basic
+ /// block.
+ struct BlockInterference {
+ BlockInterference() : Tag(0) {}
+ unsigned Tag;
+ SlotIndex First;
+ SlotIndex Last;
+ };
+
+ /// Entry - A cache entry containing interference information for all aliases
+ /// of PhysReg in all basic blocks.
+ class Entry {
+ /// PhysReg - The register currently represented.
+ unsigned PhysReg;
+
+ /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+ /// change.
+ unsigned Tag;
+
+ /// RefCount - The total number of Cursor instances referring to this Entry.
+ unsigned RefCount;
+
+ /// MF - The current function.
+ MachineFunction *MF;
+
+ /// Indexes - Mapping block numbers to SlotIndex ranges.
+ SlotIndexes *Indexes;
+
+ /// PrevPos - The previous position the iterators were moved to.
+ SlotIndex PrevPos;
+
+ /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of
+ /// PhysReg.
+ SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases;
+
+ typedef LiveIntervalUnion::SegmentIter Iter;
+
+ /// Iters - an iterator for each alias
+ SmallVector<Iter, 8> Iters;
+
+ /// Blocks - Interference for each block in the function.
+ SmallVector<BlockInterference, 8> Blocks;
+
+ /// update - Recompute Blocks[MBBNum]
+ void update(unsigned MBBNum);
+
+ public:
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+
+ void clear(MachineFunction *mf, SlotIndexes *indexes) {
+ assert(!hasRefs() && "Cannot clear cache entry with references");
+ PhysReg = 0;
+ MF = mf;
+ Indexes = indexes;
+ }
+
+ unsigned getPhysReg() const { return PhysReg; }
+
+ void addRef(int Delta) { RefCount += Delta; }
+
+ bool hasRefs() const { return RefCount > 0; }
+
+ void revalidate();
+
+ /// valid - Return true if this is a valid entry for physReg.
+ bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// reset - Initialize entry to represent physReg's aliases.
+ void reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF);
+
+ /// get - Return an up to date BlockInterference.
+ BlockInterference *get(unsigned MBBNum) {
+ if (Blocks[MBBNum].Tag != Tag)
+ update(MBBNum);
+ return &Blocks[MBBNum];
+ }
+ };
+
+ // We don't keep a cache entry for every physical register, that would use too
+ // much memory. Instead, a fixed number of cache entries are used in a round-
+ // robin manner.
+ enum { CacheEntries = 32 };
+
+ // Point to an entry for each physreg. The entry pointed to may not be up to
+ // date, and it may have been reused for a different physreg.
+ SmallVector<unsigned char, 2> PhysRegEntries;
+
+ // Next round-robin entry to be picked.
+ unsigned RoundRobin;
+
+ // The actual cache entries.
+ Entry Entries[CacheEntries];
+
+ // get - Get a valid entry for PhysReg.
+ Entry *get(unsigned PhysReg);
+
+public:
+ InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+
+ /// init - Prepare cache for a new function.
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+ const TargetRegisterInfo *);
+
+ /// getMaxCursors - Return the maximum number of concurrent cursors that can
+ /// be supported.
+ unsigned getMaxCursors() const { return CacheEntries; }
+
+ /// Cursor - The primary query interface for the block interference cache.
+ class Cursor {
+ Entry *CacheEntry;
+ BlockInterference *Current;
+
+ void setEntry(Entry *E) {
+ Current = 0;
+ // Update reference counts. Nothing happens when RefCount reaches 0, so
+ // we don't have to check for E == CacheEntry etc.
+ if (CacheEntry)
+ CacheEntry->addRef(-1);
+ CacheEntry = E;
+ if (CacheEntry)
+ CacheEntry->addRef(+1);
+ }
+
+ public:
+ /// Cursor - Create a dangling cursor.
+ Cursor() : CacheEntry(0), Current(0) {}
+ ~Cursor() { setEntry(0); }
+
+ Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+ setEntry(O.CacheEntry);
+ }
+
+ Cursor &operator=(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ return *this;
+ }
+
+ /// setPhysReg - Point this cursor to PhysReg's interference.
+ void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ // Release reference before getting a new one. That guarantees we can
+ // actually have CacheEntries live cursors.
+ setEntry(0);
+ if (PhysReg)
+ setEntry(Cache.get(PhysReg));
+ }
+
+ /// moveTo - Move cursor to basic block MBBNum.
+ void moveToBlock(unsigned MBBNum) {
+ Current = CacheEntry->get(MBBNum);
+ }
+
+ /// hasInterference - Return true if the current block has any interference.
+ bool hasInterference() {
+ return Current->First.isValid();
+ }
+
+ /// first - Return the starting index of the first interfering range in the
+ /// current block.
+ SlotIndex first() {
+ return Current->First;
+ }
+
+ /// last - Return the ending index of the last interfering range in the
+ /// current block.
+ SlotIndex last() {
+ return Current->Last;
+ }
+ };
+
+ friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 000000000000..611886ff16a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,570 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getFloatTy(M.getContext()));
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getDoubleTy(M.getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ Constant* FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ LLVMContext &Context = M.getContext();
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::getInt32Ty(M.getContext()));
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+ LLVMContext &Context = CI->getContext();
+
+ const Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ CallSite CS(CI);
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ report_fatal_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ Value *V = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getInt32Ty(Context));
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getArgOperand(0);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_exception:
+ case Intrinsic::eh_selector:
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 000000000000..f985af8ba83e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,498 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+namespace llvm {
+ bool EnableFastISel;
+}
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+ cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+ cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+ cl::desc("Show instruction structure in .s output"));
+static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
+ cl::desc("Enable MC API logging"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+ cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+ switch (AsmVerbose) {
+ default:
+ case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+}
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+ StringRef CPU, StringRef FS)
+ : TargetMachine(T, Triple, CPU, FS) {
+ AsmInfo = T.createMCAsmInfo(Triple);
+}
+
+// Set the default code model for the JIT for a generic target.
+// FIXME: Is small right here? or .is64Bit() ? Large : Small?
+void LLVMTargetMachine::setCodeModelForJIT() {
+ setCodeModel(CodeModel::Small);
+}
+
+// Set the default code model for static compilation for a generic target.
+void LLVMTargetMachine::setCodeModelForStatic() {
+ setCodeModel(CodeModel::Small);
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ MCContext *Context = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ return true;
+ assert(Context != 0 && "Failed to get MCContext");
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ TargetAsmBackend *TAB = 0;
+ if (ShowMCEncoding) {
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context);
+ TAB = getTarget().createAsmBackend(getTargetTriple());
+ }
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ getVerboseAsm(),
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ InstPrinter,
+ MCE, TAB,
+ ShowMCInst);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI,
+ *Context);
+ TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
+ if (MCE == 0 || TAB == 0)
+ return true;
+
+ AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(),
+ *Context, *TAB, Out, MCE,
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ if (EnableMCLogging)
+ AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ // Make sure the code model is set.
+ setCodeModelForStatic();
+ PM.add(createGCInfoDeleter());
+ return false;
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a JITCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ JITCodeEmitter &JCE,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // Make sure the code model is set.
+ setCodeModelForJIT();
+
+ // Add common CodeGen passes.
+ MCContext *Ctx = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ return true;
+
+ addCodeEmitter(PM, OptLevel, JCE);
+ PM.add(createGCInfoDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ raw_ostream &Out,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ return true;
+
+ if (hasMCSaveTempLabels())
+ Ctx->setAllowTemporaryLabels(false);
+
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx);
+ TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
+ if (MCE == 0 || TAB == 0)
+ return true;
+
+ OwningPtr<MCStreamer> AsmStreamer;
+ AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx,
+ *TAB, Out, MCE,
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ // Make sure the code model is set.
+ setCodeModelForJIT();
+
+ return false; // success!
+}
+
+static void printNoVerify(PassManagerBase &PM, const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
+static void printAndVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(Banner));
+}
+
+/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
+/// emitting to assembly files or machine code output.
+///
+bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
+ MCContext *&OutContext) {
+ // Standard LLVM-Level Passes.
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None && !DisableLSR) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ // Turn exception handling constructs into something the code generators can
+ // handle.
+ switch (getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ PM.add(createSjLjEHPass(getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ PM.add(createDwarfEHPass(this));
+ break;
+ case ExceptionHandling::None:
+ PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+ break;
+ }
+
+ if (OptLevel != CodeGenOpt::None && !DisableCGP)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel(PM, OptLevel);
+
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Standard Lower-Level Passes.
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
+ PM.add(MMI);
+ OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+ EnableFastISel = true;
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass");
+
+ if (!DisableMachineLICM)
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineCSEPass());
+ if (!DisableMachineSink)
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes");
+
+ // Perform register allocation.
+ PM.add(createRegisterAllocator(OptLevel));
+ printAndVerify(PM, "After Register Allocation");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ if (OptLevel != CodeGenOpt::None) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ if (!DisableSSC)
+ PM.add(createStackSlotColoringPass(false));
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ if (!DisablePostRAMachineLICM)
+ PM.add(createMachineLICMPass(false));
+
+ printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+ }
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
+
+ PM.add(createLowerSubregsPass());
+ printAndVerify(PM, "After LowerSubregs");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
+ PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printNoVerify(PM, "After BranchFolding");
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
+ PM.add(createTailDuplicatePass(false));
+ printNoVerify(PM, "After TailDuplicate");
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(dbgs()));
+
+ if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
+ PM.add(createCodePlacementOptPass());
+ printNoVerify(PM, "After CodePlacementOpt");
+ }
+
+ if (addPreEmitPass(PM, OptLevel))
+ printNoVerify(PM, "After PreEmit passes");
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 000000000000..0eb009ddac29
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,152 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push_back(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+ }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+ if (empty()) return NULL;
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
+ return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ LatencyPriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..5d38c83b49c2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,966 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class LDVImpl;
+class UserValue {
+ const MDNode *variable; ///< The debug info variable we are part of.
+ unsigned offset; ///< Byte offset into variable.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// is live. Returns true if any changes were made.
+ bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, unsigned o, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+ {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the parameters?
+ bool match(const MDNode *Var, unsigned Offset) const {
+ return Var == variable && Offset == offset;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next)
+ End->leader = L1, End = End->next;
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg()) {
+ if (LocMO.getReg() == 0)
+ return ~0u;
+ // For register locations we dont care about use/def and other flags.
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ locations[i].getReg() == LocMO.getReg() &&
+ locations[i].getSubReg() == LocMO.getSubReg())
+ return i;
+ } else
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ // Don't store def operands.
+ if (locations.back().isReg())
+ locations.back().setIsUse();
+ return locations.size() - 1;
+ }
+
+ /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ void mapVirtRegs(LDVImpl *LDV);
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// End points where VNI is no longer live are added to Kills.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LI Restrict liveness to where LI has the value VNI. May be null.
+ /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param Kills Append end points of VNI's live range to Kills.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+ /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+ /// registers. Determine if any of the copies are available at the kill
+ /// points, and add defs if possible.
+ /// @param LI Scan for copies of the value in LI->reg.
+ /// @param LocNo Location number of LI->reg.
+ /// @param Kills Points where the range of LocNo could be extended.
+ /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(MachineRegisterInfo &MRI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+ /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI);
+
+ /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// live. Returns true if any changes were made.
+ bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+ /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// Only first one needs DebugLoc to identify variable's lexical scope
+ /// in source file.
+ DebugLoc findDebugLoc();
+ void print(raw_ostream&, const TargetMachine*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<UserValue*, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Relase all memory.
+ void clear() {
+ DeleteContainerPointers(userValues);
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ }
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Replace all references to OldReg with NewRegs.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+ if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+ OS << "!\"" << MDS->getString() << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+ OS << " Loc" << i << '=';
+ locations[i].print(OS, TM);
+ }
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, &MF->getTarget());
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+ DebugLoc DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Offset))
+ return UV;
+ }
+
+ UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+ userValues.push_back(UV);
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI->getNumOperands() != 3 ||
+ !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << *MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ unsigned Offset = MI->getOperand(1).getImm();
+ const MDNode *Var = MI->getOperand(2).getMetadata();
+ UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+ UV->addDef(Idx, MI->getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx = MBBI == MBB->begin() ?
+ LIS->getMBBStartIdx(MBB) :
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT) {
+ SmallVector<SlotIndex, 16> Todo;
+ Todo.push_back(Idx);
+
+ do {
+ SlotIndex Start = Todo.pop_back_val();
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LI && VNI) {
+ LiveRange *Range = LI->getLiveRangeContaining(Start);
+ if (!Range || Range->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ continue;
+ }
+ if (Range->end < Stop)
+ Stop = Range->end, ToEnd = false;
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ continue;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+
+ if (Start >= Stop)
+ continue;
+
+ I.insert(Start, Stop, LocNo);
+
+ // If we extended to the MBB end, propagate down the dominator tree.
+ if (!ToEnd)
+ continue;
+ const std::vector<MachineDomTreeNode*> &Children =
+ MDT.getNode(MBB)->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+ } while (!Todo.empty());
+}
+
+void
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+ if (Kills.empty())
+ return;
+ // Don't track copies from physregs, there are too many uses.
+ if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ return;
+
+ // Collect all the (vreg, valno) pairs that are copies of LI.
+ SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(LI->reg),
+ UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+ // Copies of the full value.
+ if (UI.getOperand().getSubReg() || !UI->isCopy())
+ continue;
+ MachineInstr *MI = &*UI;
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved register,
+ // or it could be spilled.
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ continue;
+
+ // Is LocNo extended to reach this copy? If not, another def may be blocking
+ // it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ LocMap::iterator I = locInts.find(Idx.getUseIndex());
+ if (!I.valid() || I.value() != LocNo)
+ continue;
+
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
+ assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+ CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+ }
+
+ if (CopyValues.empty())
+ return;
+
+ DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+ // Try to add defs of the copied values for each kill point.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ SlotIndex Idx = Kills[i];
+ for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+ LiveInterval *DstLI = CopyValues[j].first;
+ const VNInfo *DstVNI = CopyValues[j].second;
+ if (DstLI->getVNInfoAt(Idx) != DstVNI)
+ continue;
+ // Check that there isn't already a def at Idx
+ LocMap::iterator I = locInts.find(Idx);
+ if (I.valid() && I.start() <= Idx)
+ continue;
+ DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
+ MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+ assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+ unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+ I.insert(Idx, Idx.getNextSlot(), LocNo);
+ NewDefs.push_back(std::make_pair(Idx, LocNo));
+ break;
+ }
+ }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ LiveIntervals &LIS,
+ MachineDominatorTree &MDT) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ // Extend all defs, and possibly add new ones along the way.
+ for (unsigned i = 0; i != Defs.size(); ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ // Register locations are constrained to where the register value is live.
+ if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+ LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ SmallVector<SlotIndex, 16> Kills;
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT);
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ } else
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT);
+ userValues[i]->mapVirtRegs(this);
+ }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getTarget().getRegisterInfo();
+ clear();
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << ((Value*)mf.getFunction())->getName()
+ << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned i = locations.size(); i; --i) {
+ unsigned LocNo = i - 1;
+ MachineOperand &Loc = locations[LocNo];
+ if (!Loc.isReg() || Loc.getReg() != OldReg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ Loc.substPhysReg(NewReg, *TRI);
+ else
+ Loc.substVirtReg(NewReg, SubIdx, *TRI);
+ coalesceLocation(LocNo);
+ }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ UserValue *UV = lookupVirtReg(OldReg);
+ if (!UV)
+ return;
+
+ if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ mapVirtReg(NewReg, UV);
+ virtRegToEqClass.erase(OldReg);
+
+ do {
+ UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+ UV = UV->getNext();
+ } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+ DEBUG({
+ dbgs() << "Splitting Loc" << OldLocNo << '\t';
+ print(dbgs(), 0);
+ });
+ bool DidChange = false;
+ LocMap::iterator LocMapI;
+ LocMapI.setMap(locInts);
+ for (unsigned i = 0; i != NewRegs.size(); ++i) {
+ LiveInterval *LI = NewRegs[i];
+ if (LI->empty())
+ continue;
+
+ // Don't allocate the new LocNo until it is needed.
+ unsigned NewLocNo = ~0u;
+
+ // Iterate over the overlaps between locInts and LI.
+ LocMapI.find(LI->beginIndex());
+ if (!LocMapI.valid())
+ continue;
+ LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+ LiveInterval::iterator LIE = LI->end();
+ while (LocMapI.valid() && LII != LIE) {
+ // At this point, we know that LocMapI.stop() > LII->start.
+ LII = LI->advanceTo(LII, LocMapI.start());
+ if (LII == LIE)
+ break;
+
+ // Now LII->end > LocMapI.start(). Do we have an overlap?
+ if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+ // Overlapping correct location. Allocate NewLocNo now.
+ if (NewLocNo == ~0u) {
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MO.setSubReg(locations[OldLocNo].getSubReg());
+ NewLocNo = getLocationNo(MO);
+ DidChange = true;
+ }
+
+ SlotIndex LStart = LocMapI.start();
+ SlotIndex LStop = LocMapI.stop();
+
+ // Trim LocMapI down to the LII overlap.
+ if (LStart < LII->start)
+ LocMapI.setStartUnchecked(LII->start);
+ if (LStop > LII->end)
+ LocMapI.setStopUnchecked(LII->end);
+
+ // Change the value in the overlap. This may trigger coalescing.
+ LocMapI.setValue(NewLocNo);
+
+ // Re-insert any removed OldLocNo ranges.
+ if (LStart < LocMapI.start()) {
+ LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+ ++LocMapI;
+ assert(LocMapI.valid() && "Unexpected coalescing");
+ }
+ if (LStop > LocMapI.stop()) {
+ ++LocMapI;
+ LocMapI.insert(LII->end, LStop, OldLocNo);
+ --LocMapI;
+ }
+ }
+
+ // Advance to the next overlap.
+ if (LII->end < LocMapI.stop()) {
+ if (++LII == LIE)
+ break;
+ LocMapI.advanceTo(LII->start);
+ } else {
+ ++LocMapI;
+ if (!LocMapI.valid())
+ break;
+ LII = LI->advanceTo(LII, LocMapI.start());
+ }
+ }
+ }
+
+ // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+ locations.erase(locations.begin() + OldLocNo);
+ LocMapI.goToBegin();
+ while (LocMapI.valid()) {
+ unsigned v = LocMapI.value();
+ if (v == OldLocNo) {
+ DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+ << LocMapI.stop() << ")\n");
+ LocMapI.erase();
+ } else {
+ if (v > OldLocNo)
+ LocMapI.setValueUnchecked(v-1);
+ ++LocMapI;
+ }
+ }
+
+ DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+ return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ // Split locations referring to OldReg. Iterate backwards so splitLocation can
+ // safely erase unuused locations.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ const MachineOperand *Loc = &locations[LocNo];
+ if (!Loc->isReg() || Loc->getReg() != OldReg)
+ continue;
+ DidChange |= splitLocation(LocNo, NewRegs);
+ }
+ return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs);
+
+ if (!DidChange)
+ return;
+
+ // Map all of the new virtual registers.
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i]->reg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+ VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+ DebugLoc D = dl;
+ dl = DebugLoc();
+ return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+
+ // Frame index locations may require a target callback.
+ if (Loc.isFI()) {
+ MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+ Loc.getIndex(), offset, variable,
+ findDebugLoc());
+ if (MI) {
+ MBB->insert(I, MI);
+ return;
+ }
+ }
+ // This is not a frame index, or the target is happy with a standard FI.
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..3ce3c398bd4b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,70 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables();
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Move any user variables in OldReg to the live ranges in
+ /// NewRegs where they are live. Mark the values as unavailable where no new
+ /// register is live.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ virtual bool runOnMachineFunction(MachineFunction &);
+ virtual void releaseMemory();
+ virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 000000000000..cfade24b8d87
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,775 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+ // This algorithm is basically std::upper_bound.
+ // Unfortunately, std::upper_bound cannot be used with mixed types until we
+ // adopt C++0x. Many libraries can do it, but not all.
+ if (empty() || Pos >= endIndex())
+ return end();
+ iterator I = begin();
+ size_t Len = ranges.size();
+ do {
+ size_t Mid = Len >> 1;
+ if (Pos < I[Mid].end)
+ Len = Mid;
+ else
+ I += Mid + 1, Len -= Mid + 1;
+ } while (Len);
+ return I;
+}
+
+/// killedInRange - Return true if the interval has kills in [Start,End).
+bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
+ Ranges::const_iterator r =
+ std::lower_bound(ranges.begin(), ranges.end(), End);
+
+ // Now r points to the first interval with start >= End, or ranges.end().
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ // Now r points to the last interval with end <= End.
+ // r->end is the kill point.
+ return r->end >= Start && r->end < End;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ assert(!empty() && "empty interval");
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = std::lower_bound(begin(), end(), End);
+ return I != begin() && (--I)->end > Start;
+}
+
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->setIsUnused(true);
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ bool seenPHIDef = false;
+ valnos.clear();
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ VNInfo *VNI = I->valno;
+ if (!Seen.insert(VNI))
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live range");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ VNI->setHasPHIKill(false);
+ if (VNI->isPHIDef())
+ seenPHIDef = true;
+ }
+
+ // Recompute phi kill flags.
+ if (!seenPHIDef)
+ return;
+ for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (!VNI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def);
+ assert(PHIBB && "No basic block for phi-def");
+ for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(),
+ PE = PHIBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot());
+ if (KVNI)
+ KVNI->setHasPHIKill(true);
+ }
+ }
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = llvm::next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // Erase any dead ranges.
+ ranges.erase(llvm::next(I), MergeTo);
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ Ranges::iterator Next = llvm::next(I);
+ if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
+ I->end = Next->end;
+ ranges.erase(Next);
+ }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(llvm::next(MergeTo), llvm::next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ SlotIndex Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end()) {
+ if (LR.valno == it->valno) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+/// extendInBlock - If this interval is live before UseIdx in the basic
+/// block that starts at StartIdx, extend it to be live at UseIdx and return
+/// the value. If there is no live range before UseIdx, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) {
+ if (empty())
+ return 0;
+ iterator I = std::upper_bound(begin(), end(), UseIdx);
+ if (I == begin())
+ return 0;
+ --I;
+ if (I->end <= StartIdx)
+ return 0;
+ if (I->end <= UseIdx)
+ extendIntervalEndTo(I, UseIdx.getNextSlot());
+ return I->valno;
+}
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = find(Start);
+ assert(I != ranges.end() && "Range is not in interval!");
+ assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+ }
+ }
+
+ ranges.erase(I); // Removed the whole LiveRange.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ SlotIndex OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ Ranges::iterator I = ranges.end();
+ Ranges::iterator E = ranges.begin();
+ do {
+ --I;
+ if (I->valno == ValNo)
+ ranges.erase(I);
+ } while (I != E);
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// index (register interval).
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
+ for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+ i != e; ++i) {
+ if ((*i)->def == Idx)
+ return *i;
+ }
+
+ return 0;
+}
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ MachineRegisterInfo *MRI) {
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+ MustMapCurValNos = true;
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos) {
+ // Map the first live range.
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ ++OutIt;
+ for (iterator I = OutIt, E = end(); I != E; ++I) {
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
+ (OutIt-1)->end = OutIt->end;
+ } else {
+ if (I != OutIt) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+
+ // Didn't merge, on to the next one.
+ ++OutIt;
+ }
+ }
+
+ // If we merge some live ranges, chop off the end.
+ ranges.erase(OutIt, end());
+ }
+
+ // Remember assignements because val# ids are changing.
+ SmallVector<unsigned, 16> OtherAssignments;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveInterval now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ iterator InsertPos = begin();
+ unsigned RangeNo = 0;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
+ // Map the valno in the other live range to the current live range.
+ I->valno = NewVNInfo[OtherAssignments[RangeNo]];
+ assert(I->valno && "Adding a dead range?");
+ InsertPos = addRangeFrom(*I, InsertPos);
+ }
+
+ ComputeJoinedWeight(Other);
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ VNInfo *LHSValNo) {
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ // Map the valno in the other live range to the current live range.
+ LiveRange Tmp = *I;
+ Tmp.valno = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
+ }
+}
+
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(
+ const LiveInterval &RHS,
+ const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ if (I->valno != RHSValNo)
+ continue;
+ // Map the valno in the other live range to the current live range.
+ LiveRange Tmp = *I;
+ Tmp.valno = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
+ }
+}
+
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ V1->copyFrom(*V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->valno != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->valno == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->valno = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->valno == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Merge the relevant flags.
+ V2->mergeFlags(V1);
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
+ return V2;
+}
+
+void LiveInterval::Copy(const LiveInterval &RHS,
+ MachineRegisterInfo *MRI,
+ VNInfo::Allocator &VNInfoAllocator) {
+ ranges.clear();
+ valnos.clear();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+ MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
+ weight = RHS.weight;
+ for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+ const VNInfo *VNI = RHS.getValNumInfo(i);
+ createValueCopy(VNI, VNInfoAllocator);
+ }
+ for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+ const LiveRange &LR = RHS.ranges[i];
+ addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+ }
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->start.distance(I->end);
+ return Sum;
+}
+
+/// ComputeJoinedWeight - Set the weight of a live interval Joined
+/// after Other has been merged into it.
+void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
+ // If either of these intervals was spilled, the weight is the
+ // weight of the non-spilled interval. This can only happen with
+ // iterative coalescers.
+
+ if (Other.weight != HUGE_VALF) {
+ weight += Other.weight;
+ }
+ else if (weight == HUGE_VALF &&
+ !TargetRegisterInfo::isPhysicalRegister(reg)) {
+ // Remove this assert if you have an iterative coalescer
+ assert(0 && "Joining to spilled interval");
+ weight = Other.weight;
+ }
+ else {
+ // Otherwise the weight stays the same
+ // Remove this assert if you have an iterative coalescer
+ assert(0 && "Joining from spilled interval");
+ }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+void LiveRange::dump() const {
+ dbgs() << *this << "\n";
+}
+
+void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ OS << PrintReg(reg, TRI);
+ if (weight != 0)
+ OS << ',' << weight;
+
+ if (empty())
+ OS << " EMPTY";
+ else {
+ OS << " = ";
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I) {
+ OS << *I;
+ assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+ }
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << " ";
+ OS << vnum << "@";
+ if (vni->isUnused()) {
+ OS << "x";
+ } else {
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phidef";
+ if (vni->hasPHIKill())
+ OS << "-phikill";
+ if (vni->hasRedefByEC())
+ OS << "-ec";
+ }
+ }
+ }
+}
+
+void LiveInterval::dump() const {
+ dbgs() << *this << "\n";
+}
+
+
+void LiveRange::print(raw_ostream &os) const {
+ os << *this;
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+ // Create initial equivalence classes.
+ EqClass.clear();
+ EqClass.grow(LI->getNumValNums());
+
+ const VNInfo *used = 0, *unused = 0;
+
+ // Determine connections.
+ for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ EqClass.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI =
+ LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+ EqClass.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+ EqClass.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ EqClass.join(used->id, unused->id);
+
+ EqClass.compress();
+ return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
+ assert(LIV[0] && "LIV[0] must be set");
+ LiveInterval &LI = *LIV[0];
+
+ // Rewrite instructions.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ if (MO.isUse() && MO.isUndef())
+ continue;
+ // DBG_VALUE instructions should have been eliminated earlier.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ const VNInfo *VNI = LI.getVNInfoAt(Idx);
+ assert(VNI && "Interval not live at use.");
+ MO.setReg(LIV[getEqClass(VNI)]->reg);
+ }
+
+ // Move runs to new intervals.
+ LiveInterval::iterator J = LI.begin(), E = LI.end();
+ while (J != E && EqClass[J->valno->id] == 0)
+ ++J;
+ for (LiveInterval::iterator I = J; I != E; ++I) {
+ if (unsigned eq = EqClass[I->valno->id]) {
+ assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ LIV[eq]->ranges.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LI.ranges.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LI.getNumValNums();
+ while (j != e && EqClass[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LI.getValNumInfo(i);
+ if (unsigned eq = EqClass[i]) {
+ VNI->id = LIV[eq]->getNumValNums();
+ LIV[eq]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LI.valnos[j++] = VNI;
+ }
+ }
+ LI.valnos.resize(j);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 000000000000..9257191f7fc0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,2171 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<bool> DisableReMat("disable-rematerialization",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(numIntervals , "Number of original intervals");
+STATISTIC(numFolds , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits , "Number of intervals split");
+
+char LiveIntervals::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+
+ if (!StrongPHIElim) {
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequiredID(PHIEliminationID);
+ }
+
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ AU.addPreserved<ProcessImplicitDefs>();
+ AU.addRequired<ProcessImplicitDefs>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
+ E = r2iMap_.end(); I != E; ++I)
+ delete I->second;
+
+ r2iMap_.clear();
+
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+ while (!CloneMIs.empty()) {
+ MachineInstr *MI = CloneMIs.back();
+ CloneMIs.pop_back();
+ mf_->DeleteMachineInstr(MI);
+ }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &mf_->getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ aa_ = &getAnalysis<AliasAnalysis>();
+ lv_ = &getAnalysis<LiveVariables>();
+ indexes_ = &getAnalysis<SlotIndexes>();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+
+ computeIntervals();
+
+ numIntervals += getNumIntervals();
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second->print(OS, tri_);
+ OS << "\n";
+ }
+
+ printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+ OS << "********** MACHINEINSTRS **********\n";
+ mf_->print(OS, indexes_);
+}
+
+void LiveIntervals::dumpInstrs() const {
+ printInstrs(dbgs());
+}
+
+bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
+ VirtRegMap &vrm, unsigned reg) {
+ // We don't handle fancy stuff crossing basic block boundaries
+ if (li.ranges.size() != 1)
+ return true;
+ const LiveRange &range = li.ranges.front();
+ SlotIndex idx = range.start.getBaseIndex();
+ SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
+
+ // Skip deleted instructions
+ MachineInstr *firstMI = getInstructionFromIndex(idx);
+ while (!firstMI && idx != end) {
+ idx = idx.getNextIndex();
+ firstMI = getInstructionFromIndex(idx);
+ }
+ if (!firstMI)
+ return false;
+
+ // Find last instruction in range
+ SlotIndex lastIdx = end.getPrevIndex();
+ MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
+ while (!lastMI && lastIdx != idx) {
+ lastIdx = lastIdx.getPrevIndex();
+ lastMI = getInstructionFromIndex(lastIdx);
+ }
+ if (!lastMI)
+ return false;
+
+ // Range cannot cross basic block boundaries or terminators
+ MachineBasicBlock *MBB = firstMI->getParent();
+ if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
+ return true;
+
+ MachineBasicBlock::const_iterator E = lastMI;
+ ++E;
+ for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
+ const MachineInstr &MI = *I;
+
+ // Allow copies to and from li.reg
+ if (MI.isCopy())
+ if (MI.getOperand(0).getReg() == li.reg ||
+ MI.getOperand(1).getReg() == li.reg)
+ continue;
+
+ // Check for operands using reg
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand& mop = MI.getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned PhysReg = mop.getReg();
+ if (PhysReg == 0 || PhysReg == li.reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+ if (!vrm.hasPhys(PhysReg))
+ continue;
+ PhysReg = vrm.getPhys(PhysReg);
+ }
+ if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+ return true;
+ }
+ }
+
+ // No conflicts found.
+ return false;
+}
+
+bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
+ SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ for (SlotIndex index = I->start.getBaseIndex(),
+ end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+ index != end;
+ index = index.getNextIndex()) {
+ MachineInstr *MI = getInstructionFromIndex(index);
+ if (!MI)
+ continue; // skip deleted instructions
+
+ if (JoinedCopies.count(MI))
+ continue;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned PhysReg = MO.getReg();
+ if (PhysReg == 0 || PhysReg == Reg ||
+ TargetRegisterInfo::isVirtualRegister(PhysReg))
+ continue;
+ if (tri_->regsOverlap(Reg, PhysReg))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static
+bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
+ unsigned Reg = MI.getOperand(MOIdx).getReg();
+ for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == Reg && MO.isDef()) {
+ assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
+ MI.getOperand(MOIdx).getSubReg() &&
+ (MO.getSubReg() || MO.isImplicit()));
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isPartialRedef - Return true if the specified def at the specific index is
+/// partially re-defining the specified live interval. A common case of this is
+/// a definition of the sub-register.
+bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+ LiveInterval &interval) {
+ if (!MO.getSubReg() || MO.isEarlyClobber())
+ return false;
+
+ SlotIndex RedefIndex = MIIdx.getDefIndex();
+ const LiveRange *OldLR =
+ interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ if (DefMI != 0) {
+ return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
+ }
+ return false;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator mi,
+ SlotIndex MIIdx,
+ MachineOperand& MO,
+ unsigned MOIdx,
+ LiveInterval &interval) {
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+
+ // Virtual registers may be defined multiple times (due to phi
+ // elimination and 2-addr elimination). Much of what we do only has to be
+ // done once for the vreg. We use an empty interval to detect the first
+ // time we see a vreg.
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+ if (interval.empty()) {
+ // Get the Idx of the defining instructions.
+ SlotIndex defIndex = MIIdx.getDefIndex();
+ // Earlyclobbers move back one, so that they overlap the live range
+ // of inputs.
+ if (MO.isEarlyClobber())
+ defIndex = MIIdx.getUseIndex();
+
+ // Make sure the first definition is not a partial redefinition. Add an
+ // <imp-def> of the full register.
+ if (MO.getSubReg())
+ mi->addRegisterDefined(interval.reg);
+
+ MachineInstr *CopyMI = NULL;
+ if (mi->isCopyLike()) {
+ CopyMI = mi;
+ }
+
+ VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ assert(ValNo->id == 0 && "First value in interval is not 0?");
+
+ // Loop over all of the blocks that the vreg is defined in. There are
+ // two cases we have to handle here. The most common case is a vreg
+ // whose lifetime is contained within a basic block. In this case there
+ // will be a single kill, in MBB, which comes after the definition.
+ if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+ // FIXME: what about dead vars?
+ SlotIndex killIdx;
+ if (vi.Kills[0] != mi)
+ killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+ else
+ killIdx = defIndex.getStoreIndex();
+
+ // If the kill happens after the definition, we have an intra-block
+ // live range.
+ if (killIdx > defIndex) {
+ assert(vi.AliveBlocks.empty() &&
+ "Shouldn't be alive across any blocks!");
+ LiveRange LR(defIndex, killIdx, ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR << "\n");
+ return;
+ }
+ }
+
+ // The other case we handle is when a virtual register lives to the end
+ // of the defining block, potentially live across some blocks, then is
+ // live into some number of blocks, but gets killed. Start by adding a
+ // range that goes from this definition to the end of the defining block.
+ LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
+ DEBUG(dbgs() << " +" << NewLR);
+ interval.addRange(NewLR);
+
+ bool PHIJoin = lv_->isPHIJoin(interval.reg);
+
+ if (PHIJoin) {
+ // A phi join register is killed at the end of the MBB and revived as a new
+ // valno in the killing blocks.
+ assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
+ DEBUG(dbgs() << " phi-join");
+ ValNo->setHasPHIKill(true);
+ } else {
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+ E = vi.AliveBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+ LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR);
+ }
+ }
+
+ // Finally, this virtual register is live from the start of any killing
+ // block to the 'use' slot of the killing instruction.
+ for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+ MachineInstr *Kill = vi.Kills[i];
+ SlotIndex Start = getMBBStartIdx(Kill->getParent());
+ SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+
+ // Create interval with one of a NEW value number. Note that this value
+ // number isn't actually defined by an instruction, weird huh? :)
+ if (PHIJoin) {
+ assert(getInstructionFromIndex(Start) == 0 &&
+ "PHI def index points at actual instruction.");
+ ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+ ValNo->setIsPHIDef(true);
+ }
+ LiveRange LR(Start, killIdx, ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR);
+ }
+
+ } else {
+ if (MultipleDefsBySameMI(*mi, MOIdx))
+ // Multiple defs of the same virtual register by the same instruction.
+ // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+ // This is likely due to elimination of REG_SEQUENCE instructions. Return
+ // here since there is nothing to do.
+ return;
+
+ // If this is the second time we see a virtual register definition, it
+ // must be due to phi elimination or two addr elimination. If this is
+ // the result of two address elimination, then the vreg is one of the
+ // def-and-use register operand.
+
+ // It may also be partial redef like this:
+ // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+ // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+ bool PartReDef = isPartialRedef(MIIdx, MO, interval);
+ if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
+ // If this is a two-address definition, then we have already processed
+ // the live range. The only problem is that we didn't realize there
+ // are actually two values in the live interval. Because of this we
+ // need to take the LiveRegion that defines this register and split it
+ // into two values.
+ SlotIndex RedefIndex = MIIdx.getDefIndex();
+ if (MO.isEarlyClobber())
+ RedefIndex = MIIdx.getUseIndex();
+
+ const LiveRange *OldLR =
+ interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ VNInfo *OldValNo = OldLR->valno;
+ SlotIndex DefIndex = OldValNo->def.getDefIndex();
+
+ // Delete the previous value, which should be short and continuous,
+ // because the 2-addr copy must be in the same MBB as the redef.
+ interval.removeRange(DefIndex, RedefIndex);
+
+ // The new value number (#1) is defined by the instruction we claimed
+ // defined value #0.
+ VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
+
+ // Value#0 is now defined by the 2-addr instruction.
+ OldValNo->def = RedefIndex;
+ OldValNo->setCopy(0);
+
+ // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
+ if (PartReDef && mi->isCopyLike())
+ OldValNo->setCopy(&*mi);
+
+ // Add the new live interval which replaces the range for the input copy.
+ LiveRange LR(DefIndex, RedefIndex, ValNo);
+ DEBUG(dbgs() << " replace range with " << LR);
+ interval.addRange(LR);
+
+ // If this redefinition is dead, we need to add a dummy unit live
+ // range covering the def slot.
+ if (MO.isDead())
+ interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+ OldValNo));
+
+ DEBUG({
+ dbgs() << " RESULT: ";
+ interval.print(dbgs(), tri_);
+ });
+ } else if (lv_->isPHIJoin(interval.reg)) {
+ // In the case of PHI elimination, each variable definition is only
+ // live until the end of the block. We've already taken care of the
+ // rest of the live range.
+
+ SlotIndex defIndex = MIIdx.getDefIndex();
+ if (MO.isEarlyClobber())
+ defIndex = MIIdx.getUseIndex();
+
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ if (mi->isCopyLike())
+ CopyMI = mi;
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+ SlotIndex killIndex = getMBBEndIdx(mbb);
+ LiveRange LR(defIndex, killIndex, ValNo);
+ interval.addRange(LR);
+ ValNo->setHasPHIKill(true);
+ DEBUG(dbgs() << " phi-join +" << LR);
+ } else {
+ llvm_unreachable("Multiply defined register");
+ }
+ }
+
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator mi,
+ SlotIndex MIIdx,
+ MachineOperand& MO,
+ LiveInterval &interval,
+ MachineInstr *CopyMI) {
+ // A physical register cannot be live across basic block, so its
+ // lifetime must end somewhere in its defining basic block.
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+
+ SlotIndex baseIndex = MIIdx;
+ SlotIndex start = baseIndex.getDefIndex();
+ // Earlyclobbers move back one.
+ if (MO.isEarlyClobber())
+ start = MIIdx.getUseIndex();
+ SlotIndex end = start;
+
+ // If it is not used after definition, it is considered dead at
+ // the instruction defining it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ // For earlyclobbers, the defSlot was pushed back one; the extra
+ // advance below compensates.
+ if (MO.isDead()) {
+ DEBUG(dbgs() << " dead");
+ end = start.getStoreIndex();
+ goto exit;
+ }
+
+ // If it is not dead on definition, it must be killed by a
+ // subsequent instruction. Hence its interval is:
+ // [defSlot(def), useSlot(kill)+1)
+ baseIndex = baseIndex.getNextIndex();
+ while (++mi != MBB->end()) {
+
+ if (mi->isDebugValue())
+ continue;
+ if (getInstructionFromIndex(baseIndex) == 0)
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DEBUG(dbgs() << " killed");
+ end = baseIndex.getDefIndex();
+ goto exit;
+ } else {
+ int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
+ if (DefIdx != -1) {
+ if (mi->isRegTiedToUseOperand(DefIdx)) {
+ // Two-address instruction.
+ end = baseIndex.getDefIndex();
+ } else {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that
+ // defines it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DEBUG(dbgs() << " dead");
+ end = start.getStoreIndex();
+ }
+ goto exit;
+ }
+ }
+
+ baseIndex = baseIndex.getNextIndex();
+ }
+
+ // The only case we should have a dead physreg here without a killing or
+ // instruction where we know it's dead is if it is live-in to the function
+ // and never used. Another possible case is the implicit use of the
+ // physical register has been deleted by two-address pass.
+ end = start.getStoreIndex();
+
+exit:
+ assert(start < end && "did not find end of interval?");
+
+ // Already exists? Extend old live interval.
+ VNInfo *ValNo = interval.getVNInfoAt(start);
+ bool Extend = ValNo != 0;
+ if (!Extend)
+ ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ if (Extend && MO.isEarlyClobber())
+ ValNo->setHasRedefByEC(true);
+ LiveRange LR(start, end, ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI,
+ SlotIndex MIIdx,
+ MachineOperand& MO,
+ unsigned MOIdx) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
+ getOrCreateInterval(MO.getReg()));
+ else {
+ MachineInstr *CopyMI = NULL;
+ if (MI->isCopyLike())
+ CopyMI = MI;
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(MO.getReg()), CopyMI);
+ }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+ SlotIndex MIIdx,
+ LiveInterval &interval, bool isAlias) {
+ DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
+
+ // Look for kills, if it reaches a def before it's killed, then it shouldn't
+ // be considered a livein.
+ MachineBasicBlock::iterator mi = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ // Skip over DBG_VALUE at the start of the MBB.
+ if (mi != E && mi->isDebugValue()) {
+ while (++mi != E && mi->isDebugValue())
+ ;
+ if (mi == E)
+ // MBB is empty except for DBG_VALUE's.
+ return;
+ }
+
+ SlotIndex baseIndex = MIIdx;
+ SlotIndex start = baseIndex;
+ if (getInstructionFromIndex(baseIndex) == 0)
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+ SlotIndex end = baseIndex;
+ bool SeenDefUse = false;
+
+ while (mi != E) {
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DEBUG(dbgs() << " killed");
+ end = baseIndex.getDefIndex();
+ SeenDefUse = true;
+ break;
+ } else if (mi->definesRegister(interval.reg, tri_)) {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DEBUG(dbgs() << " dead");
+ end = start.getStoreIndex();
+ SeenDefUse = true;
+ break;
+ }
+
+ while (++mi != E && mi->isDebugValue())
+ // Skip over DBG_VALUE.
+ ;
+ if (mi != E)
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+ }
+
+ // Live-in register might not be used at all.
+ if (!SeenDefUse) {
+ if (isAlias) {
+ DEBUG(dbgs() << " dead");
+ end = MIIdx.getStoreIndex();
+ } else {
+ DEBUG(dbgs() << " live through");
+ end = getMBBEndIdx(MBB);
+ }
+ }
+
+ SlotIndex defIdx = getMBBStartIdx(MBB);
+ assert(getInstructionFromIndex(defIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *vni =
+ interval.getNextValue(defIdx, 0, VNInfoAllocator);
+ vni->setIsPHIDef(true);
+ LiveRange LR(start, end, vni);
+
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+ DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n');
+
+ SmallVector<unsigned, 8> UndefUses;
+ for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (MBB->empty())
+ continue;
+
+ // Track the index of the current machine instr.
+ SlotIndex MIIndex = getMBBStartIdx(MBB);
+ DEBUG(dbgs() << "BB#" << MBB->getNumber()
+ << ":\t\t# derived from " << MBB->getName() << "\n");
+
+ // Create intervals for live-ins to this BB first.
+ for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end(); LI != LE; ++LI) {
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+ // Multiple live-ins can alias the same register.
+ for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
+ if (!hasInterval(*AS))
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+ true);
+ }
+
+ // Skip over empty initial indices.
+ if (getInstructionFromIndex(MIIndex) == 0)
+ MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+
+ for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+ MI != miEnd; ++MI) {
+ DEBUG(dbgs() << MIIndex << "\t" << *MI);
+ if (MI->isDebugValue())
+ continue;
+
+ // Handle defs.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ // handle register defs - build intervals
+ if (MO.isDef())
+ handleRegisterDef(MBB, MI, MIIndex, MO, i);
+ else if (MO.isUndef())
+ UndefUses.push_back(MO.getReg());
+ }
+
+ // Move to the next instr slot.
+ MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+ }
+ }
+
+ // Create empty intervals for registers defined by implicit_def's (except
+ // for those implicit_def that define values which are liveout of their
+ // blocks.
+ for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
+ unsigned UndefReg = UndefUses[i];
+ (void)getOrCreateInterval(UndefReg);
+ }
+}
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+/// dupInterval - Duplicate a live interval. The caller is responsible for
+/// managing the allocated memory.
+LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
+ LiveInterval *NewLI = createInterval(li->reg);
+ NewLI->Copy(*li, mri_, getVNInfoAllocator());
+ return NewLI;
+}
+
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can't only shrink physical registers");
+ // Find all the values used, including PHI kills.
+ SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+ VNInfo *VNI = li->getVNInfoAt(Idx);
+ if (!VNI) {
+ // This shouldn't happen: readsVirtualRegister returns true, but there is
+ // no live value. It is likely caused by a target getting <undef> flags
+ // wrong.
+ DEBUG(dbgs() << Idx << '\t' << *UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
+ continue;
+ }
+ if (VNI->def == Idx) {
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ Idx = Idx.getPrevSlot();
+ VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Early-clobber tied value not available");
+ }
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ // We may eliminate PHI values, so recompute PHIKill flags.
+ VNI->setHasPHIKill(false);
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+
+ // A use tied to an early-clobber def ends at the load slot and isn't caught
+ // above. Catch it here instead. This probably only ever happens for inline
+ // assembly.
+ if (VNI->def.isUse())
+ if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
+ WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+ }
+
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+ (void)ExtVNI;
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ VNInfo *PVNI = li->getVNInfoAt(Stop);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (PVNI) {
+ PVNI->setHasPHIKill(true);
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ bool CanSeparate = false;
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getNextSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->setIsUnused(true);
+ NewLI.removeRange(*LII);
+ DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+ CanSeparate = true;
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, tri_);
+ if (dead && MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+ dead->push_back(MI);
+ }
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+ MachineBasicBlock *mbb) const {
+ const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+ // If li is not live into a landing pad, we can insert spill code before the
+ // first terminator.
+ if (!lpad || !isLiveInToMBB(li, lpad))
+ return mbb->getFirstTerminator();
+
+ // When there is a landing pad, spill code must go before the call instruction
+ // that can throw.
+ MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+ while (I != B) {
+ --I;
+ if (I->getDesc().isCall())
+ return I;
+ }
+ // The block contains no calls that can throw, so use the first terminator.
+ return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (mri_->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = I->second;
+
+ // Every instruction that kills Reg corresponds to a live range end point.
+ for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ ++RI) {
+ // A LOAD index indicates an MBB edge.
+ if (RI->end.isLoad())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+ MI->addRegisterKilled(Reg, NULL);
+ }
+ }
+}
+
+/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
+/// allow one) virtual register operand, then its uses are implicitly using
+/// the register. Returns the virtual register.
+unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
+ MachineInstr *MI) const {
+ unsigned RegOp = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == li.reg)
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !allocatableRegs_[Reg])
+ continue;
+ // FIXME: For now, only remat MI with at most one register operand.
+ assert(!RegOp &&
+ "Can't rematerialize instruction with multiple register operand!");
+ RegOp = MO.getReg();
+#ifndef NDEBUG
+ break;
+#endif
+ }
+ return RegOp;
+}
+
+/// isValNoAvailableAt - Return true if the val# of the specified interval
+/// which reaches the given instruction also reaches the specified use index.
+bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+ SlotIndex UseIdx) const {
+ VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+ return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ const SmallVectorImpl<LiveInterval*> *SpillIs,
+ bool &isLoad) {
+ if (DisableReMat)
+ return false;
+
+ if (!tii_->isTriviallyReMaterializable(MI, aa_))
+ return false;
+
+ // Target-specific code can mark an instruction as being rematerializable
+ // if it has one virtual reg use, though it had better be something like
+ // a PIC base register which is likely to be live everywhere.
+ unsigned ImpUse = getReMatImplicitUse(li, MI);
+ if (ImpUse) {
+ const LiveInterval &ImpLi = getInterval(ImpUse);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
+ ri != re; ++ri) {
+ MachineInstr *UseMI = &*ri;
+ SlotIndex UseIdx = getInstructionIndex(UseMI);
+ if (li.getVNInfoAt(UseIdx) != ValNo)
+ continue;
+ if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
+ return false;
+ }
+
+ // If a register operand of the re-materialized instruction is going to
+ // be spilled next, then it's not legal to re-materialize this instruction.
+ if (SpillIs)
+ for (unsigned i = 0, e = SpillIs->size(); i != e; ++i)
+ if (ImpUse == (*SpillIs)[i]->reg)
+ return false;
+ }
+ return true;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI) {
+ bool Dummy2;
+ return isReMaterializable(li, ValNo, MI, 0, Dummy2);
+}
+
+/// isReMaterializable - Returns true if every definition of MI of every
+/// val# of the specified interval is re-materializable.
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const SmallVectorImpl<LiveInterval*> *SpillIs,
+ bool &isLoad) {
+ isLoad = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ if (VNI->isUnused())
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+ if (!ReMatDefMI)
+ return false;
+ bool DefIsLoad = false;
+ if (!ReMatDefMI ||
+ !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
+ return false;
+ isLoad |= DefIsLoad;
+ }
+ return true;
+}
+
+/// FilterFoldedOps - Filter out two-address use operands. Return
+/// true if it finds any issue with the operands that ought to prevent
+/// folding.
+static bool FilterFoldedOps(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ unsigned &MRInfo,
+ SmallVector<unsigned, 2> &FoldOps) {
+ MRInfo = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned OpIdx = Ops[i];
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ // FIXME: fold subreg use.
+ if (MO.getSubReg())
+ return true;
+ if (MO.isDef())
+ MRInfo |= (unsigned)VirtRegMap::isMod;
+ else {
+ // Filter out two-address use operand(s).
+ if (MI->isRegTiedToDefOperand(OpIdx)) {
+ MRInfo = VirtRegMap::isModRef;
+ continue;
+ }
+ MRInfo |= (unsigned)VirtRegMap::isRef;
+ }
+ FoldOps.push_back(OpIdx);
+ }
+ return false;
+}
+
+
+/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+/// slot / to reg or any rematerialized load into ith operand of specified
+/// MI. If it is successul, MI is updated with the newly created MI and
+/// returns true.
+bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
+ VirtRegMap &vrm, MachineInstr *DefMI,
+ SlotIndex InstrIdx,
+ SmallVector<unsigned, 2> &Ops,
+ bool isSS, int Slot, unsigned Reg) {
+ // If it is an implicit def instruction, just delete it.
+ if (MI->isImplicitDef()) {
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++numFolds;
+ return true;
+ }
+
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // The only time it's safe to fold into a two address instruction is when
+ // it's folding reload and spill from / into a spill stack slot.
+ if (DefMI && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
+ : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
+ if (fmi) {
+ // Remember this instruction uses the spill slot.
+ if (isSS) vrm.addSpillSlotUse(Slot, fmi);
+
+ // Attempt to fold the memory reference into the instruction. If
+ // we can do this, we don't need to insert spill code.
+ if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
+ vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
+ vrm.transferSpillPts(MI, fmi);
+ vrm.transferRestorePts(MI, fmi);
+ vrm.transferEmergencySpills(MI, fmi);
+ ReplaceMachineInstrInMaps(MI, fmi);
+ MI->eraseFromParent();
+ MI = fmi;
+ ++numFolds;
+ return true;
+ }
+ return false;
+}
+
+/// canFoldMemoryOperand - Returns true if the specified load / store
+/// folding is possible.
+bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ bool ReMat) const {
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // It's only legal to remat for a use, not a def.
+ if (ReMat && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ return tii_->canFoldMemoryOperand(MI, FoldOps);
+}
+
+bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
+ LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+
+ MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+ if (mbb == 0)
+ return false;
+
+ for (++itr; itr != li.ranges.end(); ++itr) {
+ MachineBasicBlock *mbb2 =
+ indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+ if (mbb2 != mbb)
+ return false;
+ }
+
+ return true;
+}
+
+/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+/// interval on to-be re-materialized operands of MI) with new register.
+void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
+ MachineInstr *MI, unsigned NewVReg,
+ VirtRegMap &vrm) {
+ // There is an implicit use. That means one of the other operand is
+ // being remat'ed and the remat'ed instruction has li.reg as an
+ // use operand. Make sure we rewrite that as well.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (!vrm.isReMaterialized(Reg))
+ continue;
+ MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
+ MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
+ if (UseMO)
+ UseMO->setReg(NewVReg);
+ }
+}
+
+/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
+/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
+bool LiveIntervals::
+rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+ bool TrySplit, SlotIndex index, SlotIndex end,
+ MachineInstr *MI,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool CanFold = false;
+ RestartInstruction:
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned Reg = mop.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (Reg != li.reg)
+ continue;
+
+ bool TryFold = !DefIsReMat;
+ bool FoldSS = true; // Default behavior unless it's a remat.
+ int FoldSlot = Slot;
+ if (DefIsReMat) {
+ // If this is the rematerializable definition MI itself and
+ // all of its uses are rematerialized, simply delete it.
+ if (MI == ReMatOrigDefMI && CanDelete) {
+ DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
+ << *MI << '\n');
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+
+ // If def for this use can't be rematerialized, then try folding.
+ // If def is rematerializable and it's a load, also try folding.
+ TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
+ if (isLoad) {
+ // Try fold loads (from stack slot, constant pool, etc.) into uses.
+ FoldSS = isLoadSS;
+ FoldSlot = LdSlot;
+ }
+ }
+
+ // Scan all of the operands of this instruction rewriting operands
+ // to use NewVReg instead of li.reg as appropriate. We do this for
+ // two reasons:
+ //
+ // 1. If the instr reads the same spilled vreg multiple times, we
+ // want to reuse the NewVReg.
+ // 2. If the instr is a two-addr instruction, we are required to
+ // keep the src/dst regs pinned.
+ //
+ // Keep track of whether we replace a use and/or def so that we can
+ // create the spill interval with the appropriate range.
+ SmallVector<unsigned, 2> Ops;
+ tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+ // Create a new virtual register for the spill interval.
+ // Create the new register now so we can map the fold instruction
+ // to the new register so when it is unfolded we get the correct
+ // answer.
+ bool CreatedNewVReg = false;
+ if (NewVReg == 0) {
+ NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ CreatedNewVReg = true;
+
+ // The new virtual register should get the same allocation hints as the
+ // old one.
+ std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
+ if (Hint.first || Hint.second)
+ mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
+ }
+
+ if (!TryFold)
+ CanFold = false;
+ else {
+ // Do not fold load / store here if we are splitting. We'll find an
+ // optimal point to insert a load / store later.
+ if (!TrySplit) {
+ if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, FoldSS, FoldSlot, NewVReg)) {
+ // Folding the load/store can completely change the instruction in
+ // unpredictable ways, rescan it from the beginning.
+
+ if (FoldSS) {
+ // We need to give the new vreg the same stack slot as the
+ // spilled interval.
+ vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
+ }
+
+ HasUse = false;
+ HasDef = false;
+ CanFold = false;
+ if (isNotInMIMap(MI))
+ break;
+ goto RestartInstruction;
+ }
+ } else {
+ // We'll try to fold it later if it's profitable.
+ CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
+ }
+ }
+
+ mop.setReg(NewVReg);
+ if (mop.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+
+ // Reuse NewVReg for other reads.
+ bool HasEarlyClobber = false;
+ for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
+ MachineOperand &mopj = MI->getOperand(Ops[j]);
+ mopj.setReg(NewVReg);
+ if (mopj.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+ if (mopj.isEarlyClobber())
+ HasEarlyClobber = true;
+ }
+
+ if (CreatedNewVReg) {
+ if (DefIsReMat) {
+ vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
+ if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
+ // Each valnum may have its own remat id.
+ ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
+ } else {
+ vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
+ }
+ if (!CanDelete || (HasUse && HasDef)) {
+ // If this is a two-addr instruction then its use operands are
+ // rematerializable but its def is not. It should be assigned a
+ // stack slot.
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else {
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else if (HasUse && HasDef &&
+ vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
+ // If this interval hasn't been assigned a stack slot (because earlier
+ // def is a deleted remat def), do it now.
+ assert(Slot != VirtRegMap::NO_STACK_SLOT);
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI.
+ if (DefIsReMat && ImpUse)
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+
+ // Create a new register interval for this spill / remat.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (CreatedNewVReg) {
+ NewLIs.push_back(&nI);
+ MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
+ if (TrySplit)
+ vrm.setIsSplitFromReg(NewVReg, li.reg);
+ }
+
+ if (HasUse) {
+ if (CreatedNewVReg) {
+ LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ } else {
+ // Extend the split live interval to this def / use.
+ SlotIndex End = index.getDefIndex();
+ LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
+ nI.getValNumInfo(nI.getNumValNums()-1));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ }
+ }
+ if (HasDef) {
+ // An early clobber starts at the use slot, except for an early clobber
+ // tied to a use operand (yes, that is a thing).
+ LiveRange LR(HasEarlyClobber && !HasUse ?
+ index.getUseIndex() : index.getDefIndex(),
+ index.getStoreIndex(),
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
+ DEBUG(dbgs() << " +" << LR);
+ nI.addRange(LR);
+ }
+
+ DEBUG({
+ dbgs() << "\t\t\t\tAdded new interval: ";
+ nI.print(dbgs(), tri_);
+ dbgs() << '\n';
+ });
+ }
+ return CanFold;
+}
+bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
+ const VNInfo *VNI,
+ MachineBasicBlock *MBB,
+ SlotIndex Idx) const {
+ return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
+}
+
+/// RewriteInfo - Keep track of machine instrs that will be rewritten
+/// during spilling.
+namespace {
+ struct RewriteInfo {
+ SlotIndex Index;
+ MachineInstr *MI;
+ RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
+ };
+
+ struct RewriteInfoCompare {
+ bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
+ return LHS.Index < RHS.Index;
+ }
+ };
+}
+
+void LiveIntervals::
+rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+ LiveInterval::Ranges::const_iterator &I,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ BitVector &SpillMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool AllCanFold = true;
+ unsigned NewVReg = 0;
+ SlotIndex start = I->start.getBaseIndex();
+ SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+
+ // First collect all the def / use in this live range that will be rewritten.
+ // Make sure they are sorted according to instruction index.
+ std::vector<RewriteInfo> RewriteMIs;
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineInstr *MI = &*ri;
+ MachineOperand &O = ri.getOperand();
+ ++ri;
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ int FI = isLoadSS ? LdSlot : (int)Slot;
+ if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
+ Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ ReplaceMachineInstrInMaps(MI, NewDV);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ continue;
+ }
+ }
+
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ continue;
+ }
+ assert(!(O.isImplicit() && O.isUse()) &&
+ "Spilling register that's used as implicit use?");
+ SlotIndex index = getInstructionIndex(MI);
+ if (index < start || index >= end)
+ continue;
+
+ if (O.isUndef())
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ continue;
+ RewriteMIs.push_back(RewriteInfo(index, MI));
+ }
+ std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
+
+ unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
+ // Now rewrite the defs and uses.
+ for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
+ RewriteInfo &rwi = RewriteMIs[i];
+ ++i;
+ SlotIndex index = rwi.Index;
+ MachineInstr *MI = rwi.MI;
+ // If MI def and/or use the same register multiple times, then there
+ // are multiple entries.
+ while (i != e && RewriteMIs[i].MI == MI) {
+ assert(RewriteMIs[i].Index == index);
+ ++i;
+ }
+ MachineBasicBlock *MBB = MI->getParent();
+
+ if (ImpUse && MI != ReMatDefMI) {
+ // Re-matting an instruction with virtual register use. Prevent interval
+ // from being spilled.
+ getInterval(ImpUse).markNotSpillable();
+ }
+
+ unsigned MBBId = MBB->getNumber();
+ unsigned ThisVReg = 0;
+ if (TrySplit) {
+ DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
+ if (NVI != MBBVRegsMap.end()) {
+ ThisVReg = NVI->second;
+ // One common case:
+ // x = use
+ // ...
+ // ...
+ // def = ...
+ // = use
+ // It's better to start a new interval to avoid artificially
+ // extend the new interval.
+ if (MI->readsWritesVirtualRegister(li.reg) ==
+ std::make_pair(false,true)) {
+ MBBVRegsMap.erase(MBB->getNumber());
+ ThisVReg = 0;
+ }
+ }
+ }
+
+ bool IsNew = ThisVReg == 0;
+ if (IsNew) {
+ // This ends the previous live interval. If all of its def / use
+ // can be folded, give it a low spill weight.
+ if (NewVReg && TrySplit && AllCanFold) {
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+ AllCanFold = true;
+ }
+ NewVReg = ThisVReg;
+
+ bool HasDef = false;
+ bool HasUse = false;
+ bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
+ index, end, MI, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
+ ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
+ if (!HasDef && !HasUse)
+ continue;
+
+ AllCanFold &= CanFold;
+
+ // Update weight of spill interval.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (!TrySplit) {
+ // The spill weight is now infinity as it cannot be spilled again.
+ nI.markNotSpillable();
+ continue;
+ }
+
+ // Keep track of the last def and first use in each MBB.
+ if (HasDef) {
+ if (MI != ReMatOrigDefMI || !CanDelete) {
+ bool HasKill = false;
+ if (!HasUse)
+ HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
+ else {
+ // If this is a two-address code, then this index starts a new VNInfo.
+ const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
+ if (VNI)
+ HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
+ }
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (!HasKill) {
+ if (SII == SpillIdxes.end()) {
+ std::vector<SRInfo> S;
+ S.push_back(SRInfo(index, NewVReg, true));
+ SpillIdxes.insert(std::make_pair(MBBId, S));
+ } else if (SII->second.back().vreg != NewVReg) {
+ SII->second.push_back(SRInfo(index, NewVReg, true));
+ } else if (index > SII->second.back().index) {
+ // If there is an earlier def and this is a two-address
+ // instruction, then it's not possible to fold the store (which
+ // would also fold the load).
+ SRInfo &Info = SII->second.back();
+ Info.index = index;
+ Info.canFold = !HasUse;
+ }
+ SpillMBBs.set(MBBId);
+ } else if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ index > SII->second.back().index) {
+ // There is an earlier def that's not killed (must be two-address).
+ // The spill is no longer needed.
+ SII->second.pop_back();
+ if (SII->second.empty()) {
+ SpillIdxes.erase(MBBId);
+ SpillMBBs.reset(MBBId);
+ }
+ }
+ }
+ }
+
+ if (HasUse) {
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ index > SII->second.back().index)
+ // Use(s) following the last def, it's not safe to fold the spill.
+ SII->second.back().canFold = false;
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
+ RestoreIdxes.find(MBBId);
+ if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
+ // If we are splitting live intervals, only fold if it's the first
+ // use and there isn't another use later in the MBB.
+ RII->second.back().canFold = false;
+ else if (IsNew) {
+ // Only need a reload if there isn't an earlier def / use.
+ if (RII == RestoreIdxes.end()) {
+ std::vector<SRInfo> Infos;
+ Infos.push_back(SRInfo(index, NewVReg, true));
+ RestoreIdxes.insert(std::make_pair(MBBId, Infos));
+ } else {
+ RII->second.push_back(SRInfo(index, NewVReg, true));
+ }
+ RestoreMBBs.set(MBBId);
+ }
+ }
+
+ // Update spill weight.
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+ }
+
+ if (NewVReg && TrySplit && AllCanFold) {
+ // If all of its def / use can be folded, give it a low spill weight.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+}
+
+bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
+ unsigned vr, BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return false;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index &&
+ Restores[i].vreg == vr &&
+ Restores[i].canFold)
+ return true;
+ return false;
+}
+
+void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
+ unsigned vr, BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index && Restores[i].vreg)
+ Restores[i].index = SlotIndex();
+}
+
+/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+/// spilled and create empty intervals for their uses.
+void
+LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ std::vector<LiveInterval*> &NewLIs) {
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineOperand &O = ri.getOperand();
+ MachineInstr *MI = &*ri;
+ ++ri;
+ if (MI->isDebugValue()) {
+ // Remove debug info for now.
+ O.setReg(0U);
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ continue;
+ }
+ if (O.isDef()) {
+ assert(MI->isImplicitDef() &&
+ "Register def was not rewritten?");
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ } else {
+ // This must be an use of an implicit_def so it's not part of the live
+ // interval. Create a new empty live interval for it.
+ // FIXME: Can we simply erase some of the instructions? e.g. Stores?
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.setIsImplicitlyDefined(NewVReg);
+ NewLIs.push_back(&getOrCreateInterval(NewVReg));
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == li.reg) {
+ MO.setReg(NewVReg);
+ MO.setIsUndef();
+ }
+ }
+ }
+ }
+}
+
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
+
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ // By the way, powf() might be unavailable here. For consistency,
+ // We may take pow(double,double).
+ float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
+
+ return (isDef + isUse) * lc;
+}
+
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
+ NewLIs[i]->weight =
+ normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li,
+ const SmallVectorImpl<LiveInterval*> *SpillIs,
+ const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
+ assert(li.isSpillable() && "attempt to spill already spilled interval!");
+
+ DEBUG({
+ dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+ li.print(dbgs(), tri_);
+ dbgs() << '\n';
+ });
+
+ // Each bit specify whether a spill is required in the MBB.
+ BitVector SpillMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
+ BitVector RestoreMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
+ DenseMap<unsigned,unsigned> MBBVRegsMap;
+ std::vector<LiveInterval*> NewLIs;
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ unsigned NumValNums = li.getNumValNums();
+ SmallVector<MachineInstr*, 4> ReMatDefs;
+ ReMatDefs.resize(NumValNums, NULL);
+ SmallVector<MachineInstr*, 4> ReMatOrigDefs;
+ ReMatOrigDefs.resize(NumValNums, NULL);
+ SmallVector<int, 4> ReMatIds;
+ ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
+ BitVector ReMatDelete(NumValNums);
+ unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
+
+ // Spilling a split live interval. It cannot be split any further. Also,
+ // it's also guaranteed to be a single val# / range interval.
+ if (vrm.getPreSplitReg(li.reg)) {
+ vrm.setIsSplitFromReg(li.reg, 0);
+ // Unset the split kill marker on the last use.
+ SlotIndex KillIdx = vrm.getKillPoint(li.reg);
+ if (KillIdx != SlotIndex()) {
+ MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
+ assert(KillMI && "Last use disappeared?");
+ int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
+ assert(KillOp != -1 && "Last use disappeared?");
+ KillMI->getOperand(KillOp).setIsKill(false);
+ }
+ vrm.removeKillPoint(li.reg);
+ bool DefIsReMat = vrm.isReMaterialized(li.reg);
+ Slot = vrm.getStackSlot(li.reg);
+ assert(Slot != VirtRegMap::MAX_STACK_SLOT);
+ MachineInstr *ReMatDefMI = DefIsReMat ?
+ vrm.getReMaterializedMI(li.reg) : NULL;
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
+ bool IsFirstRange = true;
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ // If this is a split live interval with multiple ranges, it means there
+ // are two-address instructions that re-defined the value. Only the
+ // first def can be rematerialized!
+ if (IsFirstRange) {
+ // Note ReMatOrigDefMI has already been deleted.
+ rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ } else {
+ rewriteInstructionsForSpills(li, false, I, NULL, 0,
+ Slot, 0, false, false, false,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+ IsFirstRange = false;
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ normalizeSpillWeights(NewLIs);
+ return NewLIs;
+ }
+
+ bool TrySplit = !intervalIsInOneMBB(li);
+ if (TrySplit)
+ ++numSplits;
+ bool NeedStackSlot = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (VNI->isUnused())
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+ bool dummy;
+ if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
+ // Remember how to remat the def of this val#.
+ ReMatOrigDefs[VN] = ReMatDefMI;
+ // Original def may be modified so we have to make a copy here.
+ MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
+ CloneMIs.push_back(Clone);
+ ReMatDefs[VN] = Clone;
+
+ bool CanDelete = true;
+ if (VNI->hasPHIKill()) {
+ // A kill is a phi node, not all of its uses can be rematerialized.
+ // It must not be deleted.
+ CanDelete = false;
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ if (CanDelete)
+ ReMatDelete.set(VN);
+ } else {
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ }
+
+ // One stack slot per live interval.
+ if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
+ if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
+ Slot = vrm.assignVirt2StackSlot(li.reg);
+
+ // This case only occurs when the prealloc splitter has already assigned
+ // a stack slot to this vreg.
+ else
+ Slot = vrm.getStackSlot(li.reg);
+ }
+
+ // Create new intervals and rewrite defs and uses.
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
+ MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
+ bool DefIsReMat = ReMatDefMI != NULL;
+ bool CanDelete = ReMatDelete[I->valno->id];
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
+ rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+
+ // Insert spills / restores if we are splitting.
+ if (!TrySplit) {
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ normalizeSpillWeights(NewLIs);
+ return NewLIs;
+ }
+
+ SmallPtrSet<LiveInterval*, 4> AddedKill;
+ SmallVector<unsigned, 2> Ops;
+ if (NeedStackSlot) {
+ int Id = SpillMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &spills = SpillIdxes[Id];
+ for (unsigned i = 0, e = spills.size(); i != e; ++i) {
+ SlotIndex index = spills[i].index;
+ unsigned VReg = spills[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ bool FoundUse = false;
+ Ops.clear();
+ if (spills[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ Ops.push_back(j);
+ if (MO.isDef())
+ continue;
+ if (isReMat ||
+ (!FoundUse && !alsoFoldARestore(Id, index, VReg,
+ RestoreMBBs, RestoreIdxes))) {
+ // MI has two-address uses of the same register. If the use
+ // isn't the first and only use in the BB, then we can't fold
+ // it. FIXME: Move this to rewriteInstructionsForSpills.
+ CanFold = false;
+ break;
+ }
+ FoundUse = true;
+ }
+ }
+ // Fold the store into the def if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
+ Folded = true;
+ if (FoundUse) {
+ // Also folded uses, do not issue a load.
+ eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
+ nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+ }
+ nI.removeRange(index.getDefIndex(), index.getStoreIndex());
+ }
+ }
+
+ // Otherwise tell the spiller to issue a spill.
+ if (!Folded) {
+ LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
+ bool isKill = LR->end == index.getStoreIndex();
+ if (!MI->registerDefIsDead(nI.reg))
+ // No need to spill a dead def.
+ vrm.addSpillPoint(VReg, isKill, MI);
+ if (isKill)
+ AddedKill.insert(&nI);
+ }
+ }
+ Id = SpillMBBs.find_next(Id);
+ }
+ }
+
+ int Id = RestoreMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = restores.size(); i != e; ++i) {
+ SlotIndex index = restores[i].index;
+ if (index == SlotIndex())
+ continue;
+ unsigned VReg = restores[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ Ops.clear();
+ if (restores[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ if (MO.isDef()) {
+ // If this restore were to be folded, it would have been folded
+ // already.
+ CanFold = false;
+ break;
+ }
+ Ops.push_back(j);
+ }
+ }
+
+ // Fold the load into the use if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (!isReMat)
+ Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
+ else {
+ MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
+ int LdSlot = 0;
+ bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ // If the rematerializable def is a load, also try to fold it.
+ if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
+ Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, isLoadSS, LdSlot, VReg);
+ if (!Folded) {
+ unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
+ if (ImpUse) {
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI and mark the register
+ // interval as unspillable.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.markNotSpillable();
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+ }
+ }
+ }
+ }
+ // If folding is not possible / failed, then tell the spiller to issue a
+ // load / rematerialization for us.
+ if (Folded)
+ nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+ else
+ vrm.addRestorePoint(VReg, MI);
+ }
+ Id = RestoreMBBs.find_next(Id);
+ }
+
+ // Finalize intervals: add kills, finalize spill weights, and filter out
+ // dead intervals.
+ std::vector<LiveInterval*> RetNewLIs;
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
+ LiveInterval *LI = NewLIs[i];
+ if (!LI->empty()) {
+ if (!AddedKill.count(LI)) {
+ LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
+ SlotIndex LastUseIdx = LR->end.getBaseIndex();
+ MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
+ int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
+ assert(UseIdx != -1);
+ if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
+ LastUse->getOperand(UseIdx).setIsKill();
+ vrm.addKillPoint(LI->reg, LastUseIdx);
+ }
+ }
+ RetNewLIs.push_back(LI);
+ }
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+ normalizeSpillWeights(RetNewLIs);
+ return RetNewLIs;
+}
+
+/// hasAllocatableSuperReg - Return true if the specified physical register has
+/// any super register that's allocatable.
+bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
+ if (allocatableRegs_[*AS] && hasInterval(*AS))
+ return true;
+ return false;
+}
+
+/// getRepresentativeReg - Find the largest super register of the specified
+/// physical register.
+unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
+ // Find the largest super-register that is allocatable.
+ unsigned BestReg = Reg;
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
+ unsigned SuperReg = *AS;
+ if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
+ BestReg = SuperReg;
+ break;
+ }
+ }
+ return BestReg;
+}
+
+/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+/// specified interval that conflicts with the specified physical register.
+unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
+ unsigned PhysReg) const {
+ unsigned NumConflicts = 0;
+ const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ if (MI->isDebugValue())
+ continue;
+ SlotIndex Index = getInstructionIndex(MI);
+ if (pli.liveAt(Index))
+ ++NumConflicts;
+ }
+ return NumConflicts;
+}
+
+/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+/// around all defs and uses of the specified interval. Return true if it
+/// was able to cut its interval.
+bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+ unsigned PhysReg, VirtRegMap &vrm) {
+ unsigned SpillReg = getRepresentativeReg(PhysReg);
+
+ DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+ << " represented by " << tri_->getName(SpillReg) << '\n');
+
+ for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
+ // If there are registers which alias PhysReg, but which are not a
+ // sub-register of the chosen representative super register. Assert
+ // since we can't handle it yet.
+ assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
+ tri_->isSuperRegister(*AS, SpillReg));
+
+ bool Cut = false;
+ SmallVector<unsigned, 4> PRegs;
+ if (hasInterval(SpillReg))
+ PRegs.push_back(SpillReg);
+ for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+ if (hasInterval(*SR))
+ PRegs.push_back(*SR);
+
+ DEBUG({
+ dbgs() << "Trying to spill:";
+ for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+ dbgs() << ' ' << tri_->getName(PRegs[i]);
+ dbgs() << '\n';
+ });
+
+ SmallPtrSet<MachineInstr*, 8> SeenMIs;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ if (MI->isDebugValue() || SeenMIs.count(MI))
+ continue;
+ SeenMIs.insert(MI);
+ SlotIndex Index = getInstructionIndex(MI);
+ bool LiveReg = false;
+ for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
+ unsigned PReg = PRegs[i];
+ LiveInterval &pli = getInterval(PReg);
+ if (!pli.liveAt(Index))
+ continue;
+ LiveReg = true;
+ SlotIndex StartIdx = Index.getLoadIndex();
+ SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
+ if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(Msg, tm_);
+ }
+ report_fatal_error(Msg.str());
+ }
+ pli.removeRange(StartIdx, EndIdx);
+ LiveReg = true;
+ }
+ if (!LiveReg)
+ continue;
+ DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+ vrm.addEmergencySpill(SpillReg, MI);
+ Cut = true;
+ }
+ return Cut;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+ startInst, getVNInfoAllocator());
+ VN->setHasPHIKill(true);
+ LiveRange LR(
+ SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+ getMBBEndIdx(startInst->getParent()), VN);
+ Interval.addRange(LR);
+
+ return LR;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..70003e7cc86a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,325 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (SegPos.valid()) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+
+ // We have reached the end of Segments, so it is no longer necessary to search
+ // for the insertion position.
+ // It is faster to insert the end first.
+ --RegEnd;
+ SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+ for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ OS << "LIU " << PrintReg(RepReg, TRI);
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ OS << '[' << start() << ';' << stop() << "):"
+ << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ OS << "Interferences with ";
+ LiveUnion->print(OS, TRI);
+ InterferenceResult IR = firstInterference();
+ while (isInterference(IR)) {
+ OS << " ";
+ IR.print(OS, TRI);
+ OS << '\n';
+ nextInterference(IR);
+ }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments. This may
+// be ok since most virtual registers have very few segments. If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+ // Search until reaching the end of the LiveUnion segments.
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ if (IR.VirtRegI == VirtRegEnd)
+ return;
+ while (IR.LiveUnionI.valid()) {
+ // Slowly advance the live virtual reg iterator until we surpass the next
+ // segment in LiveUnion.
+ //
+ // Note: If this is ever used for coalescing of fixed registers and we have
+ // a live vreg with thousands of segments, then change this code to use
+ // upperBound instead.
+ IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+ if (IR.VirtRegI == VirtRegEnd)
+ break; // Retain current (nonoverlapping) LiveUnionI
+
+ // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+ IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+ // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+ if (!IR.LiveUnionI.valid())
+ break;
+ if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+ assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+ "upperBound postcondition");
+ break;
+ }
+ }
+ if (!IR.LiveUnionI.valid())
+ IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+ if (CheckedFirstInterference)
+ return FirstInterference;
+ CheckedFirstInterference = true;
+ InterferenceResult &IR = FirstInterference;
+ IR.LiveUnionI.setMap(LiveUnion->getMap());
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ IR.VirtRegI = VirtReg->end();
+ } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+ // VirtReg starts first, perform double binary search.
+ IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+ if (IR.VirtRegI != VirtReg->end())
+ IR.LiveUnionI.find(IR.VirtRegI->start);
+ } else {
+ // LiveUnion starts first, perform double binary search.
+ IR.LiveUnionI.find(VirtReg->beginIndex());
+ if (IR.LiveUnionI.valid())
+ IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+ else
+ IR.VirtRegI = VirtReg->end();
+ }
+ findIntersection(FirstInterference);
+ assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+ && "Uninitialized iterator");
+ return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+ assert(isInterference(IR) && "iteration past end of interferences");
+
+ // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+ // unique overlapping pairs.
+ if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+ if (++IR.VirtRegI == VirtReg->end())
+ return false;
+ }
+ else {
+ if (!(++IR.LiveUnionI).valid()) {
+ IR.VirtRegI = VirtReg->end();
+ return false;
+ }
+ }
+ // Short-circuit findIntersection() if possible.
+ if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+ return true;
+
+ // Find the next intersection.
+ findIntersection(IR);
+ return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ InterferenceResult IR = firstInterference();
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentInterferingVReg = NULL;
+ if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+ // Advance the union's iterator to reach an unseen interfering vreg.
+ do {
+ if (IR.LiveUnionI.value() == RecentInterferingVReg)
+ continue;
+
+ if (!isSeenInterference(IR.LiveUnionI.value()))
+ break;
+
+ // Cache the most recent interfering vreg to bypass isSeenInterference.
+ RecentInterferingVReg = IR.LiveUnionI.value();
+
+ } while ((++IR.LiveUnionI).valid());
+ if (!IR.LiveUnionI.valid())
+ break;
+
+ // Advance the VirtReg iterator until surpassing the next segment in
+ // LiveUnion.
+ IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+ if (IR.VirtRegI == VirtRegEnd)
+ break;
+
+ // Check for intersection with the union's segment.
+ if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+ if (!IR.LiveUnionI.value()->isSpillable())
+ SeenUnspillableVReg = true;
+
+ if (InterferingVRegs.size() == MaxInterferingRegs)
+ // Leave SeenAllInterferences set to false to indicate that at least one
+ // interference exists beyond those we collected.
+ return MaxInterferingRegs;
+
+ InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+ // Cache the most recent interfering vreg to bypass isSeenInterference.
+ RecentInterferingVReg = IR.LiveUnionI.value();
+ ++IR.LiveUnionI;
+
+ continue;
+ }
+ // VirtRegI may have advanced far beyond LiveUnionI,
+ // do a fast intersection test to "catch up"
+ IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+ // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+ // overlaps.
+ IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+ Overlaps(LiveUnion->getMap(), Loop->getMap());
+ if (!Overlaps.valid())
+ return false;
+
+ // The loop is overlapping an LIU assignment. Check VirtReg as well.
+ LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+ for (;;) {
+ if (VRI == VirtReg->end())
+ return false;
+ if (VRI->start < Overlaps.stop())
+ return true;
+
+ Overlaps.advanceTo(VRI->start);
+ if (!Overlaps.valid())
+ return false;
+ if (Overlaps.start() < VRI->end)
+ return true;
+
+ VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 000000000000..5e78d5e85029
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,264 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+ const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+ return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context). We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+ // A set of live virtual register segments that supports fast insertion,
+ // intersection, and removal.
+ // Mapping SlotIndex intervals to virtual register numbers.
+ typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+ // SegmentIter can advance to the next segment ordered by starting position
+ // which may belong to a different live virtual register. We also must be able
+ // to reach the current segment's containing virtual register.
+ typedef LiveSegments::iterator SegmentIter;
+
+ // LiveIntervalUnions share an external allocator.
+ typedef LiveSegments::Allocator Allocator;
+
+ class InterferenceResult;
+ class Query;
+
+private:
+ const unsigned RepReg; // representative register number
+ unsigned Tag; // unique tag for current contents.
+ LiveSegments Segments; // union of virtual reg segments
+
+public:
+ LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+ {}
+
+ // Iterate over all segments in the union of live virtual registers ordered
+ // by their starting position.
+ SegmentIter begin() { return Segments.begin(); }
+ SegmentIter end() { return Segments.end(); }
+ SegmentIter find(SlotIndex x) { return Segments.find(x); }
+ bool empty() const { return Segments.empty(); }
+ SlotIndex startIndex() const { return Segments.start(); }
+
+ // Provide public access to the underlying map to allow overlap iteration.
+ typedef LiveSegments Map;
+ const Map &getMap() { return Segments; }
+
+ /// getTag - Return an opaque tag representing the current state of the union.
+ unsigned getTag() const { return Tag; }
+
+ /// changedSince - Return true if the union change since getTag returned tag.
+ bool changedSince(unsigned tag) const { return tag != Tag; }
+
+ // Add a live virtual register to this union and merge its segments.
+ void unify(LiveInterval &VirtReg);
+
+ // Remove a live virtual register's segments from this union.
+ void extract(LiveInterval &VirtReg);
+
+ // Remove all inserted virtual registers.
+ void clear() { Segments.clear(); ++Tag; }
+
+ // Print union, using TRI to translate register names
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+ // Verify the live intervals in this union and add them to the visited set.
+ void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+ /// Cache a single interference test result in the form of two intersecting
+ /// segments. This allows efficiently iterating over the interferences. The
+ /// iteration logic is handled by LiveIntervalUnion::Query which may
+ /// filter interferences depending on the type of query.
+ class InterferenceResult {
+ friend class Query;
+
+ LiveInterval::iterator VirtRegI; // current position in VirtReg
+ SegmentIter LiveUnionI; // current position in LiveUnion
+
+ // Internal ctor.
+ InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+ : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+ public:
+ // Public default ctor.
+ InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+ /// start - Return the start of the current overlap.
+ SlotIndex start() const {
+ return std::max(VirtRegI->start, LiveUnionI.start());
+ }
+
+ /// stop - Return the end of the current overlap.
+ SlotIndex stop() const {
+ return std::min(VirtRegI->end, LiveUnionI.stop());
+ }
+
+ /// interference - Return the register that is interfering here.
+ LiveInterval *interference() const { return LiveUnionI.value(); }
+
+ // Note: this interface provides raw access to the iterators because the
+ // result has no way to tell if it's valid to dereference them.
+
+ // Access the VirtReg segment.
+ LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+ // Access the LiveUnion segment.
+ const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+ bool operator==(const InterferenceResult &IR) const {
+ return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+ }
+ bool operator!=(const InterferenceResult &IR) const {
+ return !operator==(IR);
+ }
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+ };
+
+ /// Query interferences between a single live virtual register and a live
+ /// interval union.
+ class Query {
+ LiveIntervalUnion *LiveUnion;
+ LiveInterval *VirtReg;
+ InterferenceResult FirstInterference;
+ SmallVector<LiveInterval*,4> InterferingVRegs;
+ bool CheckedFirstInterference;
+ bool SeenAllInterferences;
+ bool SeenUnspillableVReg;
+ unsigned Tag, UserTag;
+
+ public:
+ Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
+
+ Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+ LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+ SeenAllInterferences(false), SeenUnspillableVReg(false)
+ {}
+
+ void clear() {
+ LiveUnion = NULL;
+ VirtReg = NULL;
+ InterferingVRegs.clear();
+ CheckedFirstInterference = false;
+ SeenAllInterferences = false;
+ SeenUnspillableVReg = false;
+ Tag = 0;
+ UserTag = 0;
+ }
+
+ void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
+ assert(VReg && LIU && "Invalid arguments");
+ if (UserTag == UTag && VirtReg == VReg &&
+ LiveUnion == LIU && !LIU->changedSince(Tag)) {
+ // Retain cached results, e.g. firstInterference.
+ return;
+ }
+ clear();
+ LiveUnion = LIU;
+ VirtReg = VReg;
+ Tag = LIU->getTag();
+ UserTag = UTag;
+ }
+
+ LiveInterval &virtReg() const {
+ assert(VirtReg && "uninitialized");
+ return *VirtReg;
+ }
+
+ bool isInterference(const InterferenceResult &IR) const {
+ if (IR.VirtRegI != VirtReg->end()) {
+ assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+ "invalid segment iterators");
+ return true;
+ }
+ return false;
+ }
+
+ // Does this live virtual register interfere with the union?
+ bool checkInterference() { return isInterference(firstInterference()); }
+
+ // Get the first pair of interfering segments, or a noninterfering result.
+ // This initializes the firstInterference_ cache.
+ const InterferenceResult &firstInterference();
+
+ // Treat the result as an iterator and advance to the next interfering pair
+ // of segments. Visiting each unique interfering pairs means that the same
+ // VirtReg or LiveUnion segment may be visited multiple times.
+ bool nextInterference(InterferenceResult &IR) const;
+
+ // Count the virtual registers in this union that interfere with this
+ // query's live virtual register, up to maxInterferingRegs.
+ unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+ // Was this virtual register visited during collectInterferingVRegs?
+ bool isSeenInterference(LiveInterval *VReg) const;
+
+ // Did collectInterferingVRegs collect all interferences?
+ bool seenAllInterferences() const { return SeenAllInterferences; }
+
+ // Did collectInterferingVRegs encounter an unspillable vreg?
+ bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+ // Vector generated by collectInterferingVRegs.
+ const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+ return InterferingVRegs;
+ }
+
+ /// checkLoopInterference - Return true if there is interference overlapping
+ /// Loop.
+ bool checkLoopInterference(MachineLoopRange*);
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+ private:
+ Query(const Query&); // DO NOT IMPLEMENT
+ void operator=(const Query&); // DO NOT IMPLEMENT
+
+ // Private interface for queries
+ void findIntersection(InterferenceResult &IR) const;
+ };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..b385fb36bbf1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,327 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
+ LiveIntervals &LIS,
+ VirtRegMap &VRM) {
+ MachineRegisterInfo &MRI = VRM.getRegInfo();
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ VRM.grow();
+ VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+ LiveInterval &LI = LIS.getOrCreateInterval(VReg);
+ newRegs_.push_back(&LI);
+ return LI;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+ const MachineInstr *DefMI,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ assert(DefMI && "Missing instruction");
+ scannedRemattable_ = true;
+ if (!tii.isTriviallyReMaterializable(DefMI, aa))
+ return false;
+ remattable_.insert(VNI);
+ return true;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+ E = parent_.vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+ if (!DefMI)
+ continue;
+ checkRematerializable(VNI, DefMI, tii, aa);
+ }
+ scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ if (!scannedRemattable_)
+ scanRemattable(lis, tii, aa);
+ return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx,
+ LiveIntervals &lis) {
+ OrigIdx = OrigIdx.getUseIndex();
+ UseIdx = UseIdx.getUseIndex();
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.isDef())
+ continue;
+ // Reserved registers are OK.
+ if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+ continue;
+ // We cannot depend on virtual registers in uselessRegs_.
+ if (uselessRegs_)
+ for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
+ if ((*uselessRegs_)[ui]->reg == MO.getReg())
+ return false;
+
+ LiveInterval &li = lis.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis) {
+ assert(scannedRemattable_ && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!remattable_.count(RM.ParentVNI))
+ return false;
+
+ // No defining instruction provided.
+ SlotIndex DefIdx;
+ if (RM.OrigMI)
+ DefIdx = lis.getInstructionIndex(RM.OrigMI);
+ else {
+ DefIdx = RM.ParentVNI->def;
+ RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ }
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri,
+ bool Late) {
+ assert(RM.OrigMI && "Invalid remat");
+ tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ rematted_.insert(RM.ParentVNI);
+ return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+ .getDefIndex();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+ if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
+ LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr*> &Dead,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineInstr *DefMI = 0, *UseMI = 0;
+
+ // Check that there is a single def and a single use.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
+ E = MRI.reg_nodbg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MO.isDef()) {
+ if (DefMI && DefMI != MI)
+ return false;
+ if (!MI->getDesc().canFoldAsLoad())
+ return false;
+ DefMI = MI;
+ } else if (!MO.isUndef()) {
+ if (UseMI && UseMI != MI)
+ return false;
+ // FIXME: Targets don't know how to fold subreg uses.
+ if (MO.getSubReg())
+ return false;
+ UseMI = MI;
+ }
+ }
+ if (!DefMI || !UseMI)
+ return false;
+
+ DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
+
+ SmallVector<unsigned, 8> Ops;
+ if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ return false;
+
+ MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+ if (!FoldMI)
+ return false;
+ DEBUG(dbgs() << " folded: " << *FoldMI);
+ LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+ UseMI->eraseFromParent();
+ DefMI->addRegisterDead(LI->reg, 0);
+ Dead.push_back(DefMI);
+ ++NumDCEFoldedLoads;
+ return true;
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ LiveIntervals &LIS, VirtRegMap &VRM,
+ const TargetInstrInfo &TII) {
+ SetVector<LiveInterval*,
+ SmallVector<LiveInterval*, 8>,
+ SmallPtrSet<LiveInterval*, 8> > ToShrink;
+ MachineRegisterInfo &MRI = VRM.getRegInfo();
+
+ for (;;) {
+ // Erase all dead defs.
+ while (!Dead.empty()) {
+ MachineInstr *MI = Dead.pop_back_val();
+ assert(MI->allDefsAreDead() && "Def isn't really dead");
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+
+ // Never delete inline asm.
+ if (MI->isInlineAsm()) {
+ DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ // Use the same criteria as DeadMachineInstructionElim.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+ DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+ // Check for live intervals that may shrink
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg())
+ continue;
+ unsigned Reg = MOI->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // Shrink read registers, unless it is likely to be expensive and
+ // unlikely to change anything. We typically don't want to shrink the
+ // PIC base register that has lots of uses everywhere.
+ // Always shrink COPY uses that probably come from live range splitting.
+ if (MI->readsVirtualRegister(Reg) &&
+ (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+ LI.killedAt(Idx)))
+ ToShrink.insert(&LI);
+
+ // Remove defined value.
+ if (MOI->isDef()) {
+ if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
+ if (delegate_)
+ delegate_->LRE_WillShrinkVirtReg(LI.reg);
+ LI.removeValNo(VNI);
+ if (LI.empty()) {
+ ToShrink.remove(&LI);
+ eraseVirtReg(Reg, LIS);
+ }
+ }
+ }
+ }
+
+ if (delegate_)
+ delegate_->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+
+ if (ToShrink.empty())
+ break;
+
+ // Shrink just one live interval. Then delete new dead defs.
+ LiveInterval *LI = ToShrink.back();
+ ToShrink.pop_back();
+ if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+ continue;
+ if (delegate_)
+ delegate_->LRE_WillShrinkVirtReg(LI->reg);
+ if (!LIS.shrinkToUses(LI, &Dead))
+ continue;
+
+ // LI may have been separated, create new intervals.
+ LI->RenumberValues(LIS);
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp <= 1)
+ continue;
+ ++NumFracRanges;
+ bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+ DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
+ SmallVector<LiveInterval*, 8> Dups(1, LI);
+ for (unsigned i = 1; i != NumComp; ++i) {
+ Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+ // If LI is an original interval that hasn't been split yet, make the new
+ // intervals their own originals instead of referring to LI. The original
+ // interval must contain all the split products, and LI doesn't.
+ if (IsOriginal)
+ VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+ if (delegate_)
+ delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ }
+ ConEQ.Distribute(&Dups[0], MRI);
+ }
+}
+
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ LiveIntervals &LIS,
+ const MachineLoopInfo &Loops) {
+ VirtRegAuxInfo VRAI(MF, LIS, Loops);
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ LiveInterval &LI = **I;
+ VRAI.CalculateRegClass(LI.reg);
+ VRAI.CalculateWeightAndHint(LI);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 000000000000..db6740c1fbcd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,206 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+public:
+ /// Callback methods for LiveRangeEdit owners.
+ struct Delegate {
+ /// Called immediately before erasing a dead machine instruction.
+ virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
+
+ /// Called when a virtual register is no longer used. Return false to defer
+ /// its deletion from LiveIntervals.
+ virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+
+ /// Called before shrinking the live range of a virtual register.
+ virtual void LRE_WillShrinkVirtReg(unsigned) {}
+
+ /// Called after cloning a virtual register.
+ /// This is used for new registers representing connected components of Old.
+ virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+
+ virtual ~Delegate() {}
+ };
+
+private:
+ LiveInterval &parent_;
+ SmallVectorImpl<LiveInterval*> &newRegs_;
+ Delegate *const delegate_;
+ const SmallVectorImpl<LiveInterval*> *uselessRegs_;
+
+ /// firstNew_ - Index of the first register added to newRegs_.
+ const unsigned firstNew_;
+
+ /// scannedRemattable_ - true when remattable values have been identified.
+ bool scannedRemattable_;
+
+ /// remattable_ - Values defined by remattable instructions as identified by
+ /// tii.isTriviallyReMaterializable().
+ SmallPtrSet<const VNInfo*,4> remattable_;
+
+ /// rematted_ - Values that were actually rematted, and so need to have their
+ /// live range trimmed or entirely removed.
+ SmallPtrSet<const VNInfo*,4> rematted_;
+
+ /// scanRemattable - Identify the parent_ values that may rematerialize.
+ void scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa);
+
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx, LiveIntervals &lis);
+
+ /// foldAsLoad - If LI has a single use and a single def that can be folded as
+ /// a load, eliminate the register by folding the def into the use.
+ bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
+ MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
+
+public:
+ /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+ /// @param parent The register being spilled or split.
+ /// @param newRegs List to receive any new registers created. This needn't be
+ /// empty initially, any existing registers are ignored.
+ /// @param uselessRegs List of registers that can't be used when
+ /// rematerializing values because they are about to be removed.
+ LiveRangeEdit(LiveInterval &parent,
+ SmallVectorImpl<LiveInterval*> &newRegs,
+ Delegate *delegate = 0,
+ const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
+ : parent_(parent), newRegs_(newRegs),
+ delegate_(delegate),
+ uselessRegs_(uselessRegs),
+ firstNew_(newRegs.size()),
+ scannedRemattable_(false) {}
+
+ LiveInterval &getParent() const { return parent_; }
+ unsigned getReg() const { return parent_.reg; }
+
+ /// Iterator for accessing the new registers added by this edit.
+ typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+ iterator begin() const { return newRegs_.begin()+firstNew_; }
+ iterator end() const { return newRegs_.end(); }
+ unsigned size() const { return newRegs_.size()-firstNew_; }
+ bool empty() const { return size() == 0; }
+ LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+ ArrayRef<LiveInterval*> regs() const {
+ return ArrayRef<LiveInterval*>(newRegs_).slice(firstNew_);
+ }
+
+ /// FIXME: Temporary accessors until we can get rid of
+ /// LiveIntervals::AddIntervalsForSpills
+ SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
+ const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
+ return uselessRegs_;
+ }
+
+ /// createFrom - Create a new virtual register based on OldReg.
+ LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
+
+ /// create - Create a new register with the same class and original slot as
+ /// parent.
+ LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
+ return createFrom(getReg(), LIS, VRM);
+ }
+
+ /// anyRematerializable - Return true if any parent values may be
+ /// rematerializable.
+ /// This function must be called before any rematerialization is attempted.
+ bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+ AliasAnalysis*);
+
+ /// checkRematerializable - Manually add VNI to the list of rematerializable
+ /// values if DefMI may be rematerializable.
+ bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
+ const TargetInstrInfo&, AliasAnalysis*);
+
+ /// Remat - Information needed to rematerialize at a specific location.
+ struct Remat {
+ VNInfo *ParentVNI; // parent_'s value at the remat location.
+ MachineInstr *OrigMI; // Instruction defining ParentVNI.
+ explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+ };
+
+ /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+ /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+ /// When cheapAsAMove is set, only cheap remats are allowed.
+ bool canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis);
+
+ /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+ /// instruction into MBB before MI. The new instruction is mapped, but
+ /// liveness is not updated.
+ /// Return the SlotIndex of the new instruction.
+ SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals&,
+ const TargetInstrInfo&,
+ const TargetRegisterInfo&,
+ bool Late = false);
+
+ /// markRematerialized - explicitly mark a value as rematerialized after doing
+ /// it manually.
+ void markRematerialized(const VNInfo *ParentVNI) {
+ rematted_.insert(ParentVNI);
+ }
+
+ /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+ bool didRematerialize(const VNInfo *ParentVNI) const {
+ return rematted_.count(ParentVNI);
+ }
+
+ /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
+ /// to erase it from LIS.
+ void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
+
+ /// eliminateDeadDefs - Try to delete machine instructions that are now dead
+ /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
+ /// and further dead efs to be eliminated.
+ void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ LiveIntervals&, VirtRegMap&,
+ const TargetInstrInfo&);
+
+ /// calculateRegClassAndHint - Recompute register class and hint for each new
+ /// register.
+ void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
+ const MachineLoopInfo&);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 000000000000..c75196a47210
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,82 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.insert(I, std::make_pair(Slot,
+ LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(OS);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ OS << " [" << RC->getName() << "]\n";
+ else
+ OS << " [Unknown]\n";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 000000000000..20bad60dedda
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,770 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return NULL;
+}
+
+void LiveVariables::VarInfo::dump() const {
+ dbgs() << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ dbgs() << *I << ", ";
+ dbgs() << "\n Killed by:";
+ if (Kills.empty())
+ dbgs() << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ dbgs() << "\n #" << i << ": " << *Kills[i];
+ dbgs() << "\n";
+ }
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(RegIdx);
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+ VRInfo.NumUses++;
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ SmallSet<unsigned,4> &PartDefRegs) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+
+ if (!LastDef)
+ return 0;
+
+ PartDefRegs.insert(LastDefReg);
+ for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastDef->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ continue;
+ unsigned DefReg = MO.getReg();
+ if (TRI->isSubRegister(Reg, DefReg)) {
+ PartDefRegs.insert(DefReg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ PartDefRegs.insert(SubReg);
+ }
+ }
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ // If there was a previous use or a "full" def all is well.
+ if (!LastDef && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ SmallSet<unsigned, 4> PartDefRegs;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (Processed.count(SubReg))
+ continue;
+ if (PartDefRegs.count(SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ }
+ else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg,
+ true/*IsDef*/, true/*IsImp*/));
+
+ // Remember this use.
+ PhysRegUse[Reg] = MI;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ PhysRegUse[SubReg] = MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return 0;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ unsigned LastPartDefDist = 0;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist)
+ LastPartDefDist = Dist;
+ } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ MachineInstr *LastPartDef = 0;
+ unsigned LastPartDefDist = 0;
+ SmallSet<unsigned, 8> PartUses;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist) {
+ LastPartDefDist = Dist;
+ LastPartDef = Def;
+ }
+ continue;
+ }
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ PartUses.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (!PhysRegUse[Reg]) {
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // EAX<dead> = op AL<imp-def>
+ // That is, EAX def is dead but AL def extends pass it.
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (!PartUses.count(SubReg))
+ continue;
+ bool NeedDef = true;
+ if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+ MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ if (MO) {
+ NeedDef = false;
+ assert(!MO->isDead());
+ }
+ }
+ if (NeedDef)
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true/*IsDef*/, true/*IsImp*/));
+ MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+ if (LastSubRef)
+ LastSubRef->addRegisterKilled(SubReg, TRI, true);
+ else {
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ PhysRegUse[SubReg] = LastRefOrPartRef;
+ for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+ unsigned SSReg = *SSRegs; ++SSRegs)
+ PhysRegUse[SSReg] = LastRefOrPartRef;
+ }
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.erase(*SS);
+ }
+ } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
+ } else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ Live.insert(Reg);
+ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ Live.insert(*SS);
+ } else {
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (Live.count(SubReg))
+ continue;
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ Live.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ HandlePhysRegKill(Reg, MI);
+ // Only some of the sub-registers are used.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ HandlePhysRegKill(SubReg, MI);
+ }
+
+ if (MI)
+ Defs.push_back(Reg); // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.back();
+ Defs.pop_back();
+ PhysRegDef[Reg] = MI;
+ PhysRegUse[Reg] = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ PhysRegDef[SubReg] = MI;
+ PhysRegUse[SubReg] = NULL;
+ }
+ }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+
+ ReservedRegisters = TRI->getReservedRegs(mf);
+
+ unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef = new MachineInstr*[NumRegs];
+ PhysRegUse = new MachineInstr*[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ PHIJoins.clear();
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ MO.setIsKill(false);
+ UseRegs.push_back(MOReg);
+ } else /*MO.isDef()*/ {
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegDef(MOReg, MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // Finally, if the last instruction in the block is a return, make sure to
+ // mark it as using all of the live-out values in the function.
+ // Things marked both call and return are tail calls; do not do this for
+ // them. The tail callee need not take the same registers as input
+ // that it produces as output, and there are dependencies for its input
+ // registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn()
+ && !MBB->back().getDesc().isCall()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register!");
+ HandlePhysRegUse(*I, Ret);
+
+ // Add live-out registers as implicit uses.
+ if (!Ret->readsRegister(*I))
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (PhysRegDef[i] || PhysRegUse[i])
+ HandlePhysRegDef(i, 0, Defs);
+
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+ else
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegDef;
+ delete[] PhysRegUse;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ removed = true;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+ unsigned Reg,
+ MachineRegisterInfo &MRI) {
+ unsigned Num = MBB.getNumber();
+
+ // Reg is live-through.
+ if (AliveBlocks.test(Num))
+ return true;
+
+ // Registers defined in MBB cannot be live in.
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // Reg was not defined in MBB, was it killed here?
+ return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB) {
+ const unsigned NumNew = BB->getNumber();
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+ BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i+1).getMBB() == BB)
+ getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+
+ // Update info for all live variables
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ VarInfo &VI = getVarInfo(Reg);
+ if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 000000000000..1318d6212497
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,359 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+ MI(I), LocalOffset(Offset) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const {
+ return "Local Stack Slot Allocation";
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+FunctionPass *llvm::createLocalStackSlotAllocationPass() {
+ return new LocalStackSlotPass();
+}
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+ std::pair<unsigned, int64_t> &RegOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ unsigned e = Regs.size();
+ for (unsigned i = 0; i < e; ++i) {
+ RegOffset = Regs[i];
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+ if (TRI->isFrameOffsetLegal(MI, Offset))
+ return true;
+ }
+ return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+ // A base register definition is a register + offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+
+ // Debug value instructions can't be out of range, so they don't need
+ // any updates.
+ if (MI->isDebugValue())
+ continue;
+
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ break;
+ FrameReferenceInsns.
+ push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ break;
+ }
+ }
+ }
+ }
+
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+ MachineBasicBlock *Entry = Fn.begin();
+
+ // Loop through the frame references and allocate for them as necessary.
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ MachineBasicBlock::iterator I =
+ FrameReferenceInsns[ref].getMachineInstr();
+ MachineInstr *MI = I;
+ for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(idx).isFI()) {
+ int FrameIdx = MI->getOperand(idx).getIndex();
+
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+ if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+ unsigned BaseReg = 0;
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust =
+ StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ std::pair<unsigned, int64_t> RegOffset;
+ if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+ FrameSizeAdjust,
+ LocalOffsets[FrameIdx],
+ MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " <<
+ RegOffset.first << "\n");
+ // We found a register to reuse.
+ BaseReg = RegOffset.first;
+ Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+ RegOffset.second;
+ } else {
+ // No previously defined register was in range, so create a
+ // new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ const TargetRegisterClass *RC = TRI->getPointerRegClass();
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " <<
+ LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+ InstrOffset;
+ BaseRegisters.push_back(
+ std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
+ }
+ }
+ }
+ }
+ return UsedBaseReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp
new file mode 100644
index 000000000000..7871ba9c17e4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp
@@ -0,0 +1,223 @@
+//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineFunction pass which runs after register
+// allocation that turns subreg insert/extract instructions into register
+// copies, as needed. This ensures correct codegen even if the coalescer
+// isn't able to remove all subreg instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersubregs"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct LowerSubregsInstructionPass : public MachineFunctionPass {
+ private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const {
+ return "Subregister lowering instruction pass";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
+
+ void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo *TRI);
+ void TransferImplicitDefs(MachineInstr *MI);
+ };
+
+ char LowerSubregsInstructionPass::ID = 0;
+}
+
+FunctionPass *llvm::createLowerSubregsPass() {
+ return new LowerSubregsInstructionPass();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
+ unsigned DstReg,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterDead(DstReg, TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyPhysReg output doesn't reference destination register!");
+ }
+}
+
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+ continue;
+ CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
+ }
+}
+
+bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "subreg: " << *(--dMI);
+ });
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
+ }
+
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+ if (DstMO.isDead())
+ TransferDeadFlag(MI, DstMO.getReg(), TRI);
+ if (MI->getNumOperands() > 2)
+ TransferImplicitDefs(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n"
+ << "********** LOWERING SUBREG INSTRS **********\n"
+ << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
+ MachineInstr *MI = mi;
+ assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear");
+ assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ "EXTRACT_SUBREG should no longer appear");
+ if (MI->isSubregToReg()) {
+ MadeChange |= LowerSubregToReg(MI);
+ } else if (MI->isCopy()) {
+ MadeChange |= LowerCopy(MI);
+ }
+ mi = nmi;
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 000000000000..8f0fb46879ac
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,791 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+ AddressTaken(false) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol() const {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+ return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) + "_" +
+ Twine(getNumber()));
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+ assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+ assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ N->RemoveRegOperandsFromUseLists();
+
+ N->setParent(0);
+
+ LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+ MachineBasicBlock::iterator first,
+ MachineBasicBlock::iterator last) {
+ assert(Parent->getParent() == fromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == fromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; first != last; ++first)
+ first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ iterator I = begin();
+ while (I != end() && I->isPHI())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator I = end();
+ while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != end() && !I->getDesc().isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ iterator B = begin(), I = end();
+ while (I != B) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+ // A block with a landing pad successor only has one other successor.
+ if (succ_size() > 2)
+ return 0;
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isLandingPad())
+ return *I;
+ return 0;
+}
+
+void MachineBasicBlock::dump() const {
+ print(dbgs());
+}
+
+StringRef MachineBasicBlock::getName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->getName();
+ else
+ return "(null)";
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
+ OS << "BB#" << getNumber() << ": ";
+
+ const char *Comma = "";
+ if (const BasicBlock *LBB = getBasicBlock()) {
+ OS << Comma << "derived from LLVM BB ";
+ WriteAsOperand(OS, LBB, /*PrintType=*/false);
+ Comma = ", ";
+ }
+ if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ OS << '\n';
+
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!livein_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Live Ins:";
+ for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OS << ' ' << PrintReg(*I, TRI);
+ OS << '\n';
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " BB#" << (*PI)->getNumber();
+ OS << '\n';
+ }
+
+ for (const_iterator I = begin(); I != end(); ++I) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
+ OS << '\t';
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ OS << " BB#" << (*SI)->getNumber();
+ OS << '\n';
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ std::vector<unsigned>::iterator I =
+ std::find(LiveIns.begin(), LiveIns.end(), Reg);
+ assert(I != LiveIns.end() && "Not a live in!");
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->splice(++BBI, this);
+}
+
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty()) return;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now
+ // its layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not
+ // its layout successor, insert a branch.
+ TBB = *succ_begin();
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ }
+ } else {
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond))
+ return;
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FBB, 0, Cond, dl);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ }
+ } else {
+ // The block has a fallthrough conditional branch.
+ MachineBasicBlock *MBBA = *succ_begin();
+ MachineBasicBlock *MBBB = *llvm::next(succ_begin());
+ if (MBBA == TBB) std::swap(MBBB, MBBA);
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+ return;
+ }
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+ } else if (!isLayoutSuccessor(MBBA)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, MBBA, Cond, dl);
+ }
+ }
+ }
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+ // If we see non-zero value for the first time it means we actually use Weight
+ // list, so we fill all Weights with 0's.
+ if (weight != 0 && Weights.empty())
+ Weights.resize(Successors.size());
+
+ if (weight != 0 || !Weights.empty())
+ Weights.push_back(weight);
+
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+ }
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ uint32_t weight = 0;
+ succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(SI);
+ weight = *WI;
+ }
+
+ // Update the successor information.
+ removeSuccessor(SI);
+ addSuccessor(New, weight);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ uint32_t weight = 0;
+
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!fromMBB->Weights.empty())
+ weight = *fromMBB->Weights.begin();
+
+ addSuccessor(Succ, weight);
+ fromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ addSuccessor(Succ);
+ fromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
+ MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == fromMBB)
+ MO.setMBB(this);
+ }
+ }
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
+ return I != Successors.end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return llvm::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ MachineFunction::iterator Fallthrough = this;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor, no fallthrough is possible.
+ if (!isSuccessor(Fallthrough))
+ return false;
+
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicable check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier. This
+ // is over-conservative though, because if an instruction isn't actually
+ // predicated we could still treat it like a barrier.
+ return empty() || !back().getDesc().isBarrier() ||
+ back().getDesc().isPredicable();
+ }
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ MachineFunction *MF = getParent();
+ DebugLoc dl; // FIXME: this is nowhere
+
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+ return NULL;
+
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return NULL;
+ }
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+ DEBUG(dbgs() << "Splitting critical edge:"
+ " BB#" << getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << Succ->getNumber() << '\n');
+
+ // On some targets like Mips, branches may kill virtual registers. Make sure
+ // that LiveVariables is properly updated after updateTerminator replaces the
+ // terminators.
+ LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+
+ // Collect a list of virtual registers killed by the terminators.
+ SmallVector<unsigned, 4> KilledRegs;
+ if (LV)
+ for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+ MachineInstr *MI = I;
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+ continue;
+ unsigned Reg = OI->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LV->getVarInfo(Reg).removeKill(MI)) {
+ KilledRegs.push_back(Reg);
+ DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+ OI->setIsKill(false);
+ }
+ }
+ }
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+ updateTerminator();
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this
+ for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == this)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ // Update LiveVariables.
+ if (LV) {
+ // Restore kills of virtual registers that were killed by the terminators.
+ while (!KilledRegs.empty()) {
+ unsigned Reg = KilledRegs.pop_back_val();
+ for (iterator I = end(), E = begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+ continue;
+ LV->getVarInfo(Reg).Kills.push_back(I);
+ DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ break;
+ }
+ }
+ // Update relevant live-through information.
+ LV->addNewBlock(NMBB, this, Succ);
+ }
+
+ if (MachineDominatorTree *MDT =
+ P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+ // Update dominator information.
+ MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+ bool IsNewIDom = true;
+ for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+ PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (PredBB == NMBB)
+ continue;
+ if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+ IsNewIDom = false;
+ break;
+ }
+ }
+
+ // We know "this" dominates the newly created basic block.
+ MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom)
+ MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+ }
+
+ if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::iterator I = end();
+ while (I != begin()) {
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ replaceSuccessor(Old, New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
+/// null.
+///
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ // The values of DestA and DestB frequently come from a call to the
+ // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+ // values from there.
+ //
+ // 1. If both DestA and DestB are null, then the block ends with no branches
+ // (it falls through to its successor).
+ // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+ // with only an unconditional branch.
+ // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+ // with a conditional branch that falls through to a successor (DestB).
+ // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+ // conditional branch followed by an unconditional branch. DestA is the
+ // 'true' destination and DestB is the 'false' destination.
+
+ bool Changed = false;
+
+ MachineFunction::iterator FallThru =
+ llvm::next(MachineFunction::iterator(this));
+
+ if (DestA == 0 && DestB == 0) {
+ // Block falls through to successor.
+ DestA = FallThru;
+ DestB = FallThru;
+ } else if (DestA != 0 && DestB == 0) {
+ if (isCond)
+ // Block ends in conditional jump that falls through to successor.
+ DestB = FallThru;
+ } else {
+ assert(DestA && DestB && isCond &&
+ "CFG in a bad state. Cannot correct CFG edges");
+ }
+
+ // Remove superfluous edges. I.e., those which aren't destinations of this
+ // basic block, duplicate edges, or landing pads.
+ SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ while (SI != succ_end()) {
+ const MachineBasicBlock *MBB = *SI;
+ if (!SeenMBBs.insert(MBB) ||
+ (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+ // This is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ Changed = true;
+ } else {
+ ++SI;
+ }
+ }
+
+ return Changed;
+}
+
+/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+ DebugLoc DL;
+ MachineBasicBlock::iterator E = end();
+ if (MBBI != E) {
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MachineBasicBlock::iterator MBBI2 = MBBI;
+ while (MBBI2 != E && MBBI2->isDebugValue())
+ MBBI2++;
+ if (MBBI2 != E)
+ DL = MBBI2->getDebugLoc();
+ }
+ return DL;
+}
+
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+ if (Weights.empty())
+ return 0;
+
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ return *getWeightIterator(I);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+ bool t) {
+ OS << "BB#" << MBB->getNumber();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp
new file mode 100644
index 000000000000..893a320a6a63
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp
@@ -0,0 +1,59 @@
+//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequency.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequency::ID = 0;
+
+
+MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry());
+ MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+ MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequency::~MachineBlockFrequency() {
+ delete MBFI;
+}
+
+void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+}
+
+bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI->doFunction(&F, &MBPI);
+ return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) {
+ return MBFI->getBlockFreq(MBB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..c13fa6bc5333
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,113 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(MachineBasicBlock *MBB) const {
+ uint32_t Sum = 0;
+
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(MBB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+ }
+
+ return Sum;
+}
+
+uint32_t
+MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ uint32_t Weight = Src->getSuccWeight(Dst);
+ if (!Weight)
+ return DEFAULT_WEIGHT;
+ return Weight;
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ uint32_t Weight = getEdgeWeight(Src, Dst);
+ uint32_t Sum = getSumForBlock(Src);
+
+ // FIXME: Implement BranchProbability::compare then change this code to
+ // compare this BranchProbability against a static "hot" BranchProbability.
+ return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+ uint32_t Sum = 0;
+ uint32_t MaxWeight = 0;
+ MachineBasicBlock *MaxSucc = 0;
+
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(MBB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = Succ;
+ }
+ }
+
+ // FIXME: Use BranchProbability::compare.
+ if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+ return MaxSucc;
+
+ return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ uint32_t N = getEdgeWeight(Src, Dst);
+ uint32_t D = getSumForBlock(Src);
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 000000000000..3a60a37af443
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,535 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
+
+namespace {
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ AliasAnalysis *AA;
+ MachineDominatorTree *DT;
+ MachineRegisterInfo *MRI;
+ public:
+ static char ID; // Pass identification
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ virtual void releaseMemory() {
+ ScopeMap.clear();
+ Exps.clear();
+ }
+
+ private:
+ const unsigned LookAheadLimit;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
+ DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+ ScopedHTType VNT;
+ SmallVector<MachineInstr*, 64> Exps;
+ unsigned CurrVN;
+
+ bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const ;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const;
+ bool isCSECandidate(MachineInstr *MI);
+ bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI);
+ void EnterScope(MachineBasicBlock *MBB);
+ void ExitScope(MachineBasicBlock *MBB);
+ bool ProcessBlock(MachineBasicBlock *MBB);
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+ bool PerformCSE(MachineDomTreeNode *Node);
+ };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+
+FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // Only coalesce single use copies. This ensure the copy will be
+ // deleted.
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (DefMI->getParent() != MBB)
+ continue;
+ if (!DefMI->isCopy())
+ continue;
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ continue;
+ if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+ continue;
+ if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+ continue;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
+ if (I == E)
+ // Reached end of block, register is obviously dead.
+ return true;
+
+ bool SeenDef = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!TRI->regsOverlap(MO.getReg(), Reg))
+ continue;
+ if (MO.isUse())
+ // Found a use!
+ return false;
+ SeenDef = true;
+ }
+ if (SeenDef)
+ // See a def of Reg (or an alias) before encountering any use, it's
+ // trivially dead.
+ return true;
+
+ --LookAheadLeft;
+ ++I;
+ }
+ return false;
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const {
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (MO.isDef() &&
+ (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+ continue;
+ PhysRefs.insert(Reg);
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ PhysRefs.insert(*Alias);
+ }
+
+ return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const {
+ // For now conservatively returns false if the common subexpression is
+ // not in the same basic block as the given instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ if (CSMI->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+ MachineBasicBlock::const_iterator E = MI;
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
+ if (I == E)
+ return true;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
+
+ --LookAheadLeft;
+ ++I;
+ }
+
+ return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+ return false;
+
+ // Ignore copies.
+ if (MI->isCopyLike())
+ return false;
+
+ // Ignore stuff that we obviously can't move.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ if (MCID.mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!MI->isInvariantLoad(AA))
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+ return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI) {
+ // FIXME: Heuristics that works around the lack the live range splitting.
+
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
+ if (MI->getDesc().isAsCheapAsAMove()) {
+ MachineBasicBlock *CSBB = CSMI->getParent();
+ MachineBasicBlock *BB = MI->getParent();
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
+ return false;
+ }
+
+ // Heuristics #2: If the expression doesn't not use a vr and the only use
+ // of the redundant computation are copies, do not cse.
+ bool HasVRegUse = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ HasVRegUse = true;
+ break;
+ }
+ }
+ if (!HasVRegUse) {
+ bool HasNonCopyUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ // Ignore copies.
+ if (!Use->isCopyLike()) {
+ HasNonCopyUse = true;
+ break;
+ }
+ }
+ if (!HasNonCopyUse)
+ return false;
+ }
+
+ // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+ // it unless the defined value is already used in the BB of the new use.
+ bool HasPHI = false;
+ SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ HasPHI |= Use->isPHI();
+ CSBBs.insert(Use->getParent());
+ }
+
+ if (!HasPHI)
+ return true;
+ return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ ScopeType *Scope = new ScopeType(VNT);
+ ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+ assert(SI != ScopeMap.end());
+ ScopeMap.erase(SI);
+ delete SI->second;
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isCSECandidate(MI))
+ continue;
+
+ bool FoundCSE = VNT.count(MI);
+ if (!FoundCSE) {
+ // Look for trivial copy coalescing opportunities.
+ if (PerformTrivialCoalescing(MI, MBB)) {
+ Changed = true;
+
+ // After coalescing MI itself may become a copy.
+ if (MI->isCopyLike())
+ continue;
+ FoundCSE = VNT.count(MI);
+ }
+ }
+
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->getDesc().isCommutable()) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ Changed = true;
+ } else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
+ // used, then it's not safe to replace it with a common subexpression.
+ // It's also not safe if the instruction uses physical registers.
+ SmallSet<unsigned,8> PhysRefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+ FoundCSE = false;
+
+ // ... Unless the CS is local and it also defines the physical register
+ // which is not clobbered in between and the physical register uses
+ // were not clobbered.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ FoundCSE = true;
+ }
+
+ if (!FoundCSE) {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ continue;
+ }
+
+ // Found a common subexpression, eliminate it.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ DEBUG(dbgs() << "Examining: " << *MI);
+ DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+ // Check if it's profitable to perform this CSE.
+ bool DoCSE = true;
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned OldReg = MO.getReg();
+ unsigned NewReg = CSMI->getOperand(i).getReg();
+ if (OldReg == NewReg)
+ continue;
+ assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+ TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ "Do not CSE physical register defs!");
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DoCSE = false;
+ break;
+ }
+ CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+ --NumDefs;
+ }
+
+ // Actually perform the elimination.
+ if (DoCSE) {
+ for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
+ MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+ MRI->clearKillFlags(CSEPairs[i].second);
+ }
+ MI->eraseFromParent();
+ ++NumCSEs;
+ if (!PhysRefs.empty())
+ ++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
+ Changed = true;
+ } else {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ }
+ CSEPairs.clear();
+ }
+
+ return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ CurrVN = 0;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(Node);
+ do {
+ Node = WorkList.pop_back_val();
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+ OpenChildren[Node] = NumChildren;
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ // Now perform CSE.
+ bool Changed = false;
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
+ EnterScope(MBB);
+ Changed |= ProcessBlock(MBB);
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
+
+ return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ return PerformCSE(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 000000000000..04c8ecbf9bdc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,59 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 000000000000..cd2515652831
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,744 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Config/config.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+ unsigned FunctionNum, MachineModuleInfo &mmi,
+ GCModuleInfo* gmi)
+ : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
+ if (TM.getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
+ else
+ RegInfo = 0;
+ MFInfo = 0;
+ FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
+ if (Fn->hasFnAttr(Attribute::StackAlignment))
+ FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+ Fn->getAttributes().getFnAttributes()));
+ ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
+ Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+ Alignment = std::max(Alignment,
+ TM.getTargetLowering()->getPrefFunctionAlignment());
+ FunctionNumber = FunctionNum;
+ JumpTableInfo = 0;
+}
+
+MachineFunction::~MachineFunction() {
+ BasicBlocks.clear();
+ InstructionRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo) {
+ RegInfo->~MachineRegisterInfo();
+ Allocator.Deallocate(RegInfo);
+ }
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo();
+ Allocator.Deallocate(MFInfo);
+ }
+ FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo);
+ ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool);
+
+ if (JumpTableInfo) {
+ JumpTableInfo->~MachineJumpTableInfo();
+ Allocator.Deallocate(JumpTableInfo);
+ }
+}
+
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+ if (JumpTableInfo) return JumpTableInfo;
+
+ JumpTableInfo = new (Allocator)
+ MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+ return JumpTableInfo;
+}
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+ DebugLoc DL, bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(MCID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ MI->~MachineInstr();
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+ uint64_t s, unsigned base_alignment,
+ const MDNode *TBAAInfo) {
+ return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+ TBAAInfo);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ int64_t Offset, uint64_t Size) {
+ return new (Allocator)
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), 0);
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+ return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isLoad())
+ ++Num;
+
+ // Allocate a new array and populate it with the load information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isLoad()) {
+ if (!(*I)->isStore())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the store flag.
+ MachineMemOperand *JustLoad =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOStore,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
+ Result[Index] = JustLoad;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isStore())
+ ++Num;
+
+ // Allocate a new array and populate it with the store information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isStore()) {
+ if (!(*I)->isLoad())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the load flag.
+ MachineMemOperand *JustStore =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
+ Result[Index] = JustStore;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+void MachineFunction::dump() const {
+ print(dbgs());
+}
+
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+ OS << "# Machine code for function " << Fn->getName() << ":\n";
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ if (JumpTableInfo)
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ ConstantPool->print(OS);
+
+ const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Function Live Ins: ";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ OS << PrintReg(I->first, TRI);
+ if (I->second)
+ OS << " in " << PrintReg(I->second, TRI);
+ if (llvm::next(I) != E)
+ OS << ", ";
+ }
+ OS << '\n';
+ }
+ if (RegInfo && !RegInfo->liveout_empty()) {
+ OS << "Function Live Outs:";
+ for (MachineRegisterInfo::liveout_iterator
+ I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+ OS << ' ' << PrintReg(*I, TRI);
+ OS << '\n';
+ }
+
+ for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+ OS << '\n';
+ BB->print(OS, Indexes);
+ }
+
+ OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
+}
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+ }
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
+ Node->print(OSS);
+ }
+
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getNameStr());
+#else
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+#else
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = getRegInfo();
+ unsigned VReg = MRI.getLiveInVirtReg(PReg);
+ if (VReg) {
+ assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+ return VReg;
+ }
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+ bool isLinkerPrivate) const {
+ assert(JumpTableInfo && "No jump tables");
+
+ assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+ const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+
+ const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+ MAI.getPrivateGlobalPrefix();
+ SmallString<60> Name;
+ raw_svector_ostream(Name)
+ << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(getFunctionNumber())+"$pb");
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = TFI.getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/false, false));
+ return -++NumFixedObjects;
+}
+
+
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+ assert(MBB && "MBB must be valid");
+ const MachineFunction *MF = MBB->getParent();
+ assert(MF && "MBB must be part of a MachineFunction");
+ const TargetMachine &TM = MF->getTarget();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ BV.set(*CSR);
+
+ // The entry MBB always has all CSRs pristine.
+ if (MBB == &MF->front())
+ return BV;
+
+ // On other MBBs the saved CSRs are not pristine.
+ const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I)
+ BV.reset(I->getReg());
+
+ return BV;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment;
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+ // The size of a jump table entry is 4 bytes unless the entry is just the
+ // address of a block, in which case it is the pointer size.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return 4;
+ case MachineJumpTableInfo::EK_Inline:
+ return 0;
+ }
+ assert(0 && "Unknown jump table encoding!");
+ return ~0;
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+ // The alignment of a jump table entry is the alignment of int32 unless the
+ // entry is just the address of a block, in which case it is the pointer
+ // alignment.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return TD.getABIIntegerTypeAlignment(32);
+ case MachineJumpTableInfo::EK_Inline:
+ return 1;
+ }
+ assert(0 && "Unknown jump table encoding!");
+ return ~0;
+}
+
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
+///
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+ if (JumpTables.empty()) return;
+
+ OS << "Jump Tables:\n";
+
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " jt#" << i << ": ";
+ for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+ OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+ }
+
+ OS << '\n';
+}
+
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getRelocationInfo();
+ return Val.ConstVal->getRelocationInfo();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+ for (DenseSet<MachineConstantPoolValue*>::iterator I =
+ MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+ I != E; ++I)
+ delete *I;
+}
+
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+ const TargetData *TD) {
+ // Handle the trivial case quickly.
+ if (A == B) return true;
+
+ // If they have the same type but weren't the same constant, quickly
+ // reject them.
+ if (A->getType() == B->getType()) return false;
+
+ // For now, only support constants with the same size.
+ if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+ return false;
+
+ // If a floating-point value and an integer value have the same encoding,
+ // they can share a constant-pool entry.
+ if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+ if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
+ return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
+ if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+ if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
+ return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
+
+ // Two vectors can share an entry if each pair of corresponding
+ // elements could.
+ if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
+ if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+ if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
+ return false;
+ for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
+ if (!CanShareConstantPoolEntry(AV->getOperand(i),
+ BV->getOperand(i), TD))
+ return false;
+ return true;
+ }
+
+ // TODO: Handle other cases.
+
+ return false;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (!Constants[i].isMachineConstantPoolEntry() &&
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+ if ((unsigned)Constants[i].getAlignment() < Alignment)
+ Constants[i].Alignment = Alignment;
+ return i;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1) {
+ MachineCPVsSharingEntries.insert(V);
+ return (unsigned)Idx;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ if (Constants.empty()) return;
+
+ OS << "Constant Pool:\n";
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " cp#" << i << ": ";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(Value*)Constants[i].Val.ConstVal;
+ OS << ", align=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+void MachineConstantPool::dump() const { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 000000000000..054c750c9f2b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,58 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+using namespace llvm;
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+ releaseMemory();
+ assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI && "MMI not around yet??");
+ MMI->setModule(&M);
+ NextFnNum = 0;
+ return false;
+}
+
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+ assert(!MF && "MachineFunctionAnalysis already initialized!");
+ MF = new MachineFunction(&F, TM, NextFnNum++,
+ getAnalysis<MachineModuleInfo>(),
+ getAnalysisIfAvailable<GCModuleInfo>());
+ return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+ delete MF;
+ MF = 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 000000000000..e5a491270a8c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,56 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+ return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+
+ // MachineFunctionPass preserves all LLVM IR passes, but there's no
+ // high-level way to express this. Instead, just list a bunch of
+ // passes explicitly. This does not include setPreservesCFG,
+ // because CodeGen overloads that to mean preserving the MachineBasicBlock
+ // CFG in addition to the LLVM IR CFG.
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved("scalar-evolution");
+ AU.addPreserved("iv-users");
+ AU.addPreserved("memdep");
+ AU.addPreserved("live-values");
+ AU.addPreserved("domtree");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("loops");
+ AU.addPreserved("lda");
+
+ FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 000000000000..2aaa798a02c1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,60 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+ const std::string Banner;
+
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ OS << "# " << Banner << ":\n";
+ MF.print(OS);
+ return false;
+ }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner){
+ return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 000000000000..143a29b08a1e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1743 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Value.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/FoldingSet.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// AddRegOperandToRegInfo - Add this register operand to the specified
+/// MachineRegisterInfo. If it is null, then the next/prev fields should be
+/// explicitly nulled out.
+void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
+ assert(isReg() && "Can only add reg operand to use lists");
+
+ // If the reginfo pointer is null, just explicitly null out or next/prev
+ // pointers, to ensure they are not garbage.
+ if (RegInfo == 0) {
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+ return;
+ }
+
+ // Otherwise, add this operand to the head of the registers use/def list.
+ MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
+
+ // For SSA values, we prefer to keep the definition at the start of the list.
+ // we do this by skipping over the definition if it is at the head of the
+ // list.
+ if (*Head && (*Head)->isDef())
+ Head = &(*Head)->Contents.Reg.Next;
+
+ Contents.Reg.Next = *Head;
+ if (Contents.Reg.Next) {
+ assert(getReg() == Contents.Reg.Next->getReg() &&
+ "Different regs on the same list!");
+ Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
+ }
+
+ Contents.Reg.Prev = Head;
+ *Head = this;
+}
+
+/// RemoveRegOperandFromRegInfo - Remove this register operand from the
+/// MachineRegisterInfo it is linked with.
+void MachineOperand::RemoveRegOperandFromRegInfo() {
+ assert(isOnRegUseList() && "Reg operand is not on a use list");
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *NextOp = Contents.Reg.Next;
+ *Contents.Reg.Prev = NextOp;
+ if (NextOp) {
+ assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
+ NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
+ }
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ RemoveRegOperandFromRegInfo();
+ SmallContents.RegNo = Reg;
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
+ setSubReg(0);
+ }
+ setReg(Reg);
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ // If this operand is currently a register operand, and if this is in a
+ // function, deregister the operand from the register's use/def list.
+ if (isReg() && getParent() && getParent()->getParent() &&
+ getParent()->getParent()->getParent())
+ RemoveRegOperandFromRegInfo();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ // If this operand is already a register operand, use setReg to update the
+ // register's use/def lists.
+ if (isReg()) {
+ assert(!isEarlyClobber());
+ setReg(Reg);
+ } else {
+ // Otherwise, change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ }
+
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsUndef = isUndef;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ SubReg = 0;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ default: llvm_unreachable("Unrecognized operand type");
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress();
+ case MachineOperand::MO_MCSymbol:
+ return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_Metadata:
+ return getMetadata() == Other.getMetadata();
+ }
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (!TM)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ OS << PrintReg(getReg(), TRI, getSubReg());
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+ isEarlyClobber()) {
+ OS << '<';
+ bool NeedComma = false;
+ if (isDef()) {
+ if (NeedComma) OS << ',';
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ if (isImplicit())
+ OS << "imp-";
+ OS << "def";
+ NeedComma = true;
+ } else if (isImplicit()) {
+ OS << "imp-use";
+ NeedComma = true;
+ }
+
+ if (isKill() || isDead() || isUndef()) {
+ if (NeedComma) OS << ',';
+ if (isKill()) OS << "kill";
+ if (isDead()) OS << "dead";
+ if (isUndef()) {
+ if (isKill() || isDead())
+ OS << ',';
+ OS << "undef";
+ }
+ }
+ OS << '>';
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_CImmediate:
+ getCImm()->getValue().print(OS, false);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType()->isFloatTy())
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ else
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "<BB#" << getMBB()->getNumber() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:";
+ WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << '<';
+ WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+ OS << '>';
+ break;
+ case MachineOperand::MO_Metadata:
+ OS << '<';
+ WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+ OS << '>';
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<MCSym=" << *getMCSymbol() << '>';
+ break;
+ default:
+ llvm_unreachable("Unrecognized operand type");
+ }
+
+ if (unsigned TF = getTargetFlags())
+ OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V == 0) return 0;
+ return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+ return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+ return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+ return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+ return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+ return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+ uint64_t s, unsigned int a,
+ const MDNode *TBAAInfo)
+ : PtrInfo(ptrinfo), Size(s),
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+ TBAAInfo(TBAAInfo) {
+ assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ "invalid pointer value");
+ assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(getOffset());
+ ID.AddInteger(Size);
+ ID.AddPointer(getValue());
+ ID.AddInteger(Flags);
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+ if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ // Update the alignment value.
+ Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+ ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ PtrInfo = MMO->PtrInfo;
+ }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+ return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+ assert((MMO.isLoad() || MMO.isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (MMO.isVolatile())
+ OS << "Volatile ";
+
+ if (MMO.isLoad())
+ OS << "LD";
+ if (MMO.isStore())
+ OS << "ST";
+ OS << MMO.getSize();
+
+ // Print the address information.
+ OS << "[";
+ if (!MMO.getValue())
+ OS << "<unknown>";
+ else
+ WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+ // If the alignment of the memory reference itself differs from the alignment
+ // of the base pointer, print the base alignment explicitly, next to the base
+ // pointer.
+ if (MMO.getBaseAlignment() != MMO.getAlignment())
+ OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+ if (MMO.getOffset() != 0)
+ OS << "+" << MMO.getOffset();
+ OS << "]";
+
+ // Print the alignment of the reference.
+ if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+ MMO.getBaseAlignment() != MMO.getSize())
+ OS << "(align=" << MMO.getAlignment() << ")";
+
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print nontemporal info.
+ if (MMO.isNonTemporal())
+ OS << "(nontemporal)";
+
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// MCID NULL and no operands.
+MachineInstr::MachineInstr()
+ : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0),
+ Parent(0) {
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+ if (MCID->ImplicitDefs)
+ for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (MCID->ImplicitUses)
+ for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+ addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
+ : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0) {
+ if (!NoImp)
+ NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
+ bool NoImp)
+ : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ if (!NoImp)
+ NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor two above, except
+/// that the MachineInstr is created and added to the end of the specified
+/// basic block.
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
+ : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
+ const MCInstrDesc &tid)
+ : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+ Parent(0), debugLoc(MI.getDebugLoc()) {
+ Operands.reserve(MI.getNumOperands());
+
+ // Add operands
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ addOperand(MI.getOperand(i));
+ NumImplicitOps = MI.NumImplicitOps;
+
+ // Copy all the flags.
+ Flags = MI.Flags;
+
+ // Set parent to null.
+ Parent = 0;
+
+ LeakDetector::addGarbageObject(this);
+}
+
+MachineInstr::~MachineInstr() {
+ LeakDetector::removeGarbageObject(this);
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
+ assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
+ "Reg operand def/use list corrupted");
+ }
+#endif
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists() {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(&RegInfo);
+ }
+}
+
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ assert((isImpReg || !OperandsComplete()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *RegInfo = getRegInfo();
+
+ // If we are adding the operand to the end of the list, our job is simpler.
+ // This is true most of the time, so this is a reasonable optimization.
+ if (isImpReg || NumImplicitOps == 0) {
+ // We can only do this optimization if we know that the operand list won't
+ // reallocate.
+ if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
+ Operands.push_back(Op);
+
+ // Set the parent of the operand.
+ Operands.back().ParentMI = this;
+
+ // If the operand is a register, update the operand's use list.
+ if (Op.isReg()) {
+ Operands.back().AddRegOperandToRegInfo(RegInfo);
+ // If the register operand is flagged as early, mark the operand as such
+ unsigned OpNo = Operands.size() - 1;
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+ return;
+ }
+ }
+
+ // Otherwise, we have to insert a real operand before any implicit ones.
+ unsigned OpNo = Operands.size()-NumImplicitOps;
+
+ // If this instruction isn't embedded into a function, then we don't need to
+ // update any operand lists.
+ if (RegInfo == 0) {
+ // Simple insertion, no reginfo update needed for other register operands.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Do explicitly set the reginfo for this operand though, to ensure the
+ // next/prev fields are properly nulled out.
+ if (Operands[OpNo].isReg()) {
+ Operands[OpNo].AddRegOperandToRegInfo(0);
+ // If the register operand is flagged as early, mark the operand as such
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+
+ } else if (Operands.size()+1 <= Operands.capacity()) {
+ // Otherwise, we have to remove register operands from their register use
+ // list, add the operand, then add the register operands back to their use
+ // list. This also must handle the case when the operand list reallocates
+ // to somewhere else.
+
+ // If insertion of this operand won't cause reallocation of the operand
+ // list, just remove the implicit operands, add the operand, then re-add all
+ // the rest of the operands.
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+
+ // Add the operand. If it is a register, add it to the reg list.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ if (Operands[OpNo].isReg()) {
+ Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // If the register operand is flagged as early, mark the operand as such
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+
+ // Re-add all the implicit ops.
+ for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ } else {
+ // Otherwise, we will be reallocating the operand list. Remove all reg
+ // operands from their list, then readd them after the operand list is
+ // reallocated.
+ RemoveRegOperandsFromUseLists();
+
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Re-add all the operands.
+ AddRegOperandsToUseLists(*RegInfo);
+
+ // If the register operand is flagged as early, mark the operand as such
+ if (Operands[OpNo].isReg()
+ && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < Operands.size() && "Invalid operand number");
+
+ // Special case removing the last one.
+ if (OpNo == Operands.size()-1) {
+ // If needed, remove from the reg def/use list.
+ if (Operands.back().isReg() && Operands.back().isOnRegUseList())
+ Operands.back().RemoveRegOperandFromRegInfo();
+
+ Operands.pop_back();
+ return;
+ }
+
+ // Otherwise, we are removing an interior operand. If we have reginfo to
+ // update, remove all operands that will be shifted down from their reg lists,
+ // move everything down, then re-add them.
+ MachineRegisterInfo *RegInfo = getRegInfo();
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+ }
+
+ Operands.erase(Operands.begin()+OpNo);
+
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ }
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ MachineMemOperand *MO) {
+ mmo_iterator OldMemRefs = MemRefs;
+ mmo_iterator OldMemRefsEnd = MemRefsEnd;
+
+ size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+ mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+ mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
+
+ std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+ NewMemRefs[NewNum - 1] = MO;
+
+ MemRefs = NewMemRefs;
+ MemRefsEnd = NewMemRefsEnd;
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+ MICheckType Check) const {
+ // If opcodes or number of operands are not the same then the two
+ // instructions are obviously not identical.
+ if (Other->getOpcode() != getOpcode() ||
+ Other->getNumOperands() != getNumOperands())
+ return false;
+
+ // Check operands to make sure they match.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (!MO.isReg()) {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ continue;
+ }
+
+ // Clients may or may not want to ignore defs when testing for equality.
+ // For example, machine CSE pass only cares about finding common
+ // subexpressions, so it's safe to ignore virtual register defs.
+ if (MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ else if (Check == IgnoreVRegDefs) {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+ return false;
+ }
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+ return false;
+ }
+ }
+ // If DebugLoc does not match then two dbg.values are not identical.
+ if (isDebugValue())
+ if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+ && getDebugLoc() != Other->getDebugLoc())
+ return false;
+ return true;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing basic
+/// block, and deletes it.
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+ unsigned short NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+ return true; // Broken: we have all the operands of this instruction!
+ return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+ SmallVectorImpl<unsigned> *Ops) const {
+ bool PartDef = false; // Partial redefine.
+ bool FullDef = false; // Full define.
+ bool Use = false;
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (Ops)
+ Ops->push_back(i);
+ if (MO.isUse())
+ Use |= !MO.isUndef();
+ else if (MO.getSubReg())
+ PartDef = true;
+ else
+ FullDef = true;
+ }
+ // A partial redefine uses Reg unless there is also a full define.
+ return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+ const TargetRegisterInfo *TRI) const {
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ bool Found = (MOReg == Reg);
+ if (!Found && TRI && isPhys &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Overlap)
+ Found = TRI->regsOverlap(MOReg, Reg);
+ else
+ Found = TRI->isSubRegister(MOReg, Reg);
+ }
+ if (Found && (!isDead || MO.isDead()))
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+/// isRegTiedToUseOperand - Given the index of a register def operand,
+/// check if the register def is tied to a source operand, due to either
+/// two-address elimination or inline assembly constraints. Returns the
+/// first tied use operand index by reference is UseOpIdx is not null.
+bool MachineInstr::
+isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+ if (isInlineAsm()) {
+ assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
+ const MachineOperand &MO = getOperand(DefOpIdx);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ return false;
+ // Determine the actual operand index that corresponds to this index.
+ unsigned DefNo = 0;
+ unsigned DefPart = 0;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i < e; ) {
+ const MachineOperand &FMO = getOperand(i);
+ // After the normal asm operands there may be additional imp-def regs.
+ if (!FMO.isImm())
+ return false;
+ // Skip over this def.
+ unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
+ unsigned PrevDef = i + 1;
+ i = PrevDef + NumOps;
+ if (i > DefOpIdx) {
+ DefPart = DefOpIdx - PrevDef;
+ break;
+ }
+ ++DefNo;
+ }
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &FMO = getOperand(i);
+ if (!FMO.isImm())
+ continue;
+ if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+ continue;
+ unsigned Idx;
+ if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+ Idx == DefNo) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i + 1 + DefPart;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
+ const MCInstrDesc &MCID = getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isRegTiedToDefOperand - Return true if the operand of the specified index
+/// is a register use and it is tied to an def operand. It also returns the def
+/// operand index by reference.
+bool MachineInstr::
+isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
+ if (isInlineAsm()) {
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+ return false;
+
+ // Find the flag operand corresponding to UseOpIdx
+ unsigned FlagIdx, NumOps=0;
+ for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+ FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+ const MachineOperand &UFMO = getOperand(FlagIdx);
+ // After the normal asm operands there may be additional imp-def regs.
+ if (!UFMO.isImm())
+ return false;
+ NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
+ assert(NumOps < getNumOperands() && "Invalid inline asm flag");
+ if (UseOpIdx < FlagIdx+NumOps+1)
+ break;
+ }
+ if (FlagIdx >= UseOpIdx)
+ return false;
+ const MachineOperand &UFMO = getOperand(FlagIdx);
+ unsigned DefNo;
+ if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+ if (!DefOpIdx)
+ return true;
+
+ unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
+ // Remember to adjust the index. First operand is asm string, second is
+ // the HasSideEffects and AlignStack bits, then there is a flag for each.
+ while (DefNo) {
+ const MachineOperand &FMO = getOperand(DefIdx);
+ assert(FMO.isImm());
+ // Skip over this def.
+ DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ --DefNo;
+ }
+ *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
+ return true;
+ }
+ return false;
+ }
+
+ const MCInstrDesc &MCID = getDesc();
+ if (UseOpIdx >= MCID.getNumOperands())
+ return false;
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
+ if (DefIdx == -1)
+ return false;
+ if (DefOpIdx)
+ *DefOpIdx = (unsigned)DefIdx;
+ return true;
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ MO.setIsKill(false);
+ }
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+ continue;
+ for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+ MachineOperand &MOp = getOperand(j);
+ if (!MOp.isIdenticalTo(MO))
+ continue;
+ if (MO.isKill())
+ MOp.setIsKill();
+ else
+ MOp.setIsDead();
+ break;
+ }
+ }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isPredicable())
+ return;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate()) {
+ // Predicated operands must be last operands.
+ addOperand(MI->getOperand(i));
+ }
+ }
+}
+
+void MachineInstr::substituteRegister(unsigned FromReg,
+ unsigned ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+ AliasAnalysis *AA,
+ bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ if (MCID->mayStore() || MCID->isCall()) {
+ SawStore = true;
+ return false;
+ }
+
+ if (isLabel() || isDebugValue() ||
+ MCID->isTerminator() || hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (MCID->mayLoad() && !isInvariantLoad(AA))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, or if the load is volatile, we can't move it.
+ return !SawStore && !hasVolatileMemoryRef();
+
+ return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+ AliasAnalysis *AA,
+ unsigned DstReg) const {
+ bool SawStore = false;
+ if (!TII->isTriviallyReMaterializable(this, AA) ||
+ !isSafeToMove(TII, AA, SawStore))
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg())
+ continue;
+ // FIXME: For now, do not remat any instruction with register operands.
+ // Later on, we can loosen the restriction is the register operands have
+ // not been modified between the def and use. Note, this is different from
+ // MachineSink because the code is no longer in two-address form (at least
+ // partially).
+ if (MO.isUse())
+ return false;
+ else if (!MO.isDead() && MO.getReg() != DstReg)
+ return false;
+ }
+ return true;
+}
+
+/// hasVolatileMemoryRef - Return true if this instruction may have a
+/// volatile memory reference, or if the information describing the
+/// memory reference is not available. Return false if it is known to
+/// have no volatile memory references.
+bool MachineInstr::hasVolatileMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!MCID->mayStore() &&
+ !MCID->mayLoad() &&
+ !MCID->isCall() &&
+ !hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check the memory reference information for volatile references.
+ for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+ if ((*I)->isVolatile())
+ return true;
+
+ return false;
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function. For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change. This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+ // If the instruction doesn't load at all, it isn't an invariant load.
+ if (!MCID->mayLoad())
+ return false;
+
+ // If the instruction has lost its memoperands, conservatively assume that
+ // it may not be an invariant load.
+ if (memoperands_empty())
+ return false;
+
+ const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+ for (mmo_iterator I = memoperands_begin(),
+ E = memoperands_end(); I != E; ++I) {
+ if ((*I)->isVolatile()) return false;
+ if ((*I)->isStore()) return false;
+
+ if (const Value *V = (*I)->getValue()) {
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV->isConstant(MFI))
+ continue;
+ // If we have an AliasAnalysis, ask it whether the memory is constant.
+ if (AA && AA->pointsToConstantMemory(
+ AliasAnalysis::Location(V, (*I)->getSize(),
+ (*I)->getTBAAInfo())))
+ continue;
+ }
+
+ // Otherwise assume conservatively.
+ return false;
+ }
+
+ // Everything checks out.
+ return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (!isPHI())
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ unsigned Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (getDesc().hasUnmodeledSideEffects())
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+ for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ addOperand(MO);
+ }
+}
+
+void MachineInstr::dump() const {
+ dbgs() << " " << *this;
+}
+
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+ const MachineFunction *MF = 0;
+ const MachineRegisterInfo *MRI = 0;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (!TM && MF)
+ TM = &MF->getTarget();
+ if (MF)
+ MRI = &MF->getRegInfo();
+ }
+
+ // Save a list of virtual registers.
+ SmallVector<unsigned, 8> VirtRegs;
+
+ // Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
+ for (; StartOp < e && getOperand(StartOp).isReg() &&
+ getOperand(StartOp).isDef() &&
+ !getOperand(StartOp).isImplicit();
+ ++StartOp) {
+ if (StartOp != 0) OS << ", ";
+ getOperand(StartOp).print(OS, TM);
+ unsigned Reg = getOperand(StartOp).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ VirtRegs.push_back(Reg);
+ }
+
+ if (StartOp != 0)
+ OS << " = ";
+
+ // Print the opcode name.
+ OS << getDesc().getName();
+
+ // Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
+ unsigned AsmDescOp = ~0u;
+ unsigned AsmOpCount = 0;
+
+ if (isInlineAsm()) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+ // Print HasSideEffects, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+
+ StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegs.push_back(MO.getReg());
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MF && getDesc().isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+ bool HasAliasLive = false;
+ for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
+ if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ";
+ if (i < getDesc().NumOperands) {
+ const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+ if (MCOI.isPredicate())
+ OS << "pred:";
+ if (MCOI.isOptionalDef())
+ OS << "opt:";
+ }
+ if (isDebugValue() && MO.isMetadata()) {
+ // Pretty print DBG_VALUE instructions.
+ const MDNode *MD = MO.getMetadata();
+ if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+ OS << "!\"" << MDS->getString() << '\"';
+ else
+ MO.print(OS, TM);
+ } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+ } else if (i == AsmDescOp && MO.isImm()) {
+ // Pretty print the inline asm operand descriptor.
+ OS << '$' << AsmOpCount++;
+ unsigned Flag = MO.getImm();
+ switch (InlineAsm::getKind(Flag)) {
+ case InlineAsm::Kind_RegUse: OS << ":[reguse]"; break;
+ case InlineAsm::Kind_RegDef: OS << ":[regdef]"; break;
+ case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break;
+ case InlineAsm::Kind_Clobber: OS << ":[clobber]"; break;
+ case InlineAsm::Kind_Imm: OS << ":[imm]"; break;
+ case InlineAsm::Kind_Mem: OS << ":[mem]"; break;
+ default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break;
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " [tiedto:$" << TiedTo << ']';
+
+ // Compute the index of the next operand descriptor.
+ AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+ } else
+ MO.print(OS, TM);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (!FirstOp) OS << ",";
+ OS << " ...";
+ }
+
+ bool HaveSemi = false;
+ if (Flags) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " flags: ";
+
+ if (Flags & FrameSetup)
+ OS << "FrameSetup";
+ }
+
+ if (!memoperands_empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+ OS << " mem:";
+ for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+ i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ }
+
+ // Print the regclass of any virtual registers encountered.
+ if (MRI && !VirtRegs.empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+ OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+ for (unsigned j = i+1; j != VirtRegs.size();) {
+ if (MRI->getRegClass(VirtRegs[j]) != RC) {
+ ++j;
+ continue;
+ }
+ if (VirtRegs[i] != VirtRegs[j])
+ OS << "," << PrintReg(VirtRegs[j]);
+ VirtRegs.erase(VirtRegs.begin()+j);
+ }
+ }
+ }
+
+ // Print debug location information.
+ if (!debugLoc.isUnknown() && MF) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " dbg:";
+ printDebugLoc(debugLoc, MF, OS);
+ }
+
+ OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ if (isPhysReg && isRegTiedToDefOperand(i))
+ // Two-address uses of physregs must not be marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ MO.setIsDead();
+ Found = true;
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->getSubRegisters(IncomingReg) &&
+ RegInfo->getSuperRegisters(Reg) &&
+ RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (Found || !AddIfNotFound)
+ return Found;
+
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+}
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+ MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+ if (MO)
+ return;
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+ MO.getSubReg() == 0)
+ return;
+ }
+ }
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ bool Dead = true;
+ for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
+ E = UsedRegs.end(); I != E; ++I)
+ if (TRI.regsOverlap(*I, Reg)) {
+ Dead = false;
+ break;
+ }
+ // If there are no uses, including partial uses, the def is dead.
+ if (Dead) MO.setIsDead();
+ }
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+ unsigned Hash = MI->getOpcode() * 37;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ uint64_t Key = (uint64_t)MO.getType() << 32;
+ switch (MO.getType()) {
+ default: break;
+ case MachineOperand::MO_Register:
+ if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+ Key |= MO.getReg();
+ break;
+ case MachineOperand::MO_Immediate:
+ Key |= MO.getImm();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ Key |= MO.getIndex();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+ break;
+ case MachineOperand::MO_MCSymbol:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+ break;
+ }
+ Key += ~(Key << 32);
+ Key ^= (Key >> 22);
+ Key += ~(Key << 13);
+ Key ^= (Key >> 8);
+ Key += (Key << 3);
+ Key ^= (Key >> 15);
+ Key += ~(Key << 27);
+ Key ^= (Key >> 31);
+ Hash = (unsigned)Key + Hash * 37;
+ }
+ return Hash;
+}
+
+void MachineInstr::emitError(StringRef Msg) const {
+ // Find the source location cookie.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = getNumOperands(); i != 0; --i) {
+ if (getOperand(i-1).isMetadata() &&
+ (LocMD = getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ report_fatal_error(Msg);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 000000000000..722ceb202439
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1211 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+ "Number of machine instructions hoisted out of loops post regalloc");
+
+namespace {
+ class MachineLICM : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
+ const TargetRegisterInfo *TRI;
+ const MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const InstrItineraryData *InstrItins;
+
+ // Various analyses that we use...
+ AliasAnalysis *AA; // Alias analysis info.
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ BitVector AllocatableSet;
+
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register class. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+ // For each opcode, keep a list of potential CSE instructions.
+ DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() :
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit MachineLICM(bool PreRA) :
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Machine Instruction LICM"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+ CI->second.clear();
+ CSEMap.clear();
+ }
+
+ private:
+ /// CandidateInfo - Keep track of information about hoisting candidates.
+ struct CandidateInfo {
+ MachineInstr *MI;
+ unsigned Def;
+ int FI;
+ CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+ : MI(mi), Def(def), FI(fi) {}
+ };
+
+ /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+ /// invariants out to the preheader.
+ void HoistRegionPostRA();
+
+ /// HoistPostRA - When an instruction is found to only use loop invariant
+ /// operands that is safe to hoist, this instruction is called to do the
+ /// dirty work.
+ void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+ /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+ /// gather register def and frame object update information.
+ void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVector<CandidateInfo, 32> &Candidates);
+
+ /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+ /// current loop.
+ void AddToLiveIns(unsigned Reg);
+
+ /// IsLICMCandidate - Returns true if the instruction may be a suitable
+ /// candidate for LICM. e.g. If the instruction is a call, then it's
+ /// obviously not safe to hoist it.
+ bool IsLICMCandidate(MachineInstr &I);
+
+ /// IsLoopInvariantInst - Returns true if the instruction is loop
+ /// invariant. I.e., all virtual register operands are defined outside of
+ /// the loop, physical registers aren't accessed (explicitly or implicitly),
+ /// and the instruction is hoistable.
+ ///
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ /// HasAnyPHIUse - Return true if the specified register is used by any
+ /// phi node.
+ bool HasAnyPHIUse(unsigned Reg) const;
+
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+ /// check if hoisting an instruction of the given cost matrix can cause high
+ /// register pressure.
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+ /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+ /// the current block and update their register pressures to reflect the
+ /// effect of hoisting MI from the current block to the preheader.
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+ /// IsProfitableToHoist - Return true if it is potentially profitable to
+ /// hoist the given loop invariant.
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+ /// InitRegPressure - Find all virtual register references that are liveout
+ /// of the preheader to initialize the starting "register pressure". Note
+ /// this does not count live through (livein but not used) registers.
+ void InitRegPressure(MachineBasicBlock *BB);
+
+ /// UpdateRegPressure - Update estimate of register pressure after the
+ /// specified instruction.
+ void UpdateRegPressure(const MachineInstr *MI);
+
+ /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+ /// the load itself could be hoisted. Return the unfolded and hoistable
+ /// load, or null if the load couldn't be unfolded or if it wouldn't
+ /// be hoistable.
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+ /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+ /// duplicate of MI. Return this instruction if it's found.
+ const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs);
+
+ /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+ /// the preheader that compute the same value. If it's found, do a RAU on
+ /// with the definition of the existing instruction rather than hoisting
+ /// the instruction to the preheader.
+ bool EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
+ /// Hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ /// It returns true if the instruction is hoisted.
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+ /// InitCSEMap - Initialize the CSE map with instructions that are in the
+ /// current loop preheader that may become duplicates of instructions that
+ /// are hoisted out of the loop.
+ void InitCSEMap(MachineBasicBlock *BB);
+
+ /// getCurPreheader - Get the preheader for the current loop, splitting
+ /// a critical edge if needed.
+ MachineBasicBlock *getCurPreheader();
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+
+FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
+ return new MachineLICM(PreRegAlloc);
+}
+
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPredecessor())
+ return false;
+ // None of them did, so this is the outermost with a unique predecessor.
+ return true;
+}
+
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ if (PreRegAlloc)
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
+
+ Changed = FirstInLoop = false;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TLI = TM->getTargetLowering();
+ TRI = TM->getRegisterInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ InstrItins = TM->getInstrItineraryData();
+ AllocatableSet = TRI->getAllocatableSet(MF);
+
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegPressure.resize(NumRC);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRC);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+ }
+
+ // Get our Loop information...
+ MLI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = 0;
+
+ // If this is done before regalloc, only visit outer-most preheader-sporting
+ // loops.
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
+ continue;
+ }
+
+ if (!PreRegAlloc)
+ HoistRegionPostRA();
+ else {
+ // CSEMap is initialized for loop header when the first instruction is
+ // being hoisted.
+ MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
+ HoistRegion(N, true);
+ CSEMap.clear();
+ }
+ }
+
+ return Changed;
+}
+
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end(); o != oe; ++o) {
+ if (!(*o)->isStore() || !(*o)->getValue())
+ continue;
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ if (Value->getFrameIndex() == FI)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+ unsigned *PhysRegDefs,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVector<CandidateInfo, 32> &Candidates) {
+ bool RuledOut = false;
+ bool HasNonInvariantUse = false;
+ unsigned Def = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isFI()) {
+ // Remember if the instruction stores to the frame index.
+ int FI = MO.getIndex();
+ if (!StoredFIs.count(FI) &&
+ MFI->isSpillSlotObjectIndex(FI) &&
+ InstructionStoresToFI(MI, FI))
+ StoredFIs.insert(FI);
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Not expecting virtual register!");
+
+ if (!MO.isDef()) {
+ if (Reg && PhysRegDefs[Reg])
+ // If it's using a non-loop-invariant register, then it's obviously not
+ // safe to hoist.
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ if (MO.isImplicit()) {
+ ++PhysRegDefs[Reg];
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ ++PhysRegDefs[*AS];
+ if (!MO.isDead())
+ // Non-dead implicit def? This cannot be hoisted.
+ RuledOut = true;
+ // No need to check if a dead implicit def is also defined by
+ // another instruction.
+ continue;
+ }
+
+ // FIXME: For now, avoid instructions with multiple defs, unless
+ // it's a dead implicit def.
+ if (Def)
+ RuledOut = true;
+ else
+ Def = Reg;
+
+ // If we have already seen another instruction that defines the same
+ // register, then this is not safe.
+ if (++PhysRegDefs[Reg] > 1)
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ if (++PhysRegDefs[*AS] > 1)
+ RuledOut = true;
+ }
+
+ // Only consider reloads for now and remats which do not have register
+ // operands. FIXME: Consider unfold load folding instructions.
+ if (Def && !RuledOut) {
+ int FI = INT_MIN;
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+ Candidates.push_back(CandidateInfo(MI, Def, FI));
+ }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+ unsigned NumRegs = TRI->getNumRegs();
+ unsigned *PhysRegDefs = new unsigned[NumRegs];
+ std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+
+ SmallVector<CandidateInfo, 32> Candidates;
+ SmallSet<int, 32> StoredFIs;
+
+ // Walk the entire region, count number of defs for each register, and
+ // collect potential LICM candidates.
+ const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *BB = Blocks[i];
+ // Conservatively treat live-in's as an external def.
+ // FIXME: That means a reload that're reused in successor block(s) will not
+ // be LICM'ed.
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ ++PhysRegDefs[Reg];
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ ++PhysRegDefs[*AS];
+ }
+
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+ }
+ }
+
+ // Now evaluate whether the potential candidates qualify.
+ // 1. Check if the candidate defined register is defined by another
+ // instruction in the loop.
+ // 2. If the candidate is a load from stack slot (always true for now),
+ // check if the slot is stored anywhere in the loop.
+ for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+ if (Candidates[i].FI != INT_MIN &&
+ StoredFIs.count(Candidates[i].FI))
+ continue;
+
+ if (PhysRegDefs[Candidates[i].Def] == 1) {
+ bool Safe = true;
+ MachineInstr *MI = Candidates[i].MI;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.isDef() || !MO.getReg())
+ continue;
+ if (PhysRegDefs[MO.getReg()]) {
+ // If it's using a non-loop-invariant register, then it's obviously
+ // not safe to hoist.
+ Safe = false;
+ break;
+ }
+ }
+ if (Safe)
+ HoistPostRA(MI, Candidates[i].Def);
+ }
+ }
+
+ delete[] PhysRegDefs;
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+ const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *BB = Blocks[i];
+ if (!BB->isLiveIn(Reg))
+ BB->addLiveIn(Reg);
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+ if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ MO.setIsKill(false);
+ }
+ }
+ }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader) return;
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (Preheader->getBasicBlock())
+ dbgs() << " to MachineBasicBlock "
+ << Preheader->getName();
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from MachineBasicBlock "
+ << MI->getParent()->getName();
+ dbgs() << "\n";
+ });
+
+ // Splice the instruction to the preheader.
+ MachineBasicBlock *MBB = MI->getParent();
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+ // Add register to livein list to all the BBs in the current loop since a
+ // loop invariant must be kept live throughout the whole loop. This is
+ // important to ensure later passes do not scavenge the def register.
+ AddToLiveIns(Def);
+
+ ++NumPostRAHoisted;
+ Changed = true;
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
+ assert(N != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ if (IsHeader) {
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+ }
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
+ MII = NextMII;
+ }
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() < 25) {
+ const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ HoistRegion(Children[I]);
+ }
+
+ BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+ MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (MO.isDef())
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill)
+ // Haven't seen this, it must be a livein.
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ else if (!isNew && isKill)
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+ }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (!isNew && isOperandKill(MO, MRI)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+ if (RCCost > RegPressure[RCId])
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.pop_back_val();
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] += RCCost;
+ }
+}
+
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+ return false;
+
+ return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ if (!IsLICMCandidate(I))
+ return false;
+
+ // The instruction is loop invariant if all of its operands are.
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I.getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->def_empty(Reg))
+ return false;
+ if (AllocatableSet.test(Reg))
+ return false;
+ // Check for a def among the register's aliases too.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (!MRI->def_empty(AliasReg))
+ return false;
+ if (AllocatableSet.test(AliasReg))
+ return false;
+ }
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// HasAnyPHIUse - Return true if the specified register is used by any
+/// phi node.
+bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->isPHI())
+ return true;
+ // Look pass copies as well.
+ if (UseMI->isCopy()) {
+ unsigned Def = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Def) &&
+ HasAnyPHIUse(Def))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI->getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ return true;
+ if (!InstrItins || InstrItins->isEmpty())
+ return false;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ if (CI->second <= 0)
+ continue;
+
+ unsigned RCId = CI->first;
+ for (unsigned i = BackTrace.size(); i != 0; --i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+ if (RP[RCId] + CI->second >= RegLimit[RCId])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ if (MO.isDef()) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Update register pressure of blocks from loop header to current block.
+ for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i];
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ unsigned RCId = CI->first;
+ RP[RCId] += CI->second;
+ }
+ }
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.isImplicitDef())
+ return true;
+
+ // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+ // will increase register pressure. It's probably not worth it if the
+ // instruction is cheap.
+ // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
+ // these tend to help performance in low register pressure situation. The
+ // trade off is it may cause spill in high pressure situation. It will end up
+ // adding a store in the loop preheader. But the reload is no more expensive.
+ // The side benefit is these loads are frequently CSE'ed.
+ if (IsCheapInstruction(MI)) {
+ if (!TII->isTriviallyReMaterializable(&MI, AA))
+ return false;
+ } else {
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ ++NumHighLatency;
+ return true;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost)) {
+ ++NumLowRP;
+ return true;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going to
+ // be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA))
+ return false;
+ }
+
+ // If result(s) of this instruction is used by PHIs outside of the loop, then
+ // don't hoist it if the instruction because it will introduce an extra copy.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (HasAnyPHIUse(MO.getReg()))
+ return false;
+ }
+
+ return true;
+}
+
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->getDesc().canFoldAsLoad())
+ return 0;
+
+ // If not, we may be able to unfold a load and hoist that.
+ // First test whether the instruction is loading from an amenable
+ // memory location.
+ if (!MI->isInvariantLoad(AA))
+ return 0;
+
+ // Next determine the register class for a temporary register.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc == 0) return 0;
+ const MCInstrDesc &MID = TII->get(NewOpc);
+ if (MID.getNumDefs() != 1) return 0;
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
+ // Ok, we're unfolding. Create a temporary register and do the unfold.
+ unsigned Reg = MRI->createVirtualRegister(RC);
+
+ MachineFunction &MF = *MI->getParent()->getParent();
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success =
+ TII->unfoldMemoryOperand(MF, MI, Reg,
+ /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+ NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MI, NewMIs[0]);
+ MBB->insert(MI, NewMIs[1]);
+ // If unfolding produced a load that wasn't loop-invariant or profitable to
+ // hoist, discard the new instructions and bail.
+ if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ return 0;
+ }
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
+ // Otherwise we successfully unfolded a load that we can hoist.
+ MI->eraseFromParent();
+ return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+}
+
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
+ for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+ const MachineInstr *PrevMI = PrevMIs[i];
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
+ return PrevMI;
+ }
+ return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() &&
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ MRI->clearKillFlags(Dup->getOperand(i).getReg());
+ }
+ }
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
+ }
+ return false;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ // First check whether we should hoist this instruction.
+ if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ // If not, try unfolding a hoistable load.
+ MI = ExtractHoistableLoad(MI);
+ if (!MI) return false;
+ }
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (Preheader->getBasicBlock())
+ dbgs() << " to MachineBasicBlock "
+ << Preheader->getName();
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from MachineBasicBlock "
+ << MI->getParent()->getName();
+ dbgs() << "\n";
+ });
+
+ // If this is the first instruction being hoisted to the preheader,
+ // initialize the CSE map with potential common expressions.
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
+
+ // Look for opportunity to CSE the hoisted instruction.
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (!EliminateCSE(MI, CI)) {
+ // Otherwise, splice the instruction to the preheader.
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
+ // Clear the kill flags of any register this instruction defines,
+ // since they may need to be live throughout the entire loop
+ // rather than just live for part of it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isDead())
+ MRI->clearKillFlags(MO.getReg());
+ }
+
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+
+ ++NumHoisted;
+ Changed = true;
+
+ return true;
+}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return 0;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+ }
+ }
+ return CurPreheader;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 000000000000..189cb2ba5d1d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,83 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace llvm {
+#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLB);
+#undef MLB
+#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLIB);
+#undef MLIB
+}
+
+char MachineLoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+ MachineBasicBlock *TopMBB = getHeader();
+ MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+ if (TopMBB != Begin) {
+ MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ while (contains(PriorMBB)) {
+ TopMBB = PriorMBB;
+ if (TopMBB == Begin) break;
+ PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ }
+ }
+ return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+ MachineBasicBlock *BotMBB = getHeader();
+ MachineFunction::iterator End = BotMBB->getParent()->end();
+ if (BotMBB != prior(End)) {
+ MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ while (contains(NextMBB)) {
+ BotMBB = NextMBB;
+ if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+ NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ }
+ }
+ return BotMBB;
+}
+
+void MachineLoop::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp b/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 000000000000..17fe67f65045
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<SlotIndexes>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Indexes = &getAnalysis<SlotIndexes>();
+ return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+ DeleteContainerSeconds(Cache);
+ Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+ MachineLoopRange *&Range = Cache[Loop];
+ if (!Range)
+ Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+ return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+ MachineLoopRange::Allocator &alloc,
+ SlotIndexes &Indexes)
+ : Loop(loop), Intervals(alloc), Area(0) {
+ // Compute loop coverage.
+ for (MachineLoop::block_iterator I = Loop->block_begin(),
+ E = Loop->block_end(); I != E; ++I) {
+ const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+ Intervals.insert(Range.first, Range.second, 1u);
+ Area += Range.first.distance(Range.second);
+ }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+ Map::const_iterator I = Intervals.find(Start);
+ return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+ return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ unsigned na = a->getNumber();
+ unsigned nb = b->getNumber();
+ if (na < nb)
+ return -1;
+ if (na > nb)
+ return 1;
+ return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ if (a->getArea() != b->getArea())
+ return a->getArea() > b->getArea() ? -1 : 1;
+ return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+ OS << "Loop#" << getNumber() << " =";
+ for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+ OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+ MLR.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 000000000000..fadc594efcb2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,567 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+ MMIAddrLabelMap *Map;
+public:
+ MMIAddrLabelMapCallbackPtr() : Map(0) {}
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(MMIAddrLabelMap *map) { Map = map; }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *V2);
+};
+
+class MMIAddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// Symbols - The symbols for the label. This is a pointer union that is
+ /// either one symbol (the common case) or a list of symbols.
+ PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
+ /// use this so we get notified if a block is deleted or RAUWd.
+ std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+ /// whose corresponding BasicBlock got deleted. These symbols need to be
+ /// emitted at some point in the file, so AsmPrinter emits them after the
+ /// function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+ DeletedAddrLabelsNeedingEmission;
+public:
+
+ MMIAddrLabelMap(MCContext &context) : Context(context) {}
+ ~MMIAddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+
+ // Deallocate any of the 'list of symbols' case.
+ for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+ I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+ if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+ delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+ }
+
+ MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+ std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol*> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.isNull()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ if (Entry.Symbols.is<MCSymbol*>())
+ return Entry.Symbols.get<MCSymbol*>();
+ return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.push_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size()-1;
+ Entry.Fn = BB->getParent();
+ MCSymbol *Result = Context.CreateTempSymbol();
+ Entry.Symbols = Result;
+ return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ std::vector<MCSymbol*> Result;
+
+ // If we already had an entry for this block, just return it.
+ if (Entry.Symbols.isNull())
+ Result.push_back(getAddrLabelSymbol(BB));
+ else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+ Result.push_back(Sym);
+ else
+ Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+ return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = 0; // Clear the callback.
+
+ assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+
+ // Handle both the single and the multiple symbols cases.
+ if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ } else {
+ std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+ for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+ MCSymbol *Sym = (*Syms)[i];
+ if (Sym->isDefined()) continue; // Ignore already emitted labels.
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from
+ // 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+
+ // The entry is deleted, free the memory associated with the symbol list.
+ delete Syms;
+ }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.isNull()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = OldEntry; // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = 0; // Update the callback.
+
+ // Otherwise, we need to add the old symbol to the new block's set. If it is
+ // just a single entry, upgrade it to a symbol list.
+ if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+ std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+ SymList->push_back(PrevSym);
+ NewEntry.Symbols = SymList;
+ }
+
+ std::vector<MCSymbol*> *SymList =
+ NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+ // If the old entry was a single symbol, add it.
+ if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+ SymList->push_back(Sym);
+ return;
+ }
+
+ // Otherwise, concatenate the list.
+ std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+ SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+ delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
+ ObjFileMMI(0),
+ CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+ CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+ // Always emit some info, by default "no personality" info.
+ Personalities.push_back(NULL);
+ AddrLabelSymbols = 0;
+ TheModule = 0;
+}
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
+ assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
+ "should always be explicitly constructed by LLVMTargetMachine");
+ abort();
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+ delete ObjFileMMI;
+
+ // FIXME: Why isn't doFinalization being called??
+ //assert(AddrLabelSymbols == 0 && "doFinalization not called");
+ delete AddrLabelSymbols;
+ AddrLabelSymbols = 0;
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+ assert(AddrLabelSymbols == 0 && "Improperly initialized");
+ return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+ delete AddrLabelSymbols;
+ AddrLabelSymbols = 0;
+ return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ CallSiteMap.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ VariableDbgInfo.clear();
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
+ // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+ // UsedFunctions.
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const Function *F =
+ dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+ UsedFunctions.insert(F);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken. This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (AddrLabelSymbols == 0)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken. If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (AddrLabelSymbols == 0)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it. This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+ std::vector<MCSymbol*> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (AddrLabelSymbols == 0) return;
+ return AddrLabelSymbols->
+ takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ const Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ // If this is the first personality we're adding go
+ // ahead and add it at the beginning.
+ if (Personalities[0] == NULL)
+ Personalities[0] = Personality;
+ else
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<const GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<const GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ if (LandingPad.LandingPadLabel &&
+ !LandingPad.LandingPadLabel->isDefined() &&
+ (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+ LandingPad.LandingPadLabel = 0;
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() ||
+ (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() ||
+ (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j, --e;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+ ++i;
+ }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+ FilterIds.push_back(TyIds[I]);
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+const Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0; i != LandingPads.size(); ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0; i < Personalities.size(); ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This will happen if the current personality function is
+ // in the zero index.
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 000000000000..5ab56c09f5f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::Anchor() {}
+void MachineModuleInfoELF::Anchor() {}
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+ typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+ const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+ const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
+ return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+ MachineModuleInfoImpl::StubValueTy>&Map) {
+ MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+ if (!List.empty())
+ qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+ return List;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 000000000000..9f4ef1287803
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 000000000000..4b3e64c25f60
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,229 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+ VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
+ UsedPhysRegs.resize(TRI.getNumRegs());
+
+ // Create the physreg use/def lists.
+ PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+ memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ assert(!PhysRegUseDefLists[i] &&
+ "PhysRegUseDefLists has entries after all instructions are deleted");
+#endif
+ delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ VRegInfo[Reg].first = RC;
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+ if (!NewRC)
+ return 0;
+ if (NewRC != OldRC)
+ setRegClass(Reg, NewRC);
+ return NewRC;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ assert(RegClass->isAllocatable() &&
+ "Virtual register RegClass must be allocatable.");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
+ // Add a reg, but keep track of whether the vector reallocated or not.
+ const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+ void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
+
+ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
+ // The vector reallocated, handle this now.
+ HandleVRegListReallocation();
+ return Reg;
+}
+
+/// HandleVRegListReallocation - We just added a virtual register to the
+/// VRegInfo info list and it reallocated. Update the use/def lists info
+/// pointers.
+void MachineRegisterInfo::HandleVRegListReallocation() {
+ // The back pointers for the vreg lists point into the previous vector.
+ // Update them to point to their correct slots.
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ MachineOperand *List = VRegInfo[Reg].second;
+ if (!List) continue;
+ // Update the back-pointer to be accurate once more.
+ List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+ }
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ ++I;
+ O.setReg(ToReg);
+ }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ // Since we are in SSA form, we can use the first definition.
+ if (!def_empty(Reg))
+ return &*def_begin(Reg);
+ return 0;
+}
+
+bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
+ use_iterator UI = use_begin(RegNo);
+ if (UI == use_end())
+ return false;
+ return ++UI == use_end();
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+ use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+ if (UI == use_nodbg_end())
+ return false;
+ return ++UI == use_nodbg_end();
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+ for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+ UI.getOperand().setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == Reg || I->second == Reg)
+ return true;
+ return false;
+}
+
+bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
+ for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+ if (*I == Reg)
+ return true;
+ return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->second == VReg)
+ return I->first;
+ return 0;
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == PReg)
+ return I->second;
+ return 0;
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_empty(LiveIns[i].second)) {
+ // The livein has no uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+}
+
+void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
+ for (int i = UsedPhysRegs.find_first(); i >= 0;
+ i = UsedPhysRegs.find_next(i))
+ for (const unsigned *SS = TRI.getSubRegisters(i);
+ unsigned SubReg = *SS; ++SS)
+ if (SubReg > unsigned(i))
+ UsedPhysRegs.set(SubReg);
+}
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+ I.getOperand().getParent()->dump();
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 000000000000..84d6df25397c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,372 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : AV(0), InsertedPHIs(NewPHI) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates. ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ VR = V;
+ VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+ if (BB->empty())
+ return 0;
+
+ MachineBasicBlock::iterator I = BB->front();
+ if (!I->isPHI())
+ return 0;
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->isPHI()) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstr *InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlockInternal(BB);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+ unsigned SingularValue = 0;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ MachineInstrBuilder MIB(InsertedPHI);
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+ return 0;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ unsigned NewVR = 0;
+ if (UseMI->isPHI()) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+ MRI->replaceRegWith(OldReg, NewReg);
+
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+ I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+ if (I->second == OldReg)
+ I->second = NewReg;
+}
+
+/// MachinePHIiter - Iterator for PHI operands. This is used for the
+/// PHI_iterator in the SSAUpdaterImpl template.
+namespace {
+ class MachinePHIiter {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit MachinePHIiter(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ MachinePHIiter(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ MachinePHIiter &operator++() { idx += 2; return *this; }
+ bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
+ bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
+}
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+ typedef MachineBasicBlock BlkT;
+ typedef unsigned ValT;
+ typedef MachineInstr PhiT;
+
+ typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ typedef MachinePHIiter PHI_iterator;
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(MachineBasicBlock *BB,
+ SmallVectorImpl<MachineBasicBlock*> *Preds){
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+
+ /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned GetUndefVal(MachineBasicBlock *BB,
+ MachineSSAUpdater *Updater) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ MachineSSAUpdater *Updater) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return PHI->getOperand(0).getReg();
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+ MachineBasicBlock *Pred) {
+ PHI->addOperand(MachineOperand::CreateReg(Val, false));
+ PHI->addOperand(MachineOperand::CreateMBB(Pred));
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static MachineInstr *InstrIsPHI(MachineInstr *I) {
+ if (I && I->isPHI())
+ return I;
+ return 0;
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified register
+ /// is a PHI instruction.
+ static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ MachineInstr *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumOperands() <= 1)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the register
+ /// that it defines.
+ static unsigned GetPHIValue(MachineInstr *PHI) {
+ return PHI->getOperand(0).getReg();
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (unsigned V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 000000000000..916dff70a41e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,610 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+
+namespace {
+ class MachineSinking : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *LI;
+ AliasAnalysis *AA;
+ BitVector AllocatableSet; // Which physregs are allocatable?
+
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ }
+
+ virtual void releaseMemory() {
+ CEBCandidates.clear();
+ }
+
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
+ bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+
+FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (!MI->isCopy())
+ return false;
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI->eraseFromParent();
+ ++NumCoalesces;
+ return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
+ // Ignoring debug uses is necessary so debug info doesn't affect the code.
+ // This may leave a referencing dbg_value in the original block, before
+ // the definition of the vreg. Dwarf generator handles this although the
+ // user might not get the right info at runtime.
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->isPHI()) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
+ }
+
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+
+ return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<MachineLoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ AllocatableSet = TRI->getAllocatableSet(MF);
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ CEBCandidates.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&MBB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr *MI = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (MI->isDebugValue())
+ continue;
+
+ bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+ if (Joined) {
+ MadeChange = true;
+ continue;
+ }
+
+ if (SinkInstruction(MI, SawStore))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)))
+ return true;
+
+ if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MRI->hasOneNonDBGUse(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return 0;
+
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || FromBB == ToBB)
+ return 0;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
+ return 0;
+
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return 0;
+ }
+ }
+
+ return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, AA, SawStore))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ MachineBasicBlock *ParentBlock = MI->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = 0;
+
+ bool BreakPHIEdge = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->def_empty(Reg))
+ return false;
+
+ if (AllocatableSet.test(Reg))
+ return false;
+
+ // Check for a def among the register's aliases too.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (!MRI->def_empty(AliasReg))
+ return false;
+
+ if (AllocatableSet.test(AliasReg))
+ return false;
+ }
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ }
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+ return false;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ //
+ // x = computation
+ // if () {} else {}
+ // use x
+ //
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ BreakPHIEdge, LocalUse))
+ return false;
+
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to.
+ for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
+ E = ParentBlock->succ_end(); SI != E; ++SI) {
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ BreakPHIEdge, LocalUse)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return false;
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+ }
+ }
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo->isLandingPad())
+ return false;
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MI->getParent() == SuccToSinkTo)
+ return false;
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ if (SuccToSinkTo->pred_size() > 1) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ bool TryBreak = false;
+ bool store = true;
+ if (!MI->isSafeToMove(TII, AA, store)) {
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
+ }
+
+ // Don't sink instructions into a loop.
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ MachineBasicBlock *NewSucc =
+ SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ BreakPHIEdge = false;
+ }
+ }
+ }
+
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+ ++InsertPos;
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MI->clearKillInfo();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 000000000000..7a55852a1315
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,1218 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MachineVerifier {
+
+ MachineVerifier(Pass *pass, const char *b) :
+ PASS(pass),
+ Banner(b),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ Pass *const PASS;
+ const char *Banner;
+ const char *const OutFileName;
+ raw_ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegSet regsLiveInButUnused;
+
+ SlotIndex lastIndex;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ RV.push_back(*R);
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addRequired(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool changed = false;
+ for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+ if (addRequired(I->first))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved.test(Reg);
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ void verifyLiveIntervals();
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+ const char *const Banner;
+
+ MachineVerifierPass(const char *b = 0)
+ : MachineFunctionPass(ID), Banner(b) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ MF.verify(this, Banner);
+ return false;
+ }
+ };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+ return new MachineVerifierPass(Banner);
+}
+
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+ MachineVerifier(p, Banner)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+ raw_ostream *OutFile = 0;
+ if (OutFileName) {
+ std::string ErrorInfo;
+ OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ raw_fd_ostream::F_Append);
+ if (!ErrorInfo.empty()) {
+ errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ exit(1);
+ }
+
+ OS = OutFile;
+ } else {
+ OS = &errs();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LiveVars = NULL;
+ LiveInts = NULL;
+ LiveStks = NULL;
+ Indexes = NULL;
+ if (PASS) {
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ }
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+ MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != MFI) {
+ report("Bad instruction parent pointer", MFI);
+ *OS << "Instruction: " << *MBBI;
+ continue;
+ }
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+ }
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFile)
+ delete OutFile;
+ else if (foundErrors)
+ report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regsLiveInButUnused.clear();
+ MBBInfoMap.clear();
+
+ return false; // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ *OS << '\n';
+ if (!foundErrors++) {
+ if (Banner)
+ *OS << "# " << Banner << '\n';
+ MF->print(*OS, Indexes);
+ }
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getFunction()->getNameStr() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: " << MBB->getName()
+ << " " << (void*)MBB
+ << " (BB#" << MBB->getNumber() << ")";
+ if (Indexes)
+ *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ *OS << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(MI))
+ *OS << Indexes->getInstructionIndex(MI) << '\t';
+ MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum) {
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
+ regsReserved = TRI->getReservedRegs(*MF);
+
+ // A sub-register of a reserved register is also reserved
+ for (int Reg = regsReserved.find_first(); Reg>=0;
+ Reg = regsReserved.find_next(Reg)) {
+ for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ // FIXME: This should probably be:
+ // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+ regsReserved.set(*Sub);
+ }
+ }
+ markReachable(&MF->front());
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isLandingPad())
+ LandingPadSuccs.insert(*I);
+ }
+
+ const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (LandingPadSuccs.size() > 1 &&
+ !(AsmInfo &&
+ AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+ BB && isa<SwitchInst>(BB->getTerminator())))
+ report("MBB has more than one landing pad successor", MBB);
+
+ // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+ TBB, FBB, Cond)) {
+ // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actually fall
+ // out the bottom of the function.
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actuall fall
+ // out of the block.
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional fall-through but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(MBBI)) {
+ report("MBB exits via unconditional fall-through but its successor "
+ "differs from its CFG successor!", MBB);
+ }
+ if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ !TII->isPredicated(&MBB->back())) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional branch but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(TBB)) {
+ report("MBB exits via unconditional branch but the CFG "
+ "successor doesn't match the actual successor!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/fall-through but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/branch but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+ report("MBB exits via conditional branch/branch but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditinal branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("AnalyzeBranch returned invalid data!", MBB);
+ }
+ }
+
+ regsLive.clear();
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ regsLive.insert(*R);
+ }
+ regsLiveInButUnused = regsLive;
+
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ assert(MFI && "Function has no frame info");
+ BitVector PR = MFI->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ regsLive.insert(I);
+ for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+ regsLive.insert(*R);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ if ((*I)->isLoad() && !MCID.mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MCID.mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const MCInstrDesc &MCID = MI->getDesc();
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+
+ // The first MCID.NumDefs operands must be explicit register defines
+ if (MONum < MCID.getNumDefs()) {
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < MCID.getNumOperands()) {
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() &&
+ !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+
+ // Check Live Variables.
+ if (MI->isDebugValue()) {
+ // Liveness checks are not valid for debug values.
+ } else if (MO->isUse() && !MO->isUndef()) {
+ regsLiveInButUnused.erase(Reg);
+
+ bool isKill = false;
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+ // A two-addr use counts as a kill if use and def are the same.
+ unsigned DefReg = MI->getOperand(defIdx).getReg();
+ if (Reg == DefReg)
+ isKill = true;
+ else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ report("Two-address instruction operands must be identical",
+ MO, MONum);
+ }
+ } else
+ isKill = MO->isKill();
+
+ if (isKill)
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(),
+ VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (!LI.liveAt(UseIdx)) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ } else if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << VNI->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+
+ // Check register classes.
+ if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ unsigned sr = Reg;
+ if (SubIdx) {
+ unsigned s = TRI->getSubReg(Reg, SubIdx);
+ if (!s) {
+ report("Invalid subregister index for physical register",
+ MO, MONum);
+ return;
+ }
+ sr = s;
+ }
+ if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (!DRC->contains(sr)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(sr) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
+ }
+ RC = SRC;
+ }
+ if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (!RC->hasSuperClassEq(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ *OS << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+ PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PInfo = MBBInfoMap[*PrI];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(*PrI);
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (*PrI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*PrI];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(*PrI);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ DenseSet<const MachineBasicBlock*> seen;
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "BB#" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+ }
+
+ // Now check liveness info if available
+ if (LiveVars || LiveInts)
+ calcRegsRequired();
+ if (LiveVars)
+ verifyLiveVariables();
+ if (LiveInts)
+ verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
+ LVE = LiveInts->end(); LVI != LVE; ++LVI) {
+ const LiveInterval &LI = *LVI->second;
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->use_empty(LI.reg))
+ continue;
+
+ // Physical registers have much weirdness going on, mostly from coalescing.
+ // We should probably fix it, but for now just ignore them.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+ continue;
+
+ assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I) {
+ VNInfo *VNI = *I;
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+
+ if (!DefVNI) {
+ if (!VNI->isUnused()) {
+ report("Valno not live at def and not marked unused", MF);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (VNI->isUnused())
+ continue;
+
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+ continue;
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ continue;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber()
+ << " in " << LI << '\n';
+ }
+ } else {
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ bool isEarlyClobber = false;
+ if (MI) {
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+ MOI->isEarlyClobber()) {
+ isEarlyClobber = true;
+ break;
+ }
+ }
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isUse()) {
+ report("Early clobber def must be at a USE slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ } else if (!VNI->def.isDef()) {
+ report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ }
+ }
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF);
+ I->print(*OS);
+ *OS << " has a valno not in " << LI << '\n';
+ }
+
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ continue;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ continue;
+ }
+ if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ !MI->readsVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ // FIXME: Should we check for each of these?
+ bool hasDeadDef = false;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+ hasDeadDef = true;
+ break;
+ }
+ }
+
+ if (!hasDeadDef) {
+ report("Instruction killing live segment neither defines nor reads "
+ "register", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ continue;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+ const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+ if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+ if (PVNI && !PVNI->hasPHIKill()) {
+ report("Value live out of predecessor doesn't have PHIKill", MF);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << " doesn't have PHIKill, but Valno #" << VNI->id
+ << " is PHIDef and defined at the beginning of BB#"
+ << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+ << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+ << PEnd << " in " << LI << '\n';
+ continue;
+ }
+
+ if (PVNI != VNI) {
+ report("Different value live out of predecessor", *PI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+ }
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF);
+ *OS << NumComp << " components in " << LI << '\n';
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
+ }
+ }
+ }
+ }
+}
+
diff --git a/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp
new file mode 100644
index 000000000000..cf05275d7a31
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp
@@ -0,0 +1,141 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+
+//===----------------------------------------------------------------------===//
+// ObjectCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
+ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
+ObjectCodeEmitter::~ObjectCodeEmitter() {}
+
+/// setBinaryObject - set the BinaryObject we are writting to
+void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
+
+/// emitByte - This callback is invoked when a byte needs to be
+/// written to the data stream, without buffer overflow testing.
+void ObjectCodeEmitter::emitByte(uint8_t B) {
+ BO->emitByte(B);
+}
+
+/// emitWordLE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitWordLE(uint32_t W) {
+ BO->emitWordLE(W);
+}
+
+/// emitWordBE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitWordBE(uint32_t W) {
+ BO->emitWordBE(W);
+}
+
+/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
+ BO->emitDWordLE(W);
+}
+
+/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
+ BO->emitDWordBE(W);
+}
+
+/// emitAlignment - Align 'BO' to the necessary alignment boundary.
+void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
+ uint8_t fill /* 0 */) {
+ BO->emitAlignment(Alignment, fill);
+}
+
+/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
+ BO->emitULEB128Bytes(Value);
+}
+
+/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
+ BO->emitSLEB128Bytes(Value);
+}
+
+/// emitString - This callback is invoked when a String needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitString(const std::string &String) {
+ BO->emitString(String);
+}
+
+/// getCurrentPCValue - This returns the address that the next emitted byte
+/// will be output to.
+uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
+ return BO->getCurrentPCOffset();
+}
+
+/// getCurrentPCOffset - Return the offset from the start of the emitted
+/// buffer that we are currently writing to.
+uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
+ return BO->getCurrentPCOffset();
+}
+
+/// addRelocation - Whenever a relocatable address is needed, it should be
+/// noted with this interface.
+void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
+ BO->addRelocation(relocation);
+}
+
+/// StartMachineBasicBlock - This should be called by the target when a new
+/// basic block is about to be emitted. This way the MCE knows where the
+/// start of the block is, and can implement getMachineBasicBlockAddress.
+void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+}
+
+/// getMachineBasicBlockAddress - Return the address of the specified
+/// MachineBasicBlock, only usable after the label for the MBB has been
+/// emitted.
+uintptr_t
+ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+}
+
+/// getJumpTableEntryAddress - Return the address of the jump table with index
+/// 'Index' in the function that last called initJumpTableInfo.
+uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
+ assert(JTLocations.size() > Index && "JT not emitted!");
+ return JTLocations[Index];
+}
+
+/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
+ assert(CPLocations.size() > Index && "CP not emitted!");
+ return CPLocations[Index];
+}
+
+/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
+ assert(CPSections.size() > Index && "CP not emitted!");
+ return CPSections[Index];
+}
+
+} // end namespace llvm
+
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 000000000000..48db200c513c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+ class OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 000000000000..c05be130ec61
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,190 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+ typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+ bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+ InstrSet &PHIsInCycle);
+ bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+ bool OptimizeBB(MachineBasicBlock &MBB);
+ };
+}
+
+char OptimizePHIs::ID = 0;
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+ "Optimize machine instruction PHIs", false, false)
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getTarget().getInstrInfo();
+
+ // Find dead PHI cycles and PHI cycles that can be replaced by a single
+ // value. InstCombine does these optimizations, but DAG legalization may
+ // introduce new opportunities, e.g., when i64 values are split up for
+ // 32-bit targets.
+ bool Changed = false;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= OptimizeBB(*I);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+ unsigned &SingleValReg,
+ InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ "PHI destination is not a virtual register");
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+ E = MRI->use_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+ return false;
+ }
+
+ return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ // Check for single-value PHI cycles.
+ unsigned SingleValReg = 0;
+ InstrSet PHIsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+ SingleValReg != 0) {
+ MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+ MI->eraseFromParent();
+ ++NumPHICycles;
+ Changed = true;
+ continue;
+ }
+
+ // Check for dead PHI cycles.
+ PHIsInCycle.clear();
+ if (IsDeadPHICycle(MI, PHIsInCycle)) {
+ for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+ PI != PE; ++PI) {
+ MachineInstr *PhiMI = *PI;
+ if (&*MII == PhiMI)
+ ++MII;
+ PhiMI->eraseFromParent();
+ }
+ ++NumDeadPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 000000000000..af65f13bf065
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,434 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden, cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
+
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ LiveVariables &LV, MachineLoopInfo *MLI);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
+
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+
+ bool Changed = false;
+
+ // Split critical edges to help the coalescer
+ if (!DisableEdgeSplitting) {
+ if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+ }
+ }
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(MF);
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(MF, *I);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
+ E = ImpDefs.end(); I != E; ++I) {
+ MachineInstr *DefMI = *I;
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(DefReg))
+ DefMI->eraseFromParent();
+ }
+
+ // Clean up the lowered PHI instructions.
+ for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+ I != E; ++I)
+ MF.DeleteMachineInstr(I->first);
+
+ LoweredPHIs.clear();
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+
+ return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || !MBB.front().isPHI())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
+
+ while (MBB.front().isPHI())
+ LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ unsigned SrcReg = MPhi->getOperand(i).getReg();
+ const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (!DefMI || !DefMI->isImplicitDef())
+ return false;
+ }
+ return true;
+}
+
+
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs. This lowering method is always correct all of the
+/// time.
+///
+void PHIElimination::LowerAtomicPHINode(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ ++NumAtomic;
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ unsigned IncomingReg = 0;
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ else {
+ // Can we reuse an earlier PHI node? This only happens for critical edges,
+ // typically those created by tail duplication.
+ unsigned &entry = LoweredPHIs[MPhi];
+ if (entry) {
+ // An identical PHI node was already lowered. Reuse the incoming register.
+ IncomingReg = entry;
+ reusedIncoming = true;
+ ++NumReused;
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+ } else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ }
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(IncomingReg);
+ }
+
+ // Update live variable information if there is any.
+ LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ if (IncomingReg) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+ // Increment use count of the newly created virtual register.
+ VI.NumUses++;
+ LV->setPHIJoin(IncomingReg);
+
+ // When we are reusing the incoming register, it may already have been
+ // killed in this block. The old kill will also have been inserted at
+ // AfterPHIsIt, so it appears before the current PHICopy.
+ if (reusedIncoming)
+ if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+ DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+ DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, MPhi);
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // If source is defined by an implicit def, there is no need to insert a
+ // copy.
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isImplicitDef()) {
+ ImpDefs.insert(DefMI);
+ continue;
+ }
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock))
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos =
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+ // Insert the copy.
+ if (!reusedIncoming && IncomingReg)
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
+
+ // Now update live variable information if we have it. Otherwise we're done
+ if (!LV) continue;
+
+ // We want to be able to insert a kill of the register if this PHI (aka, the
+ // copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value is
+ // live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Also check to see if this register is in use by another PHI node which
+ // has not yet been eliminated. If so, it will be killed at an appropriate
+ // point later.
+
+ // Is it used by any PHI instructions in this block?
+ bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
+
+ // Okay, if we now know that the value is not live out of the block, we can
+ // add a kill marker in this block saying that it kills the incoming value!
+ if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, the first
+ // terminator instruction at the end of the block may also use the value.
+ // In this case, we should mark *it* as being the killing block, not the
+ // copy.
+ MachineBasicBlock::iterator KillInst;
+ MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
+ if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
+ KillInst = Term;
+
+ // Check that no other terminators use values.
+#ifndef NDEBUG
+ for (MachineBasicBlock::iterator TI = llvm::next(Term);
+ TI != opBlock.end(); ++TI) {
+ if (TI->isDebugValue())
+ continue;
+ assert(!TI->readsRegister(SrcReg) &&
+ "Terminator instructions cannot use virtual registers unless"
+ "they are the first terminator in a block!");
+ }
+#endif
+ } else if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = Term;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+ }
+ }
+
+ // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+ if (reusedIncoming || !IncomingReg)
+ MF.DeleteMachineInstr(MPhi);
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
+ BBI->getOperand(i).getReg())];
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ LiveVariables &LV,
+ MachineLoopInfo *MLI) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ bool Changed = false;
+ for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // We break edges when registers are live out from the predecessor block
+ // (not considering PHI nodes). If the register is live in to this block
+ // anyway, we would gain nothing from splitting.
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB != &MBB &&
+ !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
+ if (!MLI ||
+ !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
+ MLI->isLoopHeader(&MBB))) {
+ if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..10bfdcce6769
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isLandingPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+ RE = MRI.reg_end(); RI != RE; ++RI) {
+ MachineInstr* DefUseMI = &*RI;
+ if (DefUseMI->getParent() == MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..9ac47fb4c505
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
new file mode 100644
index 000000000000..315aedddb9ef
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -0,0 +1,73 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ createDefaultRegisterAllocator);
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&createDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+
+ // This forces linking of the linear scan register allocator,
+ // so -regalloc=linearscan still works in clang.
+ if (Ctor == createLinearScanRegisterAllocator)
+ return createLinearScanRegisterAllocator();
+
+ if (Ctor != createDefaultRegisterAllocator)
+ return Ctor();
+
+ // When the 'default' allocator is requested, pick one based on OptLevel.
+ switch (OptLevel) {
+ case CodeGenOpt::None:
+ return createFastRegisterAllocator();
+ default:
+ return createGreedyRegisterAllocator();
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 000000000000..c523e39bc258
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,465 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+// - Optimize Bitcast pairs:
+//
+// v1 = bitcast v0
+// v2 = bitcast v1
+// = v2
+// =>
+// v1 = bitcast v0
+// = v0
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
+STATISTIC(NumCmps, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate foled");
+
+namespace {
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ private:
+ bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ };
+}
+
+char PeepholeOptimizer::ID = 0;
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+
+FunctionPass *llvm::createPeepholeOptimizerPass() {
+ return new PeepholeOptimizer();
+}
+
+/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ unsigned SrcReg, DstReg, SubIdx;
+ if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+ if (++UI == MRI->use_nodbg_end())
+ // No other uses.
+ return false;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ UI = MRI->use_nodbg_begin(SrcReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+ if (UI->isPHI())
+ PHIBBs.insert(UI->getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
+/// a value cross register classes), and the source is defined by another
+/// bitcast instruction B. And if the register class of source of B matches
+/// the register class of instruction A, then it is legal to replace all uses
+/// of the def of A with source of B. e.g.
+/// %vreg0<def> = VMOVSR %vreg1
+/// %vreg3<def> = VMOVRS %vreg0
+/// Replace all uses of vreg3 with vreg1.
+
+bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+
+ unsigned Def = 0;
+ unsigned Src = 0;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Def = Reg;
+ else if (Src)
+ // Multiple sources?
+ return false;
+ else
+ Src = Reg;
+ }
+
+ assert(Def && Src && "Malformed bitcast instruction!");
+
+ MachineInstr *DefMI = MRI->getVRegDef(Src);
+ if (!DefMI || !DefMI->getDesc().isBitcast())
+ return false;
+
+ unsigned SrcDef = 0;
+ unsigned SrcSrc = 0;
+ NumDefs = DefMI->getDesc().getNumDefs();
+ NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = DefMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ SrcDef = Reg;
+ else if (SrcSrc)
+ // Multiple sources?
+ return false;
+ else
+ SrcSrc = Reg;
+ }
+
+ if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
+ return false;
+
+ MRI->replaceRegWith(Def, SrcSrc);
+ MRI->clearKillFlags(SrcSrc);
+ MI->eraseFromParent();
+ ++NumBitcasts;
+ return true;
+}
+
+/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ unsigned SrcReg;
+ int CmpMask, CmpValue;
+ if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ // Attempt to optimize the comparison instruction.
+ if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
+ ++NumCmps;
+ return true;
+ }
+
+ return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isMoveImmediate())
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePeephole)
+ return false;
+
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
+ LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
+
+ bool First = true;
+ MachineBasicBlock::iterator PMII;
+ for (MachineBasicBlock::iterator
+ MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+ LocalMIs.insert(MI);
+
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+ MI->hasUnmodeledSideEffects()) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isBitcast()) {
+ if (OptimizeBitcastInstr(MI, MBB)) {
+ // MI is deleted.
+ Changed = true;
+ MII = First ? I->begin() : llvm::next(PMII);
+ continue;
+ }
+ } else if (MCID.isCompare()) {
+ if (OptimizeCmpInstr(MI, MBB)) {
+ // MI is deleted.
+ Changed = true;
+ MII = First ? I->begin() : llvm::next(PMII);
+ continue;
+ }
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
+ } else {
+ Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+ if (SeenMoveImm)
+ Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ }
+
+ First = false;
+ PMII = MII;
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 000000000000..c73e87733cb4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,712 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AntiDepBreaker.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+ cl::desc("Enable scheduling after register allocation"),
+ cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies: "
+ "\"critical\", \"all\", or \"none\""),
+ cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+ class PostRAScheduler : public MachineFunctionPass {
+ AliasAnalysis *AA;
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+ CodeGenOpt::Level OptLevel;
+
+ public:
+ static char ID;
+ PostRAScheduler(CodeGenOpt::Level ol) :
+ MachineFunctionPass(ID), OptLevel(ol) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Post RA top-down list latency scheduler";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char PostRAScheduler::ID = 0;
+
+ class SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// Topo - A topological ordering for SUnits.
+ ScheduleDAGTopologicalSort Topo;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+ AntiDepBreaker *AntiDepBreak;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ public:
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo&,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+ ~SchedulePostRATDList();
+
+ /// StartBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void Schedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count);
+
+ /// FinishBlock - Clean up register live-range state.
+ ///
+ void FinishBlock();
+
+ /// FixupKills - Fix register kill flags that have been made
+ /// invalid due to scheduling
+ ///
+ void FixupKills(MachineBasicBlock *MBB);
+
+ private:
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ void StartBlockForKills(MachineBasicBlock *BB);
+
+ // ToggleKillFlag - Toggle a register operand kill flag. Other
+ // adjustments may be made to the instruction if necessary. Return
+ // true if the operand has been deleted, false if not.
+ bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+ };
+}
+
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+ KillIndices(TRI->getNumRegs())
+{
+ const TargetMachine &TM = MF.getTarget();
+ const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ HazardRec =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ RegClassInfo.runOnMachineFunction(Fn);
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+ if (EnablePostRAScheduler.getPosition() > 0) {
+ if (!EnablePostRAScheduler)
+ return false;
+ } else {
+ // Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
+ const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+ return false;
+ }
+
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking == "all")
+ ? TargetSubtargetInfo::ANTIDEP_ALL
+ : ((EnableAntiDepBreaking == "critical")
+ ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+ : TargetSubtargetInfo::ANTIDEP_NONE);
+ }
+
+ DEBUG(dbgs() << "PostRAScheduler\n");
+
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ CriticalPathRCs);
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int bbcnt = 0;
+ if (bbcnt++ % DebugDiv != DebugMod)
+ continue;
+ dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+ ":BB#" << MBB->getNumber() << " ***\n";
+ }
+#endif
+
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.StartBlock(MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB->end();
+ unsigned Count = MBB->size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineInstr *MI = llvm::prior(I);
+ if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
+ Scheduler.Run(MBB, I, Current, CurrentCount);
+ Scheduler.EmitSchedule();
+ Current = MI;
+ CurrentCount = Count - 1;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ --Count;
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB->begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.FinishBlock();
+
+ // Update register kills
+ Scheduler.FixupKills(MBB);
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::StartBlock(BB);
+
+ // Reset the hazard recognizer and anti-dep breaker.
+ HazardRec->Reset();
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::Schedule() {
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ if (AntiDepBreak != NULL) {
+ unsigned Broken =
+ AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
+ InsertPosIndex, DbgValues);
+
+ if (Broken != 0) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ SUnits.clear();
+ Sequence.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+ BuildSchedGraph(AA);
+
+ NumFixedAnti += Broken;
+ }
+ }
+
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ AvailableQueue.initNodes(SUnits);
+ ListScheduleTopDown();
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::FinishBlock() {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->FinishBlock();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::FinishBlock();
+}
+
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+ // Initialize the indices to indicate that no registers are live.
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
+ KillIndices[i] = ~0u;
+
+ // Determine the live-out physregs for this block.
+ if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ KillIndices[Reg] = BB->size();
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = BB->size();
+ }
+ }
+ }
+ else {
+ // In a non-return block, examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ KillIndices[Reg] = BB->size();
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = BB->size();
+ }
+ }
+ }
+ }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+ MachineOperand &MO) {
+ // Setting kill flag...
+ if (!MO.isKill()) {
+ MO.setIsKill(true);
+ return false;
+ }
+
+ // If MO itself is live, clear the kill flag...
+ if (KillIndices[MO.getReg()] != ~0u) {
+ MO.setIsKill(false);
+ return false;
+ }
+
+ // If any subreg of MO is live, then create an imp-def for that
+ // subreg and keep MO marked as killed.
+ MO.setIsKill(false);
+ bool AllDead = true;
+ const unsigned SuperReg = MO.getReg();
+ for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+ *Subreg; ++Subreg) {
+ if (KillIndices[*Subreg] != ~0u) {
+ MI->addOperand(MachineOperand::CreateReg(*Subreg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ false /*IsDead*/));
+ AllDead = false;
+ }
+ }
+
+ if(AllDead)
+ MO.setIsKill(true);
+ return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+ std::set<unsigned> killedRegs;
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+ StartBlockForKills(MBB);
+
+ // Examine block from end to start...
+ unsigned Count = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ KillIndices[Reg] = ~0u;
+
+ // Repeat for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = ~0u;
+ }
+ }
+
+ // Examine all used registers and set/clear kill flag. When a
+ // register is used multiple times we only set the kill flag on
+ // the first use.
+ killedRegs.clear();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+ bool kill = false;
+ if (killedRegs.find(Reg) == killedRegs.end()) {
+ kill = true;
+ // A register is not killed if any subregs are live...
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ if (KillIndices[*Subreg] != ~0u) {
+ kill = false;
+ break;
+ }
+ }
+
+ // If subreg is not live, then register is killed if it became
+ // live in this instruction
+ if (kill)
+ kill = (KillIndices[Reg] == ~0u);
+ }
+
+ if (MO.isKill() != kill) {
+ DEBUG(dbgs() << "Fixing " << MO << " in ");
+ // Warning: ToggleKillFlag may invalidate MO.
+ ToggleKillFlag(MI, MO);
+ DEBUG(MI->dump());
+ }
+
+ killedRegs.insert(Reg);
+ }
+
+ // Mark any used register (that is not using undef) and subregs as
+ // now live...
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+ KillIndices[Reg] = Count;
+
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ KillIndices[*Subreg] = Count;
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // Standard scheduler algorithms will recompute the depth of the successor
+ // here as such:
+ // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ //
+ // However, we lazily compute node depth instead. Note that
+ // ScheduleNodeTopDown has already updated the depth of this node which causes
+ // all descendents to be marked dirty. Setting the successor depth explicitly
+ // here would cause depth to be recomputed for all its ancestors. If the
+ // successor is not yet ready (because of a transitively redundant edge) then
+ // this causes depth computation to be quadratic in the size of the DAG.
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() &&
+ "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // We're scheduling top-down but we're visiting the regions in
+ // bottom-up order, so we don't know the hazards at the start of a
+ // region. So assume no hazards (this should usually be ok as most
+ // blocks are a single region).
+ HazardRec->Reset();
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // Add all leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ bool available = SUnits[i].Preds.empty();
+ if (available) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // In any cycle where we can't schedule any instructions, we must
+ // stall or emit a noop, depending on the target.
+ bool CycleHasInsts = false;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+ SUnit *FoundSUnit = 0;
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule...
+ if (FoundSUnit) {
+ // ... schedule the node...
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+ CycleHasInsts = true;
+ if (HazardRec->atIssueLimit()) {
+ DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ } else {
+ if (CycleHasInsts) {
+ DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem,
+ // just advance the current cycle and try again.
+ DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ }
+
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
+ return new PostRAScheduler(OptLevel);
+}
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 000000000000..c04d65637c94
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,295 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+
+using namespace llvm;
+
+char ProcessImplicitDefs::ID = 0;
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool
+ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ SmallSet<unsigned, 8> &ImpDefRegs) {
+ switch(OpIdx) {
+ case 1:
+ return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+ ImpDefRegs.count(MI->getOperand(0).getReg()));
+ case 2:
+ return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+ ImpDefRegs.count(MI->getOperand(0).getReg()));
+ default: return false;
+ }
+}
+
+static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
+ SmallSet<unsigned, 8> &ImpDefRegs) {
+ if (MI->isCopy()) {
+ MachineOperand &MO0 = MI->getOperand(0);
+ MachineOperand &MO1 = MI->getOperand(1);
+ if (MO1.getReg() != Reg)
+ return false;
+ if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+ return true;
+ return false;
+ }
+ return false;
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
+/// there is one implicit_def for each use. Add isUndef marker to
+/// implicit_def defs and their uses.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
+
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: "
+ << ((Value*)fn.getFunction())->getName() << '\n');
+
+ bool Changed = false;
+
+ TII = fn.getTarget().getInstrInfo();
+ TRI = fn.getTarget().getRegisterInfo();
+ MRI = &fn.getRegInfo();
+ LV = &getAnalysis<LiveVariables>();
+
+ SmallSet<unsigned, 8> ImpDefRegs;
+ SmallVector<MachineInstr*, 8> ImpDefMIs;
+ SmallVector<MachineInstr*, 4> RUses;
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ SmallPtrSet<MachineInstr*, 8> ModInsts;
+
+ MachineBasicBlock *Entry = fn.begin();
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isImplicitDef()) {
+ if (MI->getOperand(0).getSubReg())
+ continue;
+ unsigned Reg = MI->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ ImpDefRegs.insert(*SS);
+ }
+ ImpDefMIs.push_back(MI);
+ continue;
+ }
+
+ // Eliminate %reg1032:sub<def> = COPY undef.
+ if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+ MachineOperand &MO = MI->getOperand(1);
+ if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
+ if (MO.isKill()) {
+ LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
+ vi.removeKill(MI);
+ }
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+ }
+
+ bool ChangedToImpDef = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!ImpDefRegs.count(Reg))
+ continue;
+ // Use is a copy, just turn it into an implicit_def.
+ if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
+ bool isKill = MO.isKill();
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI->RemoveOperand(j);
+ if (isKill) {
+ ImpDefRegs.erase(Reg);
+ LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+ vi.removeKill(MI);
+ }
+ ChangedToImpDef = true;
+ Changed = true;
+ break;
+ }
+
+ Changed = true;
+ MO.setIsUndef();
+ // This is a partial register redef of an implicit def.
+ // Make sure the whole register is defined by the instruction.
+ if (MO.isDef()) {
+ MI->addRegisterDefined(Reg);
+ continue;
+ }
+ if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+ // Make sure other uses of
+ for (unsigned j = i+1; j != e; ++j) {
+ MachineOperand &MOJ = MI->getOperand(j);
+ if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+ MOJ.setIsUndef();
+ }
+ ImpDefRegs.erase(Reg);
+ }
+ }
+
+ if (ChangedToImpDef) {
+ // Backtrack to process this new implicit_def.
+ --I;
+ } else {
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ ImpDefRegs.erase(MO.getReg());
+ }
+ }
+ }
+
+ // Any outstanding liveout implicit_def's?
+ for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
+ MachineInstr *MI = ImpDefMIs[i];
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !ImpDefRegs.count(Reg)) {
+ // Delete all "local" implicit_def's. That include those which define
+ // physical registers since they cannot be liveout.
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // If there are multiple defs of the same register and at least one
+ // is not an implicit_def, do not insert implicit_def's before the
+ // uses.
+ bool Skip = false;
+ SmallVector<MachineInstr*, 4> DeadImpDefs;
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ MachineInstr *DeadImpDef = &*DI;
+ if (!DeadImpDef->isImplicitDef()) {
+ Skip = true;
+ break;
+ }
+ DeadImpDefs.push_back(DeadImpDef);
+ }
+ if (Skip)
+ continue;
+
+ // The only implicit_def which we want to keep are those that are live
+ // out of its block.
+ for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
+ DeadImpDefs[j]->eraseFromParent();
+ Changed = true;
+
+ // Process each use instruction once.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI.getOperand().isUndef())
+ continue;
+ MachineInstr *RMI = &*UI;
+ if (ModInsts.insert(RMI))
+ RUses.push_back(RMI);
+ }
+
+ for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
+ MachineInstr *RMI = RUses[i];
+
+ // Turn a copy use into an implicit_def.
+ if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
+ RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+
+ bool isKill = false;
+ SmallVector<unsigned, 4> Ops;
+ for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &RRMO = RMI->getOperand(j);
+ if (RRMO.isReg() && RRMO.getReg() == Reg) {
+ Ops.push_back(j);
+ if (RRMO.isKill())
+ isKill = true;
+ }
+ }
+ // Leave the other operands along.
+ for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
+ unsigned OpIdx = Ops[j];
+ RMI->RemoveOperand(OpIdx-j);
+ }
+
+ // Update LiveVariables varinfo if the instruction is a kill.
+ if (isKill) {
+ LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+ vi.removeKill(RMI);
+ }
+ continue;
+ }
+
+ // Replace Reg with a new vreg that's marked implicit.
+ const TargetRegisterClass* RC = MRI->getRegClass(Reg);
+ unsigned NewVReg = MRI->createVirtualRegister(RC);
+ bool isKill = true;
+ for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &RRMO = RMI->getOperand(j);
+ if (RRMO.isReg() && RRMO.getReg() == Reg) {
+ RRMO.setReg(NewVReg);
+ RRMO.setIsUndef();
+ if (isKill) {
+ // Only the first operand of NewVReg is marked kill.
+ RRMO.setIsKill();
+ isKill = false;
+ }
+ }
+ }
+ }
+ RUses.clear();
+ ModInsts.clear();
+ }
+ ImpDefRegs.clear();
+ ImpDefMIs.clear();
+ }
+
+ return Changed;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 000000000000..a901c5fefa3e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,852 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pei"
+#include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+
+STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallsInformation(Fn);
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill code
+ // for any callee saved registers that are modified.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Determine placement of CSR spill/restore code:
+ // - With shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used.
+ // - Without shink wrapping (default), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ if (!F->hasFnAttr(Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(Fn);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F->hasFnAttr(Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimiation
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(Fn);
+
+ delete RS;
+ clearAllSets();
+ return true;
+}
+
+#if 0
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+#endif
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI->adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ MFI->setAdjustsStack(AdjustsStack);
+ MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(Fn))
+ RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Get the callee saved register list...
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers.
+ if (CSRegs == 0 || CSRegs[0] == 0)
+ return;
+
+ // In Naked functions we aren't going to save any registers.
+ if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+ return;
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg));
+ } else {
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+ *AliasSet; ++AliasSet) { // Check alias registers too.
+ if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
+ CSI.push_back(CalleeSavedInfo(Reg));
+ break;
+ }
+ }
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
+ }
+
+ I->setFrameIdx(FrameIdx);
+ }
+
+ MFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ MachineBasicBlock::iterator I;
+
+ if (! ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ EntryBlock->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ }
+
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock* MBB = ReturnBlocks[ri];
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
+ CSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ // Insert spills.
+ std::vector<CalleeSavedInfo> blockCSI;
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
+
+ if (save.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ I = MBB->begin();
+
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MBB, I, Reg,
+ true,
+ blockCSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB is empty and needs restores, insert at the _beginning_.
+ if (MBB->empty()) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (! I->getDesc().isTerminator()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
+ blockCSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If the stack grows down, we need to add the size to find the lowest
+ // address of the object.
+ Offset += MFI->getObjectSize(i);
+
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MFI->setObjectOffset(i, Offset);
+ Offset += MFI->getObjectSize(i);
+ }
+ }
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+ !RegInfo->useFPForScavengingIndex(Fn))) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(Offset - LocalAreaOffset);
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+ // Add prologue to the function...
+ TFI.emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn())
+ TFI.emitEpilogue(Fn, *I);
+ }
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+ int SPAdjCount = 0; // frame setup / destroy count.
+#endif
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+ // Track whether we see even pairs of them
+ SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
+ // Remember how much SP has been adjusted to create the call
+ // frame.
+ int Size = I->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+
+ SPAdj += Size;
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = prior(I);
+ TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = llvm::next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isFI()) {
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj,
+ FrameIndexVirtualScavenging ? NULL : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = 0;
+ break;
+ }
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ }
+
+ // If we have evenly matched pairs of frame setup / destroy instructions,
+ // make sure the adjustments come out to zero. If we don't have matched
+ // pairs, we can't be sure the missing bit isn't in another basic block
+ // due to a custom inserter playing tricks, so just asserting SPAdj==0
+ // isn't sufficient. See tMOVCC on Thumb1, for example.
+ assert((SPAdjCount || SPAdj == 0) &&
+ "Unbalanced call frame setup / destroy pairs?");
+ }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+ // Run through the instructions and find any virtual registers.
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ RS->enterBasicBlock(BB);
+
+ unsigned VirtReg = 0;
+ unsigned ScratchReg = 0;
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check BB->end()
+ // directly.
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ MachineInstr *MI = I;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isReg()) {
+ MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ ++NumVirtualFrameRegs;
+
+ // Have we already allocated a scratch register for this virtual?
+ if (Reg != VirtReg) {
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ VirtReg = Reg;
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+ ++NumScavengedRegs;
+ }
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert (ScratchReg && "Missing scratch register!");
+ MI->getOperand(i).setReg(ScratchReg);
+
+ }
+ }
+ RS->forward(I);
+ ++I;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 000000000000..e2391591ad06
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,177 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class RegScavenger;
+ class MachineBasicBlock;
+
+ class PEI : public MachineFunctionPass {
+ public:
+ static char ID;
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const {
+ return "Prolog/Epilog Insertion & Frame Finalization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Map of MBBs to top level MachineLoops.
+ DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the curren function.
+ bool FrameIndexVirtualScavenging;
+
+#ifndef NDEBUG
+ // Machine function handle.
+ MachineFunction* MF;
+
+ // Flag indicating that the current function
+ // has at least one "short" path in the machine
+ // CFG from the entry block to an exit block.
+ bool HasFastExitPath;
+#endif
+
+ bool calculateSets(MachineFunction &Fn);
+ bool calcAnticInOut(MachineBasicBlock* MBB);
+ bool calcAvailInOut(MachineBasicBlock* MBB);
+ void calculateAnticAvail(MachineFunction &Fn);
+ bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks);
+ bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+ bool calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills);
+ bool calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize DFA sets, called before iterations.
+ void clearAnticAvailSets();
+ // Clear all sets constructed by shrink wrapping.
+ void clearAllSets();
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo();
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+ // Propgate CSRs used in MBB to all MBBs of loop LP.
+ void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+ // Debugging methods.
+
+ // Mark this function as having fast exit paths.
+ void findFastExitPath();
+
+ // Verify placement of spills/restores.
+ void verifySpillRestorePlacement();
+
+ std::string getBasicBlockName(const MachineBasicBlock* MBB);
+ std::string stringifyCSRegSet(const CSRegSet& s);
+ void dumpSet(const CSRegSet& s);
+ void dumpUsed(MachineBasicBlock* MBB);
+ void dumpAllUsed();
+ void dumpSets(MachineBasicBlock* MBB);
+ void dumpSets1(MachineBasicBlock* MBB);
+ void dumpAllSets();
+ void dumpSRSets();
+#endif
+
+ };
+} // End llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 000000000000..73b66d868f3d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,134 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <map>
+using namespace llvm;
+
+namespace {
+struct PSVGlobalsTy {
+ // PseudoSourceValues are immutable so don't need locking.
+ const PseudoSourceValue PSVs[4];
+ sys::Mutex Lock; // Guards FSValues, but not the values inside it.
+ std::map<int, const PseudoSourceValue *> FSValues;
+
+ PSVGlobalsTy() : PSVs() {}
+ ~PSVGlobalsTy() {
+ for (std::map<int, const PseudoSourceValue *>::iterator
+ I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+ delete I->second;
+ }
+ }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+} // anonymous namespace
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &PSVGlobals->PSVs[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &PSVGlobals->PSVs[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &PSVGlobals->PSVs[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &PSVGlobals->PSVs[3]; }
+
+static const char *const PSVNames[] = {
+ "Stack",
+ "GOT",
+ "JumpTable",
+ "ConstantPool"
+};
+
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static. For now, we can safely use the global context for the time being to
+// squeak by.
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
+ Value(Type::getInt8PtrTy(getGlobalContext()),
+ Subclass) {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+ O << PSVNames[this - PSVGlobals->PSVs];
+}
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+ PSVGlobalsTy &PG = *PSVGlobals;
+ sys::ScopedLock locked(PG.Lock);
+ const PseudoSourceValue *&V = PG.FSValues[FI];
+ if (!V)
+ V = new FixedStackPseudoSourceValue(FI);
+ return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (this == getStack())
+ return false;
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return true;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+ return false;
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ if (this == getStack() ||
+ this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+ return true;
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ return true;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ // Negative frame indices are used for special things that don't
+ // appear in LLVM IR. Non-negative indices may be used for things
+ // like static allocas.
+ if (!MFI)
+ return FI >= 0;
+ // Spill slots should not alias others.
+ return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ // Spill slots will not alias any LLVM IR value.
+ return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..031642117efc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,193 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include "RegisterClassInfo.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+ LiveIntervalUnion::Allocator UnionAllocator;
+
+ // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual
+ // registers may have changed.
+ unsigned UserTag;
+
+protected:
+ // Array of LiveIntervalUnions indexed by physical register.
+ class LiveUnionArray {
+ unsigned NumRegs;
+ LiveIntervalUnion *Array;
+ public:
+ LiveUnionArray(): NumRegs(0), Array(0) {}
+ ~LiveUnionArray() { clear(); }
+
+ unsigned numRegs() const { return NumRegs; }
+
+ void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+ void clear();
+
+ LiveIntervalUnion& operator[](unsigned PhysReg) {
+ assert(PhysReg < NumRegs && "physReg out of bounds");
+ return Array[PhysReg];
+ }
+ };
+
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ RegisterClassInfo RegClassInfo;
+ LiveUnionArray PhysReg2LiveUnion;
+
+ // Current queries, one per physreg. They must be reinitialized each time we
+ // query on a new live virtual register.
+ OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+ RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+ // Get an initialized query to check interferences between lvr and preg. Note
+ // that Query::init must be called at least once for each physical register
+ // before querying a new live virtual register. This ties Queries and
+ // PhysReg2LiveUnion together.
+ LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+ Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]);
+ return Queries[PhysReg];
+ }
+
+ // Invalidate all cached information about virtual registers - live ranges may
+ // have changed.
+ void invalidateVirtRegs() { ++UserTag; }
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ //
+ // If an implementation wants to override the LiveInterval comparator, we
+ // should modify this interface to allow passing in an instance derived from
+ // LiveVirtRegQueue.
+ void allocatePhysRegs();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ virtual void enqueue(LiveInterval *LI) = 0;
+
+ /// dequeue - Return the next unassigned register, or NULL.
+ virtual LiveInterval *dequeue() = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+ // A RegAlloc pass should call this when PassManager releases its memory.
+ virtual void releaseMemory();
+
+ // Helper for checking interference between a live virtual register and a
+ // physical register, including all its register aliases. If an interference
+ // exists, return the interfering register, which may be preg or an alias.
+ unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+ /// assign - Assign VirtReg to PhysReg.
+ /// This should not be called from selectOrSplit for the current register.
+ void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+ /// This can be invoked from selectOrSplit, but be careful to guarantee that
+ /// allocation is making progress.
+ void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// addMBBLiveIns - Add physreg liveins to basic blocks.
+ void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+ // Verify each LiveIntervalUnion.
+ void verify();
+#endif
+
+ // Use this group name for NamedRegionTimer.
+ static const char *TimerGroupName;
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveRegs();
+
+ void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..5ea26adc7644
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,587 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "LiveDebugVariables.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+ struct CompSpillWeight {
+ bool operator()(LiveInterval *A, LiveInterval *B) const {
+ return A->weight < B->weight;
+ }
+ };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+
+ // analyses
+ LiveStacks *LS;
+ RenderMachineFunction *RMF;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+ CompSpillWeight> Queue;
+public:
+ RABasic();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+ virtual void enqueue(LiveInterval *LI) {
+ Queue.push(LI);
+ }
+
+ virtual LiveInterval *dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = Queue.top();
+ Queue.pop();
+ return LI;
+ }
+
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ DEBUG(AU.addRequired<RenderMachineFunction>());
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset(0);
+ RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+ LiveVirtRegBitSet VisitedVRegs;
+ OwningArrayPtr<LiveVirtRegBitSet>
+ unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+ // Verify disjoint unions.
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+ LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+ PhysReg2LiveUnion[PhysReg].verify(VRegs);
+ // Union + intersection test could be done efficiently in one pass, but
+ // don't add a method to SparseBitVector unless we really need it.
+ assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+ VisitedVRegs |= VRegs;
+ }
+
+ // Verify vreg coverage.
+ for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+ liItr != liEnd; ++liItr) {
+ unsigned reg = liItr->first;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+ if (!VRM->hasPhys(reg)) continue; // spilled?
+ unsigned PhysReg = VRM->getPhys(reg);
+ if (!unionVRegs[PhysReg].test(reg)) {
+ dbgs() << "LiveVirtReg " << reg << " not in union " <<
+ TRI->getName(PhysReg) << "\n";
+ llvm_unreachable("unallocated live vreg");
+ }
+ }
+ // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+ unsigned NRegs) {
+ NumRegs = NRegs;
+ Array =
+ static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+ for (unsigned r = 0; r != NRegs; ++r)
+ new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+ NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+ PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+ // Cache an interferece query for each physical reg
+ Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+ }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+ if (!Array)
+ return;
+ for (unsigned r = 0; r != NumRegs; ++r)
+ Array[r].~LiveIntervalUnion();
+ free(Array);
+ NumRegs = 0;
+ Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+ for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+ PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+ unsigned RegNum = I->first;
+ LiveInterval &VirtReg = *I->second;
+ if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+ PhysReg2LiveUnion[RegNum].unify(VirtReg);
+ else
+ enqueue(&VirtReg);
+ }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << '\n');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+ ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << '\n');
+ assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+ PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+ VRM->clearVirt(VirtReg.reg);
+ ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << MRI->getRegClass(VirtReg->reg)->getName()
+ << ':' << *VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ const char *Msg = "ran out of registers during register allocation";
+ // Probably caused by an inline asm.
+ MachineInstr *MI;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+ (MI = I.skipInstruction());)
+ if (MI->isInlineAsm())
+ break;
+ if (MI)
+ MI->emitError(Msg);
+ else
+ report_fatal_error(Msg);
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = *I;
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ if (query(VirtReg, *AliasI).checkInterference())
+ return *AliasI;
+ return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+ assert(Q.seenAllInterferences() && "need collectInterferences()");
+ const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+ for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+ E = PendingSpills.end(); I != E; ++I) {
+ LiveInterval &SpilledVReg = **I;
+ DEBUG(dbgs() << "extracting from " <<
+ TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ unassign(SpilledVReg, PhysReg);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+ spiller().spill(LRE);
+ }
+ // After extracting segments, the query's results are invalid. But keep the
+ // contents valid until we're done accessing pendingSpills.
+ Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ unsigned NumInterferences = 0;
+ // Collect interferences assigned to any alias of the physical register.
+ for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+ LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+ NumInterferences += QAlias.collectInterferingVRegs();
+ if (QAlias.seenUnspillableVReg()) {
+ return false;
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(NumInterferences > 0 && "expect interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ spillReg(VirtReg, *AliasI, SplitVRegs);
+ return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+ NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ if (MF->size() <= 1)
+ return;
+
+ LiveIntervalUnion::SegmentIter SI;
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+ if (LiveUnion.empty())
+ continue;
+ MachineFunction::iterator MBB = llvm::next(MF->begin());
+ MachineFunction::iterator MFE = MF->end();
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.setMap(LiveUnion.getMap());
+ SI.find(Start);
+ while (SI.valid()) {
+ if (SI.start() <= Start) {
+ if (!MBB->isLiveIn(PhysReg))
+ MBB->addLiveIn(PhysReg);
+ } else if (SI.start() > Stop)
+ MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+ if (++MBB == MFE)
+ break;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.advanceTo(Start);
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ ArrayRef<unsigned> Order =
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg));
+ for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E;
+ ++I) {
+ unsigned PhysReg = *I;
+
+ // Check interference and as a side effect, intialize queries for this
+ // VirtReg and its aliases.
+ unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+ if (interfReg == 0) {
+ // Found an available register.
+ return PhysReg;
+ }
+ LiveInterval *interferingVirtReg =
+ Queries[interfReg].firstInterference().liveUnionPos().value();
+
+ // The current VirtReg must either be spillable, or one of its interferences
+ // must have less spill weight.
+ if (interferingVirtReg->weight < VirtReg.weight ) {
+ PhysRegSpillCands.push_back(PhysReg);
+ }
+ }
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+ assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ if (!VirtReg.isSpillable())
+ return ~0u;
+ LiveRangeEdit LRE(VirtReg, SplitVRegs);
+ spiller().spill(LRE);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+
+ addMBBLiveIns(MF);
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ // optional HTML output
+ DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+ // FIXME: Verification currently must run before VirtRegRewriter. We should
+ // make the rewriter a separate pass and override verifyAnalysis instead. When
+ // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+ if (VerifyEnabled) {
+ // Verify accuracy of LiveIntervals. The standard machine code verifier
+ // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+ // FIXME: MachineVerifier is badly broken when using the standard
+ // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+ // inline spiller, some tests fail to verify because the coalescer does not
+ // always generate verifiable code.
+ MF->verify(this, "In RABasic::verify");
+
+ // Verify that LiveIntervals are partitioned into unions and disjoint within
+ // the unions.
+ verify();
+ }
+#endif // !NDEBUG
+
+ // Run rewriter
+ VRM->rewrite(LIS->getSlotIndexes());
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 000000000000..b36a445291b7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1073 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
+
+static RegisterRegAlloc
+ fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+ class RAFast : public MachineFunctionPass {
+ public:
+ static char ID;
+ RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
+ isBulkSpilling(false) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ // Basic block currently being allocated.
+ MachineBasicBlock *MBB;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse; // Last instr to use reg.
+ unsigned PhysReg; // Currently held here.
+ unsigned short LastOpNum; // OpNum on LastUse.
+ bool Dirty; // Register needs spill.
+
+ LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
+ Dirty(false) {}
+ };
+
+ typedef DenseMap<unsigned, LiveReg> LiveRegMap;
+ typedef LiveRegMap::value_type LiveRegEntry;
+
+ // LiveVirtRegs - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ LiveRegMap LiveVirtRegs;
+
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+
+ // RegState - Track the state of a physical register.
+ enum RegState {
+ // A disabled register is not available for allocation, but an alias may
+ // be in use. A register can only be moved out of the disabled state if
+ // all aliases are disabled.
+ regDisabled,
+
+ // A free register is not currently in use and can be allocated
+ // immediately without checking aliases.
+ regFree,
+
+ // A reserved register has been assigned explicitly (e.g., setting up a
+ // call parameter), and it remains reserved until it is used.
+ regReserved
+
+ // A register state may also be a virtual register number, indication that
+ // the physical register is currently allocated to a virtual register. In
+ // that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ // PhysRegState - One of the RegState enums, or a virtreg.
+ std::vector<unsigned> PhysRegState;
+
+ // UsedInInstr - BitVector of physregs that are used in the current
+ // instruction, and so cannot be allocated.
+ BitVector UsedInInstr;
+
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
+ SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
+
+ // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+ // completely after spilling all live registers. LiveRegMap entries should
+ // not be erased.
+ bool isBulkSpilling;
+
+ enum {
+ spillClean = 1,
+ spillDirty = 100,
+ spillImpossible = ~0u
+ };
+ public:
+ virtual const char *getPassName() const {
+ return "Fast Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool runOnMachineFunction(MachineFunction &Fn);
+ void AllocateBasicBlock();
+ void handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead);
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+ bool isLastUseOfLocalReg(MachineOperand&);
+
+ void addKillFlag(const LiveReg&);
+ void killVirtReg(LiveRegMap::iterator);
+ void killVirtReg(unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+ void usePhysReg(MachineOperand&);
+ void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+ unsigned calcSpillCost(unsigned PhysReg) const;
+ void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
+ void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+ LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ void spillAll(MachineInstr *MI);
+ bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ };
+ char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot.
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
+///
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+ // Check for non-debug uses or defs following MO.
+ // This is the most likely way to fail - fast path it.
+ MachineOperand *Next = &MO;
+ while ((Next = Next->getNextOperandForReg()))
+ if (!Next->isDebug())
+ return false;
+
+ // If the register has ever been spilled or reloaded, we conservatively assume
+ // it is a global register used in multiple blocks.
+ if (StackSlotForVirtReg[MO.getReg()] != -1)
+ return false;
+
+ // Check that the use/def chain has exactly one operand - MO.
+ return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+}
+
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+ if (!LR.LastUse) return;
+ MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+ if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+ if (MO.getReg() == LR.PhysReg)
+ MO.setIsKill();
+ else
+ LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+ }
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+ addKillFlag(LRI->second);
+ const LiveReg &LR = LRI->second;
+ assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+ PhysRegState[LR.PhysReg] = regFree;
+ // Erase from LiveVirtRegs unless we're spilling in bulk.
+ if (!isBulkSpilling)
+ LiveVirtRegs.erase(LRI);
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "killVirtReg needs a virtual register");
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ if (LRI != LiveVirtRegs.end())
+ killVirtReg(LRI);
+}
+
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Spilling a physical register is illegal!");
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+ spillVirtReg(MI, LRI);
+}
+
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ LiveRegMap::iterator LRI) {
+ LiveReg &LR = LRI->second;
+ assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+
+ if (LR.Dirty) {
+ // If this physreg is used by the instruction, we want to kill it on the
+ // instruction, not on the spill.
+ bool SpillKill = LR.LastUse != MI;
+ LR.Dirty = false;
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
+ int FI = getStackSpaceFor(LRI->first, RC);
+ DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+ TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+ ++NumStores; // Update statistics
+
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+ for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+ MachineInstr *DBG = LRIDbgValues[li];
+ const MDNode *MDPtr =
+ DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+ int64_t Offset = 0;
+ if (DBG->getOperand(1).isImm())
+ Offset = DBG->getOperand(1).getImm();
+ DebugLoc DL;
+ if (MI == MBB->end()) {
+ // If MI is at basic block end then use last instruction's location.
+ MachineBasicBlock::iterator EI = MI;
+ DL = (--EI)->getDebugLoc();
+ }
+ else
+ DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+ MachineBasicBlock *MBB = DBG->getParent();
+ MBB->insert(MI, NewDV);
+ DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ }
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE pointing
+ // to this register because they are all pointing to spilled value now.
+ LRIDbgValues.clear();
+ if (SpillKill)
+ LR.LastUse = 0; // Don't kill register again
+ }
+ killVirtReg(LRI);
+}
+
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineInstr *MI) {
+ if (LiveVirtRegs.empty()) return;
+ isBulkSpilling = true;
+ // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+ // of spilling here is deterministic, if arbitrary.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+ i != e; ++i)
+ spillVirtReg(MI, i);
+ LiveVirtRegs.clear();
+ isBulkSpilling = false;
+}
+
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+ unsigned PhysReg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Bad usePhysReg operand");
+
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ // Fall through
+ case regFree:
+ UsedInInstr.set(PhysReg);
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means the value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
+
+ // Maybe a superregister is reserved?
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS) {
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ assert(TRI->isSuperRegister(PhysReg, Alias) &&
+ "Instruction is not using a subregister of a reserved register");
+ // Leave the superregister in the working set.
+ PhysRegState[Alias] = regFree;
+ UsedInInstr.set(Alias);
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ UsedInInstr.set(Alias);
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ PhysRegState[Alias] = regDisabled;
+ break;
+ default:
+ llvm_unreachable("Instruction uses an alias of an allocated register");
+ }
+ }
+
+ // All aliases are disabled, bring register into working set.
+ PhysRegState[PhysReg] = regFree;
+ UsedInInstr.set(PhysReg);
+ MO.setIsKill();
+}
+
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+ RegState NewState) {
+ UsedInInstr.set(PhysReg);
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[PhysReg] = NewState;
+ return;
+ }
+
+ // This is a disabled register, disable all aliases.
+ PhysRegState[PhysReg] = NewState;
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS) {
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[Alias] = regDisabled;
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
+ break;
+ }
+ }
+}
+
+
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+ if (UsedInInstr.test(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+ return spillImpossible;
+ }
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+ << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+ return spillImpossible;
+ default:
+ return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+ }
+
+ // This is a disabled register, add up cost of aliases.
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+ unsigned Cost = 0;
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS) {
+ if (UsedInInstr.test(Alias))
+ return spillImpossible;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regFree:
+ ++Cost;
+ break;
+ case regReserved:
+ return spillImpossible;
+ default:
+ Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+ break;
+ }
+ }
+ return Cost;
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
+ PhysRegState[PhysReg] = LRE.first;
+ assert(!LRE.second.PhysReg && "Already assigned a physreg");
+ LRE.second.PhysReg = PhysReg;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
+ const unsigned VirtReg = LRE.first;
+
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Can only allocate virtual registers");
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+ // Ignore invalid hints.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+ Hint = 0;
+
+ // Take hint when possible.
+ if (Hint) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint);
+ if (Cost < spillDirty) {
+ if (Cost)
+ definePhysReg(MI, Hint, regFree);
+ return assignVirtToPhysReg(LRE, Hint);
+ }
+ }
+
+ ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
+
+ // First try to find a completely free register.
+ for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+ unsigned PhysReg = *I;
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
+ return assignVirtToPhysReg(LRE, PhysReg);
+ }
+
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << RC->getName() << "\n");
+
+ unsigned BestReg = 0, BestCost = spillImpossible;
+ for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+ unsigned Cost = calcSpillCost(*I);
+ DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+ DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+ DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
+ // Cost is 0 when all aliases are already disabled.
+ if (Cost == 0)
+ return assignVirtToPhysReg(LRE, *I);
+ if (Cost < BestCost)
+ BestReg = *I, BestCost = Cost;
+ }
+
+ if (BestReg) {
+ definePhysReg(MI, BestReg, regFree);
+ return assignVirtToPhysReg(LRE, BestReg);
+ }
+
+ // Nothing we can do. Report an error and keep going with a bad allocation.
+ MI->emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AO.begin(), regFree);
+ assignVirtToPhysReg(LRE, *AO.begin());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+ LiveReg &LR = LRI->second;
+ if (New) {
+ // If there is no hint, peek at the only use of this register.
+ if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
+ // It's a copy, use the destination register as a hint.
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
+ }
+ allocVirtReg(MI, *LRI, Hint);
+ } else if (LR.LastUse) {
+ // Redefining a live register - kill at the last use, unless it is this
+ // instruction defining VirtReg multiple times.
+ if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
+ addKillFlag(LR);
+ }
+ assert(LR.PhysReg && "Register not assigned");
+ LR.LastUse = MI;
+ LR.LastOpNum = OpNum;
+ LR.Dirty = true;
+ UsedInInstr.set(LR.PhysReg);
+ return LRI;
+}
+
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+ LiveReg &LR = LRI->second;
+ MachineOperand &MO = MI->getOperand(OpNum);
+ if (New) {
+ allocVirtReg(MI, *LRI, Hint);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LR.PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+ ++NumLoads;
+ } else if (LR.Dirty) {
+ if (isLastUseOfLocalReg(MO)) {
+ DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
+ } else if (MO.isKill()) {
+ DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ } else if (MO.isKill()) {
+ // We must remove kill flags from uses of reloaded registers because the
+ // register would be killed immediately, and there might be a second use:
+ // %foo = OR %x<kill>, %x
+ // This would cause a second reload of %x into a different register.
+ DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ assert(LR.PhysReg && "Register not assigned");
+ LR.LastUse = MI;
+ LR.LastOpNum = OpNum;
+ UsedInInstr.set(LR.PhysReg);
+ return LRI;
+}
+
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+ MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.getSubReg()) {
+ MO.setReg(PhysReg);
+ return MO.isKill() || MO.isDead();
+ }
+
+ // Handle subregister index.
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setSubReg(0);
+
+ // A kill flag implies killing the full register. Add corresponding super
+ // register kill.
+ if (MO.isKill()) {
+ MI->addRegisterKilled(PhysReg, TRI, true);
+ return true;
+ }
+ return MO.isDead();
+}
+
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
+ DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<unsigned, 8> ThroughRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+ (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg))
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ if (ThroughRegs.count(PhysRegState[Reg]))
+ definePhysReg(MI, Reg, regFree);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ UsedInInstr.set(*AS);
+ if (ThroughRegs.count(PhysRegState[*AS]))
+ definePhysReg(MI, *AS, regFree);
+ }
+ }
+
+ SmallVector<unsigned, 8> PartialDefs;
+ DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ unsigned DefIdx = 0;
+ if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+ DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+ << DefIdx << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ PartialDefs.push_back(LRI->second.PhysReg);
+ } else if (MO.isEarlyClobber()) {
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+ << " as used in instr\n");
+ UsedInInstr.set(Reg);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+ UsedInInstr.set(PartialDefs[i]);
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+ // FIXME: This should probably be added by instruction selection instead?
+ // If the last instruction in the block is a return, make sure to mark it as
+ // using all of the live-out values in the function. Things marked both call
+ // and return are tail calls; do not do this for them. The tail callee need
+ // not take the same registers as input that it produces as output, and there
+ // are dependencies for its input registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+ !MBB->back().getDesc().isCall()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register.");
+
+ // Add live-out registers as implicit uses.
+ Ret->addRegisterKilled(*I, TRI, true);
+ }
+ }
+
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+
+ MachineBasicBlock::iterator MII = MBB->begin();
+
+ // Add live-in registers as live.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ if (RegClassInfo.isAllocatable(*I))
+ definePhysReg(MII, *I, regReserved);
+
+ SmallVector<unsigned, 8> VirtDead;
+ SmallVector<MachineInstr*, 32> Coalesced;
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB->end()) {
+ MachineInstr *MI = MII++;
+ const MCInstrDesc &MCID = MI->getDesc();
+ DEBUG({
+ dbgs() << "\n>> " << *MI << "Regs:";
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << TRI->getName(Reg);
+ switch(PhysRegState[Reg]) {
+ case regFree:
+ break;
+ case regReserved:
+ dbgs() << "*";
+ break;
+ default:
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
+ if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+ dbgs() << "*";
+ assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
+ "Bad inverse map");
+ break;
+ }
+ }
+ dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+ "Bad map key");
+ assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+ "Bad map value");
+ assert(PhysRegState[i->second.PhysReg] == i->first &&
+ "Bad inverse map");
+ }
+ });
+
+ // Debug values are not allowed to change codegen in any way.
+ if (MI->isDebugValue()) {
+ bool ScanDbgValue = true;
+ while (ScanDbgValue) {
+ ScanDbgValue = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ LiveDbgValueMap[Reg].push_back(MI);
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->second.PhysReg);
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1) {
+ // We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ int64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+ "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ // Scan NewDV operands from the beginning.
+ MI = NewDV;
+ ScanDbgValue = true;
+ break;
+ } else {
+ // We can't allocate a physreg for a DebugValue; sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ }
+ }
+ }
+ }
+ // Next instruction.
+ continue;
+ }
+
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
+ if (MI->isCopy()) {
+ CopyDst = MI->getOperand(0).getReg();
+ CopySrc = MI->getOperand(1).getReg();
+ CopyDstSub = MI->getOperand(0).getSubReg();
+ CopySrcSub = MI->getOperand(1).getSubReg();
+ }
+
+ // Track registers used by instruction.
+ UsedInInstr.reset();
+
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
+ continue;
+ }
+ if (!RegClassInfo.isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDst = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
+ }
+
+ // Second scan.
+ // Allocate virtreg uses.
+ for (unsigned i = 0; i != VirtOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+ unsigned PhysReg = LRI->second.PhysReg;
+ CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, i, PhysReg))
+ killVirtReg(LRI);
+ }
+ }
+
+ MRI->addPhysRegsUsed(UsedInInstr);
+
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.reset();
+ if (hasEarlyClobbers) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+ UsedInInstr.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ UsedInInstr.set(*AS);
+ }
+ }
+
+ unsigned DefOpEnd = MI->getNumOperands();
+ if (MCID.isCall()) {
+ // Spill all virtregs before a call. This serves two purposes: 1. If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots, and 2. we don't have to wade through
+ // all the <imp-def> operands on the call instruction.
+ DefOpEnd = VirtOpEnd;
+ DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
+
+ // The imp-defs are skipped below, but we still need to mark those
+ // registers as used by the function.
+ SkippedInstrs.insert(&MCID);
+ }
+
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned i = 0; i != DefOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!RegClassInfo.isAllocatable(Reg)) continue;
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ continue;
+ }
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+ unsigned PhysReg = LRI->second.PhysReg;
+ if (setPhysReg(MI, i, PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDst = 0; // cancel coalescing;
+ } else
+ CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+ killVirtReg(VirtDead[i]);
+ VirtDead.clear();
+
+ MRI->addPhysRegsUsed(UsedInInstr);
+
+ if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+ DEBUG(dbgs() << "-- coalescing: " << *MI);
+ Coalesced.push_back(MI);
+ } else {
+ DEBUG(dbgs() << "<< " << *MI);
+ }
+ }
+
+ // Spill all physical registers holding virtual registers now.
+ DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ spillAll(MBB->getFirstTerminator());
+
+ // Erase all the coalesced copies. We are delaying it until now because
+ // LiveVirtRegs might refer to the instrs.
+ for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+ MBB->erase(Coalesced[i]);
+ NumCopies += Coalesced.size();
+
+ DEBUG(MBB->dump());
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+ DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)Fn.getFunction())->getName() << '\n');
+ MF = &Fn;
+ MRI = &MF->getRegInfo();
+ TM = &Fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ RegClassInfo.runOnMachineFunction(Fn);
+ UsedInInstr.resize(TRI->getNumRegs());
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+ MBBi != MBBe; ++MBBi) {
+ MBB = &*MBBi;
+ AllocateBasicBlock();
+ }
+
+ // Make sure the set of used physregs is closed under subreg operations.
+ MRI->closePhysRegsUsed(*TRI);
+
+ // Add the clobber lists for all the instructions we skipped earlier.
+ for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
+ I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+ if (const unsigned *Defs = (*I)->getImplicitDefs())
+ while (*Defs)
+ MRI->setPhysRegUsed(*Defs++);
+
+ SkippedInstrs.clear();
+ StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+ return new RAFast();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..e235e87b54f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1429 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+
+ // context
+ MachineFunction *MF;
+
+ // analyses
+ SlotIndexes *Indexes;
+ LiveStacks *LS;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+ unsigned NextCascade;
+
+ // Live ranges pass through a number of stages as we try to allocate them.
+ // Some of the stages may also create new live ranges:
+ //
+ // - Region splitting.
+ // - Per-block splitting.
+ // - Local splitting.
+ // - Spilling.
+ //
+ // Ranges produced by one of the stages skip the previous stages when they are
+ // dequeued. This improves performance because we can skip interference checks
+ // that are unlikely to give any results. It also guarantees that the live
+ // range splitting algorithm terminates, something that is otherwise hard to
+ // ensure.
+ enum LiveRangeStage {
+ RS_New, ///< Never seen before.
+ RS_First, ///< First time in the queue.
+ RS_Second, ///< Second time in the queue.
+ RS_Global, ///< Produced by global splitting.
+ RS_Local, ///< Produced by local splitting.
+ RS_Spill ///< Produced by spilling.
+ };
+
+ static const char *const StageName[];
+
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage;
+
+ // Cascade - Eviction loop prevention. See canEvictInterference().
+ unsigned Cascade;
+
+ RegInfo() : Stage(RS_New), Cascade(0) {}
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return ExtraRegInfo[VirtReg.reg].Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ }
+
+ template<typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ for (;Begin != End; ++Begin) {
+ unsigned Reg = (*Begin)->reg;
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = NewStage;
+ }
+ }
+
+ /// Cost of evicting interference.
+ struct EvictionCost {
+ unsigned BrokenHints; ///< Total number of broken hints.
+ float MaxWeight; ///< Maximum spill weight evicted.
+
+ EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+ bool operator<(const EvictionCost &O) const {
+ if (BrokenHints != O.BrokenHints)
+ return BrokenHints < O.BrokenHints;
+ return MaxWeight < O.MaxWeight;
+ }
+ };
+
+ // splitting state.
+ std::auto_ptr<SplitAnalysis> SA;
+ std::auto_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ unsigned PhysReg;
+ InterferenceCache::Cursor Intf;
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, unsigned Reg) {
+ PhysReg = Reg;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+ };
+
+ /// Candidate info for for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+ virtual Spiller &spiller() { return *SpillerInstance; }
+ virtual void enqueue(LiveInterval *LI);
+ virtual LiveInterval *dequeue();
+ virtual unsigned selectOrSplit(LiveInterval&,
+ SmallVectorImpl<LiveInterval*>&);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+
+private:
+ void LRE_WillEraseInstruction(MachineInstr*);
+ bool LRE_CanEraseVirtReg(unsigned);
+ void LRE_WillShrinkVirtReg(unsigned);
+ void LRE_DidCloneVirtReg(unsigned, unsigned);
+
+ float calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, float&);
+ void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ void growRegion(GlobalSplitCandidate &Cand);
+ float calcGlobalSplitCost(GlobalSplitCandidate&);
+ void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
+ SmallVectorImpl<LiveInterval*>&);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ void evictInterference(LiveInterval&, unsigned,
+ SmallVectorImpl<LiveInterval*>&);
+
+ unsigned tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_First",
+ "RS_Second",
+ "RS_Global",
+ "RS_Local",
+ "RS_Spill"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+//===----------------------------------------------------------------------===//
+// LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // LRE itself will remove from SlotIndexes and parent basic block.
+ VRM->RemoveMachineInstrFromMaps(MI);
+}
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
+ unassign(LIS->getInterval(VirtReg), PhysReg);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ if (!PhysReg)
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ unassign(LI, PhysReg);
+ enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ // LRE may clone a virtual register because dead code elimination causes it to
+ // be split into connected components. Ensure that the new register gets the
+ // same stage as the parent.
+ ExtraRegInfo.grow(New);
+ ExtraRegInfo[New] = ExtraRegInfo[Old];
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset(0);
+ ExtraRegInfo.clear();
+ GlobalCand.clear();
+ RegAllocBase::releaseMemory();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) {
+ // Prioritize live ranges by size, assigning larger ranges first.
+ // The queue holds (size, reg) pairs.
+ const unsigned Size = LI->getSize();
+ const unsigned Reg = LI->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only enqueue virtual registers");
+ unsigned Prio;
+
+ ExtraRegInfo.grow(Reg);
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = RS_First;
+
+ if (ExtraRegInfo[Reg].Stage == RS_Second)
+ // Unsplit ranges that couldn't be allocated immediately are deferred until
+ // everything else has been allocated. Long ranges are allocated last so
+ // they are split against realistic interference.
+ Prio = (1u << 31) - Size;
+ else {
+ // Everything else is allocated in long->short order. Long ranges that don't
+ // fit should be spilled ASAP so they don't create interference.
+ Prio = (1u << 31) + Size;
+
+ // Boost ranges that have a physical register hint.
+ if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+ Prio |= (1u << 30);
+ }
+
+ Queue.push(std::make_pair(Prio, Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+ Queue.pop();
+ return LI;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ Order.rewind();
+ unsigned PhysReg;
+ while ((PhysReg = Order.next()))
+ if (!checkPhysRegInterference(VirtReg, PhysReg))
+ break;
+ if (!PhysReg || Order.isHint(PhysReg))
+ return PhysReg;
+
+ // PhysReg is available, but there may be a better choice.
+
+ // If we missed a simple hint, try to cheaply evict interference from the
+ // preferred register.
+ if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Order.isHint(Hint)) {
+ DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+ EvictionCost MaxCost(1);
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ evictInterference(VirtReg, Hint, NewVRegs);
+ return Hint;
+ }
+ }
+
+ // Try to evict interference from a cheaper alternative.
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ << '\n');
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B, bool BreaksHint) {
+ bool CanSplit = getStage(B) <= RS_Second;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ return A.weight > B.weight;
+}
+
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted. When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @prarm IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ bool IsHint, EvictionCost &MaxCost) {
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = NextCascade;
+
+ EvictionCost Cost;
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ // If there is 10 or more interferences, chances are one is heavier.
+ if (Q.collectInterferingVRegs(10) >= 10)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
+ return false;
+ // Never evict spill products. They cannot split or spill.
+ if (getStage(*Intf) == RS_Spill)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ // Finally, apply the eviction policy for non-urgent evictions.
+ if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // Make sure that VirtReg has a cascade number, and assign that cascade
+ // number to every evicted register. These live ranges than then only be
+ // evicted by a newer cascade, preventing infinite loops.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+ DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i];
+ unassign(*Intf, VRM->getPhys(Intf->reg));
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf);
+ }
+ }
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs,
+ unsigned CostPerUseLimit) {
+ NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost(~0u);
+ unsigned BestPhys = 0;
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < ~0u) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight;
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ continue;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1)
+ if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+ if (!MRI->isPhysRegUsed(CSR)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+ << PrintReg(CSR, TRI) << '\n');
+ continue;
+ }
+
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (Order.isHint(PhysReg))
+ break;
+ }
+
+ if (!BestPhys)
+ return 0;
+
+ evictInterference(VirtReg, BestPhys, NewVRegs);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ float &Cost) {
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+ // Reset interference dependent info.
+ SplitConstraints.resize(UseBlocks.size());
+ float StaticCost = 0;
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
+ BC.Number = BI.MBB->getNumber();
+ Intf.moveToBlock(BC.Number);
+ BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+
+ if (!Intf.hasInterference())
+ continue;
+
+ // Number of spill code instructions to insert.
+ unsigned Ins = 0;
+
+ // Interference for the live-in value.
+ if (BI.LiveIn) {
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
+ BC.Entry = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.first() < BI.FirstUse)
+ BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.first() < BI.LastUse)
+ ++Ins;
+ }
+
+ // Interference for the live-out value.
+ if (BI.LiveOut) {
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
+ BC.Exit = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.last() > BI.LastUse)
+ BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.last() > BI.FirstUse)
+ ++Ins;
+ }
+
+ // Accumulate the total frequency of inserted spill code.
+ if (Ins)
+ StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+ Cost = StaticCost;
+
+ // Add constraints for use-blocks. Note that these are the only constraints
+ // that may add a positive bias, it is downhill from here.
+ SpillPlacer->addConstraints(SplitConstraints);
+ return SpillPlacer->scanActiveBundles();
+}
+
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
+ const unsigned GroupSize = 8;
+ SpillPlacement::BlockConstraint BCS[GroupSize];
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
+
+ for (unsigned i = 0; i != Blocks.size(); ++i) {
+ unsigned Number = Blocks[i];
+ Intf.moveToBlock(Number);
+
+ if (!Intf.hasInterference()) {
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+ T = 0;
+ }
+ continue;
+ }
+
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
+ // Interference for the live-in value.
+ if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+ BCS[B].Entry = SpillPlacement::MustSpill;
+ else
+ BCS[B].Entry = SpillPlacement::PrefSpill;
+
+ // Interference for the live-out value.
+ if (Intf.last() >= SA->getLastSplitPoint(Number))
+ BCS[B].Exit = SpillPlacement::MustSpill;
+ else
+ BCS[B].Exit = SpillPlacement::PrefSpill;
+
+ if (++B == GroupSize) {
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ B = 0;
+ }
+ }
+
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+}
+
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ for (;;) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+ unsigned Bundle = NewBundles[i];
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ unsigned Block = *I;
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() == AddedTo)
+ break;
+ addThroughConstraints(Cand.Intf,
+ ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo));
+ AddedTo = ActiveBlocks.size();
+
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ DEBUG(dbgs() << ", v=" << Visited);
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+ float Cost = 0;
+ const LiveInterval &LI = SA->getParent();
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(Number);
+ LiveInterval::const_iterator I = LI.find(Start);
+ assert(I != LI.end() && "Expected live-in value");
+ // Is there a different live-out value? If so, we need an extra spill
+ // instruction.
+ if (I->end < Stop)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ }
+ return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+ float GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+ unsigned Ins = 0;
+
+ if (BI.LiveIn)
+ Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+ if (BI.LiveOut)
+ Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+ if (Ins)
+ GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ if (!RegIn && !RegOut)
+ continue;
+ if (RegIn && RegOut) {
+ // We need double spill code if this block has interference.
+ Cand.Intf.moveToBlock(Number);
+ if (Cand.Intf.hasInterference())
+ GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+ continue;
+ }
+ // live-in / stack-out or stack-in live-out.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
+ GlobalSplitCandidate &Cand,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ const BitVector &LiveBundles = Cand.LiveBundles;
+
+ DEBUG({
+ dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI)
+ << " with bundles";
+ for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+
+ InterferenceCache::Cursor &Intf = Cand.Intf;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit);
+
+ // Create the main cross-block interval.
+ const unsigned MainIntv = SE->openIntv();
+
+ // First handle all the blocks with uses.
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ bool RegIn = BI.LiveIn &&
+ LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+ bool RegOut = BI.LiveOut &&
+ LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!RegIn && !RegOut) {
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ if (!BI.isOneInstr()) {
+ SE->splitSingleBlock(BI);
+ SE->selectIntv(MainIntv);
+ }
+ continue;
+ }
+
+ Intf.moveToBlock(BI.MBB->getNumber());
+
+ if (RegIn && RegOut)
+ SE->splitLiveThroughBlock(BI.MBB->getNumber(),
+ MainIntv, Intf.first(),
+ MainIntv, Intf.last());
+ else if (RegIn)
+ SE->splitRegInBlock(BI, MainIntv, Intf.first());
+ else
+ SE->splitRegOutBlock(BI, MainIntv, Intf.last());
+ }
+
+ // Handle live-through blocks.
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ if (!RegIn && !RegOut)
+ continue;
+ Intf.moveToBlock(Number);
+ SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(),
+ RegOut ? MainIntv : 0, Intf.last());
+ }
+
+ ++NumGlobalSplits;
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &Reg = *LREdit.get(i);
+
+ // Ignore old intervals from DCE.
+ if (getStage(Reg) != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[i] == 0) {
+ setStage(Reg, RS_Global);
+ continue;
+ }
+
+ // Main interval. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[i] == MainIntv) {
+ if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+ DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ setStage(Reg, RS_Global);
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around region");
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ float BestCost = Hysteresis * calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ const unsigned NoCand = ~0u;
+ unsigned BestCand = NoCand;
+ unsigned NumCands = 0;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned i = 0; i != NumCands; ++i) {
+ if (i == BestCand)
+ continue;
+ unsigned Count = GlobalCand[i].LiveBundles.count();
+ if (Count < WorstCount)
+ Worst = i, WorstCount = Count;
+ }
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ }
+
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
+
+ SpillPlacer->prepare(Cand.LiveBundles);
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+ continue;
+ }
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+ if (Cost >= BestCost) {
+ DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ continue;
+ }
+ growRegion(Cand);
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << " no bundles.\n");
+ continue;
+ }
+
+ Cost += calcGlobalSplitCost(Cand);
+ DEBUG({
+ dbgs() << ", total = " << Cost << " with bundles";
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Hysteresis * Cost; // Prevent rounding effects.
+ }
+ ++NumCands;
+ }
+
+ if (BestCand == NoCand)
+ return 0;
+
+ splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+ SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+ // so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstUse to LastUse. We should make
+ // sure that we don't do anything illegal to such an interval, though.
+
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << SA->UseSlots[i];
+ dbgs() << '\n';
+ });
+
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() >= RS_Local. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Local,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = getStage(VirtReg) >= RS_Local;
+
+ // Best split candidate.
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
+ //
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
+ Shrink = false;
+ float Diff = EstWeight - MaxGap;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Hysteresis * Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ if (++SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit);
+
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
+ SE->useIntv(SegStart, SegStop);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Local so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+ if (IntvMap[i] == 1) {
+ setStage(*LREdit.get(i), RS_Local);
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ SA->analyze(&VirtReg);
+ return tryLocalSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ // Don't iterate global splitting.
+ // Move straight to spilling if this range was produced by a global split.
+ if (getStage(VirtReg) >= RS_Global)
+ return 0;
+
+ SA->analyze(&VirtReg);
+
+ // FIXME: SplitAnalysis may repair broken live ranges coming from the
+ // coalescer. That may cause the range to become allocatable which means that
+ // tryRegionSplit won't be making progress. This check should be replaced with
+ // an assertion when the coalescer is fixed.
+ if (SA->didRepairRange()) {
+ // VirtReg has changed, so all cached queries are invalid.
+ invalidateVirtRegs();
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+ }
+
+ // First try to split around a region spanning multiple blocks.
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Then isolate blocks with multiple uses.
+ SplitAnalysis::BlockPtrSet Blocks;
+ if (SA->getMultiUseBlocks(Blocks)) {
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit);
+ SE->splitSingleBlocks(Blocks);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ }
+
+ // Don't assign any physregs.
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ LiveRangeStage Stage = getStage(VirtReg);
+ DEBUG(dbgs() << StageName[Stage]
+ << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Second ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Second)
+ if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // The first time we see a live range, don't try to split or spill.
+ // Wait until the second time, when all smaller ranges have been allocated.
+ // This gives a better picture of the interference to split around.
+ if (Stage == RS_First) {
+ setStage(VirtReg, RS_Second);
+ DEBUG(dbgs() << "wait for second round\n");
+ NewVRegs.push_back(&VirtReg);
+ return 0;
+ }
+
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class wil report it.
+ if (Stage >= RS_Spill || !VirtReg.isSpillable())
+ return ~0u;
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill);
+
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+ SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+ ExtraRegInfo.clear();
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ NextCascade = 1;
+ IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+
+ allocatePhysRegs();
+ addMBBLiveIns(MF);
+ LIS->addKillFlags();
+
+ // Run rewriter
+ {
+ NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+ VRM->rewrite(Indexes);
+ }
+
+ // Write out new DBG_VALUE instructions.
+ DebugVars->emitDebugValues(VRM);
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 000000000000..0dd3c598c154
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,1544 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "RegisterClassInfo.h"
+#include "Spiller.h"
+#include "RegisterCoalescer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <queue>
+#include <memory>
+#include <cmath>
+
+using namespace llvm;
+
+STATISTIC(NumIters , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+STATISTIC(NumCoalesce, "Number of copies coalesced");
+STATISTIC(NumDowngrade, "Number of registers downgraded");
+
+static cl::opt<bool>
+NewHeuristic("new-spilling-heuristic",
+ cl::desc("Use new spilling heuristic"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+TrivCoalesceEnds("trivial-coalesce-ends",
+ cl::desc("Attempt trivial coalescing of interval ends"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+AvoidWAWHazard("avoid-waw-hazard",
+ cl::desc("Avoid write-write hazards for some register classes"),
+ cl::init(false), cl::Hidden);
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "linear scan register allocator",
+ createLinearScanRegisterAllocator);
+
+namespace {
+ // When we allocate a register, add it to a fixed-size queue of
+ // registers to skip in subsequent allocations. This trades a small
+ // amount of register pressure and increased spills for flexibility in
+ // the post-pass scheduler.
+ //
+ // Note that in a the number of registers used for reloading spills
+ // will be one greater than the value of this option.
+ //
+ // One big limitation of this is that it doesn't differentiate between
+ // different register classes. So on x86-64, if there is xmm register
+ // pressure, it can caused fewer GPRs to be held in the queue.
+ static cl::opt<unsigned>
+ NumRecentlyUsedRegs("linearscan-skip-count",
+ cl::desc("Number of registers for linearscan to remember"
+ "to skip."),
+ cl::init(0),
+ cl::Hidden);
+
+ struct RALinScan : public MachineFunctionPass {
+ static char ID;
+ RALinScan() : MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(
+ *PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+
+ // Initialize the queue to record recently-used registers.
+ if (NumRecentlyUsedRegs > 0)
+ RecentRegs.resize(NumRecentlyUsedRegs, 0);
+ RecentNext = RecentRegs.begin();
+ avoidWAW_ = 0;
+ }
+
+ typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+ typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
+ private:
+ /// RelatedRegClasses - This structure is built the first time a function is
+ /// compiled, and keeps track of which register classes have registers that
+ /// belong to multiple classes or have aliases that are in other classes.
+ EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+ DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+ // NextReloadMap - For each register in the map, it maps to the another
+ // register which is defined by a reload from the same stack slot and
+ // both reloads are in the same basic block.
+ DenseMap<unsigned, unsigned> NextReloadMap;
+
+ // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+ // un-favored for allocation.
+ SmallSet<unsigned, 8> DowngradedRegs;
+
+ // DowngradeMap - A map from virtual registers to physical registers being
+ // downgraded for the virtual registers.
+ DenseMap<unsigned, unsigned> DowngradeMap;
+
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ BitVector allocatableRegs_;
+ BitVector reservedRegs_;
+ LiveIntervals* li_;
+ MachineLoopInfo *loopInfo;
+ RegisterClassInfo RegClassInfo;
+
+ /// handled_ - Intervals are added to the handled_ set in the order of their
+ /// start value. This is uses for backtracking.
+ std::vector<LiveInterval*> handled_;
+
+ /// fixed_ - Intervals that correspond to machine registers.
+ ///
+ IntervalPtrs fixed_;
+
+ /// active_ - Intervals that are currently being processed, and which have a
+ /// live range active for the current point.
+ IntervalPtrs active_;
+
+ /// inactive_ - Intervals that are currently being processed, but which have
+ /// a hold at the current point.
+ IntervalPtrs inactive_;
+
+ typedef std::priority_queue<LiveInterval*,
+ SmallVector<LiveInterval*, 64>,
+ greater_ptr<LiveInterval> > IntervalHeap;
+ IntervalHeap unhandled_;
+
+ /// regUse_ - Tracks register usage.
+ SmallVector<unsigned, 32> regUse_;
+ SmallVector<unsigned, 32> regUseBackUp_;
+
+ /// vrm_ - Tracks register assignments.
+ VirtRegMap* vrm_;
+
+ std::auto_ptr<VirtRegRewriter> rewriter_;
+
+ std::auto_ptr<Spiller> spiller_;
+
+ // The queue of recently-used registers.
+ SmallVector<unsigned, 4> RecentRegs;
+ SmallVector<unsigned, 4>::iterator RecentNext;
+
+ // Last write-after-write register written.
+ unsigned avoidWAW_;
+
+ // Record that we just picked this register.
+ void recordRecentlyUsed(unsigned reg) {
+ assert(reg != 0 && "Recently used register is NOREG!");
+ if (!RecentRegs.empty()) {
+ *RecentNext++ = reg;
+ if (RecentNext == RecentRegs.end())
+ RecentNext = RecentRegs.begin();
+ }
+ }
+
+ public:
+ virtual const char* getPassName() const {
+ return "Linear Scan Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ // Make sure PassManager knows which analyses to make available
+ // to coalescing and which analyses coalescing invalidates.
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequiredID(LiveStacksID);
+ AU.addPreservedID(LiveStacksID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - register allocate the whole function
+ bool runOnMachineFunction(MachineFunction&);
+
+ // Determine if we skip this register due to its being recently used.
+ bool isRecentlyUsed(unsigned reg) const {
+ return reg == avoidWAW_ ||
+ std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
+ }
+
+ private:
+ /// linearScan - the linear scan algorithm
+ void linearScan();
+
+ /// initIntervalSets - initialize the interval sets.
+ ///
+ void initIntervalSets();
+
+ /// processActiveIntervals - expire old intervals and move non-overlapping
+ /// ones to the inactive list.
+ void processActiveIntervals(SlotIndex CurPoint);
+
+ /// processInactiveIntervals - expire old intervals and move overlapping
+ /// ones to the active list.
+ void processInactiveIntervals(SlotIndex CurPoint);
+
+ /// hasNextReloadInterval - Return the next liveinterval that's being
+ /// defined by a reload from the same SS as the specified one.
+ LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+ /// DowngradeRegister - Downgrade a register for allocation.
+ void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+ /// UpgradeRegister - Upgrade a register for allocation.
+ void UpgradeRegister(unsigned Reg);
+
+ /// assignRegOrStackSlotAtInterval - assign a register if one
+ /// is available, or spill.
+ void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+ void updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC);
+
+ /// findIntervalsToSpill - Determine the intervals to spill for the
+ /// specified interval. It's passed the physical registers whose spill
+ /// weight is the lowest among all the registers whose live intervals
+ /// conflict with the interval.
+ void findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals);
+
+ /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+ /// try to allocate the definition to the same register as the source,
+ /// if the register is not defined during the life time of the interval.
+ /// This eliminates a copy, and is used to coalesce copies which were not
+ /// coalesced away before allocation either due to dest and src being in
+ /// different register classes or because the coalescer was overly
+ /// conservative.
+ unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
+
+ ///
+ /// Register usage / availability tracking helpers.
+ ///
+
+ void initRegUses() {
+ regUse_.resize(tri_->getNumRegs(), 0);
+ regUseBackUp_.resize(tri_->getNumRegs(), 0);
+ }
+
+ void finalizeRegUses() {
+#ifndef NDEBUG
+ // Verify all the registers are "freed".
+ bool Error = false;
+ for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
+ if (regUse_[i] != 0) {
+ dbgs() << tri_->getName(i) << " is still in use!\n";
+ Error = true;
+ }
+ }
+ if (Error)
+ llvm_unreachable(0);
+#endif
+ regUse_.clear();
+ regUseBackUp_.clear();
+ }
+
+ void addRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ ++regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
+ ++regUse_[*as];
+ }
+
+ void delRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ assert(regUse_[physReg] != 0);
+ --regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
+ assert(regUse_[*as] != 0);
+ --regUse_[*as];
+ }
+ }
+
+ bool isRegAvail(unsigned physReg) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ return regUse_[physReg] == 0;
+ }
+
+ void backUpRegUses() {
+ regUseBackUp_ = regUse_;
+ }
+
+ void restoreRegUses() {
+ regUse_ = regUseBackUp_;
+ }
+
+ ///
+ /// Register handling helpers.
+ ///
+
+ /// getFreePhysReg - return a free physical register for this virtual
+ /// register interval if we have one, otherwise return 0.
+ unsigned getFreePhysReg(LiveInterval* cur);
+ unsigned getFreePhysReg(LiveInterval* cur,
+ const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs);
+
+ /// getFirstNonReservedPhysReg - return the first non-reserved physical
+ /// register in the register class.
+ unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
+ ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
+ assert(!O.empty() && "All registers reserved?!");
+ return O.front();
+ }
+
+ void ComputeRelatedRegClasses();
+
+ template <typename ItTy>
+ void printIntervals(const char* const str, ItTy i, ItTy e) const {
+ DEBUG({
+ if (str)
+ dbgs() << str << " intervals:\n";
+
+ for (; i != e; ++i) {
+ dbgs() << '\t' << *i->first << " -> ";
+
+ unsigned reg = i->first->reg;
+ if (TargetRegisterInfo::isVirtualRegister(reg))
+ reg = vrm_->getPhys(reg);
+
+ dbgs() << tri_->getName(reg) << '\n';
+ }
+ });
+ }
+ };
+ char RALinScan::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
+
+void RALinScan::ComputeRelatedRegClasses() {
+ // First pass, add all reg classes to the union, and determine at least one
+ // reg class that each register is in.
+ bool HasAliases = false;
+ for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+ E = tri_->regclass_end(); RCI != E; ++RCI) {
+ RelatedRegClasses.insert(*RCI);
+ for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+ I != E; ++I) {
+ HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
+
+ const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+ if (PRC) {
+ // Already processed this register. Just make sure we know that
+ // multiple register classes share a register.
+ RelatedRegClasses.unionSets(PRC, *RCI);
+ } else {
+ PRC = *RCI;
+ }
+ }
+ }
+
+ // Second pass, now that we know conservatively what register classes each reg
+ // belongs to, add info about aliases. We don't need to do this for targets
+ // without register aliases.
+ if (HasAliases)
+ for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
+ I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+ I != E; ++I)
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+ const TargetRegisterClass *AliasClass =
+ OneClassForEachPhysReg.lookup(*AS);
+ if (AliasClass)
+ RelatedRegClasses.unionSets(I->second, AliasClass);
+ }
+}
+
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
+/// allocate the definition the same register as the source register if the
+/// register is not defined during live time of the interval. If the interval is
+/// killed by a copy, try to use the destination register. This eliminates a
+/// copy. This is used to coalesce copies which were not coalesced away before
+/// allocation either due to dest and src being in different register classes or
+/// because the coalescer was overly conservative.
+unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
+ unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+ if ((Preference && Preference == Reg) || !cur.containsOneValue())
+ return Reg;
+
+ // We cannot handle complicated live ranges. Simple linear stuff only.
+ if (cur.ranges.size() != 1)
+ return Reg;
+
+ const LiveRange &range = cur.ranges.front();
+
+ VNInfo *vni = range.valno;
+ if (vni->isUnused() || !vni->def.isValid())
+ return Reg;
+
+ unsigned CandReg;
+ {
+ MachineInstr *CopyMI;
+ if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+ // Defined by a copy, try to extend SrcReg forward
+ CandReg = CopyMI->getOperand(1).getReg();
+ else if (TrivCoalesceEnds &&
+ (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+ CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
+ // Only used by a copy, try to extend DstReg backwards
+ CandReg = CopyMI->getOperand(0).getReg();
+ else
+ return Reg;
+
+ // If the target of the copy is a sub-register then don't coalesce.
+ if(CopyMI->getOperand(0).getSubReg())
+ return Reg;
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
+ if (!vrm_->isAssignedReg(CandReg))
+ return Reg;
+ CandReg = vrm_->getPhys(CandReg);
+ }
+ if (Reg == CandReg)
+ return Reg;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
+ if (!RC->contains(CandReg))
+ return Reg;
+
+ if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
+ return Reg;
+
+ // Try to coalesce.
+ DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+ << '\n');
+ vrm_->clearVirt(cur.reg);
+ vrm_->assignVirt2Phys(cur.reg, CandReg);
+
+ ++NumCoalesce;
+ return CandReg;
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ reservedRegs_ = tri_->getReservedRegs(fn);
+ li_ = &getAnalysis<LiveIntervals>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // We don't run the coalescer here because we have no reason to
+ // interact with it. If the coalescer requires interaction, it
+ // won't do anything. If it doesn't require interaction, we assume
+ // it was run as a separate pass.
+
+ // If this is the first function compiled, compute the related reg classes.
+ if (RelatedRegClasses.empty())
+ ComputeRelatedRegClasses();
+
+ // Also resize register usage trackers.
+ initRegUses();
+
+ vrm_ = &getAnalysis<VirtRegMap>();
+ if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
+
+ spiller_.reset(createSpiller(*this, *mf_, *vrm_));
+
+ initIntervalSets();
+
+ linearScan();
+
+ // Rewrite spill code and update the PhysRegsUsed set.
+ rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
+ assert(unhandled_.empty() && "Unhandled live intervals remain!");
+
+ finalizeRegUses();
+
+ fixed_.clear();
+ active_.clear();
+ inactive_.clear();
+ handled_.clear();
+ NextReloadMap.clear();
+ DowngradedRegs.clear();
+ DowngradeMap.clear();
+ spiller_.reset(0);
+
+ return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+ assert(unhandled_.empty() && fixed_.empty() &&
+ active_.empty() && inactive_.empty() &&
+ "interval sets should be empty on initialization");
+
+ handled_.reserve(li_->getNumIntervals());
+
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
+ if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
+ mri_->setPhysRegUsed(i->second->reg);
+ fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+ }
+ } else {
+ if (i->second->empty()) {
+ assignRegOrStackSlotAtInterval(i->second);
+ }
+ else
+ unhandled_.push(i->second);
+ }
+ }
+}
+
+void RALinScan::linearScan() {
+ // linear scan algorithm
+ DEBUG({
+ dbgs() << "********** LINEAR SCAN **********\n"
+ << "********** Function: "
+ << mf_->getFunction()->getName() << '\n';
+ printIntervals("fixed", fixed_.begin(), fixed_.end());
+ });
+
+ while (!unhandled_.empty()) {
+ // pick the interval with the earliest start point
+ LiveInterval* cur = unhandled_.top();
+ unhandled_.pop();
+ ++NumIters;
+ DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
+
+ assert(!cur->empty() && "Empty interval in unhandled set.");
+
+ processActiveIntervals(cur->beginIndex());
+ processInactiveIntervals(cur->beginIndex());
+
+ assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+ "Can only allocate virtual registers!");
+
+ // Allocating a virtual register. try to find a free
+ // physical register or spill an interval (possibly this one) in order to
+ // assign it one.
+ assignRegOrStackSlotAtInterval(cur);
+
+ DEBUG({
+ printIntervals("active", active_.begin(), active_.end());
+ printIntervals("inactive", inactive_.begin(), inactive_.end());
+ });
+ }
+
+ // Expire any remaining active intervals
+ while (!active_.empty()) {
+ IntervalPtr &IP = active_.back();
+ unsigned reg = IP.first->reg;
+ DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ active_.pop_back();
+ }
+
+ // Expire any remaining inactive intervals
+ DEBUG({
+ for (IntervalPtrs::reverse_iterator
+ i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+ dbgs() << "\tinterval " << *i->first << " expired\n";
+ });
+ inactive_.clear();
+
+ // Add live-ins to every BB except for entry. Also perform trivial coalescing.
+ MachineFunction::iterator EntryMBB = mf_->begin();
+ SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ LiveInterval &cur = *i->second;
+ unsigned Reg = 0;
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
+ if (isPhys)
+ Reg = cur.reg;
+ else if (vrm_->isAssignedReg(cur.reg))
+ Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
+ if (!Reg)
+ continue;
+ // Ignore splited live intervals.
+ if (!isPhys && vrm_->getPreSplitReg(cur.reg))
+ continue;
+
+ for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
+ I != E; ++I) {
+ const LiveRange &LR = *I;
+ if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
+ for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
+ if (LiveInMBBs[i] != EntryMBB) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Adding a virtual register to livein set?");
+ LiveInMBBs[i]->addLiveIn(Reg);
+ }
+ LiveInMBBs.clear();
+ }
+ }
+ }
+
+ DEBUG(dbgs() << *vrm_);
+
+ // Look for physical registers that end up not being allocated even though
+ // register allocator had to spill other registers in its register class.
+ if (!vrm_->FindUnusedRegisters(li_))
+ return;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(SlotIndex CurPoint)
+{
+ DEBUG(dbgs() << "\tprocessing active intervals:\n");
+
+ for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+ LiveInterval *Interval = active_[i].first;
+ LiveInterval::iterator IntervalPos = active_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // Remove expired intervals.
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+
+ } else if (IntervalPos->start > CurPoint) {
+ // Move inactive intervals to inactive list.
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ // add to inactive.
+ inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ active_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
+{
+ DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
+
+ for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+ LiveInterval *Interval = inactive_[i].first;
+ LiveInterval::iterator IntervalPos = inactive_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // remove expired intervals.
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else if (IntervalPos->start <= CurPoint) {
+ // move re-activated intervals in active list
+ DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ addRegUse(reg);
+ // add to active
+ active_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ inactive_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+void RALinScan::updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC) {
+ SmallSet<unsigned, 4> Processed;
+ SmallSet<unsigned, 4> SuperAdded;
+ SmallVector<unsigned, 4> Supers;
+ Weights[reg] += weight;
+ Processed.insert(reg);
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
+ Weights[*as] += weight;
+ Processed.insert(*as);
+ if (tri_->isSubRegister(*as, reg) &&
+ SuperAdded.insert(*as) &&
+ RC->contains(*as)) {
+ Supers.push_back(*as);
+ }
+ }
+
+ // If the alias is a super-register, and the super-register is in the
+ // register class we are trying to allocate. Then add the weight to all
+ // sub-registers of the super-register even if they are not aliases.
+ // e.g. allocating for GR32, bh is not used, updating bl spill weight.
+ // bl should get the same spill weight otherwise it will be chosen
+ // as a spill candidate since spilling bh doesn't make ebx available.
+ for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
+ for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
+ if (!Processed.count(*sr))
+ Weights[*sr] += weight;
+ }
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+ for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+ I != E; ++I)
+ if (I->first == LI) return I;
+ return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
+ SlotIndex Point){
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ RALinScan::IntervalPtr &IP = V[i];
+ LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+ IP.second, Point);
+ if (I != IP.first->begin()) --I;
+ IP.second = I;
+ }
+}
+
+/// getConflictWeight - Return the number of conflicts between cur
+/// live interval and defs and uses of Reg weighted by loop depthes.
+static
+float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
+ MachineRegisterInfo *mri_,
+ MachineLoopInfo *loopInfo) {
+ float Conflicts = 0;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (cur->liveAt(li_->getInstructionIndex(MI))) {
+ unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
+ Conflicts += std::pow(10.0f, (float)loopDepth);
+ }
+ }
+ return Conflicts;
+}
+
+/// findIntervalsToSpill - Determine the intervals to spill for the
+/// specified interval. It's passed the physical registers whose spill
+/// weight is the lowest among all the registers whose live intervals
+/// conflict with the interval.
+void RALinScan::findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals) {
+ // We have figured out the *best* register to spill. But there are other
+ // registers that are pretty good as well (spill weight within 3%). Spill
+ // the one that has fewest defs and uses that conflict with cur.
+ float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
+ SmallVector<LiveInterval*, 8> SLIs[3];
+
+ DEBUG({
+ dbgs() << "\tConsidering " << NumCands << " candidates: ";
+ for (unsigned i = 0; i != NumCands; ++i)
+ dbgs() << tri_->getName(Candidates[i].first) << " ";
+ dbgs() << "\n";
+ });
+
+ // Calculate the number of conflicts of each candidate.
+ for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second-1))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ // Which is the best candidate?
+ unsigned BestCandidate = 0;
+ float MinConflicts = Conflicts[0];
+ for (unsigned i = 1; i != NumCands; ++i) {
+ if (Conflicts[i] < MinConflicts) {
+ BestCandidate = i;
+ MinConflicts = Conflicts[i];
+ }
+ }
+
+ std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
+ std::back_inserter(SpillIntervals));
+}
+
+namespace {
+ struct WeightCompare {
+ private:
+ const RALinScan &Allocator;
+
+ public:
+ WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
+
+ typedef std::pair<unsigned, float> RegWeightPair;
+ bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
+ return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
+ }
+ };
+}
+
+static bool weightsAreClose(float w1, float w2) {
+ if (!NewHeuristic)
+ return false;
+
+ float diff = w1 - w2;
+ if (diff <= 0.02f) // Within 0.02f
+ return true;
+ return (diff / w2) <= 0.05f; // Within 5%.
+}
+
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+ DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+ if (I == NextReloadMap.end())
+ return 0;
+ return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+ for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+ bool isNew = DowngradedRegs.insert(*AS);
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, *AS));
+ }
+ ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+ if (Reg) {
+ DowngradedRegs.erase(Reg);
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+ DowngradedRegs.erase(*AS);
+ }
+}
+
+namespace {
+ struct LISorter {
+ bool operator()(LiveInterval* A, LiveInterval* B) {
+ return A->beginIndex() < B->beginIndex();
+ }
+ };
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ DEBUG(dbgs() << "\tallocating current interval from "
+ << RC->getName() << ": ");
+
+ // This is an implicitly defined live interval, just assign any register.
+ if (cur->empty()) {
+ unsigned physReg = vrm_->getRegAllocPref(cur->reg);
+ if (!physReg)
+ physReg = getFirstNonReservedPhysReg(RC);
+ DEBUG(dbgs() << tri_->getName(physReg) << '\n');
+ // Note the register is not really in use.
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ return;
+ }
+
+ backUpRegUses();
+
+ std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+ SlotIndex StartPosition = cur->beginIndex();
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ // If start of this live interval is defined by a move instruction and its
+ // source is assigned a physical register that is compatible with the target
+ // register class, then we should try to assign it the same register.
+ // This can happen when the move is from a larger register class to a smaller
+ // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
+ if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
+ VNInfo *vni = cur->begin()->valno;
+ if (!vni->isUnused() && vni->def.isValid()) {
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ if (CopyMI && CopyMI->isCopy()) {
+ unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
+ unsigned SrcReg = CopyMI->getOperand(1).getReg();
+ unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
+ unsigned Reg = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ Reg = SrcReg;
+ else if (vrm_->isAssignedReg(SrcReg))
+ Reg = vrm_->getPhys(SrcReg);
+ if (Reg) {
+ if (SrcSubReg)
+ Reg = tri_->getSubReg(Reg, SrcSubReg);
+ if (DstSubReg)
+ Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+ if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+ mri_->setRegAllocationHint(cur->reg, 0, Reg);
+ }
+ }
+ }
+ }
+
+ // For every interval in inactive we overlap with, mark the
+ // register as not free and update spill weights.
+ for (IntervalPtrs::const_iterator i = inactive_.begin(),
+ e = inactive_.end(); i != e; ++i) {
+ unsigned Reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only allocate virtual registers!");
+ const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ cur->overlapsFrom(*i->first, i->second-1)) {
+ Reg = vrm_->getPhys(Reg);
+ addRegUse(Reg);
+ SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+ }
+ }
+
+ // Speculatively check to see if we can get a register right now. If not,
+ // we know we won't be able to by adding more constraints. If so, we can
+ // check to see if it is valid. Doing an exhaustive search of the fixed_ list
+ // is very bad (it contains all callee clobbered registers for any functions
+ // with a call), so we want to avoid doing that if possible.
+ unsigned physReg = getFreePhysReg(cur);
+ unsigned BestPhysReg = physReg;
+ if (physReg) {
+ // We got a register. However, if it's in the fixed_ list, we might
+ // conflict with it. Check to see if we conflict with it or any of its
+ // aliases.
+ SmallSet<unsigned, 8> RegAliases;
+ for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
+ RegAliases.insert(*AS);
+
+ bool ConflictsWithFixed = false;
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+ // Okay, this reg is on the fixed list. Check to see if we actually
+ // conflict.
+ LiveInterval *I = IP.first;
+ if (I->endIndex() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ ConflictsWithFixed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Okay, the register picked by our speculative getFreePhysReg call turned
+ // out to be in use. Actually add all of the conflicting fixed registers to
+ // regUse_ so we can do an accurate query.
+ if (ConflictsWithFixed) {
+ // For every interval in fixed we overlap with, mark the register as not
+ // free and update spill weights.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+
+ const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ I->endIndex() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ unsigned reg = I->reg;
+ addRegUse(reg);
+ SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+ }
+ }
+ }
+
+ // Using the newly updated regUse_ object, which includes conflicts in the
+ // future, see if there are any registers available.
+ physReg = getFreePhysReg(cur);
+ }
+ }
+
+ // Restore the physical register tracker, removing information about the
+ // future.
+ restoreRegUses();
+
+ // If we find a free register, we are done: assign this virtual to
+ // the free physical register and add this interval to the active
+ // list.
+ if (physReg) {
+ DEBUG(dbgs() << tri_->getName(physReg) << '\n');
+ assert(RC->contains(physReg) && "Invalid candidate");
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ addRegUse(physReg);
+ active_.push_back(std::make_pair(cur, cur->begin()));
+ handled_.push_back(cur);
+
+ // Remember physReg for avoiding a write-after-write hazard in the next
+ // instruction.
+ if (AvoidWAWHazard &&
+ tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
+ avoidWAW_ = physReg;
+
+ // "Upgrade" the physical register since it has been allocated.
+ UpgradeRegister(physReg);
+ if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+ // "Downgrade" physReg to try to keep physReg from being allocated until
+ // the next reload from the same SS is allocated.
+ mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
+ DowngradeRegister(cur, physReg);
+ }
+ return;
+ }
+ DEBUG(dbgs() << "no free registers\n");
+
+ // Compile the spill weights into an array that is better for scanning.
+ std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
+ for (std::vector<std::pair<unsigned, float> >::iterator
+ I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+ updateSpillWeights(SpillWeights, I->first, I->second, RC);
+
+ // for each interval in active, update spill weights.
+ for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
+ }
+
+ DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
+
+ // Find a register to spill.
+ float minWeight = HUGE_VALF;
+ unsigned minReg = 0;
+
+ bool Found = false;
+ std::vector<std::pair<unsigned,float> > RegsWeights;
+ ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+ if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned reg = Order[i];
+ float regWeight = SpillWeights[reg];
+ // Skip recently allocated registers and reserved registers.
+ if (minWeight > regWeight && !isRecentlyUsed(reg))
+ Found = true;
+ RegsWeights.push_back(std::make_pair(reg, regWeight));
+ }
+
+ // If we didn't find a register that is spillable, try aliases?
+ if (!Found) {
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned reg = Order[i];
+ // No need to worry about if the alias register size < regsize of RC.
+ // We are going to spill all registers that alias it anyway.
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
+ RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
+ }
+ }
+
+ // Sort all potential spill candidates by weight.
+ std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
+ minReg = RegsWeights[0].first;
+ minWeight = RegsWeights[0].second;
+ if (minWeight == HUGE_VALF) {
+ // All registers must have inf weight. Just grab one!
+ minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
+ if (cur->weight == HUGE_VALF ||
+ li_->getApproximateInstructionCount(*cur) == 0) {
+ // Spill a physical register around defs and uses.
+ if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+ // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
+ // in fixed_. Reset them.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+ if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
+ IP.second = I->advanceTo(I->begin(), StartPosition);
+ }
+
+ DowngradedRegs.clear();
+ assignRegOrStackSlotAtInterval(cur);
+ } else {
+ assert(false && "Ran out of registers during register allocation!");
+ report_fatal_error("Ran out of registers during register allocation!");
+ }
+ return;
+ }
+ }
+
+ // Find up to 3 registers to consider as spill candidates.
+ unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
+ while (LastCandidate > 1) {
+ if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
+ break;
+ --LastCandidate;
+ }
+
+ DEBUG({
+ dbgs() << "\t\tregister(s) with min weight(s): ";
+
+ for (unsigned i = 0; i != LastCandidate; ++i)
+ dbgs() << tri_->getName(RegsWeights[i].first)
+ << " (" << RegsWeights[i].second << ")\n";
+ });
+
+ // If the current has the minimum weight, we need to spill it and
+ // add any added intervals back to unhandled, and restart
+ // linearscan.
+ if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+ DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
+ SmallVector<LiveInterval*, 8> added;
+ LiveRangeEdit LRE(*cur, added);
+ spiller_->spill(LRE);
+
+ std::sort(added.begin(), added.end(), LISorter());
+ if (added.empty())
+ return; // Early exit if all spills were folded.
+
+ // Merge added with unhandled. Note that we have already sorted
+ // intervals returned by addIntervalsForSpills by their starting
+ // point.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ SlotIndex ReloadIdx = ReloadLi->beginIndex();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginIndex() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+ return;
+ }
+
+ ++NumBacktracks;
+
+ // Push the current interval back to unhandled since we are going
+ // to re-run at least this iteration. Since we didn't modify it it
+ // should go back right in the front of the list
+ unhandled_.push(cur);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
+ "did not choose a register to spill?");
+
+ // We spill all intervals aliasing the register with
+ // minimum weight, rollback to the interval with the earliest
+ // start point and let the linear scan algorithm run again
+ SmallVector<LiveInterval*, 8> spillIs;
+
+ // Determine which intervals have to be spilled.
+ findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
+
+ // Set of spilled vregs (used later to rollback properly)
+ SmallSet<unsigned, 8> spilled;
+
+ // The earliest start of a Spilled interval indicates up to where
+ // in handled we need to roll back
+ assert(!spillIs.empty() && "No spill intervals?");
+ SlotIndex earliestStart = spillIs[0]->beginIndex();
+
+ // Spill live intervals of virtual regs mapped to the physical register we
+ // want to clear (and its aliases). We only spill those that overlap with the
+ // current interval as the rest do not affect its allocation. we also keep
+ // track of the earliest start of all spilled live intervals since this will
+ // mark our rollback point.
+ SmallVector<LiveInterval*, 8> added;
+ while (!spillIs.empty()) {
+ LiveInterval *sli = spillIs.back();
+ spillIs.pop_back();
+ DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
+ if (sli->beginIndex() < earliestStart)
+ earliestStart = sli->beginIndex();
+ LiveRangeEdit LRE(*sli, added, 0, &spillIs);
+ spiller_->spill(LRE);
+ spilled.insert(sli->reg);
+ }
+
+ // Include any added intervals in earliestStart.
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ SlotIndex SI = added[i]->beginIndex();
+ if (SI < earliestStart)
+ earliestStart = SI;
+ }
+
+ DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
+
+ // Scan handled in reverse order up to the earliest start of a
+ // spilled live interval and undo each one, restoring the state of
+ // unhandled.
+ while (!handled_.empty()) {
+ LiveInterval* i = handled_.back();
+ // If this interval starts before t we are done.
+ if (!i->empty() && i->beginIndex() < earliestStart)
+ break;
+ DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
+ handled_.pop_back();
+
+ // When undoing a live interval allocation we must know if it is active or
+ // inactive to properly update regUse_ and the VirtRegMap.
+ IntervalPtrs::iterator it;
+ if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+ active_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ delRegUse(vrm_->getPhys(i->reg));
+ vrm_->clearVirt(i->reg);
+ } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+ inactive_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ vrm_->clearVirt(i->reg);
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
+ "Can only allocate virtual registers!");
+ vrm_->clearVirt(i->reg);
+ unhandled_.push(i);
+ }
+
+ DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+ if (ii == DowngradeMap.end())
+ // It interval has a preference, it must be defined by a copy. Clear the
+ // preference now since the source interval allocation may have been
+ // undone as well.
+ mri_->setRegAllocationHint(i->reg, 0, 0);
+ else {
+ UpgradeRegister(ii->second);
+ }
+ }
+
+ // Rewind the iterators in the active, inactive, and fixed lists back to the
+ // point we reverted to.
+ RevertVectorIteratorsTo(active_, earliestStart);
+ RevertVectorIteratorsTo(inactive_, earliestStart);
+ RevertVectorIteratorsTo(fixed_, earliestStart);
+
+ // Scan the rest and undo each interval that expired after t and
+ // insert it in active (the next iteration of the algorithm will
+ // put it in inactive if required)
+ for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+ LiveInterval *HI = handled_[i];
+ if (!HI->expiredAt(earliestStart) &&
+ HI->expiredAt(cur->beginIndex())) {
+ DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
+ active_.push_back(std::make_pair(HI, HI->begin()));
+ assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
+ addRegUse(vrm_->getPhys(HI->reg));
+ }
+ }
+
+ // Merge added with unhandled.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ std::sort(added.begin(), added.end(), LISorter());
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ SlotIndex ReloadIdx = ReloadLi->beginIndex();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginIndex() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+}
+
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+ const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs) {
+ unsigned FreeReg = 0;
+ unsigned FreeRegInactiveCount = 0;
+
+ std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+ // Resolve second part of the hint (if possible) given the current allocation.
+ unsigned physReg = Hint.second;
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+ physReg = vrm_->getPhys(physReg);
+
+ ArrayRef<unsigned> Order;
+ if (Hint.first)
+ Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
+ else
+ Order = RegClassInfo.getOrder(RC);
+
+ assert(!Order.empty() && "No allocatable register in this register class!");
+
+ // Scan for the first available register.
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned Reg = Order[i];
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
+ // Skip recently allocated registers.
+ if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
+ FreeReg = Reg;
+ if (FreeReg < inactiveCounts.size())
+ FreeRegInactiveCount = inactiveCounts[FreeReg];
+ else
+ FreeRegInactiveCount = 0;
+ break;
+ }
+ }
+
+ // If there are no free regs, or if this reg has the max inactive count,
+ // return this register.
+ if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
+ // Remember what register we picked so we can skip it next time.
+ if (FreeReg != 0) recordRecentlyUsed(FreeReg);
+ return FreeReg;
+ }
+
+ // Continue scanning the registers, looking for the one with the highest
+ // inactive count. Alkis found that this reduced register pressure very
+ // slightly on X86 (in rev 1.94 of this file), though this should probably be
+ // reevaluated now.
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned Reg = Order[i];
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
+ if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+ FreeRegInactiveCount < inactiveCounts[Reg] &&
+ (!SkipDGRegs || !isRecentlyUsed(Reg))) {
+ FreeReg = Reg;
+ FreeRegInactiveCount = inactiveCounts[Reg];
+ if (FreeRegInactiveCount == MaxInactiveCount)
+ break; // We found the one with the max inactive count.
+ }
+ }
+
+ // Remember what register we picked so we can skip it next time.
+ recordRecentlyUsed(FreeReg);
+
+ return FreeReg;
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+ SmallVector<unsigned, 256> inactiveCounts;
+ unsigned MaxInactiveCount = 0;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+ reg = vrm_->getPhys(reg);
+ if (inactiveCounts.size() <= reg)
+ inactiveCounts.resize(reg+1);
+ ++inactiveCounts[reg];
+ MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+ }
+ }
+
+ // If copy coalescer has assigned a "preferred" register, check if it's
+ // available first.
+ unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+ if (Preference) {
+ DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
+ if (isRegAvail(Preference) &&
+ RC->contains(Preference))
+ return Preference;
+ }
+
+ unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
+ return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+ return new RALinScan();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 000000000000..72230d4b0c5c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,723 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "RenderMachineFunction.h"
+#include "Splitter.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "RegisterCoalescer.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+pbqpPreSplitting("pbqp-pre-splitting",
+ cl::desc("Pre-split before PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+ : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
+
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+ typedef std::set<unsigned> RegSet;
+
+
+ std::auto_ptr<PBQPBuilder> builder;
+
+ char *customPassID;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+ RenderMachineFunction *rmf;
+
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ RegSet vregsToAlloc, emptyIntervalVRegs;
+
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ /// \brief Adds a stack interval if the given live interval has been
+ /// spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution);
+
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
+
+};
+
+char RegAllocPBQP::ID = 0;
+
+} // End anonymous namespace.
+
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+ Node2VReg::const_iterator vregItr = node2VReg.find(node);
+ assert(vregItr != node2VReg.end() && "No vreg for node.");
+ return vregItr->second;
+}
+
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+ VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+ assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+ return nodeItr->second;
+
+}
+
+const PBQPRAProblem::AllowedSet&
+ PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+ AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+ assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+ const AllowedSet &allowedSet = allowedSetItr->second;
+ return allowedSet;
+}
+
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+ assert(isPRegOption(vreg, option) && "Not a preg option.");
+
+ const AllowedSet& allowedSet = getAllowedSet(vreg);
+ assert(option <= allowedSet.size() && "Option outside allowed set.");
+ return allowedSet[option - 1];
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ typedef std::vector<const LiveInterval*> LIVector;
+
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+
+ std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ PBQP::Graph &g = p->getGraph();
+ RegSet pregs;
+
+ // Collect the set of preg intervals, record that they're used in the MF.
+ for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ pregs.insert(itr->first);
+ mri->setPhysRegUsed(itr->first);
+ }
+ }
+
+ BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+ // Iterate over vregs.
+ for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+ vregItr != vregEnd; ++vregItr) {
+ unsigned vreg = *vregItr;
+ const TargetRegisterClass *trc = mri->getRegClass(vreg);
+ const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+ // Compute an initial allowed set for the current vreg.
+ typedef std::vector<unsigned> VRAllowed;
+ VRAllowed vrAllowed;
+ ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+ for (unsigned i = 0; i != rawOrder.size(); ++i) {
+ unsigned preg = rawOrder[i];
+ if (!reservedRegs.test(preg)) {
+ vrAllowed.push_back(preg);
+ }
+ }
+
+ // Remove any physical registers which overlap.
+ for (RegSet::const_iterator pregItr = pregs.begin(),
+ pregEnd = pregs.end();
+ pregItr != pregEnd; ++pregItr) {
+ unsigned preg = *pregItr;
+ const LiveInterval *pregLI = &lis->getInterval(preg);
+
+ if (pregLI->empty()) {
+ continue;
+ }
+
+ if (!vregLI->overlaps(*pregLI)) {
+ continue;
+ }
+
+ // Remove the register from the allowed set.
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), preg);
+
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
+ }
+
+ // Also remove any aliases.
+ const unsigned *aliasItr = tri->getAliasSet(preg);
+ if (aliasItr != 0) {
+ for (; *aliasItr != 0; ++aliasItr) {
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
+
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
+ }
+ }
+ }
+ }
+
+ // Construct the node.
+ PBQP::Graph::NodeItr node =
+ g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
+
+ // Record the mapping and allowed set in the problem.
+ p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+
+ PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+ vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+ addSpillCosts(g.getNodeCosts(node), spillCost);
+ }
+
+ for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+ vr1Itr != vrEnd; ++vr1Itr) {
+ unsigned vr1 = *vr1Itr;
+ const LiveInterval &l1 = lis->getInterval(vr1);
+ const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+ for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+ vr2Itr != vrEnd; ++vr2Itr) {
+ unsigned vr2 = *vr2Itr;
+ const LiveInterval &l2 = lis->getInterval(vr2);
+ const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+ assert(!l2.empty() && "Empty interval in vreg set?");
+ if (l1.overlaps(l2)) {
+ PBQP::Graph::EdgeItr edge =
+ g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+ PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+ addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+ PBQP::PBQPNum spillCost) {
+ costVec[0] = spillCost;
+}
+
+void PBQPBuilder::addInterferenceCosts(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ const TargetRegisterInfo *tri) {
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (tri->regsOverlap(preg1, preg2)) {
+ costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ }
+ }
+ }
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+ MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ PBQP::Graph &g = p->getGraph();
+
+ const TargetMachine &tm = mf->getTarget();
+ CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (MachineFunction::const_iterator mbbItr = mf->begin(),
+ mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *mbb = &*mbbItr;
+
+ for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+ miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *mi = &*miItr;
+
+ if (!cp.setRegisters(mi)) {
+ continue; // Not coalescable.
+ }
+
+ if (cp.getSrcReg() == cp.getDstReg()) {
+ continue; // Already coalesced.
+ }
+
+ unsigned dst = cp.getDstReg(),
+ src = cp.getSrcReg();
+
+ const float copyFactor = 0.5; // Cost of copy relative to load. Current
+ // value plucked randomly out of the air.
+
+ PBQP::PBQPNum cBenefit =
+ copyFactor * LiveIntervals::getSpillWeight(false, true,
+ loopInfo->getLoopDepth(mbb));
+
+ if (cp.isPhys()) {
+ if (!lis->isAllocatable(dst)) {
+ continue;
+ }
+
+ const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+ unsigned pregOpt = 0;
+ while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+ ++pregOpt;
+ }
+ if (pregOpt < allowed.size()) {
+ ++pregOpt; // +1 to account for spill option.
+ PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+ }
+ } else {
+ const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+ const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+ PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+ if (edge == g.edgesEnd()) {
+ edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+ allowed2->size() + 1,
+ 0));
+ } else {
+ if (g.getEdgeNode1(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(allowed1, allowed2);
+ }
+ }
+
+ addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+ cBenefit);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+ unsigned pregOption,
+ PBQP::PBQPNum benefit) {
+ costVec[pregOption] += -benefit;
+}
+
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ PBQP::PBQPNum benefit) {
+
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (preg1 == preg2) {
+ costMat[i + 1][j + 1] += -benefit;
+ }
+ }
+ }
+}
+
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ au.addRequired<RegisterCoalescer>();
+ if (customPassID)
+ au.addRequiredID(*customPassID);
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ if (pbqpPreSplitting)
+ au.addRequired<LoopSplitter>();
+ au.addRequired<VirtRegMap>();
+ au.addRequired<RenderMachineFunction>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
+
+ LiveInterval *li = itr->second;
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregsToAlloc.insert(li->reg);
+ } else {
+ emptyIntervalVRegs.insert(li->reg);
+ }
+ }
+}
+
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
+ MachineRegisterInfo* mri) {
+ int stackSlot = vrm->getStackSlot(spilled->reg);
+
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
+ return;
+ }
+
+ const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+ LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+ VNInfo *vni;
+ if (stackInterval.getNumValNums() != 0) {
+ vni = stackInterval.getValNumInfo(0);
+ } else {
+ vni = stackInterval.getNextValue(
+ SlotIndex(), 0, lss->getVNInfoAllocator());
+ }
+
+ LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+ stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution) {
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ const PBQP::Graph &g = problem.getGraph();
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ node != nodeEnd; ++node) {
+ unsigned vreg = problem.getVRegForNode(node);
+ unsigned alloc = solution.getSelection(node);
+
+ if (problem.isPRegOption(vreg, alloc)) {
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
+ DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+ assert(preg != 0 && "Invalid preg selected.");
+ vrm->assignVirt2Phys(vreg, preg);
+ } else if (problem.isSpillOption(vreg, alloc)) {
+ vregsToAlloc.erase(vreg);
+ const LiveInterval* spillInterval = &lis->getInterval(vreg);
+ double oldWeight = spillInterval->weight;
+ rmf->rememberUseDefs(spillInterval);
+ std::vector<LiveInterval*> newSpills =
+ lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
+ addStackInterval(spillInterval, mri);
+ rmf->rememberSpills(spillInterval, newSpills);
+
+ (void) oldWeight;
+ DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+ << oldWeight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (std::vector<LiveInterval*>::const_iterator
+ itr = newSpills.begin(), end = newSpills.end();
+ itr != end; ++itr) {
+ assert(!(*itr)->empty() && "Empty spill range.");
+ DEBUG(dbgs() << (*itr)->reg << " ");
+ vregsToAlloc.insert((*itr)->reg);
+ }
+
+ DEBUG(dbgs() << ")\n");
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !newSpills.empty();
+ } else {
+ assert(false && "Unknown allocation option.");
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+
+void RegAllocPBQP::finalizeAlloc() const {
+ typedef LiveIntervals::iterator LIIterator;
+ typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+ // First allocate registers for the empty intervals.
+ for (RegSet::const_iterator
+ itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
+ itr != end; ++itr) {
+ LiveInterval *li = &lis->getInterval(*itr);
+
+ unsigned physReg = vrm->getRegAllocPref(li->reg);
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = liRC->getRawAllocationOrder(*mf).front();
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+
+ // Finally iterate over the basic blocks to compute and set the live-in sets.
+ SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+ MachineBasicBlock *entryMBB = &*mf->begin();
+
+ for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = 0;
+
+ // Get the physical register for this interval
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ reg = li->reg;
+ } else if (vrm->isAssignedReg(li->reg)) {
+ reg = vrm->getPhys(li->reg);
+ } else {
+ // Ranges which are assigned a stack slot only are ignored.
+ continue;
+ }
+
+ if (reg == 0) {
+ // Filter out zero regs - they're for intervals that were spilled.
+ continue;
+ }
+
+ // Iterate over the ranges of the current interval...
+ for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+
+ // Find the set of basic blocks which this range is live into...
+ if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
+ // And add the physreg for this interval to their live-in sets.
+ for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
+ if (liveInMBBs[i] != entryMBB) {
+ if (!liveInMBBs[i]->isLiveIn(reg)) {
+ liveInMBBs[i]->addLiveIn(reg);
+ }
+ }
+ }
+ liveInMBBs.clear();
+ }
+ }
+ }
+
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+ rmf = &getAnalysis<RenderMachineFunction>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+
+
+ DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+
+ std::auto_ptr<PBQPRAProblem> problem =
+ builder->build(mf, lis, loopInfo, vregsToAlloc);
+ PBQP::Solution solution =
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+ problem->getGraph());
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+
+ rmf->renderMachineFunction("After PBQP register allocation.", vrm);
+
+ vregsToAlloc.clear();
+ emptyIntervalVRegs.clear();
+
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+ // Run rewriter
+ std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+ rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder> builder,
+ char *customPassID) {
+ return new RegAllocPBQP(builder, customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ if (pbqpCoalescing) {
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+ } // else
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
+
+#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 000000000000..5a77e47bc591
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,112 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
+{}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+ bool Update = false;
+ MF = &mf;
+
+ // Allocate new array the first time we see a new target.
+ if (MF->getTarget().getRegisterInfo() != TRI) {
+ TRI = MF->getTarget().getRegisterInfo();
+ RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+ Update = true;
+ }
+
+ // Does this MF have different CSRs?
+ const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+ if (Update || CSR != CalleeSaved) {
+ // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+ // overlapping CSR.
+ CSRNum.clear();
+ CSRNum.resize(TRI->getNumRegs(), 0);
+ for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+ for (const unsigned *AS = TRI->getOverlaps(Reg);
+ unsigned Alias = *AS; ++AS)
+ CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ Update = true;
+ }
+ CalleeSaved = CSR;
+
+ // Different reserved registers?
+ BitVector RR = TRI->getReservedRegs(*MF);
+ if (RR != Reserved)
+ Update = true;
+ Reserved = RR;
+
+ // Invalidate cached information from previous function.
+ if (Update)
+ ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ RCInfo &RCI = RegClass[RC->getID()];
+
+ // Raw register count, including all reserved regs.
+ unsigned NumRegs = RC->getNumRegs();
+
+ if (!RCI.Order)
+ RCI.Order.reset(new unsigned[NumRegs]);
+
+ unsigned N = 0;
+ SmallVector<unsigned, 16> CSRAlias;
+
+ // FIXME: Once targets reserve registers instead of removing them from the
+ // allocation order, we can simply use begin/end here.
+ ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+ for (unsigned i = 0; i != RawOrder.size(); ++i) {
+ unsigned PhysReg = RawOrder[i];
+ // Remove reserved registers from the allocation order.
+ if (Reserved.test(PhysReg))
+ continue;
+ if (CSRNum[PhysReg])
+ // PhysReg aliases a CSR, save it for later.
+ CSRAlias.push_back(PhysReg);
+ else
+ RCI.Order[N++] = PhysReg;
+ }
+ RCI.NumRegs = N + CSRAlias.size();
+ assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+ // CSR aliases go after the volatile registers, preserve the target's order.
+ std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+
+ DEBUG({
+ dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+ for (unsigned I = 0; I != RCI.NumRegs; ++I)
+ dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+ dbgs() << " ]\n";
+ });
+
+ // RCI is now up-to-date.
+ RCI.Tag = Tag;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.h b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h
new file mode 100644
index 000000000000..d21fd67efe8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h
@@ -0,0 +1,121 @@
+//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H
+#define LLVM_CODEGEN_REGISTERCLASSINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo {
+ struct RCInfo {
+ unsigned Tag;
+ unsigned NumRegs;
+ OwningArrayPtr<unsigned> Order;
+
+ RCInfo() : Tag(0), NumRegs(0) {}
+ operator ArrayRef<unsigned>() const {
+ return ArrayRef<unsigned>(Order.get(), NumRegs);
+ }
+ };
+
+ // Brief cached information for each register class.
+ OwningArrayPtr<RCInfo> RegClass;
+
+ // Tag changes whenever cached information needs to be recomputed. An RCInfo
+ // entry is valid when its tag matches.
+ unsigned Tag;
+
+ const MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+
+ // Callee saved registers of last MF. Assumed to be valid until the next
+ // runOnFunction() call.
+ const unsigned *CalleeSaved;
+
+ // Map register number to CalleeSaved index + 1;
+ SmallVector<uint8_t, 4> CSRNum;
+
+ // Reserved registers in the current MF.
+ BitVector Reserved;
+
+ // Compute all information about RC.
+ void compute(const TargetRegisterClass *RC) const;
+
+ // Return an up-to-date RCInfo for RC.
+ const RCInfo &get(const TargetRegisterClass *RC) const {
+ const RCInfo &RCI = RegClass[RC->getID()];
+ if (Tag != RCI.Tag)
+ compute(RC);
+ return RCI;
+ }
+
+public:
+ RegisterClassInfo();
+
+ /// runOnFunction - Prepare to answer questions about MF. This must be called
+ /// before any other methods are used.
+ void runOnMachineFunction(const MachineFunction &MF);
+
+ /// getNumAllocatableRegs - Returns the number of actually allocatable
+ /// registers in RC in the current function.
+ unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const {
+ return get(RC).NumRegs;
+ }
+
+ /// getOrder - Returns the preferred allocation order for RC. The order
+ /// contains no reserved registers, and registers that alias callee saved
+ /// registers come last.
+ ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
+ return get(RC);
+ }
+
+ /// getLastCalleeSavedAlias - Returns the last callee saved register that
+ /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
+ unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ if (unsigned N = CSRNum[PhysReg])
+ return CalleeSaved[N-1];
+ return 0;
+ }
+
+ /// isReserved - Returns true when PhysReg is a reserved register.
+ ///
+ /// Reserved registers may belong to an allocatable register class, but the
+ /// target has explicitly requested that they are not used.
+ ///
+ bool isReserved(unsigned PhysReg) const {
+ return Reserved.test(PhysReg);
+ }
+
+ /// isAllocatable - Returns true when PhysReg belongs to an allocatable
+ /// register class and it hasn't been reserved.
+ ///
+ /// Allocatable registers may show up in the allocation order of some virtual
+ /// register, so a register allocator needs to track its liveness and
+ /// availability.
+ bool isAllocatable(unsigned PhysReg) const {
+ return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
+ }
+};
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 000000000000..b91f92c6aa5a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,1797 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regcoalescing"
+#include "RegisterCoalescer.h"
+#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
+
+#include "llvm/Pass.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts , "Number of times interval joining aborted");
+
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+ cl::desc("Avoid coalescing cross register class copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnablePhysicalJoin("join-physregs",
+ cl::desc("Join physical register copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
+ if (!a) return b;
+ if (!b) return a;
+ return tri.composeSubRegIndices(a, b);
+}
+
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = compose(tri, MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else
+ return false;
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ srcReg_ = dstReg_ = subIdx_ = 0;
+ newRC_ = 0;
+ flipped_ = crossClass_ = false;
+
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ partial_ = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ flipped_ = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = tri_.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ SrcSub = 0;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // For now we only handle the case of identical indices in commensurate
+ // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
+ // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
+ if (SrcSub != DstSub)
+ return false;
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ if (!getCommonSubClass(DstRC, SrcRC))
+ return false;
+ SrcSub = DstSub = 0;
+ }
+
+ // There can be no SrcSub.
+ if (SrcSub) {
+ std::swap(Src, Dst);
+ DstSub = SrcSub;
+ SrcSub = 0;
+ assert(!flipped_ && "Unexpected flip");
+ flipped_ = true;
+ }
+
+ // Find the new register class.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ if (DstSub)
+ newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ else
+ newRC_ = getCommonSubClass(DstRC, SrcRC);
+ if (!newRC_)
+ return false;
+ crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+ }
+ // Check our invariants
+ assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ "Cannot have a physical SubIdx");
+ srcReg_ = Src;
+ dstReg_ = Dst;
+ subIdx_ = DstSub;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+ return false;
+ std::swap(srcReg_, dstReg_);
+ flipped_ = !flipped_;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is srcReg_.
+ if (Dst == srcReg_) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != srcReg_) {
+ return false;
+ }
+
+ // Now check that Dst matches dstReg_.
+ if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = tri_.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return dstReg_ == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+ } else {
+ // dstReg_ is virtual.
+ if (dstReg_ != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return compose(tri_, subIdx_, SrcSub) == DstSub;
+ }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(StrongPHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
+ /// Joined copies are not deleted immediately, but kept in JoinedCopies.
+ JoinedCopies.insert(CopyMI);
+
+ /// Mark all register operands of CopyMI as <undef> so they won't affect dead
+ /// code elimination.
+ for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
+ E = CopyMI->operands_end(); I != E; ++I)
+ if (I->isReg())
+ I->setIsUndef(true);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ // Bail if there is no dst interval - can happen when merging physical subreg
+ // operations.
+ if (!li_->hasInterval(CP.getDstReg()))
+ return false;
+
+ LiveInterval &IntA =
+ li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ if (BLR == IntB.end()) return false;
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->isDefByCopy()) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+ // The live range might not exist after fun with physreg coalescing.
+ if (ALR == IntA.end()) return false;
+ VNInfo *AValNo = ALR->valno;
+ // If it's re-defined by an early clobber somewhere in the live range, then
+ // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+ // See PR3149:
+ // 172 %ECX<def> = MOV32rr %reg1039<kill>
+ // 180 INLINEASM <es:subl $5,$1
+ // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+ // %EAX<kill>,
+ // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+ // 188 %EAX<def> = MOV32rr %EAX<kill>
+ // 196 %ECX<def> = MOV32rr %ECX<kill>
+ // 204 %ECX<def> = MOV32rr %ECX<kill>
+ // 212 %EAX<def> = MOV32rr %EAX<kill>
+ // 220 %EAX<def> = MOV32rr %EAX
+ // 228 %reg1039<def> = MOV32rr %ECX<kill>
+ // The early clobber operand ties ECX input to the ECX def.
+ //
+ // The live interval of ECX is represented as this:
+ // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
+ // The coalescer has no idea there was a def in the middle of [174,230].
+ if (AValNo->hasRedefByEC())
+ return false;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ if (!CP.isCoalescable(AValNo->getCopy()))
+ return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR =
+ IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+ if (ValLR == IntB.end())
+ return false;
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst =
+ li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+ if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its aliases is overlapping the live interval of the virtual register.
+ // If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+ if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
+ DEBUG({
+ dbgs() << "\t\tInterfere with alias ";
+ li_->getInterval(*AS).print(dbgs(), tri_);
+ });
+ return false;
+ }
+ }
+
+ DEBUG({
+ dbgs() << "Extending: ";
+ IntB.print(dbgs(), tri_);
+ });
+
+ SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+ BValNo->setCopy(0);
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+ SRLI.getNextValue(FillerStart, 0,
+ li_->getVNInfoAllocator())));
+ }
+ }
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno) {
+ // If B1 is killed by a PHI, then the merged live range must also be killed
+ // by the same PHI, as B0 and B1 can not overlap.
+ bool HasPHIKill = BValNo->hasPHIKill();
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ if (HasPHIKill)
+ ValLR->valno->setHasPHIKill(true);
+ }
+ DEBUG({
+ dbgs() << " result = ";
+ IntB.print(dbgs(), tri_);
+ dbgs() << "\n";
+ });
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ }
+
+ // If the copy instruction was killing the destination register before the
+ // merge, find the last use and trim the live range. That will also add the
+ // isKill marker.
+ if (ALR->end == CopyIdx)
+ li_->shrinkToUses(&IntA);
+
+ ++numExtends;
+ return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB. If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ // FIXME: For now, only eliminate the copy by commuting its def when the
+ // source register is a virtual register. We want to guard against cases
+ // where the copy is a back edge copy and commuting the def lengthen the
+ // live interval of the source register to the entire loop.
+ if (CP.isPhys() && CP.isFlipped())
+ return false;
+
+ // Bail if there is no dst interval.
+ if (!li_->hasInterval(CP.getDstReg()))
+ return false;
+
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+ LiveInterval &IntA =
+ li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ if (!BValNo || !BValNo->isDefByCopy())
+ return false;
+
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+ assert(AValNo && "COPY source not live");
+
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return false;
+ unsigned Op1, Op2, NewDstIdx;
+ if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+ return false;
+ if (Op1 == UseOpIdx)
+ NewDstIdx = Op2;
+ else if (Op2 == UseOpIdx)
+ NewDstIdx = Op1;
+ else
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !NewDstMO.isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // Abort if the aliases of IntB.reg have values that are not simply the
+ // clobbers from the superreg.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+ if (li_->hasInterval(*AS) &&
+ HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ mri_->use_nodbg_begin(IntA.reg),
+ UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end())
+ continue;
+ if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+ TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
+ return false;
+ if (NewMI != DefMI) {
+ li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MBB->insert(DefMI, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+
+ // Update uses of IntA of the specific Val# with IntB.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ if (UseMI->isDebugValue()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *tri_);
+ else
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (!UseMI->isCopy())
+ continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
+ SlotIndex DefIdx = UseIdx.getDefIndex();
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
+ continue;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+ markAsJoined(UseMI);
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ ValNo->setCopy(0);
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+ }
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+ IntA.removeValNo(AValNo);
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ ++numCommutes;
+ return true;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ bool preserveSrcInt,
+ unsigned DstReg,
+ unsigned DstSubIdx,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
+ assert(DefMI && "Defining instruction disappeared");
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (!MCID.isAsCheapAsAMove())
+ return false;
+ if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(tii_, AA, SawStore))
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ if (!DefMI->isImplicitDef()) {
+ // Make sure the copy destination register class fits the instruction
+ // definition register class. The mismatch can happen as a result of earlier
+ // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+ const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (mri_->getRegClass(DstReg) != RC)
+ return false;
+ } else if (!RC->contains(DstReg))
+ return false;
+ }
+
+ // If destination register has a sub-register index on it, make sure it
+ // matches the instruction register class.
+ if (DstSubIdx) {
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+ const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+ const TargetRegisterClass *DstSubRC =
+ DstRC->getSubRegisterRegClass(DstSubIdx);
+ const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_);
+ if (DefRC == DstRC)
+ DstSubIdx = 0;
+ else if (DefRC != DstSubRC)
+ return false;
+ }
+
+ RemoveCopyFlag(DstReg, CopyMI);
+
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ MachineBasicBlock::iterator MII =
+ llvm::next(MachineBasicBlock::iterator(CopyMI));
+ tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
+ MachineInstr *NewMI = prior(MII);
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ NewMI->addOperand(MO);
+ if (MO.isDef())
+ RemoveCopyFlag(MO.getReg(), CopyMI);
+ }
+
+ NewMI->copyImplicitOps(CopyMI);
+ li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ CopyMI->eraseFromParent();
+ ReMatCopies.insert(CopyMI);
+ ReMatDefs.insert(DefMI);
+ DEBUG(dbgs() << "Remat: " << *NewMI);
+ ++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ if (preserveSrcInt)
+ li_->shrinkToUses(&SrcInt);
+
+ return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
+ bool DstIsPhys = CP.isPhys();
+ unsigned SrcReg = CP.getSrcReg();
+ unsigned DstReg = CP.getDstReg();
+ unsigned SubIdx = CP.getSubIdx();
+
+ // Update LiveDebugVariables.
+ ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+ // instead.
+ if (DstIsPhys) {
+ if (UseMI->isCopy() &&
+ !UseMI->getOperand(1).getSubReg() &&
+ !UseMI->getOperand(0).getSubReg() &&
+ UseMI->getOperand(1).getReg() == SrcReg &&
+ UseMI->getOperand(0).getReg() != SrcReg &&
+ UseMI->getOperand(0).getReg() != DstReg &&
+ !JoinedCopies.count(UseMI) &&
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
+ UseMI->getOperand(0).getReg(), 0, UseMI))
+ continue;
+ }
+
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+ bool Kills = false, Deads = false;
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+ Kills |= MO.isKill();
+ Deads |= MO.isDead();
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *tri_);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *tri_);
+ }
+
+ // This instruction is a copy that will be removed.
+ if (JoinedCopies.count(UseMI))
+ continue;
+
+ if (SubIdx) {
+ // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+ // read-modify-write of DstReg.
+ if (Deads)
+ UseMI->addRegisterDead(DstReg, tri_);
+ else if (!Reads && Writes)
+ UseMI->addRegisterDefined(DstReg, tri_);
+
+ // Kill flags apply to the whole physical register.
+ if (DstIsPhys && Kills)
+ UseMI->addRegisterKilled(DstReg, tri_);
+ }
+
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
+ }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+ const TargetRegisterInfo *tri_) {
+ if (li.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ if (sli.empty())
+ li_->removeInterval(*SR);
+ }
+ li_->removeInterval(li.reg);
+ return true;
+ }
+ return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
+ MachineInstr *DefMI) {
+ SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+ LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+ if (DefIdx != MLR->valno->def)
+ return false;
+ li.removeValNo(MLR->valno);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
+ const MachineInstr *CopyMI) {
+ SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+ if (li_->hasInterval(DstReg)) {
+ LiveInterval &LI = li_->getInterval(DstReg);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->def == DefIdx)
+ LR->valno->setCopy(0);
+ }
+ if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return;
+ for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+ if (!li_->hasInterval(*AS))
+ continue;
+ LiveInterval &LI = li_->getInterval(*AS);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->def == DefIdx)
+ LR->valno->setCopy(0);
+ }
+}
+
+/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
+/// We need to be careful about coalescing a source physical register with a
+/// virtual register. Once the coalescing is done, it cannot be broken and these
+/// are not spillable! If the destination interval uses are far away, think
+/// twice about coalescing them!
+bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
+ bool Allocatable = li_->isAllocatable(CP.getDstReg());
+ LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+ /// Always join simple intervals that are defined by a single copy from a
+ /// reserved register. This doesn't increase register pressure, so it is
+ /// always beneficial.
+ if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
+ return true;
+
+ if (!EnablePhysicalJoin) {
+ DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+ return false;
+ }
+
+ // Only coalesce to allocatable physreg, we don't want to risk modifying
+ // reserved registers.
+ if (!Allocatable) {
+ DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+ return false; // Not coalescable.
+ }
+
+ // Don't join with physregs that have a ridiculous number of live
+ // ranges. The data structure performance is really bad when that
+ // happens.
+ if (li_->hasInterval(CP.getDstReg()) &&
+ li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+ ++numAborts;
+ DEBUG(dbgs()
+ << "\tPhysical register live interval too complicated, abort!\n");
+ return false;
+ }
+
+ // FIXME: Why are we skipping this test for partial copies?
+ // CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+ if (!CP.isPartial()) {
+ const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+ unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ if (Length > Threshold) {
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC) {
+ unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
+ // This heuristics is good enough in practice, but it's obviously not *right*.
+ // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+ // out all but the most restrictive register classes.
+ if (NewRCCount > 4 ||
+ // Early exit if the function is fairly small, coalesce aggressively if
+ // that's the case. For really special register classes with 3 or
+ // fewer registers, be a bit more careful.
+ (li_->getFuncInstructionCount() / NewRCCount) < 8)
+ return true;
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+ unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+
+ // Coalesce aggressively if the intervals are small compared to the number of
+ // registers in the new class. The number 4 is fairly arbitrary, chosen to be
+ // less aggressive than the 8 used for the whole function size.
+ const unsigned ThresSize = 4 * NewRCCount;
+ if (SrcSize <= ThresSize && DstSize <= ThresSize)
+ return true;
+
+ // Estimate *register use density*. If it doubles or more, abort.
+ unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+ mri_->use_nodbg_end());
+ unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+ mri_->use_nodbg_end());
+ unsigned NewUses = SrcUses + DstUses;
+ unsigned NewSize = SrcSize + DstSize;
+ if (SrcRC != NewRC && SrcSize > ThresSize) {
+ unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
+ if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+ return false;
+ }
+ if (DstRC != NewRC && DstSize > ThresSize) {
+ unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
+ if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
+ return false;
+ }
+ return true;
+}
+
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+
+ Again = false;
+ if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+ return false; // Already done.
+
+ DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+ CoalescerPair CP(*tii_, *tri_);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
+ }
+
+ // If they are already joined we continue.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ markAsJoined(CopyMI);
+ DEBUG(dbgs() << "\tCopy already coalesced.\n");
+ return false; // Not coalescable.
+ }
+
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
+ << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+ << "\n");
+
+ // Enforce policies.
+ if (CP.isPhys()) {
+ if (!shouldJoinPhys(CP)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+ CP.getDstReg(), 0, CopyMI))
+ return true;
+ return false;
+ }
+ } else {
+ // Avoid constraining virtual register regclass too much.
+ if (CP.isCrossClass()) {
+ DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
+ if (DisableCrossClassJoin) {
+ DEBUG(dbgs() << "\tCross-class joins disabled.\n");
+ return false;
+ }
+ if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+ mri_->getRegClass(CP.getSrcReg()),
+ mri_->getRegClass(CP.getDstReg()),
+ CP.getNewRC())) {
+ DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+
+ // When possible, let DstReg be the larger interval.
+ if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+ li_->getInterval(CP.getDstReg()).ranges.size())
+ CP.flip();
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (!JoinIntervals(CP)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+ CP.getDstReg(), 0, CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!CP.isPartial()) {
+ if (AdjustCopiesBackFrom(CP, CopyMI) ||
+ RemoveCopyByCommutingDef(CP, CopyMI)) {
+ markAsJoined(CopyMI);
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DEBUG(dbgs() << "\tInterference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CP.isCrossClass()) {
+ ++numCrossRCs;
+ mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
+
+ // Remember to delete the copy instruction.
+ markAsJoined(CopyMI);
+
+ UpdateRegDefsUses(CP);
+
+ // If we have extended the live range of a physical register, make sure we
+ // update live-in lists as well.
+ if (CP.isPhys()) {
+ SmallVector<MachineBasicBlock*, 16> BlockSeq;
+ // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+ // ranges for this, and they are preserved.
+ LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+ for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+ I != E; ++I ) {
+ li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+ for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+ MachineBasicBlock &block = *BlockSeq[idx];
+ if (!block.isLiveIn(CP.getDstReg()))
+ block.addLiveIn(CP.getDstReg());
+ }
+ BlockSeq.clear();
+ }
+ }
+
+ // SrcReg is guarateed to be the register whose live interval that is
+ // being merged.
+ li_->removeInterval(CP.getSrcReg());
+
+ // Update regalloc hint.
+ tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
+
+ DEBUG({
+ LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+ dbgs() << "\tJoined. Result = ";
+ DstInt.print(dbgs(), tri_);
+ dbgs() << "\n";
+ });
+
+ ++numJoins;
+ return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be. This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve. InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+ DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+ SmallVector<int, 16> &ThisValNoAssignments,
+ SmallVector<int, 16> &OtherValNoAssignments) {
+ unsigned VN = VNI->id;
+
+ // If the VN has already been computed, just return it.
+ if (ThisValNoAssignments[VN] >= 0)
+ return ThisValNoAssignments[VN];
+ assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
+
+ // If this val is not a copy from the other val, then it must be a new value
+ // number in the destination.
+ DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+ if (I == ThisFromOther.end()) {
+ NewVNInfo.push_back(VNI);
+ return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+ }
+ VNInfo *OtherValNo = I->second;
+
+ // Otherwise, this *is* a copy from the RHS. If the other side has already
+ // been computed, return it.
+ if (OtherValNoAssignments[OtherValNo->id] >= 0)
+ return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+ // Mark this value number as currently being computed, then ask what the
+ // ultimate value # of the other value is.
+ ThisValNoAssignments[VN] = -2;
+ unsigned UltimateVN =
+ ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+ OtherValNoAssignments, ThisValNoAssignments);
+ return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+
+// Find out if we have something like
+// A = X
+// B = X
+// if so, we can pretend this is actually
+// A = X
+// B = A
+// which allows us to coalesce A and B.
+// VNI is the definition of B. LR is the life range of A that includes
+// the slot just before B. If we return true, we add "B = X" to DupCopies.
+static bool RegistersDefinedFromSameValue(LiveIntervals &li,
+ const TargetRegisterInfo &tri,
+ CoalescerPair &CP,
+ VNInfo *VNI,
+ LiveRange *LR,
+ SmallVector<MachineInstr*, 8> &DupCopies) {
+ // FIXME: This is very conservative. For example, we don't handle
+ // physical registers.
+
+ MachineInstr *MI = VNI->getCopy();
+
+ if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+ return false;
+
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (!TargetRegisterInfo::isVirtualRegister(Src) ||
+ !TargetRegisterInfo::isVirtualRegister(Dst))
+ return false;
+
+ unsigned A = CP.getDstReg();
+ unsigned B = CP.getSrcReg();
+
+ if (B == Dst)
+ std::swap(A, B);
+ assert(Dst == A);
+
+ VNInfo *Other = LR->valno;
+ if (!Other->isDefByCopy())
+ return false;
+ const MachineInstr *OtherMI = Other->getCopy();
+
+ if (!OtherMI->isFullCopy())
+ return false;
+
+ unsigned OtherDst = OtherMI->getOperand(0).getReg();
+ unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+
+ if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
+ !TargetRegisterInfo::isVirtualRegister(OtherDst))
+ return false;
+
+ assert(OtherDst == B);
+
+ if (Src != OtherSrc)
+ return false;
+
+ // If the copies use two different value numbers of X, we cannot merge
+ // A and B.
+ LiveInterval &SrcInt = li.getInterval(Src);
+ if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def))
+ return false;
+
+ DupCopies.push_back(MI);
+
+ return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
+ LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+ DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+ // If a live interval is a physical register, check for interference with any
+ // aliases. The interference check implemented here is a bit more conservative
+ // than the full interfeence check below. We allow overlapping live ranges
+ // only when one is a copy of the other.
+ if (CP.isPhys()) {
+ for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ if (!li_->hasInterval(*AS))
+ continue;
+ const LiveInterval &LHS = li_->getInterval(*AS);
+ LiveInterval::const_iterator LI = LHS.begin();
+ for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+ RI != RE; ++RI) {
+ LI = std::lower_bound(LI, LHS.end(), RI->start);
+ // Does LHS have an overlapping live range starting before RI?
+ if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+ (RI->start != RI->valno->def ||
+ !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), tri_);
+ dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+ });
+ return false;
+ }
+
+ // Check that LHS ranges beginning in this range are copies.
+ for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+ if (LI->start != LI->valno->def ||
+ !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), tri_);
+ dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+ });
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // Compute the final value assignment, assuming that the live ranges can be
+ // coalesced.
+ SmallVector<int, 16> LHSValNoAssignments;
+ SmallVector<int, 16> RHSValNoAssignments;
+ DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+ DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+ SmallVector<VNInfo*, 16> NewVNInfo;
+
+ SmallVector<MachineInstr*, 8> DupCopies;
+
+ LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+ DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ continue;
+
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
+
+ // Figure out the value # from the RHS.
+ LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ MachineInstr *MI = VNI->getCopy();
+ if (!CP.isCoalescable(MI) &&
+ !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+ continue;
+
+ LHSValsDefinedFromRHS[VNI] = lr->valno;
+ }
+
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ continue;
+
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
+
+ // Figure out the value # from the LHS.
+ LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ MachineInstr *MI = VNI->getCopy();
+ if (!CP.isCoalescable(MI) &&
+ !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+ continue;
+
+ RHSValsDefinedFromLHS[VNI] = lr->valno;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
+ }
+
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
+ }
+
+ // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+ // interval lists to see if these intervals are coalescable.
+ LiveInterval::const_iterator I = LHS.begin();
+ LiveInterval::const_iterator IE = LHS.end();
+ LiveInterval::const_iterator J = RHS.begin();
+ LiveInterval::const_iterator JE = RHS.end();
+
+ // Skip ahead until the first place of potential sharing.
+ if (I != IE && J != JE) {
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
+ }
+
+ while (I != IE && J != JE) {
+ // Determine if these two live ranges overlap.
+ bool Overlaps;
+ if (I->start < J->start) {
+ Overlaps = I->end > J->start;
+ } else {
+ Overlaps = J->end > I->start;
+ }
+
+ // If so, check value # info to determine if they are really different.
+ if (Overlaps) {
+ // If the live range overlap will map to the same value number in the
+ // result liverange, we can still coalesce them. If not, we can't.
+ if (LHSValNoAssignments[I->valno->id] !=
+ RHSValNoAssignments[J->valno->id])
+ return false;
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+ return false;
+ }
+
+ if (I->end < J->end)
+ ++I;
+ else
+ ++J;
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+ E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned LHSValID = LHSValNoAssignments[VNI->id];
+ if (VNI->hasPHIKill())
+ NewVNInfo[LHSValID]->setHasPHIKill(true);
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+ E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned RHSValID = RHSValNoAssignments[VNI->id];
+ if (VNI->hasPHIKill())
+ NewVNInfo[RHSValID]->setHasPHIKill(true);
+ }
+
+ if (LHSValNoAssignments.empty())
+ LHSValNoAssignments.push_back(-1);
+ if (RHSValNoAssignments.empty())
+ RHSValNoAssignments.push_back(-1);
+
+ SmallVector<unsigned, 8> SourceRegisters;
+ for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
+ E = DupCopies.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+
+ // We have pretended that the assignment to B in
+ // A = X
+ // B = X
+ // was actually a copy from A. Now that we decided to coalesce A and B,
+ // transform the code into
+ // A = X
+ // X = X
+ // and mark the X as coalesced to keep the illusion.
+ unsigned Src = MI->getOperand(1).getReg();
+ SourceRegisters.push_back(Src);
+ MI->getOperand(0).substVirtReg(Src, 0, *tri_);
+
+ markAsJoined(MI);
+ }
+
+ // If B = X was the last use of X in a liverange, we have to shrink it now
+ // that B = X is gone.
+ for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
+ E = SourceRegisters.end(); I != E; ++I) {
+ li_->shrinkToUses(&li_->getInterval(*I));
+ }
+
+ // If we get here, we know that we can coalesce the live ranges. Ask the
+ // intervals to coalesce themselves now.
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ mri_);
+ return true;
+}
+
+namespace {
+ // DepthMBBCompare - Comparison predicate that sort first based on the loop
+ // depth of the basic block (the unsigned), and then on the MBB number.
+ struct DepthMBBCompare {
+ typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+ bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+ // Deeper loops first
+ if (LHS.first != RHS.first)
+ return LHS.first > RHS.first;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+ unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+ if (cl != cr)
+ return cl > cr;
+
+ // As a last resort, sort by block number.
+ return LHS.second->getNumber() < RHS.second->getNumber();
+ }
+ };
+}
+
+void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<MachineInstr*> &TryAgain) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ SmallVector<MachineInstr*, 8> VirtCopies;
+ SmallVector<MachineInstr*, 8> PhysCopies;
+ SmallVector<MachineInstr*, 8> ImpDefCopies;
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E;) {
+ MachineInstr *Inst = MII++;
+
+ // If this isn't a copy nor a extract_subreg, we can't join intervals.
+ unsigned SrcReg, DstReg;
+ if (Inst->isCopy()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(1).getReg();
+ } else if (Inst->isSubregToReg()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ } else
+ continue;
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+ ImpDefCopies.push_back(Inst);
+ else if (SrcIsPhys || DstIsPhys)
+ PhysCopies.push_back(Inst);
+ else
+ VirtCopies.push_back(Inst);
+ }
+
+ // Try coalescing implicit copies and insert_subreg <undef> first,
+ // followed by copies to / from physical registers, then finally copies
+ // from virtual registers to virtual registers.
+ for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+ MachineInstr *TheCopy = ImpDefCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+ MachineInstr *TheCopy = PhysCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+ MachineInstr *TheCopy = VirtCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+}
+
+void RegisterCoalescer::joinIntervals() {
+ DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+ std::vector<MachineInstr*> TryAgainList;
+ if (loopInfo->empty()) {
+ // If there are no loops in the function, join intervals in function order.
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+ I != E; ++I)
+ CopyCoalesceInMBB(I, TryAgainList);
+ } else {
+ // Otherwise, join intervals in inner loops before other intervals.
+ // Unfortunately we can't just iterate over loop hierarchy here because
+ // there may be more MBB's than BB's. Collect MBB's for sorting.
+
+ // Join intervals in the function prolog first. We want to join physical
+ // registers with virtual registers before the intervals got too long.
+ std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+ }
+
+ // Sort by loop depth.
+ std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+ // Finally, join intervals in loop nest order.
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ }
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+
+ for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+ MachineInstr *&TheCopy = TryAgainList[i];
+ if (!TheCopy)
+ continue;
+
+ bool Again = false;
+ bool Success = JoinCopy(TheCopy, Again);
+ if (Success || !Again) {
+ TheCopy= 0; // Mark this one as done.
+ ProgressMade = true;
+ }
+ }
+ }
+}
+
+void RegisterCoalescer::releaseMemory() {
+ JoinedCopies.clear();
+ ReMatCopies.clear();
+ ReMatDefs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ li_ = &getAnalysis<LiveIntervals>();
+ ldv_ = &getAnalysis<LiveDebugVariables>();
+ AA = &getAnalysis<AliasAnalysis>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n');
+
+ if (VerifyCoalescing)
+ mf_->verify(this, "Before register coalescing");
+
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining) {
+ joinIntervals();
+ DEBUG({
+ dbgs() << "********** INTERVALS POST JOINING **********\n";
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+ I != E; ++I){
+ I->second->print(dbgs(), tri_);
+ dbgs() << "\n";
+ }
+ });
+ }
+
+ // Perform a final pass over the instructions and compute spill weights
+ // and remove identity moves.
+ SmallVector<unsigned, 4> DeadDefs;
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ) {
+ MachineInstr *MI = mii;
+ if (JoinedCopies.count(MI)) {
+ // Delete all coalesced copies.
+ bool DoDelete = true;
+ assert(MI->isCopyLike() && "Unrecognized copy instruction");
+ unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ MI->getNumOperands() > 2)
+ // Do not delete extract_subreg, insert_subreg of physical
+ // registers unless the definition is dead. e.g.
+ // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+ // or else the scavenger may complain. LowerSubregs will
+ // delete them later.
+ DoDelete = false;
+
+ if (MI->allDefsAreDead()) {
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ li_->hasInterval(SrcReg))
+ li_->shrinkToUses(&li_->getInterval(SrcReg));
+ DoDelete = true;
+ }
+ if (!DoDelete) {
+ // We need the instruction to adjust liveness, so make it a KILL.
+ if (MI->isSubregToReg()) {
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(1);
+ }
+ MI->setDesc(tii_->get(TargetOpcode::KILL));
+ mii = llvm::next(mii);
+ } else {
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ }
+ continue;
+ }
+
+ // Now check if this is a remat'ed def instruction which is now dead.
+ if (ReMatDefs.count(MI)) {
+ bool isDead = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ DeadDefs.push_back(Reg);
+ if (MO.isDead())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !mri_->use_nodbg_empty(Reg)) {
+ isDead = false;
+ break;
+ }
+ }
+ if (isDead) {
+ while (!DeadDefs.empty()) {
+ unsigned DeadDef = DeadDefs.back();
+ DeadDefs.pop_back();
+ RemoveDeadDef(li_->getInterval(DeadDef), MI);
+ }
+ li_->RemoveMachineInstrFromMaps(mii);
+ mii = mbbi->erase(mii);
+ continue;
+ } else
+ DeadDefs.clear();
+ }
+
+ ++mii;
+
+ // Check for now unnecessary kill flags.
+ if (li_->isNotInMIMap(MI)) continue;
+ SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isKill()) continue;
+ unsigned reg = MO.getReg();
+ if (!reg || !li_->hasInterval(reg)) continue;
+ if (!li_->getInterval(reg).killedAt(DefIdx)) {
+ MO.setIsKill(false);
+ continue;
+ }
+ // When leaving a kill flag on a physreg, check if any subregs should
+ // remain alive.
+ if (!TargetRegisterInfo::isPhysicalRegister(reg))
+ continue;
+ for (const unsigned *SR = tri_->getSubRegisters(reg);
+ unsigned S = *SR; ++SR)
+ if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+ MI->addRegisterDefined(S, tri_);
+ }
+ }
+ }
+
+ DEBUG(dump());
+ DEBUG(ldv_->dump());
+ if (VerifyCoalescing)
+ mf_->verify(this, "After register coalescing");
+ return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+ li_->print(O, m);
+}
+
+RegisterCoalescer *llvm::createRegisterCoalescer() {
+ return new RegisterCoalescer();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 000000000000..4131d91c00e9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,243 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers,
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegisterClassInfo.h"
+#include "llvm/Support/IncludeFile.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
+
+namespace llvm {
+
+ class MachineFunction;
+ class RegallocQuery;
+ class AnalysisUsage;
+ class MachineInstr;
+ class TargetRegisterInfo;
+ class TargetRegisterClass;
+ class TargetInstrInfo;
+ class LiveDebugVariables;
+ class VirtRegMap;
+ class MachineLoopInfo;
+
+ class CoalescerPair;
+
+ /// An abstract interface for register coalescers. Coalescers must
+ /// implement this interface to be part of the coalescer analysis
+ /// group.
+ class RegisterCoalescer : public MachineFunctionPass {
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ LiveIntervals *li_;
+ LiveDebugVariables *ldv_;
+ const MachineLoopInfo* loopInfo;
+ AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
+
+ /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+ ///
+ SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+ /// ReMatCopies - Keep track of copies eliminated due to remat.
+ ///
+ SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+ /// ReMatDefs - Keep track of definition instructions which have
+ /// been remat'ed.
+ SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+ /// joinIntervals - join compatible live intervals
+ void joinIntervals();
+
+ /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into the "TryAgain" list.
+ void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<MachineInstr*> &TryAgain);
+
+ /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns true
+ /// if the copy was successfully coalesced away. If it is not currently
+ /// possible to coalesce this interval, but it may be possible if other
+ /// things get coalesced, then it returns true by reference in 'Again'.
+ bool JoinCopy(MachineInstr *TheCopy, bool &Again);
+
+ /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we can
+ /// use this information below to update aliases.
+ bool JoinIntervals(CoalescerPair &CP);
+
+ /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+ /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+ /// computation, replace the copy by rematerialize the definition.
+ /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+ unsigned DstReg, unsigned DstSubIdx,
+ MachineInstr *CopyMI);
+
+ /// shouldJoinPhys - Return true if a physreg copy should be joined.
+ bool shouldJoinPhys(CoalescerPair &CP);
+
+ /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+ /// two virtual registers from different register classes.
+ bool isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC);
+
+ /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void UpdateRegDefsUses(const CoalescerPair &CP);
+
+ /// RemoveDeadDef - If a def of a live interval is now determined dead,
+ /// remove the val# it defines. If the live interval becomes empty, remove
+ /// it as well.
+ bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+ /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+ /// VNInfo copy flag for DstReg and all aliases.
+ void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
+ /// markAsJoined - Remember that CopyMI has already been joined.
+ void markAsJoined(MachineInstr *CopyMI);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Register allocators must call this from their own
+ /// getAnalysisUsage to cover the case where the coalescer is not
+ /// a Pass in the proper sense and isn't managed by PassManager.
+ /// PassManager needs to know which analyses to make available and
+ /// which to invalidate when running the register allocator or any
+ /// pass that might call coalescing. The long-term solution is to
+ /// allow hierarchies of PassManagers.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+ };
+
+ /// CoalescerPair - A helper class for register coalescers. When deciding if
+ /// two registers can be coalesced, CoalescerPair can determine if a copy
+ /// instruction would become an identity copy after coalescing.
+ class CoalescerPair {
+ const TargetInstrInfo &tii_;
+ const TargetRegisterInfo &tri_;
+
+ /// dstReg_ - The register that will be left after coalescing. It can be a
+ /// virtual or physical register.
+ unsigned dstReg_;
+
+ /// srcReg_ - the virtual register that will be coalesced into dstReg.
+ unsigned srcReg_;
+
+ /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
+ /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
+ /// virtual register.
+ unsigned subIdx_;
+
+ /// partial_ - True when the original copy was a partial subregister copy.
+ bool partial_;
+
+ /// crossClass_ - True when both regs are virtual, and newRC is constrained.
+ bool crossClass_;
+
+ /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
+ /// instruction.
+ bool flipped_;
+
+ /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
+ /// is a physreg.
+ const TargetRegisterClass *newRC_;
+
+ public:
+ CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
+ : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
+ partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
+
+ /// setRegisters - set registers to match the copy instruction MI. Return
+ /// false if MI is not a coalescable copy instruction.
+ bool setRegisters(const MachineInstr*);
+
+ /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
+ /// because dstReg_ is a physical register, or subIdx_ is set.
+ bool flip();
+
+ /// isCoalescable - Return true if MI is a copy instruction that will become
+ /// an identity copy after coalescing.
+ bool isCoalescable(const MachineInstr*) const;
+
+ /// isPhys - Return true if DstReg is a physical register.
+ bool isPhys() const { return !newRC_; }
+
+ /// isPartial - Return true if the original copy instruction did not copy the
+ /// full register, but was a subreg operation.
+ bool isPartial() const { return partial_; }
+
+ /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
+ bool isCrossClass() const { return crossClass_; }
+
+ /// isFlipped - Return true when getSrcReg is the register being defined by
+ /// the original copy instruction.
+ bool isFlipped() const { return flipped_; }
+
+ /// getDstReg - Return the register (virtual or physical) that will remain
+ /// after coalescing.
+ unsigned getDstReg() const { return dstReg_; }
+
+ /// getSrcReg - Return the virtual register that will be coalesced away.
+ unsigned getSrcReg() const { return srcReg_; }
+
+ /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
+ /// coalesced into, or 0.
+ unsigned getSubIdx() const { return subIdx_; }
+
+ /// getNewRC - Return the register class of the coalesced register.
+ const TargetRegisterClass *getNewRC() const { return newRC_; }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 000000000000..9e9a145b0aa4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,395 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg) {
+ RegsAvailable.reset(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ RegsAvailable.reset(SubReg);
+}
+
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+ if (isUsed(Reg))
+ return true;
+ for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+ if (isUsed(*R))
+ return true;
+ return false;
+}
+
+void RegScavenger::initRegState() {
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ // Reserved registers are always used.
+ RegsAvailable ^= ReservedRegs;
+
+ if (!MBB)
+ return;
+
+ // Live-in registers are in use.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ // Pristine CSRs are also unavailable.
+ BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+ setUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ "Target changed?");
+
+ // Self-initialize.
+ if (!MBB) {
+ NumPhysRegs = TRI->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+
+ // Create reserved registers bitvector.
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ initRegState();
+
+ Tracking = false;
+}
+
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+ BV.set(Reg);
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ BV.set(*R);
+}
+
+void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
+ BV.set(Reg);
+ for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+ BV.set(*R);
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+ MBBI = llvm::next(MBBI);
+ }
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+ MachineInstr *MI = MBBI;
+
+ if (MI == ScavengeRestore) {
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+ }
+
+ if (MI->isDebugValue())
+ return;
+
+ // Find out which registers are early clobbered, killed, defined, and marked
+ // def-dead in this instruction.
+ // FIXME: The scavenger is not predication aware. If the instruction is
+ // predicated, conservatively assume "kill" markers do not actually kill the
+ // register. Similarly ignores "dead" markers.
+ bool isPred = TII->isPredicated(MI);
+ BitVector EarlyClobberRegs(NumPhysRegs);
+ BitVector KillRegs(NumPhysRegs);
+ BitVector DefRegs(NumPhysRegs);
+ BitVector DeadRegs(NumPhysRegs);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || isReserved(Reg))
+ continue;
+
+ if (MO.isUse()) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ continue;
+ // Two-address operands implicitly kill.
+ if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+ addRegWithSubRegs(KillRegs, Reg);
+ } else {
+ assert(MO.isDef());
+ if (!isPred && MO.isDead())
+ addRegWithSubRegs(DeadRegs, Reg);
+ else
+ addRegWithSubRegs(DefRegs, Reg);
+ if (MO.isEarlyClobber())
+ addRegWithAliases(EarlyClobberRegs, Reg);
+ }
+ }
+
+ // Verify uses and defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || isReserved(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (MO.isUndef())
+ continue;
+ if (!isUsed(Reg)) {
+ // Check if it's partial live: e.g.
+ // D0 = insert_subreg D0<undef>, S0
+ // ... D0
+ // The problem is the insert_subreg could be eliminated. The use of
+ // D0 is using a partially undef value. This is not *incorrect* since
+ // S1 is can be freely clobbered.
+ // Ideally we would like a way to model this, but leaving the
+ // insert_subreg around causes both correctness and performance issues.
+ bool SubUsed = false;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ if (isUsed(SubReg)) {
+ SubUsed = true;
+ break;
+ }
+ assert(SubUsed && "Using an undefined register!");
+ }
+ assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
+ "Using an early clobbered register!");
+ } else {
+ assert(MO.isDef());
+#if 0
+ // FIXME: Enable this once we've figured out how to correctly transfer
+ // implicit kills during codegen passes like the coalescer.
+ assert((KillRegs.test(Reg) || isUnused(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+#endif
+ }
+ }
+
+ // Commit the changes.
+ setUnused(KillRegs);
+ setUnused(DeadRegs);
+ setUsed(DefRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ if (includeReserved)
+ used = ~RegsAvailable;
+ else
+ used = ~RegsAvailable & ~ReservedRegs;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ "\n");
+ return *I;
+ }
+ return 0;
+}
+
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+ BitVector Mask(TRI->getNumRegs());
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I))
+ Mask.set(*I);
+ return Mask;
+}
+
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after StargMII. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ BitVector &Candidates,
+ unsigned InstrLimit,
+ MachineBasicBlock::iterator &UseMI) {
+ int Survivor = Candidates.find_first();
+ assert(Survivor > 0 && "No candidates for scavenging");
+
+ MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+ assert(StartMI != ME && "MI already at terminator");
+ MachineBasicBlock::iterator RestorePointMI = StartMI;
+ MachineBasicBlock::iterator MI = StartMI;
+
+ bool inVirtLiveRange = false;
+ for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ if (MI->isDebugValue()) {
+ ++InstrLimit; // Don't count debug instructions
+ continue;
+ }
+ bool isVirtKillInsn = false;
+ bool isVirtDefInsn = false;
+ // Remove any candidates touched by instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isDef())
+ isVirtDefInsn = true;
+ else if (MO.isKill())
+ isVirtKillInsn = true;
+ continue;
+ }
+ Candidates.reset(MO.getReg());
+ for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+ Candidates.reset(*R);
+ }
+ // If we're not in a virtual reg's live range, this is a valid
+ // restore point.
+ if (!inVirtLiveRange) RestorePointMI = MI;
+
+ // Update whether we're in the live range of a virtual register
+ if (isVirtKillInsn) inVirtLiveRange = false;
+ if (isVirtDefInsn) inVirtLiveRange = true;
+
+ // Was our survivor untouched by this instruction?
+ if (Candidates.test(Survivor))
+ continue;
+
+ // All candidates gone?
+ if (Candidates.none())
+ break;
+
+ Survivor = Candidates.find_first();
+ }
+ // If we ran off the end, that's where we want to restore.
+ if (MI == ME) RestorePointMI = ME;
+ assert (RestorePointMI != StartMI &&
+ "No available scavenger restore location!");
+
+ // We ran out of candidates, so stop the search.
+ UseMI = RestorePointMI;
+ return Survivor;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates =
+ TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg() && MO.getReg() != 0 &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ Candidates.reset(MO.getReg());
+ }
+
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill. Search explicitly rather than masking out based on
+ // RegsAvailable, as RegsAvailable does not take aliases into account.
+ // That's what getRegsAvailable() is for.
+ BitVector Available = getRegsAvailable(RC);
+
+ if ((Candidates & Available).any())
+ Candidates &= Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isAliasUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ assert(ScavengedReg == 0 &&
+ "Scavenger slot is live, unable to scavenge another register!");
+
+ // Avoid infinite regress
+ ScavengedReg = SReg;
+
+ // If the target knows how to save/restore the register, let it do so;
+ // otherwise, use the emergency stack spill slot.
+ if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+ // Spill the scavenged register before I.
+ assert(ScavengingFrameIndex >= 0 &&
+ "Cannot scavenge register without an emergency spill slot!");
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+ MachineBasicBlock::iterator II = prior(I);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+ II = prior(UseMI);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
+ }
+
+ ScavengeRestore = prior(UseMI);
+
+ // Doing this here leads to infinite regress.
+ // ScavengedReg = SReg;
+ ScavengedRC = RC;
+
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
+ return SReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
new file mode 100644
index 000000000000..8b02ec44273a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
@@ -0,0 +1,1012 @@
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rendermf"
+
+#include "RenderMachineFunction.h"
+
+#include "VirtRegMap.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+char RenderMachineFunction::ID = 0;
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false)
+
+static cl::opt<std::string>
+outputFileSuffix("rmf-file-suffix",
+ cl::desc("Appended to function name to get output file name "
+ "(default: \".html\")"),
+ cl::init(".html"), cl::Hidden);
+
+static cl::opt<std::string>
+machineFuncsToRender("rmf-funcs",
+ cl::desc("Comma separated list of functions to render"
+ ", or \"*\"."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+pressureClasses("rmf-classes",
+ cl::desc("Register classes to render pressure for."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+showIntervals("rmf-intervals",
+ cl::desc("Live intervals to show alongside code."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<bool>
+filterEmpty("rmf-filter-empty-intervals",
+ cl::desc("Don't display empty intervals."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+showEmptyIndexes("rmf-empty-indexes",
+ cl::desc("Render indexes not associated with instructions or "
+ "MBB starts."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+useFancyVerticals("rmf-fancy-verts",
+ cl::desc("Use SVG for vertical text."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+prettyHTML("rmf-pretty-html",
+ cl::desc("Pretty print HTML. For debugging the renderer only.."),
+ cl::init(false), cl::Hidden);
+
+
+namespace llvm {
+
+ bool MFRenderingOptions::renderingOptionsProcessed;
+ std::set<std::string> MFRenderingOptions::mfNamesToRender;
+ bool MFRenderingOptions::renderAllMFs = false;
+
+ std::set<std::string> MFRenderingOptions::classNamesToRender;
+ bool MFRenderingOptions::renderAllClasses = false;
+
+ std::set<std::pair<unsigned, unsigned> >
+ MFRenderingOptions::intervalNumsToRender;
+ unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
+
+ template <typename OutputItr>
+ void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
+ OutputItr outItr) {
+ std::string::const_iterator curPos = s.begin();
+ std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
+ while (nextComa != s.end()) {
+ std::string elem;
+ std::copy(curPos, nextComa, std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ curPos = llvm::next(nextComa);
+ nextComa = std::find(curPos, s.end(), ',');
+ }
+
+ if (curPos != s.end()) {
+ std::string elem;
+ std::copy(curPos, s.end(), std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ }
+ }
+
+ void MFRenderingOptions::processOptions() {
+ if (!renderingOptionsProcessed) {
+ processFuncNames();
+ processRegClassNames();
+ processIntervalNumbers();
+ renderingOptionsProcessed = true;
+ }
+ }
+
+ void MFRenderingOptions::processFuncNames() {
+ if (machineFuncsToRender == "*") {
+ renderAllMFs = true;
+ } else {
+ splitComaSeperatedList(machineFuncsToRender,
+ std::inserter(mfNamesToRender,
+ mfNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processRegClassNames() {
+ if (pressureClasses == "*") {
+ renderAllClasses = true;
+ } else {
+ splitComaSeperatedList(pressureClasses,
+ std::inserter(classNamesToRender,
+ classNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processIntervalNumbers() {
+ std::set<std::string> intervalRanges;
+ splitComaSeperatedList(showIntervals,
+ std::inserter(intervalRanges,
+ intervalRanges.begin()));
+ std::for_each(intervalRanges.begin(), intervalRanges.end(),
+ processIntervalRange);
+ }
+
+ void MFRenderingOptions::processIntervalRange(
+ const std::string &intervalRangeStr) {
+ if (intervalRangeStr == "*") {
+ intervalTypesToRender |= All;
+ } else if (intervalRangeStr == "virt-nospills*") {
+ intervalTypesToRender |= VirtNoSpills;
+ } else if (intervalRangeStr == "spills*") {
+ intervalTypesToRender |= VirtSpills;
+ } else if (intervalRangeStr == "virt*") {
+ intervalTypesToRender |= AllVirt;
+ } else if (intervalRangeStr == "phys*") {
+ intervalTypesToRender |= AllPhys;
+ } else {
+ std::istringstream iss(intervalRangeStr);
+ unsigned reg1, reg2;
+ if ((iss >> reg1 >> std::ws)) {
+ if (iss.eof()) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
+ } else {
+ char c;
+ iss >> c;
+ if (c == '-' && (iss >> reg2)) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
+ } else {
+ dbgs() << "Warning: Invalid interval range \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ } else {
+ dbgs() << "Warning: Invalid interval number \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ }
+
+ void MFRenderingOptions::setup(MachineFunction *mf,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis,
+ const RenderMachineFunction *rmf) {
+ this->mf = mf;
+ this->tri = tri;
+ this->lis = lis;
+ this->rmf = rmf;
+
+ clear();
+ }
+
+ void MFRenderingOptions::clear() {
+ regClassesTranslatedToCurrentFunction = false;
+ regClassSet.clear();
+
+ intervalsTranslatedToCurrentFunction = false;
+ intervalSet.clear();
+ }
+
+ void MFRenderingOptions::resetRenderSpecificOptions() {
+ intervalSet.clear();
+ intervalsTranslatedToCurrentFunction = false;
+ }
+
+ bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
+ processOptions();
+
+ return (renderAllMFs ||
+ mfNamesToRender.find(mf->getFunction()->getName()) !=
+ mfNamesToRender.end());
+ }
+
+ const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
+ translateRegClassNamesToCurrentFunction();
+ return regClassSet;
+ }
+
+ const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
+ translateIntervalNumbersToCurrentFunction();
+ return intervalSet;
+ }
+
+ bool MFRenderingOptions::renderEmptyIndexes() const {
+ return showEmptyIndexes;
+ }
+
+ bool MFRenderingOptions::fancyVerticals() const {
+ return useFancyVerticals;
+ }
+
+ void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
+ if (!regClassesTranslatedToCurrentFunction) {
+ processOptions();
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ if (renderAllClasses ||
+ classNamesToRender.find(trc->getName()) !=
+ classNamesToRender.end()) {
+ regClassSet.insert(trc);
+ }
+ }
+ regClassesTranslatedToCurrentFunction = true;
+ }
+ }
+
+ void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
+ if (!intervalsTranslatedToCurrentFunction) {
+ processOptions();
+
+ // If we're not just doing explicit then do a copy over all matching
+ // types.
+ if (intervalTypesToRender != ExplicitOnly) {
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (filterEmpty && li->empty())
+ continue;
+
+ if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
+ (intervalTypesToRender & AllPhys))) {
+ intervalSet.insert(li);
+ } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
+ if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) ||
+ ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
+ intervalSet.insert(li);
+ }
+ }
+ }
+ }
+
+ // If we need to process the explicit list...
+ if (intervalTypesToRender != All) {
+ for (std::set<std::pair<unsigned, unsigned> >::const_iterator
+ regRangeItr = intervalNumsToRender.begin(),
+ regRangeEnd = intervalNumsToRender.end();
+ regRangeItr != regRangeEnd; ++regRangeItr) {
+ const std::pair<unsigned, unsigned> &range = *regRangeItr;
+ for (unsigned reg = range.first; reg != range.second; ++reg) {
+ if (lis->hasInterval(reg)) {
+ intervalSet.insert(&lis->getInterval(reg));
+ }
+ }
+ }
+ }
+
+ intervalsTranslatedToCurrentFunction = true;
+ }
+ }
+
+ // ---------- TargetRegisterExtraInformation implementation ----------
+
+ TargetRegisterExtraInfo::TargetRegisterExtraInfo()
+ : mapsPopulated(false) {
+ }
+
+ void TargetRegisterExtraInfo::setup(MachineFunction *mf,
+ MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis) {
+ this->mf = mf;
+ this->mri = mri;
+ this->tri = tri;
+ this->lis = lis;
+ }
+
+ void TargetRegisterExtraInfo::reset() {
+ if (!mapsPopulated) {
+ initWorst();
+ //initBounds();
+ initCapacity();
+ mapsPopulated = true;
+ }
+
+ resetPressureAndLiveStates();
+ }
+
+ void TargetRegisterExtraInfo::clear() {
+ prWorst.clear();
+ vrWorst.clear();
+ capacityMap.clear();
+ pressureMap.clear();
+ //liveStatesMap.clear();
+ mapsPopulated = false;
+ }
+
+ void TargetRegisterExtraInfo::initWorst() {
+ assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
+ "Worst map already initialised?");
+
+ // Start with the physical registers.
+ for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
+ WorstMapLine &pregLine = prWorst[preg];
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ unsigned numOverlaps = 0;
+ for (TargetRegisterClass::iterator rItr = trc->begin(),
+ rEnd = trc->end();
+ rItr != rEnd; ++rItr) {
+ unsigned trcPReg = *rItr;
+ if (tri->regsOverlap(preg, trcPReg))
+ ++numOverlaps;
+ }
+
+ pregLine[trc] = numOverlaps;
+ }
+ }
+
+ // Now the register classes.
+ for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rc1Itr != rcEnd; ++rc1Itr) {
+ const TargetRegisterClass *trc1 = *rc1Itr;
+ WorstMapLine &classLine = vrWorst[trc1];
+
+ for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
+ rc2Itr != rcEnd; ++rc2Itr) {
+ const TargetRegisterClass *trc2 = *rc2Itr;
+
+ unsigned worst = 0;
+
+ for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
+ trc1End = trc1->end();
+ trc1Itr != trc1End; ++trc1Itr) {
+ unsigned trc1Reg = *trc1Itr;
+ unsigned trc1RegWorst = 0;
+
+ for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
+ trc2End = trc2->end();
+ trc2Itr != trc2End; ++trc2Itr) {
+ unsigned trc2Reg = *trc2Itr;
+ if (tri->regsOverlap(trc1Reg, trc2Reg))
+ ++trc1RegWorst;
+ }
+ if (trc1RegWorst > worst) {
+ worst = trc1RegWorst;
+ }
+ }
+
+ if (worst != 0) {
+ classLine[trc2] = worst;
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getWorst(
+ unsigned reg,
+ const TargetRegisterClass *trc) const {
+ const WorstMapLine *wml = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ PRWorstMap::const_iterator prwItr = prWorst.find(reg);
+ assert(prwItr != prWorst.end() && "Missing prWorst entry.");
+ wml = &prwItr->second;
+ } else {
+ const TargetRegisterClass *regTRC = mri->getRegClass(reg);
+ VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
+ assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
+ wml = &vrwItr->second;
+ }
+
+ WorstMapLine::const_iterator wmlItr = wml->find(trc);
+ if (wmlItr == wml->end())
+ return 0;
+
+ return wmlItr->second;
+ }
+
+ void TargetRegisterExtraInfo::initCapacity() {
+ assert(!mapsPopulated && capacityMap.empty() &&
+ "Capacity map already initialised?");
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ unsigned capacity = trc->getRawAllocationOrder(*mf).size();
+
+ if (capacity != 0)
+ capacityMap[trc] = capacity;
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getCapacity(
+ const TargetRegisterClass *trc) const {
+ CapacityMap::const_iterator cmItr = capacityMap.find(trc);
+ assert(cmItr != capacityMap.end() &&
+ "vreg with unallocable register class");
+ return cmItr->second;
+ }
+
+ void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
+ pressureMap.clear();
+ //liveStatesMap.clear();
+
+ // Iterate over all slots.
+
+
+ // Iterate over all live intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg))
+ continue;
+
+ // For all ranges in the current interal.
+ for (LiveInterval::iterator lrItr = li->begin(),
+ lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+ LiveRange *lr = &*lrItr;
+
+ // For all slots in the current range.
+ for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
+
+ // Record increased pressure at index for all overlapping classes.
+ for (TargetRegisterInfo::regclass_iterator
+ rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ if (trc->getRawAllocationOrder(*mf).empty())
+ continue;
+
+ unsigned worstAtI = getWorst(li->reg, trc);
+
+ if (worstAtI != 0) {
+ pressureMap[i][trc] += worstAtI;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getPressureAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ PressureMap::const_iterator pmItr = pressureMap.find(i);
+ if (pmItr == pressureMap.end())
+ return 0;
+ const PressureMapLine &pmLine = pmItr->second;
+ PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
+ if (pmlItr == pmLine.end())
+ return 0;
+ return pmlItr->second;
+ }
+
+ bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ return (getPressureAtSlot(trc, i) > getCapacity(trc));
+ }
+
+ // ---------- MachineFunctionRenderer implementation ----------
+
+ void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
+ if (!prettyHTML)
+ return;
+ for (unsigned i = 0; i < ns; ++i) {
+ os << " ";
+ }
+ }
+
+ RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
+ return Spacer(ns);
+ }
+
+ raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
+ s.print(os);
+ return os;
+ }
+
+ template <typename Iterator>
+ std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
+ std::string r;
+
+ for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
+ char c = *sItr;
+
+ switch (c) {
+ case '<': r.append("&lt;"); break;
+ case '>': r.append("&gt;"); break;
+ case '&': r.append("&amp;"); break;
+ case ' ': r.append("&nbsp;"); break;
+ case '\"': r.append("&quot;"); break;
+ default: r.push_back(c); break;
+ }
+ }
+
+ return r;
+ }
+
+ RenderMachineFunction::LiveState
+ RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
+ SlotIndex i) const {
+ const MachineInstr *mi = sis->getInstructionFromIndex(i);
+
+ // For uses/defs recorded use/def indexes override current liveness and
+ // instruction operands (Only for the interval which records the indexes).
+ if (i.isUse() || i.isDef()) {
+ UseDefs::const_iterator udItr = useDefs.find(li);
+ if (udItr != useDefs.end()) {
+ const SlotSet &slotSet = udItr->second;
+ if (slotSet.count(i)) {
+ if (i.isUse()) {
+ return Used;
+ }
+ // else
+ return Defined;
+ }
+ }
+ }
+
+ // If the slot is a load/store, or there's no info in the use/def set then
+ // use liveness and instruction operand info.
+ if (li->liveAt(i)) {
+
+ if (mi == 0) {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ } else {
+ if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+ return Defined;
+ } else if (i.isUse() && mi->readsRegister(li->reg)) {
+ return Used;
+ } else {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ }
+ }
+ }
+ return Dead;
+ }
+
+ RenderMachineFunction::PressureState
+ RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ if (trei.getPressureAtSlot(trc, i) == 0) {
+ return Zero;
+ } else if (trei.classOverCapacityAtSlot(trc, i)){
+ return High;
+ }
+ return Low;
+ }
+
+ /// \brief Render a machine instruction.
+ void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ oss << *mi;
+
+ os << escapeChars(oss.str());
+ }
+
+ template <typename T>
+ void RenderMachineFunction::renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const {
+ if (ro.fancyVerticals()) {
+ os << indent << "<object\n"
+ << indent + s(2) << "class=\"obj\"\n"
+ << indent + s(2) << "type=\"image/svg+xml\"\n"
+ << indent + s(2) << "width=\"14px\"\n"
+ << indent + s(2) << "height=\"55px\"\n"
+ << indent + s(2) << "data=\"data:image/svg+xml,\n"
+ << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
+ << indent + s(6) << "<text x='-55' y='10' "
+ "font-family='Courier' font-size='12' "
+ "transform='rotate(-90)' "
+ "text-rendering='optimizeSpeed' "
+ "fill='#000'>" << t << "</text>\n"
+ << indent + s(4) << "</svg>\">\n"
+ << indent << "</object>\n";
+ } else {
+ std::ostringstream oss;
+ oss << t;
+ std::string tStr(oss.str());
+
+ os << indent;
+ for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
+ tStrItr != tStrEnd; ++tStrItr) {
+ os << *tStrItr << "<br/>";
+ }
+ os << "\n";
+ }
+ }
+
+ void RenderMachineFunction::insertCSS(const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<style type=\"text/css\">\n"
+ << indent + s(2) << "body { font-color: black; }\n"
+ << indent + s(2) << "table.code td { font-family: monospace; "
+ "border-width: 0px; border-style: solid; "
+ "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
+ << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
+ << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
+ << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
+ << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
+ << indent + s(2) << "table.code th { border-width: 0px; "
+ "border-style: solid; }\n"
+ << indent << "</style>\n";
+ }
+
+ void RenderMachineFunction::renderFunctionSummary(
+ const Spacer &indent, raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << indent << "<h1>Function: " << mf->getFunction()->getName()
+ << "</h1>\n"
+ << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
+ }
+
+
+ void RenderMachineFunction::renderPressureTableLegend(
+ const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
+ << indent << "<table class=\"code\">\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<th>Pressure</th><th>Description</th>"
+ "<th>Appearance</th>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>No Pressure</td>"
+ "<td>No physical registers of this class requested.</td>"
+ "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>Low Pressure</td>"
+ "<td>Sufficient physical registers to meet demand.</td>"
+ "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>High Pressure</td>"
+ "<td>Potentially insufficient physical registers to meet demand.</td>"
+ "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent << "</table>\n";
+ }
+
+ template <typename CellType>
+ void RenderMachineFunction::renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const {
+
+ if (rleAccumulator.second == 0)
+ return;
+
+ typename std::map<CellType, std::string>::const_iterator ctsItr =
+ cellTypeStrs.find(rleAccumulator.first);
+
+ assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
+
+ os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
+ if (rleAccumulator.second > 1)
+ os << " colspan=" << rleAccumulator.second;
+ os << "></td>\n";
+ }
+
+
+ void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const {
+
+ std::map<LiveState, std::string> lsStrs;
+ lsStrs[Dead] = "l-n";
+ lsStrs[Defined] = "l-d";
+ lsStrs[Used] = "l-u";
+ lsStrs[AliveReg] = "l-r";
+ lsStrs[AliveStack] = "l-s";
+
+ std::map<PressureState, std::string> psStrs;
+ psStrs[Zero] = "p-z";
+ psStrs[Low] = "p-l";
+ psStrs[High] = "p-h";
+
+ // Open the table...
+
+ os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
+ << indent + s(2) << "<tr>\n";
+
+ // Render the header row...
+
+ os << indent + s(4) << "<th>index</th>\n"
+ << indent + s(4) << "<th>instr</th>\n";
+
+ // Render class names if necessary...
+ if (!ro.regClasses().empty()) {
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, trc->getName());
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
+
+ // Render interval numbers if necessary...
+ if (!ro.intervals().empty()) {
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = *liItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, li->reg);
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ os << indent + s(2) << "</tr>\n";
+
+ // End header row, start with the data rows...
+
+ MachineInstr *mi = 0;
+
+ // Data rows:
+ for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
+ i = i.getNextSlot()) {
+
+ // Render the slot column.
+ os << indent + s(2) << "<tr height=6ex>\n";
+
+ // Render the code column.
+ if (i.isLoad()) {
+ MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
+ mi = sis->getInstructionFromIndex(i);
+
+ if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
+ ro.renderEmptyIndexes()) {
+ os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
+ << indent + s(4) << "<td rowspan=4>\n";
+
+ if (i == sis->getMBBStartIdx(mbb)) {
+ os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
+ } else if (mi != 0) {
+ os << indent + s(6) << "&nbsp;&nbsp;";
+ renderMachineInstr(os, mi);
+ } else {
+ // Empty interval - leave blank.
+ }
+ os << indent + s(4) << "</td>\n";
+ } else {
+ i = i.getStoreIndex(); // <- Will be incremented to the next index.
+ continue;
+ }
+ }
+
+ // Render the class columns.
+ if (!ro.regClasses().empty()) {
+ std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ PressureState newPressure = getPressureStateAt(trc, i);
+
+ if (newPressure == psRLEAccumulator.first) {
+ ++psRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ psRLEAccumulator.first = newPressure;
+ psRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<td width=2em></td>\n";
+
+ if (!ro.intervals().empty()) {
+ std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+ const LiveInterval *li = *liItr;
+ LiveState newLiveness = getLiveStateAt(li, i);
+
+ if (newLiveness == lsRLEAccumulator.first) {
+ ++lsRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ lsRLEAccumulator.first = newLiveness;
+ lsRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ }
+ os << indent + s(2) << "</tr>\n";
+ }
+
+ os << indent << "</table>\n";
+
+ if (!ro.regClasses().empty())
+ renderPressureTableLegend(indent, os);
+ }
+
+ void RenderMachineFunction::renderFunctionPage(
+ raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << "<html>\n"
+ << s(2) << "<head>\n"
+ << s(4) << "<title>" << fqn << "</title>\n";
+
+ insertCSS(s(4), os);
+
+ os << s(2) << "<head>\n"
+ << s(2) << "<body >\n";
+
+ renderFunctionSummary(s(4), os, renderContextStr);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ //renderLiveIntervalInfoTable(" ", os);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ renderCodeTablePlusPI(s(4), os);
+
+ os << s(2) << "</body>\n"
+ << "</html>\n";
+ }
+
+ void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ lis = &getAnalysis<LiveIntervals>();
+ sis = &getAnalysis<SlotIndexes>();
+
+ trei.setup(mf, mri, tri, lis);
+ ro.setup(mf, tri, lis, this);
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ return false;
+ }
+
+ void RenderMachineFunction::releaseMemory() {
+ trei.clear();
+ ro.clear();
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+ }
+
+ void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ const MachineInstr *mi = &*rItr;
+ if (mi->readsRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+ }
+ if (mi->definesRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+ }
+ }
+ }
+
+ void RenderMachineFunction::rememberSpills(
+ const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
+ siEnd = spills.end();
+ siItr != siEnd; ++siItr) {
+ const LiveInterval *spill = *siItr;
+ spillIntervals[li].insert(spill);
+ spillFor[spill] = li;
+ }
+ }
+
+ bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
+ SpillForMap::const_iterator sfItr = spillFor.find(li);
+ if (sfItr == spillFor.end())
+ return false;
+ return true;
+ }
+
+ void RenderMachineFunction::renderMachineFunction(
+ const char *renderContextStr,
+ const VirtRegMap *vrm,
+ const char *renderSuffix) {
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ this->vrm = vrm;
+ trei.reset();
+
+ std::string rpFileName(mf->getFunction()->getName().str() +
+ (renderSuffix ? renderSuffix : "") +
+ outputFileSuffix);
+
+ std::string errMsg;
+ raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
+
+ renderFunctionPage(outFile, renderContextStr);
+
+ ro.resetRenderSpecificOptions();
+ }
+
+ std::string RenderMachineFunction::escapeChars(const std::string &s) const {
+ return escapeChars(s.begin(), s.end());
+ }
+
+}
diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.h b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
new file mode 100644
index 000000000000..85719923c0c6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
@@ -0,0 +1,338 @@
+//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineRegisterInfo;
+ class RenderMachineFunction;
+ class TargetRegisterClass;
+ class TargetRegisterInfo;
+ class VirtRegMap;
+ class raw_ostream;
+
+ /// \brief Helper class to process rendering options. Tries to be as lazy as
+ /// possible.
+ class MFRenderingOptions {
+ public:
+
+ struct RegClassComp {
+ bool operator()(const TargetRegisterClass *trc1,
+ const TargetRegisterClass *trc2) const {
+ std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
+ return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
+ trc2Name.begin(), trc2Name.end());
+ }
+ };
+
+ typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
+
+ struct IntervalComp {
+ bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+ return li1->reg < li2->reg;
+ }
+ };
+
+ typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
+
+ /// Initialise the rendering options.
+ void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
+ LiveIntervals *lis, const RenderMachineFunction *rmf);
+
+ /// Clear translations of options to the current function.
+ void clear();
+
+ /// Reset any options computed for this specific rendering.
+ void resetRenderSpecificOptions();
+
+ /// Should we render the current function.
+ bool shouldRenderCurrentMachineFunction() const;
+
+ /// Return the set of register classes to render pressure for.
+ const RegClassSet& regClasses() const;
+
+ /// Return the set of live intervals to render liveness for.
+ const IntervalSet& intervals() const;
+
+ /// Render indexes which are not associated with instructions / MBB starts.
+ bool renderEmptyIndexes() const;
+
+ /// Return whether or not to render using SVG for fancy vertical text.
+ bool fancyVerticals() const;
+
+ private:
+
+ static bool renderingOptionsProcessed;
+ static std::set<std::string> mfNamesToRender;
+ static bool renderAllMFs;
+
+ static std::set<std::string> classNamesToRender;
+ static bool renderAllClasses;
+
+
+ static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
+ typedef enum { ExplicitOnly = 0,
+ AllPhys = 1,
+ VirtNoSpills = 2,
+ VirtSpills = 4,
+ AllVirt = 6,
+ All = 7 }
+ IntervalTypesToRender;
+ static unsigned intervalTypesToRender;
+
+ template <typename OutputItr>
+ static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
+
+ static void processOptions();
+
+ static void processFuncNames();
+ static void processRegClassNames();
+ static void processIntervalNumbers();
+
+ static void processIntervalRange(const std::string &intervalRangeStr);
+
+ MachineFunction *mf;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ const RenderMachineFunction *rmf;
+
+ mutable bool regClassesTranslatedToCurrentFunction;
+ mutable RegClassSet regClassSet;
+
+ mutable bool intervalsTranslatedToCurrentFunction;
+ mutable IntervalSet intervalSet;
+
+ void translateRegClassNamesToCurrentFunction() const;
+
+ void translateIntervalNumbersToCurrentFunction() const;
+ };
+
+ /// \brief Provide extra information about the physical and virtual registers
+ /// in the function being compiled.
+ class TargetRegisterExtraInfo {
+ public:
+ TargetRegisterExtraInfo();
+
+ /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
+ /// sources of information.
+ void setup(MachineFunction *mf, MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri, LiveIntervals *lis);
+
+ /// \brief Recompute tables for changed function.
+ void reset();
+
+ /// \brief Free all tables in TargetRegisterExtraInfo.
+ void clear();
+
+ /// \brief Maximum number of registers from trc which alias reg.
+ unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
+
+ /// \brief Returns the number of allocable registers in trc.
+ unsigned getCapacity(const TargetRegisterClass *trc) const;
+
+ /// \brief Return the number of registers of class trc that may be
+ /// needed at slot i.
+ unsigned getPressureAtSlot(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ /// \brief Return true if the number of registers of type trc that may be
+ /// needed at slot i is greater than the capacity of trc.
+ bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ private:
+
+ MachineFunction *mf;
+ MachineRegisterInfo *mri;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
+ typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
+ VRWorstMap vrWorst;
+
+ typedef std::map<unsigned, WorstMapLine> PRWorstMap;
+ PRWorstMap prWorst;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
+ CapacityMap capacityMap;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
+ typedef std::map<SlotIndex, PressureMapLine> PressureMap;
+ PressureMap pressureMap;
+
+ bool mapsPopulated;
+
+ /// \brief Initialise the 'worst' table.
+ void initWorst();
+
+ /// \brief Initialise the 'capacity' table.
+ void initCapacity();
+
+ /// \brief Initialise/Reset the 'pressure' and live states tables.
+ void resetPressureAndLiveStates();
+ };
+
+ /// \brief Render MachineFunction objects and related information to a HTML
+ /// page.
+ class RenderMachineFunction : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ RenderMachineFunction() : MachineFunctionPass(ID) {
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+
+ virtual void releaseMemory();
+
+ void rememberUseDefs(const LiveInterval *li);
+
+ void rememberSpills(const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills);
+
+ bool isSpill(const LiveInterval *li) const;
+
+ /// \brief Render this machine function to HTML.
+ ///
+ /// @param renderContextStr This parameter will be included in the top of
+ /// the html file to explain where (in the
+ /// codegen pipeline) this function was rendered
+ /// from. Set it to something like
+ /// "Pre-register-allocation".
+ /// @param vrm If non-null the VRM will be queried to determine
+ /// whether a virtual register was allocated to a
+ /// physical register or spilled.
+ /// @param renderFilePrefix This string will be appended to the function
+ /// name (before the output file suffix) to enable
+ /// multiple renderings from the same function.
+ void renderMachineFunction(const char *renderContextStr,
+ const VirtRegMap *vrm = 0,
+ const char *renderSuffix = 0);
+
+ private:
+ class Spacer;
+ friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
+
+ std::string fqn;
+
+ MachineFunction *mf;
+ MachineRegisterInfo *mri;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ SlotIndexes *sis;
+ const VirtRegMap *vrm;
+
+ TargetRegisterExtraInfo trei;
+ MFRenderingOptions ro;
+
+
+
+ // Utilities.
+ typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
+ LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
+
+ typedef enum { Zero, Low, High } PressureState;
+ PressureState getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
+ SpillIntervals;
+ SpillIntervals spillIntervals;
+
+ typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
+ SpillForMap spillFor;
+
+ typedef std::set<SlotIndex> SlotSet;
+ typedef std::map<const LiveInterval*, SlotSet> UseDefs;
+ UseDefs useDefs;
+
+ // ---------- Rendering methods ----------
+
+ /// For inserting spaces when pretty printing.
+ class Spacer {
+ public:
+ explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
+ Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
+ void print(raw_ostream &os) const;
+ private:
+ unsigned ns;
+ };
+
+ Spacer s(unsigned ns) const;
+
+ template <typename Iterator>
+ std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
+
+ /// \brief Render a machine instruction.
+ void renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const;
+
+ /// \brief Render vertical text.
+ template <typename T>
+ void renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const;
+
+ /// \brief Insert CSS layout info.
+ void insertCSS(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render a brief summary of the function (including rendering
+ /// context).
+ void renderFunctionSummary(const Spacer &indent,
+ raw_ostream &os,
+ const char * const renderContextStr) const;
+
+ /// \brief Render a legend for the pressure table.
+ void renderPressureTableLegend(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render a consecutive set of HTML cells of the same class using
+ /// the colspan attribute for run-length encoding.
+ template <typename CellType>
+ void renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const;
+
+ /// \brief Render code listing, potentially with register pressure
+ /// and live intervals shown alongside.
+ void renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render the HTML page representing the MachineFunction.
+ void renderFunctionPage(raw_ostream &os,
+ const char * const renderContextStr) const;
+
+ std::string escapeChars(const std::string &s) const;
+ };
+}
+
+#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 000000000000..21375b286c99
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,607 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+#ifndef NDEBUG
+cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()),
+ TII(TM.getInstrInfo()),
+ TRI(TM.getRegisterInfo()),
+ MF(mf), MRI(mf.getRegInfo()),
+ EntrySU(), ExitSU() {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ BB = bb;
+ InsertPos = insertPos;
+
+ SUnits.clear();
+ Sequence.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+
+ Schedule();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+bool SUnit::addPred(const SDep &D) {
+ // If this node already has this depenence, don't add a redundant one.
+ for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D)
+ return false;
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+ assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
+ if (!isScheduled) {
+ assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return true;
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ bool FoundSucc = false;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
+ EE = N->Succs.end(); II != EE; ++II)
+ if (*II == P) {
+ FoundSucc = true;
+ N->Succs.erase(II);
+ break;
+ }
+ assert(FoundSucc && "Mismatching preds / succs lists!");
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
+ if (!isScheduled) {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ dbgs() << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ dbgs() << " # preds left : " << NumPredsLeft << "\n";
+ dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
+ dbgs() << " Latency : " << Latency << "\n";
+ dbgs() << " Depth : " << Depth << "\n";
+ dbgs() << " Height : " << Height << "\n";
+
+ if (Preds.size() != 0) {
+ dbgs() << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "#";
+ dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << G->TRI->getName(I->getReg());
+ dbgs() << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ dbgs() << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "#";
+ dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ dbgs() << "\n";
+ }
+ }
+ dbgs() << "\n";
+}
+
+#ifndef NDEBUG
+/// VerifySchedule - Verify that all SUnits were scheduled and that
+/// their state is consistent.
+///
+void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(!AnyNotSched);
+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (!--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accommodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accommodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool &HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ int s = SU->Succs[I].getSUnit()->NodeNum;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+/// create a cycle.
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ if (IsReachable(TargetSU, SU))
+ return true;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(TargetSU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 000000000000..f8b1bc76eb8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,68 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::EmitNoop() {
+ TII->insertNoop(*BB, InsertPos);
+}
+
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
+ DenseMap<SUnit*, unsigned> &VRBaseMap) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 000000000000..446adfc2b626
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,697 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt)
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+ InstrItins(mf.getTarget().getInstrItineraryData()),
+ Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
+ LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ DbgValues.clear();
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ Begin = begin;
+ InsertPosIndex = endcount;
+
+ ScheduleDAG::Run(bb, end);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const Operator *U = dyn_cast<Operator>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (U->getOpcode() == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant or a multiplied value, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed by the multiply,
+ // because our callers only care when the result is an
+ // identifibale object.
+ if (U->getOpcode() != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObject(const Value *V) {
+ // First just call Value::getUnderlyingObject to let it do what it does.
+ do {
+ V = GetUnderlyingObject(V);
+ // If it found an inttoptr, use special code to continue climing.
+ if (Operator::getOpcode(V) != Instruction::IntToPtr)
+ break;
+ const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ // If that succeeded in finding a pointer, continue the search.
+ if (!O->getType()->isPointerTy())
+ break;
+ V = O;
+ } while (1);
+ return V;
+}
+
+/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object. Otherwise return null.
+static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
+ const MachineFrameInfo *MFI,
+ bool &MayAlias) {
+ MayAlias = true;
+ if (!MI->hasOneMemOperand() ||
+ !(*MI->memoperands_begin())->getValue() ||
+ (*MI->memoperands_begin())->isVolatile())
+ return 0;
+
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return 0;
+
+ V = getUnderlyingObject(V);
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return 0;
+
+ MayAlias = PSV->mayAlias(MFI);
+ return V;
+ }
+
+ if (isIdentifiedObject(V))
+ return V;
+
+ return 0;
+}
+
+void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+ if (MachineLoop *ML = MLI.getLoopFor(BB))
+ if (BB == ML->getLoopLatch()) {
+ MachineBasicBlock *Header = ML->getHeader();
+ for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+ E = Header->livein_end(); I != E; ++I)
+ LoopLiveInRegs.insert(*I);
+ LoopRegs.VisitLoop(ML);
+ }
+}
+
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+ MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ Uses[Reg].push_back(&ExitSU);
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ SmallSet<unsigned, 8> Seen;
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ if (Seen.insert(Reg))
+ Uses[Reg].push_back(&ExitSU);
+ }
+ }
+}
+
+void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
+ // We'll be allocating one SUnit for each instruction, plus one for
+ // the region exit node.
+ SUnits.reserve(BB->size());
+
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we procede.
+ SUnit *BarrierChain = 0, *AliasChain = 0;
+
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+
+ // Remove any stale debug info; sometimes BuildSchedGraph is called again
+ // without emitting the info from the previous call.
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ AddSchedBarrierDeps();
+
+ for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
+ }
+
+ // Walk the list of instructions, from bottom moving up.
+ MachineInstr *PrevMI = NULL;
+ for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ MII != MIE; --MII) {
+ MachineInstr *MI = prior(MII);
+ if (MI && PrevMI) {
+ DbgValues.push_back(std::make_pair(PrevMI, MI));
+ PrevMI = NULL;
+ }
+
+ if (MI->isDebugValue()) {
+ PrevMI = MI;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ assert(!MCID.isTerminator() && !MI->isLabel() &&
+ "Cannot schedule terminators or labels!");
+ // Create the SUnit for this MI.
+ SUnit *SU = NewSUnit(MI);
+ SU->isCall = MCID.isCall();
+ SU->isCommutable = MCID.isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ ComputeLatency(SU);
+
+ // Add register-based dependencies (data, anti, and output).
+ for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+
+ std::vector<SUnit *> &UseList = Uses[Reg];
+ // Defs are push in the order they are visited and never reordered.
+ std::vector<SUnit *> &DefList = Defs[Reg];
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &MemDefList = Defs[*Alias];
+ for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
+ SUnit *DefSU = MemDefList[i];
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias)))
+ DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
+ }
+ }
+
+ if (MO.isDef()) {
+ // Add any data dependencies.
+ unsigned DataLatency = SU->Latency;
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU == SU)
+ continue;
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Do this for register aliases too.
+ // TODO: Perhaps we should get rid of
+ // SpecialAddressLatency and just move this into
+ // adjustSchedDependency for the targets that care about it.
+ if (SpecialAddressLatency != 0 && !UnitLatencies &&
+ UseSU != &ExitSU) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+ assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+ if (RegUseIndex >= 0 &&
+ (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+ (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+ UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ // Adjust the dependence latency using operand def/use
+ // information (if any), and then allow the target to
+ // perform its own adjustments.
+ const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+ if (!UnitLatencies) {
+ ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+ ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ }
+ UseSU->addPred(dep);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &UseList = Uses[*Alias];
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU == SU)
+ continue;
+ const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+ if (!UnitLatencies) {
+ ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+ ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ }
+ UseSU->addPred(dep);
+ }
+ }
+
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ if (!UnitLatencies && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseMCID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artificial edge.
+ ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
+ UseList.clear();
+ if (!MO.isDead())
+ DefList.clear();
+
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ if (SU->isCall) {
+ while (!DefList.empty() && DefList.back()->isCall)
+ DefList.pop_back();
+ }
+ DefList.push_back(SU);
+ } else {
+ UseList.push_back(SU);
+ }
+ }
+
+ // Add chain dependencies.
+ // Chain dependencies used to enforce memory order should have
+ // latency of 0 (except for true dependency of Store followed by
+ // aliased Load... we estimate that with a single cycle of latency
+ // assuming the hardware will bypass)
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+#define STORE_LOAD_LATENCY 1
+ unsigned TrueMemOrderLatency = 0;
+ if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ }
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+ // Add SU to the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain = SU;
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references though SU.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ AliasChain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ E = AliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ }
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
+ } else if (MCID.mayStore()) {
+ bool MayAlias = true;
+ TrueMemOrderLatency = STORE_LOAD_LATENCY;
+ if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ I->second = SU;
+ } else {
+ if (MayAlias)
+ AliasMemDefs[V] = SU;
+ else
+ NonAliasMemDefs[V] = SU;
+ }
+ // Handle the uses in MemUses, if there are any.
+ std::map<const Value *, std::vector<SUnit *> >::iterator J =
+ ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+ ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
+ /*Reg=*/0, /*isNormalMemory=*/true));
+ J->second.clear();
+ }
+ if (MayAlias) {
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ // Add dependence on barrier chain, if needed.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ } else {
+ // Treat all other stores conservatively.
+ goto new_alias_chain;
+ }
+
+ if (!ExitSU.isPred(SU))
+ // Push store's up a bit to avoid them getting in between cmp
+ // and branches.
+ ExitSU.addPred(SDep(SU, SDep::Order, 0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (MCID.mayLoad()) {
+ bool MayAlias = true;
+ TrueMemOrderLatency = 0;
+ if (MI->isInvariantLoad(AA)) {
+ // Invariant load, no chain dependencies needed!
+ } else {
+ if (const Value *V =
+ getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ if (MayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ } else {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
+ }
+
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ }
+ }
+ if (PrevMI)
+ FirstDbgValue = PrevMI;
+
+ for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ Defs[i].clear();
+ Uses[i].clear();
+ }
+ PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::FinishBlock() {
+ // Nothing to do.
+}
+
+void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+ // Compute the latency for the node.
+ if (!InstrItins || InstrItins->isEmpty()) {
+ SU->Latency = 1;
+
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
+ if (SU->getInstr()->getDesc().mayLoad())
+ SU->Latency += 2;
+ } else {
+ SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+ }
+}
+
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const {
+ if (!InstrItins || InstrItins->isEmpty())
+ return;
+
+ // For a data dependency with a known register...
+ if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+ return;
+
+ const unsigned Reg = dep.getReg();
+
+ // ... find the definition of the register in the defining
+ // instruction
+ MachineInstr *DefMI = Def->getInstr();
+ int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1) {
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ }
+ MachineInstr *UseMI = Use->getInstr();
+ // For all uses of the register, calculate the maxmimum latency
+ int Latency = -1;
+ if (UseMI) {
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+ UseMI, i);
+ Latency = std::max(Latency, UseCycle);
+ }
+ } else {
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!InstrItins || InstrItins->isEmpty())
+ return;
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
+ }
+
+ // If we found a latency, then replace the existing dependence latency.
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+ }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss);
+ return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+ // For MachineInstr-based scheduling, we're rescheduling the instructions in
+ // the block, so start by removing them from the block.
+ while (Begin != InsertPos) {
+ MachineBasicBlock::iterator I = Begin;
+ ++Begin;
+ BB->remove(I);
+ }
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->insert(InsertPos, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->insert(InsertPos, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ EmitNoop();
+ }
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (!Sequence.empty())
+ Begin = Sequence[0]->getInstr();
+
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineInstr *OrigPrivMI = P.second;
+ BB->insertAfter(OrigPrivMI, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+ return BB;
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 000000000000..8a4ea855235c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,211 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+ class MachineLoopInfo;
+ class MachineDominatorTree;
+
+ /// LoopDependencies - This class analyzes loop-oriented register
+ /// dependencies, which are used to guide scheduling decisions.
+ /// For example, loop induction variable increments should be
+ /// scheduled as soon as possible after the variable's last use.
+ ///
+ class LLVM_LIBRARY_VISIBILITY LoopDependencies {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ public:
+ typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+ LoopDeps;
+ LoopDeps Deps;
+
+ LoopDependencies(const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt) :
+ MLI(mli), MDT(mdt) {}
+
+ /// VisitLoop - Clear out any previous state and analyze the given loop.
+ ///
+ void VisitLoop(const MachineLoop *Loop) {
+ Deps.clear();
+ MachineBasicBlock *Header = Loop->getHeader();
+ SmallSet<unsigned, 8> LoopLiveIns;
+ for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+ LE = Header->livein_end(); LI != LE; ++LI)
+ LoopLiveIns.insert(*LI);
+
+ const MachineDomTreeNode *Node = MDT.getNode(Header);
+ const MachineBasicBlock *MBB = Node->getBlock();
+ assert(Loop->contains(MBB) &&
+ "Loop does not contain header!");
+ VisitRegion(Node, MBB, Loop, LoopLiveIns);
+ }
+
+ private:
+ void VisitRegion(const MachineDomTreeNode *Node,
+ const MachineBasicBlock *MBB,
+ const MachineLoop *Loop,
+ const SmallSet<unsigned, 8> &LoopLiveIns) {
+ unsigned Count = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (LoopLiveIns.count(MOReg))
+ Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+ }
+ ++Count; // Not every iteration due to dbg_value above.
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (std::vector<MachineDomTreeNode*>::const_iterator I =
+ Children.begin(), E = Children.end(); I != E; ++I) {
+ const MachineDomTreeNode *ChildNode = *I;
+ MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+ if (Loop->contains(ChildBlock))
+ VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+ }
+ }
+ };
+
+ /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+ /// MachineInstrs.
+ class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+ const MachineFrameInfo *MFI;
+ const InstrItineraryData *InstrItins;
+
+ /// Defs, Uses - Remember where defs and uses of each physical register
+ /// are as we iterate upward through the instructions. This is allocated
+ /// here instead of inside BuildSchedGraph to avoid the need for it to be
+ /// initialized and destructed for each block.
+ std::vector<std::vector<SUnit *> > Defs;
+ std::vector<std::vector<SUnit *> > Uses;
+
+ /// PendingLoads - Remember where unknown loads are after the most recent
+ /// unknown store, as we iterate. As with Defs and Uses, this is here
+ /// to minimize construction/destruction.
+ std::vector<SUnit *> PendingLoads;
+
+ /// LoopRegs - Track which registers are used for loop-carried dependencies.
+ ///
+ LoopDependencies LoopRegs;
+
+ /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
+ /// back-edge-aware scheduling.
+ ///
+ SmallSet<unsigned, 8> LoopLiveInRegs;
+
+ protected:
+
+ /// DbgValues - Remember instruction that preceeds DBG_VALUE.
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+ DbgValueVector DbgValues;
+ MachineInstr *FirstDbgValue;
+
+ public:
+ MachineBasicBlock::iterator Begin; // The beginning of the range to
+ // be scheduled. The range extends
+ // to InsertPos.
+ unsigned InsertPosIndex; // The index in BB of InsertPos.
+
+ explicit ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt);
+
+ virtual ~ScheduleDAGInstrs() {}
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+ const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Run - perform scheduling.
+ ///
+ void Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endindex);
+
+ /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+ /// input.
+ virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+ /// list of instructions being scheduled to scheduling barrier. We want to
+ /// make sure instructions which define registers that are either used by
+ /// the terminator or are live-out are properly scheduled. This is
+ /// especially important when the definition latency of the return value(s)
+ /// are too high to be hidden by the branch or when the liveout registers
+ /// used by instructions in the fallthrough block.
+ void AddSchedBarrierDeps();
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ /// ComputeOperandLatency - Override dependence edge latency using
+ /// operand use/def information
+ ///
+ virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const;
+
+ virtual MachineBasicBlock *EmitSchedule();
+
+ /// StartBlock - Prepare to perform scheduling in the given block.
+ ///
+ virtual void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// FinishBlock - Clean up after scheduling in the given block.
+ ///
+ virtual void FinishBlock();
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 000000000000..4b55a2284f85
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,99 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+ if (BB->getBasicBlock())
+ ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+ "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() +
+ ":" + BB->getBasicBlock()->getNameStr());
+ else
+ ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+ "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+#else
+ errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 000000000000..0e005d35189d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,243 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType) :
+ ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+ IssueCount(0) {
+
+#ifndef NDEBUG
+ DebugType = ParentDebugType;
+#endif
+
+ // Determine the maximum depth of any itinerary. This determines the
+ // depth of the scoreboard. We always make the scoreboard at least 1
+ // cycle deep to avoid dealing with the boundary condition.
+ unsigned ScoreboardDepth = 1;
+ if (ItinData && !ItinData->isEmpty()) {
+ IssueWidth = ItinData->IssueWidth;
+
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData->isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
+
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ }
+ }
+ MaxLookAhead = ScoreboardDepth;
+ }
+
+ ReservedScoreboard.reset(ScoreboardDepth);
+ RequiredScoreboard.reset(ScoreboardDepth);
+
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
+ RequiredScoreboard.reset();
+ ReservedScoreboard.reset();
+}
+
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = Depth - 1;
+ while ((last > 0) && ((*this)[last] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ unsigned FUs = (*this)[i];
+ dbgs() << "\t";
+ for (int j = 31; j >= 0; j--)
+ dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
+ return NoHazard;
+
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID == NULL) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ default:
+ assert(0 && "Invalid FU reservation");
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[StageCycle];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[StageCycle];
+ break;
+ }
+
+ if (!freeUnits) {
+ DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
+ return;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ assert(MCID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(MCID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ default:
+ assert(0 && "Invalid FU reservation");
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[cycle + i];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[cycle + i];
+ break;
+ }
+
+ // reduce to a single unit
+ unsigned freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ assert(freeUnit && "No function unit available!");
+ if (IS->getReservationKind() == InstrStage::Required)
+ RequiredScoreboard[cycle + i] |= freeUnit;
+ else
+ ReservedScoreboard[cycle + i] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ DEBUG(ReservedScoreboard.dump());
+ DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+ RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 000000000000..4f0d2caca22b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,7976 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+
+ // Worklist of all of the nodes that need to be simplified.
+ std::vector<SDNode*> WorkList;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// AddToWorkList - Add to the work list making sure it's instance is at the
+ /// the back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ removeFromWorkList(N);
+ WorkList.push_back(N);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+ WorkList.end());
+ }
+
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+ void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+ SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+ SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue PromoteIntBinOp(SDValue Op);
+ SDValue PromoteIntShiftOp(SDValue Op);
+ SDValue PromoteExtend(SDValue Op);
+ bool PromoteLoad(SDValue Op);
+
+ void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType);
+
+ /// combine - call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitSREM(SDNode *N);
+ SDValue visitUREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSMULO(SDNode *N);
+ SDValue visitUMULO(SDNode *N);
+ SDValue visitSDIVREM(SDNode *N);
+ SDValue visitUDIVREM(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitMEMBARRIER(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans = true);
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset,
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+ /// getShiftAmountTy - Returns a type large enough to hold any valid
+ /// shift amount - before type legalization these can be huge.
+ EVT getShiftAmountTy(EVT LHSTy) {
+ return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
+ }
+
+ /// isTypeLegal - This method returns true if we are running before type
+ /// legalization or if the specified VT is legal.
+ bool isTypeLegal(const EVT &VT) {
+ if (!LegalTypes) return true;
+ return TLI.isTypeLegal(VT);
+ }
+ };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ DC.removeFromWorkList(N);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Ignore updates.
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ unsigned Depth = 0) {
+ // No compile time optimizations on this type.
+ if (Op.getValueType() == MVT::ppcf128)
+ return 0;
+
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValueAPF().isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N0.getOperand(1)),
+ cast<ConstantSDNode>(N1));
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ }
+ if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N1.getOperand(1)),
+ cast<ConstantSDNode>(N0));
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ }
+ if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N1.getOperand(0), N0);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.1 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo-1 << " other values\n";
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!"));
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.getNode());
+ AddUsersToWorkList(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty()) {
+ removeFromWorkList(TLO.Old.getNode());
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+ DAG.DeleteNode(TLO.Old.getNode());
+ }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.2 ";
+ TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: ";
+ TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+ DebugLoc dl = Load->getDebugLoc();
+ EVT VT = Load->getValueType(0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+ DEBUG(dbgs() << "\nReplacing.9 ";
+ Load->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
+ &DeadNodes);
+ removeFromWorkList(Load);
+ DAG.DeleteNode(Load);
+ AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+ Replace = false;
+ DebugLoc dl = Op.getDebugLoc();
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ Replace = true;
+ return DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ }
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::AssertSext:
+ return DAG.getNode(ISD::AssertSext, dl, PVT,
+ SExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::AssertZext:
+ return DAG.getNode(ISD::AssertZext, dl, PVT,
+ ZExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::Constant: {
+ unsigned ExtOpc =
+ Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, dl, PVT, Op);
+ }
+ }
+
+ if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+ return SDValue();
+ return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+ if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+ return SDValue();
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (NewOp.getNode() == 0)
+ return SDValue();
+ AddToWorkList(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+ DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (NewOp.getNode() == 0)
+ return SDValue();
+ AddToWorkList(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace0 = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+ if (NN0.getNode() == 0)
+ return SDValue();
+
+ bool Replace1 = false;
+ SDValue N1 = Op.getOperand(1);
+ SDValue NN1;
+ if (N0 == N1)
+ NN1 = NN0;
+ else {
+ NN1 = PromoteOperand(N1, PVT, Replace1);
+ if (NN1.getNode() == 0)
+ return SDValue();
+ }
+
+ AddToWorkList(NN0.getNode());
+ if (NN1.getNode())
+ AddToWorkList(NN1.getNode());
+
+ if (Replace0)
+ ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+ if (Replace1)
+ ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, NN0, NN1));
+ }
+ return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace = false;
+ SDValue N0 = Op.getOperand(0);
+ if (Opc == ISD::SRA)
+ N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ else if (Opc == ISD::SRL)
+ N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ else
+ N0 = PromoteOperand(N0, PVT, Replace);
+ if (N0.getNode() == 0)
+ return SDValue();
+
+ AddToWorkList(N0.getNode());
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+ }
+ return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+ if (!LegalOperations)
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return false;
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return false;
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+ DEBUG(dbgs() << "\nPromoting ";
+ N->dump(&DAG);
+ dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ AddToWorkList(Result.getNode());
+ return true;
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= NoIllegalOperations;
+ LegalTypes = Level >= NoIllegalTypes;
+
+ // Add all the dag nodes to the worklist.
+ WorkList.reserve(DAG.allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ WorkList.push_back(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // while the worklist isn't empty, inspect the node on the end of it and
+ // try and combine it.
+ while (!WorkList.empty()) {
+ SDNode *N = WorkList.back();
+ WorkList.pop_back();
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDValue RV = combine(N);
+
+ if (RV.getNode() == 0)
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DEBUG(dbgs() << "\nReplacing.3 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ RV.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ // Transfer debug value.
+ DAG.TransferDbgValues(SDValue(N, 0), RV);
+ WorkListRemover DeadNodes(*this);
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.getNode());
+ AddUsersToWorkList(RV.getNode());
+
+ // Add any uses of the old node to the worklist in case this node is the
+ // last one that uses them. They may become dead after this node is
+ // deleted.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO: return visitSMULO(N);
+ case ISD::UMULO: return visitUMULO(N);
+ case ISD::SDIVREM: return visitSDIVREM(N);
+ case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::MEMBARRIER: return visitMEMBARRIER(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.getNode() == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If nothing happened still, try promoting the operation.
+ if (RV.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ RV = PromoteIntBinOp(SDValue(N, 0));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ RV = PromoteIntShiftOp(SDValue(N, 0));
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ RV = PromoteExtend(SDValue(N, 0));
+ break;
+ case ISD::LOAD:
+ if (PromoteLoad(SDValue(N, 0)))
+ RV = SDValue(N, 0);
+ break;
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (RV.getNode() == 0 &&
+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = { N1, N0 };
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+ Ops, 2);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDValue Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorkListRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ do {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+ &DeadNodes);
+ } while (!N->use_empty());
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ EVT VT = N0.getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+ N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+ N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+ GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+ GA->getOffset() +
+ (uint64_t)N1C->getSExtValue());
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+ if (RADD.getNode() != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (VT.isInteger() && !VT.isVector()) {
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+ if (Result.getNode()) return Result;
+ }
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL &&
+ N1.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
+
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (N->hasNUsesOfValue(0, 1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // If both operands are null we know that carry out will always be false.
+ if (N0C && N0C->isNullValue() && N0 == N1)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(),
+ MVT::Glue));
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (N0C && N0C->isAllOnesValue())
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold C2-(A+C1) -> (C2-C1)-A
+ if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+ N1.getOperand(0));
+ }
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy(N0.getValueType())));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(Log2Val,
+ getShiftAmountTy(N0.getValueType()))));
+ }
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1, N0.getOperand(1));
+ AddToWorkList(C3.getNode());
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+ if (RMUL.getNode() != 0)
+ return RMUL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSExtValue() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+ N0, N1);
+ }
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSExtValue()) ||
+ isPowerOf2_64(-N1C->getSExtValue()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDValue();
+
+ int64_t pow2 = N1C->getSExtValue();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ unsigned lg2 = Log2_64(abs2);
+
+ // Splat the sign bit into the register
+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorkList(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+ DAG.getConstant(VT.getSizeInBits() - lg2,
+ getShiftAmountTy(SGN.getValueType())));
+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+ AddToWorkList(SRL.getNode());
+ AddToWorkList(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+ DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+
+ AddToWorkList(SRA.getNode());
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), SRA);
+ }
+
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+ !TLI.isIntDivCheap()) {
+ SDValue Op = BuildSDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy(N0.getValueType())));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ ADDVT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildUDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ VT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ getShiftAmountTy(N0.getValueType())));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+ if (Res.getNode()) return Res;
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+ if (Res.getNode()) return Res;
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+ // (smulo x, 2) -> (saddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+ // (umulo x, 2) -> (uaddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ // Avoid infinite looping with PromoteIntBinOp.
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ (!TLI.isZExtFree(VT, Op0VT) ||
+ !TLI.isTruncateFree(Op0VT, VT)) &&
+ TLI.isTypeLegal(Op0VT))) &&
+ !VT.isVector() &&
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+ if (RAND.getNode() != 0)
+ return RAND;
+ // fold (and (or x, C), D) -> D if (C & D) == D
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LN0->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Do not change the width of a volatile load.
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), NewPtr,
+ LN0->getPointerInfo(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Alignment);
+ AddToWorkList(N);
+ CombineTo(LN0, Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01C || N01C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.getNode()->hasOneUse() ||
+ !N1.getNode()->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ if (!N101C || N101C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword is zero since the SRL 16
+ // will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (DemandHighBits && OpSizeInBits > 16 &&
+ (!LookPassAnd0 || !LookPassAnd1) &&
+ !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+ return SDValue();
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+ if (OpSizeInBits > 16)
+ Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+ DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+ return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+ if (!N.getNode()->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned Num;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: Num = 0; break;
+ case 0xFF00: Num = 1; break;
+ case 0xFF0000: Num = 2; break;
+ case 0xFF000000: Num = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ SDValue N0 = N.getOperand(0);
+ if (Opc == ISD::AND) {
+ if (Num == 0 || Num == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (N0.getOpcode() != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (N0.getOpcode() != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (Num != 0 && Num != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (Num != 1 && Num != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[Num])
+ return false;
+
+ Parts[Num] = N0.getOperand(0).getNode();
+ return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+ // Look for either
+ // (or (or (and), (and)), (or (and), (and)))
+ // (or (or (or (and), (and)), (and)), (and))
+ if (N0.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (N1.getOpcode() == ISD::OR) {
+ // (or (or (and), (and)), (or (and), (and)))
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ SDValue N010 = N01.getOperand(0);
+ if (!isBSwapHWordElement(N010, Parts))
+ return SDValue();
+ SDValue N011 = N01.getOperand(1);
+ if (!isBSwapHWordElement(N011, Parts))
+ return SDValue();
+ } else {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ if (!isBSwapHWordElement(N01, Parts))
+ return SDValue();
+ if (N00.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ }
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+ SDValue(Parts[0],0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, than
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+ else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+ BSwap = MatchBSwapHWordLow(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+
+ // reassociate or
+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+ if (ROR.getNode() != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+ return SDValue(Rot, 0);
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ EVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDValue Rot;
+ if (HasROTL)
+ Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+ else
+ Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTR)
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // Look for sign/zext/any-extended or truncate cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+ LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+ if (RXOR.getNode() != 0)
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+ DAG.getConstant(1, V.getValueType()));
+ AddToWorkList(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2-c1);
+ Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1-c2);
+ Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+ DAG.getConstant(Mask, VT));
+ }
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() -
+ N1C->getZExtValue()),
+ VT);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ HiBitsMask);
+ }
+
+ if (N1C) {
+ SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSHL.getNode())
+ return NewSHL;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT =
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
+ // Determine the residual right-shift amount.
+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDValue Amt = DAG.getConstant(ShiftAmt,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1C) {
+ SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRA.getNode())
+ return NewSRA;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
+ // fold (srl (shl x, c), c) -> (and x, cst2)
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+ N0.getValueSizeInBits() <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, VT));
+ }
+
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+
+ if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+ AddToWorkList(SmallShift.getNode());
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ }
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+ DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero & Mask;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+ DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+ AddToWorkList(Op.getNode());
+ }
+
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C) {
+ SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRL.getNode())
+ return NewSRL;
+ }
+
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ // fold (select true, X, Y) -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold (select false, X, Y) -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 ||
+ (VT0.isInteger() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ SDValue XORNode;
+ if (VT == VT0)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ AddToWorkList(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, N->getDebugLoc(), false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVector<SDNode*, 4> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // a good reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ // None of the supported targets knows how to perform load and sign extend
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ else {
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ SDValue NegOne =
+ DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ NegOne, DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ NegOne, DAG.getConstant(0, VT));
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ SDValue Op = N0.getOperand(0);
+ if (Op.getValueType().bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ } else if (Op.getValueType().bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ // None of the supported targets knows how to perform load and vector_zext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ if (!LegalOperations && VT.isVector()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // Only do this before legalize for now.
+ EVT N0VT = N0.getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+ DAG.getConstant(1, EltVT));
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
+ return SDValue();
+ }
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ANY_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ else {
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode())
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used. If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+ if (SimplifyLHS.getNode())
+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ } else if (Opc == ISD::SRL) {
+ // Another special-case: SRL is basically zero-extending a narrower value.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+ ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01) return SDValue();
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - N01->getZExtValue());
+ }
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ return SDValue();
+ }
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ // If the load was a sextload then the result is a splat of the sign bit
+ // of the extended byte. This is not worth optimizing for.
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
+ }
+ }
+
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+ // Don't change the width of a volatile load.
+ cast<LoadSDNode>(N0)->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ }
+
+ // Return the new loaded value.
+ return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode() != 0)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ BSwap, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0));
+ else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ else
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+ }
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ SDValue Reduced = ReduceLoadWidth(N);
+ if (Reduced.getNode())
+ return Reduced;
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
+ return SDValue();
+ EVT LD1VT = LD1->getValueType(0);
+
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ // If both are volatile this would reduce the number of volatile loads.
+ // If one is volatile it might be ok, but play conservative and bail out.
+ !LD1->isVolatile() &&
+ !LD2->isVolatile() &&
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+ unsigned Align = LD1->getAlignment();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ EVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
+ if (Res.getNode() != N) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+ return Res;
+
+ // Folding it resulted in an illegal node, and it's too late to
+ // do that. Clean up the old node and forego the transformation.
+ // Ideally this won't happen very often, because instcombine
+ // and the earlier dagcombine runs (where illegal nodes are
+ // permitted) should have folded most of them already.
+ DAG.DeleteNode(Res.getNode());
+ }
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ unsigned OrigAlign = LN0->getAlignment();
+
+ if (Align <= OrigAlign) {
+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ OrigAlign);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ // This often reduces constant pool loads.
+ if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
+ N0.getOperand(0));
+ AddToWorkList(NewConv.getNode());
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(SignBit, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(~SignBit, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (isTypeLegal(IntXVT)) {
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ IntXVT, N0.getOperand(1));
+ AddToWorkList(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ AddToWorkList(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ }
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+ X, DAG.getConstant(SignBit, VT));
+ AddToWorkList(X.getNode());
+
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+ Cst, DAG.getConstant(~SignBit, VT));
+ AddToWorkList(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+ if (CombineLD.getNode())
+ return CombineLD;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+
+ // Due to the FP element handling below calling this routine recursively,
+ // we can end up with a scalar-to-vector node here.
+ if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+ DstEltVT, BV->getOperand(0)));
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Op = BV->getOperand(i);
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+ DstEltVT, Op));
+ AddToWorkList(Ops.back().getNode());
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+ zextOrTrunc(SrcBitSize).zext(DstBitSize);
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = OpVal.trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ Ops[0]);
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (TLI.isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> (fadd c1, c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (fadd A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if (isNegatibleForFree(N1, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if (isNegatibleForFree(N0, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+ // fold (fsub A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fsub 0, B) -> -B
+ if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ }
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (fmul A, 0) -> 0
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+ // fold (fmul A, 0) -> 0, vector edition.
+ if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128 &&
+ // ...but only if the target supports immediate floating-point values
+ (Level == llvm::Unrestricted ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128 &&
+ // ...but only if the target supports immediate floating-point values
+ (Level == llvm::Unrestricted ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP && N0.getValueType() != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ // This is a value preserving truncation if both round's are.
+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+ N0.getNode()->getConstantOperandVal(1) == 1;
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(IsTrunc));
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && isTypeLegal(EVT)) {
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (isNegatibleForFree(N0, LegalOperations))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BITCAST &&
+ !VT.isVector() &&
+ N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ VT, Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDValue SetCC =
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETNE);
+
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc) {
+ removeFromWorkList(Trunc);
+ DAG.DeleteNode(Trunc);
+ }
+ // Replace the uses of SRL with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
+ }
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ SDValue Tmp = visitXOR(TheXor);
+ if (Tmp.getNode() && Tmp.getNode() != TheXor) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+ if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = TLI.getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+ false);
+ if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (N->hasPredecessorHelper(Use, Visited, Worklist))
+ return false;
+
+ if (!((Use->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+ (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.4 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Ptr.getNode());
+ DAG.DeleteNode(Ptr.getNode());
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr.
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!((UseUse->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+ (UseUse->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.5 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Op);
+ DAG.DeleteNode(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (N->hasNUsesOfValue(0, 0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith chain: ";
+ Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+ if (N->use_empty()) {
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ DEBUG(dbgs() << "\nReplacing.7 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getAlignment())
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->isNonTemporal(), Align);
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+ BetterChain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(),
+ LD->isNonTemporal(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out. If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+ std::pair<unsigned, unsigned> Result(0, 0);
+
+ // Check for the structure we're looking for.
+ if (V->getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(V->getOperand(1)) ||
+ !ISD::isNormalLoad(V->getOperand(0).getNode()))
+ return Result;
+
+ // Check the chain and pointer.
+ LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+ if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
+
+ // The store should be chained directly to the load or be an operand of a
+ // tokenfactor.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() != ISD::TokenFactor)
+ return Result; // Fail.
+ else {
+ bool isOk = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+ if (Chain->getOperand(i).getNode() == LD) {
+ isOk = true;
+ break;
+ }
+ if (!isOk) return Result;
+ }
+
+ // This only handles simple types.
+ if (V.getValueType() != MVT::i16 &&
+ V.getValueType() != MVT::i32 &&
+ V.getValueType() != MVT::i64)
+ return Result;
+
+ // Check the constant mask. Invert it so that the bits being masked out are
+ // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
+ // follow the sign bit for uniformity.
+ uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+ unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+ if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
+ unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+ if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
+ if (NotMaskLZ == 64) return Result; // All zero mask.
+
+ // See if we have a continuous run of bits. If so, we have 0*1+0*
+ if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ return Result;
+
+ // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+ if (V.getValueType() != MVT::i64 && NotMaskLZ)
+ NotMaskLZ -= 64-V.getValueSizeInBits();
+
+ unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+ switch (MaskedBytes) {
+ case 1:
+ case 2:
+ case 4: break;
+ default: return Result; // All one mask, or 5-byte mask.
+ }
+
+ // Verify that the first bit starts at a multiple of mask so that the access
+ // is aligned the same as the access width.
+ if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+ Result.first = MaskedBytes;
+ Result.second = NotMaskTZ/8;
+ return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo. If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+ SDValue IVal, StoreSDNode *St,
+ DAGCombiner *DC) {
+ unsigned NumBytes = MaskInfo.first;
+ unsigned ByteShift = MaskInfo.second;
+ SelectionDAG &DAG = DC->getDAG();
+
+ // Check to see if IVal is all zeros in the part being masked in by the 'or'
+ // that uses this. If not, this is not a replacement.
+ APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+ ByteShift*8, (ByteShift+NumBytes)*8);
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+
+ // Check that it is legal on the target to do this. It is legal if the new
+ // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+ // legalization.
+ MVT VT = MVT::getIntegerVT(NumBytes*8);
+ if (!DC->isTypeLegal(VT))
+ return 0;
+
+ // Okay, we can do this! Replace the 'St' store with a store of IVal that is
+ // shifted by ByteShift and truncated down to NumBytes.
+ if (ByteShift)
+ IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8,
+ DC->getShiftAmountTy(IVal.getValueType())));
+
+ // Figure out the offset for the store and the alignment of the access.
+ unsigned StOffset;
+ unsigned NewAlign = St->getAlignment();
+
+ if (DAG.getTargetLoweringInfo().isLittleEndian())
+ StOffset = ByteShift;
+ else
+ StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+ SDValue Ptr = St->getBasePtr();
+ if (StOffset) {
+ Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+ NewAlign = MinAlign(NewAlign, StOffset);
+ }
+
+ // Truncate down to the new size.
+ IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+
+ ++OpsNarrowed;
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ false, false, NewAlign).getNode();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+
+ // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+ // is a byte mask indicating a consecutive number of bytes, check to see if
+ // Y is known to provide just those bytes. If so, we try to replace the
+ // load + replace + store sequence with a single (narrower) store, which makes
+ // the load dead.
+ if (Opc == ISD::OR) {
+ std::pair<unsigned, unsigned> MaskedLoad;
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(1), ST,this))
+ return SDValue(NewST, 0);
+
+ // Or is commutative, so try swapping X and Y.
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(0), ST,this))
+ return SDValue(NewST, 0);
+ }
+
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff),
+ LD->isVolatile(), LD->isNonTemporal(),
+ NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff),
+ false, false, NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ unsigned OrigAlign = ST->getAlignment();
+ EVT SVT = Value.getOperand(0).getValueType();
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+ if (Align <= OrigAlign &&
+ ((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), OrigAlign);
+ }
+
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ return Chain;
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDValue Tmp;
+ switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP type");
+ case MVT::f80: // We don't do this for these yet.
+ case MVT::f128:
+ case MVT::ppcf128:
+ break;
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), MVT::i32);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), MVT::i64);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+ Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal,
+ ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
+ Alignment);
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ St0, St1);
+ }
+
+ break;
+ }
+ }
+ }
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(), Align);
+ }
+ }
+
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getPointerInfo(),
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
+ AddToWorkList(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.getOpcode() == ISD::UNDEF)
+ return InVec;
+
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
+ // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+ // vector with the inserted element.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // BUILD_VECTOR with undef elements and the inserted element.
+ if (InVec.getOpcode() == ISD::UNDEF &&
+ isa<ConstantSDNode>(EltNo)) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = InVec.getOperand(0);
+ EVT NVT = N->getValueType(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
+ return InOp;
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ SDValue EltNo = N->getOperand(1);
+
+ if (isa<ConstantSDNode>(EltNo)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ EVT VT = InVec.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
+
+ LoadSDNode *LN0 = NULL;
+ const ShuffleVectorSDNode *SVN = NULL;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BITCAST)
+ InVec = InVec.getOperand(0);
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ }
+ }
+
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ return SDValue();
+
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getTargetData()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
+ if (Elt) {
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
+ EVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(), Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+ SDValue VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // If the input vector type disagrees with the result of the build_vector,
+ // we can't make a shuffle.
+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec.getValueType() != VT) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // Otherwise, remember this. We allow up to two distinct input vectors.
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.getNode() == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.getNode() == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ if (Extract.getOperand(0) == VecIn1) {
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (ExtIndex > VT.getVectorNumElements())
+ return SDValue();
+
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ Mask.push_back(Idx+NumInScalars);
+ }
+
+ // Add count and size info.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+
+ assert(N0.getValueType().getVectorNumElements() == NumElts &&
+ "Vector shuffle must be normalized in DAG");
+
+ // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+ SDNode *V = N0.getNode();
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BITCAST) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+ if (!TLI.getShouldFoldAtomicFences())
+ return SDValue();
+
+ SDValue atomic = N->getOperand(0);
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue fence = atomic.getOperand(0);
+ if (fence.getOpcode() != ISD::MEMBARRIER)
+ return SDValue();
+
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2),
+ atomic.getOperand(3)), atomic.getResNo());
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2)),
+ atomic.getResNo());
+ default:
+ return SDValue();
+ }
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDValue();
+ else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ RVT, &ZeroOps[0], ZeroOps.size());
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ // After legalize, the target may be depending on adds and other
+ // binary ops to provide legal ways to construct constants or other
+ // things. Simplifying them may result in a loss of legality.
+ if (LegalOperations) return SDValue();
+
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.getNode()) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ break;
+ }
+
+ EVT VT = LHSOp.getValueType();
+ assert(RHSOp.getValueType() == VT &&
+ "SimplifyVBinOp with different BUILD_VECTOR element types");
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
+ LHSOp, RHSOp);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::Constant &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
+ }
+
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+ SDValue N1, SDValue N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.getNode());
+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+ SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0)
+ return false;
+
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ return false;
+
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && !SCCC->isNullValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->isNullValue())
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValueAPF().isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ const Type *FPTy = Elts[0]->getType();
+ const TargetData &TD = *TLI.getTargetData();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false,
+ false, Alignment);
+
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ N0.getValueType().isInteger() &&
+ N2.getValueType().isInteger() &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits()-ShCtV-1;
+ SDValue ShCt = DAG.getConstant(ShCtV,
+ getShiftAmountTy(N0.getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+ XType, N0, ShCt);
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1,
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getAPIntValue() == 1)
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // FIXME: Should probably make sure that setcc is legal if we ever have a
+ // target where it isn't.
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
+
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy(Temp.getValueType())));
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ EVT XType = N0.getValueType();
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ return Res;
+ }
+
+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::CTLZ, XType))) {
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+ DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ getShiftAmountTy(Ctlz.getValueType())));
+ }
+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+ XType, DAG.getConstant(0, XType), N0);
+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+ return DAG.getNode(ISD::SRL, DL, XType,
+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(XType)));
+ }
+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs.
+ // select_cc setg[te] X, 0, X, -X ->
+ // select_cc setgt X, -1, X, -X ->
+ // select_cc setl[te] X, 0, -X, X ->
+ // select_cc setlt X, 1, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C) {
+ ConstantSDNode *SubC = NULL;
+ if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+ else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+ (N1C->isOne() && CC == ISD::SETLT)) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
+ EVT XType = N0.getValueType();
+ if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself. Provides base object and offset as
+// results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+ const GlobalValue *&GV, void *&CV) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // Return the underlying GlobalValue, and update the Offset. Return false
+ // for GlobalAddressSDNode since the same GlobalAddress may be represented
+ // by multiple nodes with different offsets.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+ GV = G->getGlobal();
+ Offset += G->getOffset();
+ return false;
+ }
+
+ // Return the underlying Constant value, and update the Offset. Return false
+ // for ConstantSDNodes since the same constant pool entry may be represented
+ // by multiple nodes with different offsets.
+ if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+ CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+ : (void *)C->getConstVal();
+ Offset += C->getOffset();
+ return false;
+ }
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ const GlobalValue *GV1, *GV2;
+ void *CV1, *CV2;
+ bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+ // If they have a same base address then check to see if they overlap.
+ if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
+ if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+ // compared to the size and offset of the access, we may be able to prove they
+ // do not alias. This check is conservative for now to catch cases created by
+ // splitting vector types.
+ if ((SrcValueAlign1 == SrcValueAlign2) &&
+ (SrcValueOffset1 != SrcValueOffset2) &&
+ (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+ int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+ int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+ // There is no overlap between these relatively aligned accesses of similar
+ // size, return no alias.
+ if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+ return false;
+ }
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ Size = LD->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LD->getSrcValue();
+ SrcValueOffset = LD->getSrcValueOffset();
+ SrcValueAlign = LD->getOriginalAlignment();
+ TBAAInfo = LD->getTBAAInfo();
+ return true;
+ }
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ Size = ST->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = ST->getSrcValue();
+ SrcValueOffset = ST->getSrcValueOffset();
+ SrcValueAlign = ST->getOriginalAlignment();
+ TBAAInfo = ST->getTBAAInfo();
+ return false;
+ }
+ llvm_unreachable("FindAliasInfo expected a memory operand");
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDValue Ptr;
+ int64_t Size;
+ const Value *SrcValue;
+ int SrcValueOffset;
+ unsigned SrcValueAlign;
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
+ // find more and revert to original chain since the xform is unlikely to be
+ // profitable.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > 6 || Aliases.size() == 2) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ break;
+ }
+
+ // Don't bother if we've been before.
+ if (!Visited.insert(Chain.getNode()))
+ continue;
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDValue OpPtr;
+ int64_t OpSize;
+ const Value *OpSrcValue;
+ int OpSrcValueOffset;
+ unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ break;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
+ return DAG.getEntryNode();
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
+ return Aliases[0];
+
+ // Construct a custom tailored token factor.
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 000000000000..54a7d43f46d6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1373 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ // Start out as null, meaining no local-value instructions have
+ // been emitted.
+ LastLocalValue = 0;
+
+ // Advance the last local value past any EH_LABEL instructions.
+ MachineBasicBlock::iterator
+ I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+ while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+ LastLocalValue = I;
+ ++I;
+ }
+}
+
+bool FastISel::hasTrivialKill(const Value *V) const {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+
+ unsigned Reg = LocalValueMap[V];
+ if (Reg != 0)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+/// materializeRegForValue - Helper for getRegForValue. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg =
+ getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue()) {
+ Reg = TargetMaterializeFloatZero(CF);
+ } else {
+ // Try to emit the constant directly.
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ }
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg, /*Kill=*/false);
+ }
+ }
+ } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+ if (!SelectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !TargetSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
+ if (AssignedReg == 0)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++)
+ FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+
+ AssignedReg = Reg;
+ }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DL;
+ recomputeInsertPt();
+ DL = DebugLoc();
+ SavePoint SP = { OldInsertPt, OldDL };
+ return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DL = OldInsertPt.DL;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0) return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+ Op1IsKill, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getZExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() &&
+ isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode,
+ Op0, Op0IsKill,
+ Op1, Op1IsKill);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(const User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ const Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+ E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+ // FIXME: This can be optimized by combining the add with a
+ // subsequent one.
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ uint64_t Offs =
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ IdxNIsKill = true;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ const Function *F = Call->getCalledFunction();
+ if (!F) return false;
+
+ // Handle selected intrinsic function calls.
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+ if (!DIVariable(DI->getVariable()).Verify() ||
+ !FuncInfo.MF->getMMI().hasDebugInfo())
+ return true;
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+ return true;
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ }
+ if (!Reg)
+ Reg = getRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(Call);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addCImm(CI).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addFPImm(CF).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ return true;
+ }
+ case Intrinsic::eh_exception: {
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+ break;
+
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ case Intrinsic::eh_selector: {
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+ break;
+ if (FuncInfo.MBB->isLandingPad())
+ AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(Call);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+ }
+
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ EVT SrcVT = TLI.getPointerTy();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+
+ bool ResultRegIsKill = hasTrivialKill(Call);
+
+ // Cast the register to the type of the selector.
+ if (SrcVT.bitsGT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+ ResultReg, ResultRegIsKill);
+ else if (SrcVT.bitsLT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+ ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+ if (ResultReg == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ UpdateValueMap(Call, ResultReg);
+
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ unsigned long long Res = CI->isZero() ? -1ULL : 0;
+ Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ }
+
+ // An arbitrary call. Bail.
+ return false;
+}
+
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg, InputRegIsKill);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(const User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple() ||
+ !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+ TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0);
+ }
+ }
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ ISD::BITCAST, Op0, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(const Instruction *I) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(I))
+ if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+ return false;
+
+ DL = I->getDebugLoc();
+
+ // First, try doing target-independent selection.
+ if (SelectOperator(I, I->getOpcode())) {
+ DL = DebugLoc();
+ return true;
+ }
+
+ // Next, try calling the target to attempt to handle the instruction.
+ if (TargetSelectInstruction(I)) {
+ DL = DebugLoc();
+ return true;
+ }
+
+ DL = DebugLoc();
+ return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+ if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // The unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ SmallVector<MachineOperand, 0>(), DL);
+ }
+ FuncInfo.MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(const User *I) {
+ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ if (OpReg == 0) return false;
+
+ bool OpRegIsKill = hasTrivialKill(I);
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(I->getType());
+ unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::FNEG, OpReg, OpRegIsKill);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64) return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg, OpRegIsKill);
+ if (IntReg == 0)
+ return false;
+
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+ IntReg, /*Kill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits()-1),
+ IntVT.getSimpleVT());
+ if (IntResultReg == 0)
+ return false;
+
+ ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ const Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ UpdateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SelectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return SelectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ if (BinaryOperator::isFNeg(I))
+ return SelectFNeg(I);
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return SelectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return SelectExtractValue(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+ : FuncInfo(funcInfo),
+ MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()),
+ MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()),
+ TD(*TM.getTargetData()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()),
+ TRI(*TM.getRegisterInfo()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+ unsigned) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+ unsigned, const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ }
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+ else {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DL = PN->getDebugLoc();
+ if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ DL = Inst->getDebugLoc();
+
+ unsigned Reg = getRegForValue(PHIOp);
+ if (Reg == 0) {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ DL = DebugLoc();
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 000000000000..d518b5d346a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,456 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+ if (I->use_empty()) return false;
+ if (isa<PHINode>(I)) return true;
+ const BasicBlock *BB = I->getParent();
+ for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+ return true;
+ }
+ return false;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
+ : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+ Fn = &fn;
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(Fn->getReturnType(),
+ Fn->getAttributes().getRetAttributes(), Outs, TLI);
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+ Fn->isVarArg(),
+ Outs, Fn->getContext());
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ const Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ // The object may need to be placed onto the stack near the stack
+ // protector if one exists. Determine here if this object is a suitable
+ // candidate. I.e., it would trigger the creation of a stack protector.
+ bool MayNeedSP =
+ (AI->isArrayAllocation() ||
+ (TySize > 8 && isa<ArrayType>(Ty) &&
+ cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
+ if (isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ if (MMI.hasDebugInfo() &&
+ DIVariable(DI->getVariable()).Verify() &&
+ !DI->getDebugLoc().isUnknown()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB->hasAddressTaken())
+ MBB->setHasAddressTaken();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ for (BasicBlock::const_iterator I = BB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ DebugLoc DL = PN->getDebugLoc();
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+
+ // Mark landing pad blocks.
+ for (BB = Fn->begin(); BB != EB; ++BB)
+ if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+ MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+#ifndef NDEBUG
+ CatchInfoLost.clear();
+ CatchInfoFound.clear();
+#endif
+ LiveOutRegInfo.clear();
+ VisitedBBs.clear();
+ ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
+ RegFixups.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = CreateReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return NULL;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return NULL;
+
+ if (BitWidth > LOI->KnownZero.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+ LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ const Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ unsigned DestReg = ValueMap[PN];
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ return;
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.KnownZero = ~Val;
+ DestLOI.KnownOne = Val;
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+ DestLOI.KnownOne.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.KnownZero &= ~Val;
+ DestLOI.KnownOne &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.KnownZero &= SrcLOI->KnownZero;
+ DestLOI.KnownOne &= SrcLOI->KnownOne;
+ }
+}
+
+/// setByValArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
+ int FI) {
+ assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getByValArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
+ assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ return 0;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<const GlobalVariable *> TyInfo;
+ unsigned N = I.getNumArgOperands();
+
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert(FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 2) {
+ TyInfo.reserve(N - 2);
+ for (unsigned j = 2; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+ SmallPtrSet<const BasicBlock*, 4> Visited;
+
+ // The 'eh.selector' call may not be in the direct successor of a basic block,
+ // but could be several successors deeper. If we don't find it, try going one
+ // level further. <rdar://problem/8824861>
+ while (Visited.insert(SuccBB)) {
+ for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
+ I != E; ++I)
+ if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+ // Apply the catch info to LPad.
+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
+#ifndef NDEBUG
+ if (!FLI.MBBMap[SuccBB]->isLandingPad())
+ FLI.CatchInfoFound.insert(EHSel);
+#endif
+ return;
+ }
+
+ const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
+ if (Br && Br->isUnconditional())
+ SuccBB = Br->getSuccessor(0);
+ else
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 000000000000..f0f4743298e7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,900 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+ unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = NULL;
+ EVT VT = Node->getValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT);
+
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ EVT VT = Node->getValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = 0;
+ if (i+II.getNumDefs() < II.getNumOperands())
+ RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
+ if (II.OpInfo[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ unsigned NumResults = CountResults(Node);
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ }
+
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+ VReg = MRI->createVirtualRegister(RC);
+ }
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it.
+ if (II) {
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *DstRC = 0;
+ if (IIOpNum < II->getNumOperands())
+ DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+ assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+ "Don't have operand info for this instruction!");
+ if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
+ }
+
+ // If this value has only one use, that use is a kill. This is a
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MI->getNumOperands();
+ while (Idx > 0 &&
+ MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
+
+ MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
+ false/*isImp*/, isKill,
+ false/*isDead*/, false/*isUndef*/,
+ false/*isEarlyClobber*/,
+ 0/*SubReg*/, IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ const ConstantFP *CFP = F->getConstantFPValue();
+ MI->addOperand(MachineOperand::CreateFPImm(CFP));
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags()));
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+ JT->getTargetFlags()));
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ const Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM->getTargetData()->getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+ CP->getTargetFlags()));
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+ ES->getTargetFlags()));
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+ BA->getTargetFlags()));
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+ unsigned SubIdx, EVT VT) {
+ // Pick the register class of the superegister for this type
+ for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+ E = TRC->superregclasses_end(); I != E; ++I)
+ if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+ return *I;
+ assert(false && "Couldn't find the register class");
+ return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+ // Figure out the register class to create for the destreg.
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ } else {
+ const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
+
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+
+ // Add source, and subreg index
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+ "Cannot yet extract from physregs");
+ MI->getOperand(1).setSubReg(SubIdx);
+ MBB->insert(InsertPos, MI);
+ }
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubReg = getVR(N1, VRBaseMap);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+ } else
+ AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ // Add the subregster being inserted
+ AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ MBB->insert(InsertPos, MI);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned NewVReg = MRI->createVirtualRegister(RC);
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+ unsigned NumOps = Node->getNumOperands();
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ for (unsigned i = 1; i != NumOps; ++i) {
+ SDValue Op = Node->getOperand(i);
+ if ((i & 1) == 0) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
+ }
+ AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ }
+
+ MBB->insert(InsertPos, MI);
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ uint64_t Offset = SD->getOffset();
+ MDNode* MDPtr = SD->getMDPtr();
+ DebugLoc DL = SD->getDebugLoc();
+
+ if (SD->getKind() == SDDbgValue::FRAMEIX) {
+ // Stack address; this needs to be lowered in target-dependent fashion.
+ // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+ unsigned FrameIx = SD->getFrameIx();
+ return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+ }
+ // Otherwise, we're going to create an instruction here.
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ if (SD->getKind() == SDDbgValue::SDNODE) {
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ MIB.addReg(0U); // undef
+ else
+ AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::CONST) {
+ const Value *V = SD->getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getSExtValue());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MIB.addFPImm(CF);
+ } else {
+ // Could be an Undef. In any case insert an Undef so we can see what we
+ // dropped.
+ MIB.addReg(0U);
+ }
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MIB.addReg(0U);
+ }
+
+ MIB.addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle REG_SEQUENCE specially.
+ if (Opc == TargetOpcode::REG_SEQUENCE) {
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const MCInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NodeOperands = CountOperands(Node);
+ bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // The MachineInstr constructor adds implicit-def operands. Scan through
+ // these to determine which are dead.
+ if (MI->getNumOperands() != 0 &&
+ Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ // First, collect all used registers.
+ SmallVector<unsigned, 8> UsedRegs;
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
+ if (F->getOpcode() == ISD::CopyFromReg)
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ else {
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ // Then mark unused registers as dead.
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+ }
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = II.getNumDefs() > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Transfer all of the memory reference descriptions of this instruction.
+ MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+ cast<MachineSDNode>(Node)->memoperands_end());
+
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MI);
+
+ // Additional results must be physical register defs.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (Node->hasAnyUseOfValue(i))
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ // If there are no uses, mark the register as dead now, so that
+ // MachineLICM/Sink can see that it's dead. Don't do this if the
+ // node has a Glue value, for the benefit of targets still using
+ // Glue for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
+ MI->addRegisterDead(Reg, TRI);
+ }
+ }
+
+ // If the instruction has implicit defs and the node doesn't, mark the
+ // implicit def as dead. If the node has any glue outputs, we don't do this
+ // because we don't know what implicit defs are being used by glued nodes.
+ if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
+ if (const unsigned *IDList = II.getImplicitDefs()) {
+ for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
+ i != e; ++i)
+ MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+ }
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ break;
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should have been excluded from the schedule!");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::EH_LABEL: {
+ MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ break;
+ }
+
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MI->addOperand(MachineOperand::CreateES(AsmStr));
+
+ // Add the HasSideEffect and isAlignStack bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ MI->addOperand(MachineOperand::CreateImm(Flags));
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MI->addOperand(MachineOperand::CreateReg(Reg, true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
+ /*isKill=*/ false,
+ /*isDead=*/ false,
+ /*isUndef=*/false,
+ /*isEarlyClobber=*/ true));
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (; NumVals; --NumVals, ++i)
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+ break;
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MI->addOperand(MachineOperand::CreateMetadata(MD));
+
+ MBB->insert(InsertPos, MI);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()),
+ MRI(&MF->getRegInfo()),
+ TM(&MF->getTarget()),
+ TII(TM->getInstrInfo()),
+ TRI(TM->getRegisterInfo()),
+ TLI(TM->getTargetLowering()),
+ MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 000000000000..19fc0445b166
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,142 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class MCInstrDesc;
+class SDDbgValue;
+
+class InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const;
+
+ void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+ ///
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// CountOperands - The inputs to target nodes have any actual inputs first,
+ /// followed by an optional chain operand, then flag operands. Compute
+ /// the number of actual operands that will go into the resulting
+ /// MachineInstr.
+ static unsigned CountOperands(SDNode *Node);
+
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 000000000000..d06e2bdce065
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3893 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize {
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ // Libcall insertion helpers.
+
+ /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
+ /// legalized. We use this to ensure that calls are properly serialized
+ /// against each other, including inserted libcalls.
+ SmallVector<SDValue, 8> LastCALLSEQ;
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+
+ // Transfer SDDbgValues.
+ DAG.TransferDbgValues(From, To);
+ }
+
+public:
+ explicit SelectionDAGLegalize(SelectionDAG &DAG);
+
+ void LegalizeDAG();
+
+private:
+ /// LegalizeOp - Return a legal replacement for the given operation, with
+ /// all legal operands.
+ SDValue LegalizeOp(SDValue O);
+
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+
+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const;
+
+ bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+ void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ DebugLoc dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, DebugLoc dl);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ DebugLoc dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+
+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); }
+ void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+ void pushLastCALLSEQ(SDValue s) {
+ LastCALLSEQ.push_back(s);
+ }
+ void popLastCALLSEQ() {
+ LastCALLSEQ.pop_back();
+ }
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+ : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ DAG(dag) {
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ pushLastCALLSEQ(DAG.getEntryNode());
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+ int next_depth = depth;
+ if (Node->getOpcode() == ISD::CALLSEQ_START)
+ next_depth = depth + 1;
+ if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ assert(depth > 0 && "negative depth!");
+ if (depth == 1)
+ return Node;
+ else
+ next_depth = depth - 1;
+ }
+ if (Node->use_empty())
+ return 0; // No CallSeqEnd
+
+ // The chain is usually at the end.
+ SDValue TheChain(Node, Node->getNumValues()-1);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Sometimes it's at the beginning.
+ TheChain = SDValue(Node, 0);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Otherwise, hunt for it.
+ for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+ if (Node->getValueType(i) == MVT::Other) {
+ TheChain = SDValue(Node, i);
+ break;
+ }
+
+ // Otherwise, we walked into a node without a chain.
+ if (TheChain.getValueType() != MVT::Other)
+ return 0;
+ }
+ }
+
+ for (SDNode::use_iterator UI = Node->use_begin(),
+ E = Node->use_end(); UI != E; ++UI) {
+
+ // Make sure to only follow users of our token chain.
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+ if (User->getOperand(i) == TheChain)
+ if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
+ return Result;
+ }
+ return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ int nested = 0;
+ assert(Node && "Didn't find callseq_start for a call??");
+ while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+ Node = Node->getOperand(0).getNode();
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ case ISD::CALLSEQ_START:
+ if (!nested)
+ return Node;
+ Node = Node->getOperand(0).getNode();
+ nested--;
+ break;
+ case ISD::CALLSEQ_END:
+ nested++;
+ break;
+ }
+ }
+ return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0;
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest. If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo. This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+ if (N == Dest) return true; // N certainly leads to Dest :)
+
+ // If we've already processed this node and it does lead to Dest, there is no
+ // need to reprocess it.
+ if (NodesLeadingTo.count(N)) return true;
+
+ // If the first result of this node has been already legalized, then it cannot
+ // reach N.
+ if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+ // Okay, this node has not already been legalized. Check and legalize all
+ // operands. If none lead to Dest, then we can legalize this node.
+ bool OperandsLeadToDest = false;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OperandsLeadToDest |= // If an operand leads to Dest, so do we.
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
+ NodesLeadingTo);
+
+ if (OperandsLeadToDest) {
+ NodesLeadingTo.insert(N);
+ return true;
+ }
+
+ // Okay, this node looks safe, legalize it and return false.
+ LegalizeOp(SDValue(N, 0));
+ return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool Extend = false;
+ DebugLoc dl = CFP->getDebugLoc();
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+ while (SVT != MVT::f32) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend)
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+ DAG.getEntryNode(),
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
+ return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false, false,
+ Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ EVT RegVT =
+ TLI.getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ ST->isVolatile(), ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
+ MemVT, ST->isVolatile(),
+ ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ }
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Val.getValueType()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+ ST->getPointerInfo(), NewStoredVT,
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+ Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (VT.isFloatingPoint() && LoadedVT != VT)
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (TLI.isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Hi.getValueType()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ SDValue Ops[] = { Result, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ DebugLoc dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = Tmp3.getValueType();
+ EVT PtrVT = TLI.getPointerTy();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ false, false, 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
+ false, false, 0);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+ &ShufOps[0]);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Tmp1 = ST->getChain();
+ SDValue Tmp2 = ST->getBasePtr();
+ SDValue Tmp3;
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ TLI.isTypeLegal(MVT::i32)) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ MVT::i32);
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (TLI.isTypeLegal(MVT::i64)) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), MVT::i64);
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(4));
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue(0, 0);
+}
+
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+ if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return Op;
+
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
+ "Unexpected illegal type!");
+
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ assert((TLI.getTypeAction(*DAG.getContext(),
+ Node->getOperand(i).getValueType()) ==
+ TargetLowering::TypeLegal ||
+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ SDValue Result = Op;
+ bool isCustom = false;
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::VAARG:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::BUILD_VECTOR:
+ // A weird case: legalization for BUILD_VECTOR never legalizes the
+ // operands!
+ // FIXME: This really sucks... changing it isn't semantically incorrect,
+ // but it massively pessimizes the code for floating-point BUILD_VECTORs
+ // because ConstantFP operands get legalized into constant pool loads
+ // before the BUILD_VECTOR code can see them. It doesn't usually bite,
+ // though, because BUILD_VECTORS usually get lowered into other nodes
+ // which get legalized properly.
+ SimpleFinishLegalizing = false;
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SmallVector<SDValue, 8> Ops, ResultVals;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::BR:
+ case ISD::BRIND:
+ case ISD::BR_JT:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+ // Branches tweak the chain to include LastCALLSEQ
+ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+ getLastCALLSEQ());
+ Ops[0] = LegalizeOp(Ops[0]);
+ setLastCALLSEQ(DAG.getEntryNode());
+ break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Ops[1].getValueType().isVector())
+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+ Ops[1]));
+ break;
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Ops[2].getValueType().isVector())
+ Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+ Ops[2]));
+ break;
+ }
+
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
+ Ops.size()), 0);
+ switch (Action) {
+ case TargetLowering::Legal:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Result.getValue(i));
+ break;
+ case TargetLowering::Custom:
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) {
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ if (e == 1)
+ ResultVals.push_back(Tmp1);
+ else
+ ResultVals.push_back(Tmp1.getValue(i));
+ }
+ break;
+ }
+
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ ExpandNode(Result.getNode(), ResultVals);
+ break;
+ case TargetLowering::Promote:
+ PromoteNode(Result.getNode(), ResultVals);
+ break;
+ }
+ if (!ResultVals.empty()) {
+ for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+ if (ResultVals[i] != SDValue(Node, i))
+ ResultVals[i] = LegalizeOp(ResultVals[i]);
+ AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+ }
+ return ResultVals[Op.getResNo()];
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ assert(0 && "Do not know how to legalize this operator!");
+
+ case ISD::BUILD_VECTOR:
+ switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = ExpandBUILD_VECTOR(Result.getNode());
+ break;
+ }
+ break;
+ case ISD::CALLSEQ_START: {
+ SDNode *CallEnd = FindCallEndFromCallStart(Node);
+ assert(CallEnd && "didn't find CALLSEQ_END!");
+
+ // Recursively Legalize all of the inputs of the call end that do not lead
+ // to this call start. This ensures that any libcalls that need be inserted
+ // are inserted *before* the CALLSEQ_START.
+ {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+ for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+ LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+ NodesLeadingTo);
+ }
+
+ // Now that we have legalized all of the inputs (which may have inserted
+ // libcalls), create the new CALLSEQ_START node.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ // Merge in the last call to ensure that this call starts after the last
+ // call ended.
+ if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
+ Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Tmp1, getLastCALLSEQ());
+ Tmp1 = LegalizeOp(Tmp1);
+ }
+
+ // Do not try to legalize the target-specific arguments (#1+).
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
+ Ops.size()), Result.getResNo());
+ }
+
+ // Remember that the CALLSEQ_START is legalized.
+ AddLegalizedOperand(Op.getValue(0), Result);
+ if (Node->getNumValues() == 2) // If this has a flag result, remember it.
+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+ // Now that the callseq_start and all of the non-call nodes above this call
+ // sequence have been legalized, legalize the call itself. During this
+ // process, no libcalls can/will be inserted, guaranteeing that no calls
+ // can overlap.
+ // Note that we are selecting this call!
+ setLastCALLSEQ(SDValue(CallEnd, 0));
+
+ // Legalize the call, starting from the CALLSEQ_END.
+ LegalizeOp(getLastCALLSEQ());
+ return Result;
+ }
+ case ISD::CALLSEQ_END:
+ {
+ SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it.
+ // This will cause this node to be legalized as well as handling libcalls
+ // right.
+ if (getLastCALLSEQ().getNode() != Node) {
+ LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
+ }
+
+ // Otherwise, the call start has been legalized and everything is going
+ // according to plan. Just legalize ourselves normally here.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Do not try to legalize the target-specific arguments (#1+), except for
+ // an optional flag input.
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ &Ops[0], Ops.size()),
+ Result.getResNo());
+ }
+ } else {
+ Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+ if (Tmp1 != Node->getOperand(0) ||
+ Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Ops.back() = Tmp2;
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ &Ops[0], Ops.size()),
+ Result.getResNo());
+ }
+ }
+ // This finishes up call legalization.
+ popLastCALLSEQ();
+
+ // If the CALLSEQ_END node has a flag, remember that we legalized it.
+ AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+ if (Node->getNumValues() == 2)
+ AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.getResNo());
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ EVT VT = Node->getValueType(0);
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
+ Tmp3 = Result.getValue(0);
+ Tmp4 = Result.getValue(1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp3 = Result.getOperand(0);
+ Tmp4 = Result.getOperand(1);
+ Tmp3 = LegalizeOp(Tmp3);
+ Tmp4 = LegalizeOp(Tmp4);
+ }
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+ if (Tmp1.getNode()) {
+ Tmp3 = LegalizeOp(Tmp1);
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ }
+ break;
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ break;
+ }
+ }
+ // Since loads produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+ return Op.getResNo() ? Tmp4 : Tmp3;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ }
+ }
+ }
+ break;
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
+ break;
+ }
+
+ // If this is a promoted vector load, and the vector element types are
+ // legal, then scalarize it.
+ if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+ TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0).getScalarType(),
+ Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
+ break;
+ }
+
+ // If this is a promoted vector load, and the vector element types are
+ // illegal, create the promoted vector from bitcasted segments.
+ if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+ EVT MemElemTy = Node->getValueType(0).getScalarType();
+ EVT SrcSclrTy = SrcVT.getScalarType();
+ unsigned SizeRatio =
+ (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ SrcVT.getScalarType(),
+ Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ if (TLI.isBigEndian()) {
+ // MSB (which is garbage, comes first)
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ for (unsigned i = 0; i<SizeRatio-1; ++i)
+ LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+ } else {
+ // LSB (which is data, comes first)
+ for (unsigned i = 0; i<SizeRatio-1; ++i)
+ LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ }
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NumElem*SizeRatio);
+ SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ TempWideVector, &LoadVals[0], LoadVals.size());
+
+ // Cast to the correct type
+ ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
+ break;
+
+ }
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ Result = SDValue(OptStore, 0);
+ break;
+ }
+
+ {
+ Tmp3 = LegalizeOp(ST->getValue());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp3, Tmp2,
+ ST->getOffset()),
+ Result.getResNo());
+
+ EVT VT = Tmp3.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+ DAG, TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) Result = Tmp1;
+ break;
+ case TargetLowering::Promote:
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ break;
+ }
+ break;
+ }
+ } else {
+ Tmp3 = LegalizeOp(ST->getValue());
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ RoundVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Tmp3.getValueType())));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Tmp3.getValueType())));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+ Tmp2 != ST->getBasePtr())
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp3, Tmp2,
+ ST->getOffset()),
+ Result.getResNo());
+
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+ DAG, TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Result, DAG);
+ break;
+ case TargetLowering::Expand:
+
+ EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+ EVT NarrowScalarVT = StVT.getScalarType();
+
+ // The Store type is illegal, must scalarize the vector store.
+ SmallVector<SDValue, 8> Stores;
+ bool ScalarLegal = TLI.isTypeLegal(WideScalarVT);
+ if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+ unsigned NumElem = StVT.getVectorNumElements();
+
+ unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+ // Types smaller than 8 bits are promoted to 8 bits.
+ ScalarSize = std::max<unsigned>(ScalarSize, 8);
+ // Store stride
+ unsigned Stride = ScalarSize/8;
+ assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+ Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride),
+ isVolatile, isNonTemporal, Alignment);
+ Stores.push_back(Store);
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
+ }
+
+ // The Store type is illegal, must scalarize the vector store.
+ // However, the scalar type is illegal. Must bitcast the result
+ // and store it in smaller parts.
+ if (!TLI.isTypeLegal(StVT) && StVT.isVector()) {
+ unsigned WideNumElem = StVT.getVectorNumElements();
+ unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+ unsigned SizeRatio =
+ (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+ EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+ SizeRatio*WideNumElem);
+
+ // Cast the wide elem vector to wider vec with smaller elem type.
+ // Example <2 x i64> -> <4 x i32>
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+ for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+ // Extract elment i
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+ // bump pointer.
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+
+ // Store if, this element is:
+ // - First element on big endian, or
+ // - Last element on little endian
+ if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+ ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+ SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride),
+ isVolatile, isNonTemporal, Alignment);
+ Stores.push_back(Store);
+ }
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
+ }
+
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
+ Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ break;
+ }
+ }
+ }
+ break;
+ }
+ }
+ assert(Result.getValueType() == Op.getValueType() &&
+ "Bad legalization!");
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op)
+ Result = LegalizeOp(Result);
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ if (Op.getValueType().isVector())
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
+ false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ EltVT, false, false, 0));
+ } else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, 0));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+
+ // Get the sign bit of the RHS. First obtain a value that has the same
+ // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
+ SDValue SignBit;
+ EVT FloatVT = Tmp2.getValueType();
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+ if (TLI.isTypeLegal(IVT)) {
+ // Convert to an integer with the same sign bit.
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+ } else {
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getPointerTy();
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ // Then store the float to it.
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ if (TLI.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ } else { // Little endian
+ SDValue LoadPtr = StackPtr;
+ // The float may be wider than the integer we are going to load. Advance
+ // the pointer so that the loaded integer will contain the sign bit.
+ unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+ unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+ LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+ LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+ // Load a legal integer containing the sign bit.
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, 0);
+ // Move the sign bit to the top bit of the loaded integer.
+ unsigned BitShift = LoadTy.getSizeInBits() -
+ (FloatVT.getSizeInBits() - 8 * ByteOffset);
+ assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+ if (BitShift)
+ SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+ DAG.getConstant(BitShift,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ }
+ }
+ // Now get the sign bit proper, by seeing whether the value is negative.
+ SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+ SignBit, DAG.getConstant(0, SignBit.getValueType()),
+ ISD::SETLT);
+ // Get the absolute value of the result.
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align > StackAlign)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC,
+ DebugLoc dl) {
+ EVT OpVT = LHS.getValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: assert(0 && "Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: assert(0 && "Don't know how to expand this condition!");
+ case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ // FIXME: Implement more expansions.
+ }
+
+ SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ break;
+ }
+ }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+ EVT SlotVT,
+ EVT DestVT,
+ DebugLoc dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+ getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ PtrInfo, SlotVT, false, false, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ PtrInfo, false, false, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+ StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ Node->getValueType(0).getVectorElementType(),
+ false, false, 0);
+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ false, false, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ std::vector<Constant*> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, Alignment);
+ }
+
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+
+ return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ unsigned OtherOpcode = 0;
+ if (isSigned)
+ OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+ else
+ OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ if (User->getOpcode() == OtherOpcode &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ DebugLoc dl = Node->getDebugLoc();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
+ MachinePointerInfo(), false, false, 0);
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDValue Op0,
+ EVT DestVT,
+ DebugLoc dl) {
+ if (Op0.getValueType() == MVT::i32) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(), StackSlot, WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+ Op0Mapped, Lo, MachinePointerInfo(),
+ false, false, 0);
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
+ // load the constructed double
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, 0);
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ SDValue TwoP52 =
+ DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+ SDValue TwoP84PlusTwoP52 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+ SDValue TwoP84 =
+ DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+ SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+ DAG.getConstant(32, MVT::i64));
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+ TwoP84PlusTwoP52);
+ return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+ }
+
+ // Implementation of unsigned i64 to f32.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst =
+ DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+ DAG.getConstant(UINT64_C(0x800), MVT::i64));
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+ SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+ SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+ ISD::SETUGE);
+ SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+ EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+
+ SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+ DAG.getConstant(32, SHVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+ SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+ SDValue TwoP32 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+ SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+ DAG.getIntPtrConstant(0));
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, Alignment);
+ else {
+ FudgeInReg =
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, Alignment));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unhandled Expand type in BSWAP!");
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ DebugLoc dl) {
+ switch (Opc) {
+ default: assert(0 && "Cannot expand this yet!");
+ case ISD::CTPOP: {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+ SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+ SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+ SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
+ return Op;
+ }
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ EVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::MEMBARRIER: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()),
+ Args, DAG, dl);
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, VT));
+ else {
+ assert(VT.isFloatingPoint() && "Unknown value type!");
+ Results.push_back(DAG.getConstantFP(0, VT));
+ }
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ Args, DAG, dl);
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BITCAST:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+ if (VT.isVector())
+ ShiftAmountTy = VT;
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, NVT));
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG: {
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(Align - 1,
+ TLI.getPointerTy()));
+
+ VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(-(int64_t)Align,
+ TLI.getPointerTy()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(TLI.getTargetData()->
+ getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, 0));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::VACOPY: {
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ if (!TLI.isTypeLegal(EltVT))
+ EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(0),
+ DAG.getIntPtrConstant(Idx)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(1),
+ DAG.getIntPtrConstant(Idx - NumElems)));
+ }
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128));
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FP16_TO_FP32:
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ break;
+ case ISD::FP32_TO_FP16:
+ Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(SDValue(Node, 0));
+ else
+ Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ break;
+ }
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, false))) {
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ } else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, true)))
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ EVT OType = Node->getValueType(1);
+
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(Cmp);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+ Node->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT));
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+ if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1));
+ }
+
+ if (isSigned) {
+ Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+ TLI.getShiftAmountTy(BottomHalf.getValueType()));
+ Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+ ISD::SETNE);
+ } else {
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ }
+ Results.push_back(BottomHalf);
+ Results.push_back(TopHalf);
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy(PairTy)));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ EVT PTy = TLI.getPointerTy();
+
+ const TargetData &TD = *TLI.getTargetData();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ Index = DAG.getNode(ISD::MUL, dl, PTy,
+ Index, DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
+ false, false, 0);
+ Addr = LD;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ // We test only the i1 bit. Skip the AND if UNDEF.
+ Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+ DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, Tmp2.getValueType()));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, Tmp3.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+ // If we expanded the SETCC into an AND/OR, return the new node
+ if (Tmp2.getNode() == 0) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ SDValue CC = Node->getOperand(4);
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, dl);
+
+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, dl);
+ assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+ setLastCALLSEQ(DAG.getEntryNode());
+
+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(SDValue(Node, i));
+ break;
+ }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT OVT = Node->getValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC) {
+ OVT = Node->getOperand(0).getValueType();
+ }
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Perform the larger operation.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTTZ) {
+ //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+ ISD::SETEQ);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+ } else if (Node->getOpcode() == ISD::CTLZ) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger() && "Cannot promote logic operation");
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this).LegalizeDAG();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 000000000000..e6835d87f82c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1456 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften the result of this operator!");
+
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP:
+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+ DAG.getConstant(RSize - 1,
+ TLI.getShiftAmountTy(RVT)));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+ DAG.getConstant(LSize - 1,
+ TLI.getShiftAmountTy(LVT)));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, 3, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+ N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+ NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT,
+ L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+ L->getMemoryVT(), dl, L->getChain(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
+ L->getMemoryVT(), L->isVolatile(),
+ L->isNonTemporal(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl) {
+ SDValue LHSInt = GetSoftenedFloat(NewLHS);
+ SDValue RHSInt = GetSoftenedFloat(NewRHS);
+ EVT VT = NewLHS.getValueType();
+
+ assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ default: assert(false && "Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = TLI.getCmpLibcallReturnType();
+ SDValue Ops[2] = { LHSInt, RHSInt };
+ NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = TLI.getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[1])), NVT);
+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, 3, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ DebugLoc dl = N->getDebugLoc();
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ const uint64_t *Parts = 0;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+ MVT::ppcf128));
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+ Lo, Hi, DAG.getCondCode(ISD::SETLT));
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+ == TargetLowering::Custom)
+ Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ EVT VT = NewLHS.getValueType();
+ assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APInt(128, 2, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ DAG.getCondCode(ISD::SETGE));
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+ ST->getPointerInfo(),
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 000000000000..e7c77dd10cb6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2887 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getMemOperand());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+ Op2, Op3, N->getMemOperand());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ if (NOutVT.bitsEq(NInVT))
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ case TargetLowering::TypeWidenVector:
+ if (OutVT.bitsEq(NInVT))
+ // The input is widened to the same size. Convert to the widened value.
+ return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = N->getDebugLoc();
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+ return DAG.getNode(ISD::AssertZext, dl,
+ NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl,
+ N->getOperand(0).getValueType().getScalarType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
+ N->getMemoryVT(), N->isVolatile(),
+ N->isNonTemporal(), N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+ assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ // Convert to the expected type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+ GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue Res = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+ Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InOp.getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
+ break;
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
+ break;
+ case TargetLowering::TypeSplitVector:
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert(NumElts == NVT.getVectorNumElements() &&
+ "Dst and Src must have the same number of elements");
+ EVT EltVT = InVT.getScalarType();
+ assert(isPowerOf2_32(NumElts) &&
+ "Promoted vector type must be a power of two");
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts/2);
+
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+ DAG.getIntPtrConstant(0));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+ DAG.getIntPtrConstant(NumElts/2));
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply, then check the high bits of
+ // the result to see if the overflow happened.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+ // Overflow occurred iff the high part of the result does not
+ // zero/sign-extend the low part.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred iff the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred iff the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::FP16_TO_FP32:
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(MVT::Other);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ DebugLoc dl = N->getDebugLoc();
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2)),
+ 0);
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+ SDValue NewOps[6];
+ DebugLoc dl = N->getDebugLoc();
+ NewOps[0] = N->getOperand(0);
+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+ SDValue Flag = GetPromotedInteger(N->getOperand(i));
+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+ }
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+ return SDValue(DAG.UpdateNodeOperands(N, Cond,
+ N->getOperand(1), N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
+ isVolatile, isNonTemporal, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc DL = N->getDebugLoc();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt > VTBits) {
+ Lo = Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else if (Amt == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::ADDC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+ // Emit this X << 1 as X+X.
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ SDValue LoOps[2] = { InL, InL };
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, DL,
+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(Amt-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+ unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+#if 0
+ // FIXME: This code is broken for shifts with a zero amount!
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Compute 32-amt.
+ SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+ DAG.getConstant(NVTBits, ShTy),
+ Amt);
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(Op1, NVT, InH, Amt),
+ DAG.getNode(Op2, NVT, InL, Amt2));
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+
+ return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ MemVT, isVolatile, isNonTemporal, Alignment);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+ NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ }
+ }
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = NVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(NVT, NVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ }
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+ SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+ EVT VT = LHSL.getValueType();
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+ TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl,
+ N->getOperand(0).getValueType(), N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+ N->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy();
+ const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+ DebugLoc dl = N->getDebugLoc();
+
+ // A divide for UMULO should be faster than a function call.
+ if (N->getOpcode() == ISD::UMULO) {
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+ SplitInteger(MUL, Lo, Hi);
+
+ // A divide for UMULO will be faster than a function call. Select to
+ // make sure we aren't using 0.
+ SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+ DAG.getConstant(1, VT), RHS);
+ SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+ SDValue Overflow;
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = N->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ true, Func, Args, DAG, dl);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+ MachinePointerInfo(), false, false, 0);
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+ SDValue Tmp1, Tmp2;
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(), isVolatile, isNonTemporal,
+ Alignment);
+ }
+
+ if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ assert(false && "Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(Hi.getValueType()),
+ Hi, DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ if (TLI.isBigEndian()) std::swap(Zero, Four);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
+ false, false, Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue BaseIdx = N->getOperand(1);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getIntPtrConstant(i));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ NewMask.push_back(SV->getMaskElt(i));
+ }
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT,
+ ConvertedVector, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = N->getOperand(1);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+ Incoming, DAG.getIntPtrConstant(i));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ &NewOps[0], NewOps.size());
+ }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 000000000000..ba658b022053
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1153 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (I->getNodeId() == NewNode)
+ NewNodes.push_back(I);
+
+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+ SDValue Res(I, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (I->getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((I->getNodeId() == NewNode && Mapped > 1) ||
+ (I->getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// run - This is the main entry point for the type legalizer. This does a
+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ I->setNodeId(ReadyToProcess);
+ Worklist.push_back(I);
+ } else {
+ I->setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case TargetLowering::TypeLegal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case TargetLowering::TypePromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeWidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case TargetLowering::TypeLegal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case TargetLowering::TypePromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeWidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(I))
+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(I->getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+ !isTypeLegal(I->getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ if (I->getNodeId() != Processed) {
+ if (I->getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (I->getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (I->getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (I->getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ I->dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes. Correct any processed operands (this may change the node) and
+/// calculate the NodeId. If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.append(N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+ }
+}
+
+namespace {
+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+ /// updates to nodes and recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : DTL(dtl), NodesToAnalyze(nta) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value. Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ do {
+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for softened float");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ 2*Lo.getValueType().getVectorNumElements() ==
+ Op.getValueType().getVectorNumElements() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(Entry.first.getNode() == 0 && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node already widened!");
+ OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+ // Currently all types are split in half.
+ if (!InVT.isVector()) {
+ LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ } else {
+ unsigned NumElements = InVT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElements/2);
+ }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Pair.getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+ SDValue Index) {
+ DebugLoc dl = Index.getDebugLoc();
+ // Make sure the index type is big enough to compute in.
+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result DebugLoc
+ DebugLoc dlHi = Hi.getDebugLoc();
+ DebugLoc dlLo = Lo.getDebugLoc();
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ if (NumOps == 0) {
+ return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ return CallInfo;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type. A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+ DebugLoc dl = Bool.getDebugLoc();
+ ISD::NodeType ExtendCode;
+ switch (TLI.getBooleanContents()) {
+ default:
+ assert(false && "Unknown BooleanContent!");
+ case TargetLowering::UndefinedBooleanContent:
+ // Extend to VT by adding rubbish bits.
+ ExtendCode = ISD::ANY_EXTEND;
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // Extend to VT by adding zero bits.
+ ExtendCode = ISD::ZERO_EXTEND;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+ // Extend to VT by copying the sign bit.
+ ExtendCode = ISD::SIGN_EXTEND;
+ break;
+ }
+ }
+ return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+ Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target. Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 000000000000..952797dc75b8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,720 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left. This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+ // about the state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// ReadyToProcess - All operands have been processed, so this node is ready
+ /// to be handled.
+ ReadyToProcess = 0,
+
+ /// NewNode - This is a new node, not before seen, that was created in the
+ /// process of legalizing some other node.
+ NewNode = -1,
+
+ /// Unanalyzed - This node's ID needs to be set to the number of its
+ /// unprocessed operands.
+ Unanalyzed = -2,
+
+ /// Processed - This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// simple value type, where the two bits correspond to the LegalizeAction
+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// getTypeAction - Return how we should legalize values of this type.
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+ }
+
+ /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// PromotedIntegers - For integer nodes that are below legal width, this map
+ /// indicates what promoted value to use.
+ DenseMap<SDValue, SDValue> PromotedIntegers;
+
+ /// ExpandedIntegers - For integer nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+ /// SoftenedFloats - For floating point nodes converted to integers of
+ /// the same size, this map indicates the converted value to use.
+ DenseMap<SDValue, SDValue> SoftenedFloats;
+
+ /// ExpandedFloats - For float nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+ /// scalar value of type 'ty' to use.
+ DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+ /// SplitVectors - For nodes that need to be split this map indicates
+ /// which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+ /// WidenedVectors - For vector nodes that need to be widened, indicates
+ /// the widened value to use.
+ DenseMap<SDValue, SDValue> WidenedVectors;
+
+ /// ReplacedValues - For values that have been replaced with another,
+ /// indicates the replacement value to use.
+ DenseMap<SDValue, SDValue> ReplacedValues;
+
+ /// Worklist - This defines a worklist of nodes to process. In order to be
+ /// pushed onto this worklist, all operands of a node must have already been
+ /// processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// run - This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+ SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+ SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps, bool isSigned,
+ DebugLoc dl);
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+ /// larger integer type, this returns the promoted value. The low bits of the
+ /// promoted value corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+ /// final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+ /// final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SDIV(SDNode *N);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UDIV(SDNode *N);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedInteger - Given a processed operand Op which was expanded into
+ /// two integers of half the size, this returns the two halves. The low bits
+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Result Float to Integer Conversion.
+ void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Operand Float to Integer Conversion.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedFloat - Given a processed operand Op which was expanded into
+ /// two floating point values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetScalarizedVector - Given a processed one-element vector Op which was
+ /// scalarized to its element type, this returns the element. For example,
+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSplitVector - Given a processed vector Op which was split into vectors
+ /// of half the size, this method returns the halves. The first elements of
+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+ /// with the elements of Hi: Op is what you would get by concatenating Lo and
+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
+ /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+ SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetWidenedVector - Given a processed vector Op which was widened into a
+ /// larger vector, this method returns the larger vector. The elements of
+ /// the returned vector consist of the elements of Op followed by elements
+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a
+ /// v4i32, then this method returns a v4i32 for which the first two elements
+ /// are the same as those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper GenWidenVectorLoads - Helper function to generate a set of
+ /// loads to load a vector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD);
+
+ /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+ /// loads to load a ector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper genWidenVectorStores - Helper function to generate a set of
+ /// stores to store a widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+ /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+ /// stores to store a truncate widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+ /// which is split (or expanded) into two not necessarily identical pieces.
+ void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+ /// high parts of the given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ // Generic Result Expansion.
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BITCAST (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 000000000000..85ea6b662044
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,480 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Convert the integer operand instead.
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+ InOp = GetWidenedVector(InOp);
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ InVT.getVectorNumElements()/2);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+ // is legal but the result is not.
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+ DAG.getIntPtrConstant(1));
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+ getTypeForEVT(*DAG.getContext()));
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
+ false, MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ // Make sure the type of Idx is big enough to hold the new values.
+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ DebugLoc dl = N->getDebugLoc();
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ const unsigned Align = N->getConstantOperandVal(3);
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+ 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue Parts[2];
+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+ }
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, NewElts.size()),
+ &NewElts[0], NewElts.size());
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ DebugLoc dl = N->getDebugLoc();
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ St->getValue().getValueType());
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ unsigned Alignment = St->getAlignment();
+ bool isVolatile = St->isVolatile();
+ bool isNonTemporal = St->isNonTemporal();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // A MERGE_VALUES node can produce any number of values. We know that the
+ // first illegal one needs to be expanded into Lo/Hi.
+ unsigned i;
+
+ // The string of legal results gets turned into input operands, which have
+ // the same type.
+ for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+ // The first illegal result must be the one that needs to be expanded.
+ GetSplitOp(N->getOperand(i), Lo, Hi);
+
+ // Legalize the rest of the results into the input operands whether they are
+ // legal or not.
+ unsigned e = N->getNumValues();
+ for (++i; i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+ Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 000000000000..ffff10ce2948
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,337 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ const TargetLowering &TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ // Adds a node to the translation cache
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ // Legalizes the given node
+ SDValue LegalizeOp(SDValue Op);
+ // Assuming the node is legal, "legalize" the results
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ // Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+ // isn't legal.
+ // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ // SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ SDValue ExpandFNEG(SDValue Op);
+ // Implements vector promotion; this is essentially just bitcasting the
+ // operands to a different type and bitcasting the result back to the
+ // original type.
+ SDValue PromoteVectorOp(SDValue Op);
+
+ public:
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ SDValue Result =
+ SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+
+ bool HasVectorValue = false;
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ EVT QueryType;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ case ISD::VSETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::SIGN_EXTEND_INREG:
+ QueryType = Node->getValueType(0);
+ break;
+ case ISD::FP_ROUND_INREG:
+ QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ QueryType = Node->getOperand(0).getValueType();
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ case TargetLowering::Promote:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.getNode()) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::UINT_TO_FP)
+ Result = ExpandUINT_TO_FLOAT(Op);
+ else if (Node->getOpcode() == ISD::FNEG)
+ Result = ExpandFNEG(Op);
+ else if (Node->getOpcode() == ISD::VSETCC)
+ Result = UnrollVSETCC(Op);
+ else
+ Result = DAG.UnrollVectorOp(Op.getNode());
+ break;
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+ // Vector "promotion" is basically just bitcasting and doing the operation
+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to
+ // v1i64.
+ EVT VT = Op.getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+
+
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+ assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ unsigned BW = SVT.getSizeInBits();
+ SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getIntPtrConstant(i));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), EltVT),
+ DAG.getConstant(0, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 000000000000..b5698f9c6738
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2568 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize the result of this operator!");
+
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED,
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getDebugLoc(),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
+
+ // VSETCC always returns a sign-extended value, while SETCC may not. The
+ // SETCC result type may not match the vector element type. Correct these.
+ if (NVT.bitsLE(SVT)) {
+ // The SETCC result type is bigger than the vector element type.
+ // Ensure the SETCC result is sign-extended.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
+ DAG.getValueType(MVT::i1));
+ // Truncate to the final type.
+ return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
+ }
+
+ // The SETCC result type is smaller than the vector element type.
+ // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+ // Sign extend to the final type.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+ Res);
+ return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getAlignment());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Split node result: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to split the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ case ISD::VSETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::CONVERT_RNDSAT:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getIntPtrConstant(IdxVal - LoNumElts));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(VecType);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ DebugLoc dl = LD->getDebugLoc();
+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ Alignment);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ EVT InVT = N->getOperand(0).getValueType();
+ SDValue LL, LH, RL, RH;
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(0));
+ RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ EVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: llvm_unreachable("Unexpected type action!");
+ case TargetLowering::TypeLegal: {
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case TargetLowering::TypePromoteInteger: {
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
+ InOp.getValueType().getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ break;
+ case TargetLowering::TypeWidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Split node operand: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to split this operator's operand!");
+
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FTRUNC:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType())), 0);
+ }
+
+ // Store the vector to the stack.
+ EVT EltVT = VecVT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ DebugLoc DL = N->getDebugLoc();
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ bool isVol = N->isVolatile();
+ bool isNT = N->isNonTemporal();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ LoMemVT, isVol, isNT, Alignment);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ isVol, isNT, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVol, isNT, Alignment);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ isVol, isNT, Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // The input operands all must have the same type, and we know the result the
+ // result type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getIntPtrConstant(i)));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+ &Elts[0], Elts.size());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::VSETCC:
+ Res = WidenVecRes_VSETCC(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::BSWAP:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::SUB:
+ case ISD::XOR:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ Res = WidenVecRes_Unary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ unsigned Opcode = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ NewOps.reserve(WidenNumElts);
+ for (unsigned i = NumElts; i < WidenNumElts; ++i)
+ NewOps.push_back(DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i > NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts);
+ for (unsigned i=0; i < WidenNumElts/2; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getIntPtrConstant(0));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+ &LdChain[0], LdChain.size());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+ InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = WidenVecOp_Convert(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ EVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+ MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVector<SDValue, 16>& LdOps,
+ unsigned Start, unsigned End) {
+ DebugLoc dl = LdOps[Start].getDebugLoc();
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getIntPtrConstant(Idx++));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth; // Difference
+ unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, Align);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth <= NewVTWidth) {
+ if (!NewVT.isVector()) {
+ unsigned NumElts = WidenWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ if (LdWidth < NewVTWidth) {
+ // Our current type we are using is too large, find a better size
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ }
+
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ isVolatile,
+ isNonTemporal, MinAlign(Align, Increment));
+ LdChain.push_back(LdOp.getValue(1));
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the loads operations
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode * LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extended it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
+ LdEltVT, isVolatile, isNonTemporal, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store powers of two widths.
+ // The routines chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getIntPtrConstant(Idx));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getIntPtrConstant(Idx++));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
+ isVolatile, isNonTemporal, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ DebugLoc dl = InOp.getDebugLoc();
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getIntPtrConstant(0));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(Idx));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 000000000000..2dcb22957325
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,114 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+ enum DbgValueKind {
+ SDNODE = 0, // value is the result of an expression
+ CONST = 1, // value is a constant
+ FRAMEIX = 2 // value is contents of a stack location
+ };
+private:
+ enum DbgValueKind kind;
+ union {
+ struct {
+ SDNode *Node; // valid for expressions
+ unsigned ResNo; // valid for expressions
+ } s;
+ const Value *Const; // valid for constants
+ unsigned FrameIx; // valid for stack objects
+ } u;
+ MDNode *mdPtr;
+ uint64_t Offset;
+ DebugLoc DL;
+ unsigned Order;
+ bool Invalid;
+public:
+ // Constructor for non-constants.
+ SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+ unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
+ kind = SDNODE;
+ u.s.Node = N;
+ u.s.ResNo = R;
+ }
+
+ // Constructor for constants.
+ SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+ unsigned O) :
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ kind = CONST;
+ u.Const = C;
+ }
+
+ // Constructor for frame indices.
+ SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ kind = FRAMEIX;
+ u.FrameIx = FI;
+ }
+
+ // Returns the kind.
+ DbgValueKind getKind() { return kind; }
+
+ // Returns the MDNode pointer.
+ MDNode *getMDPtr() { return mdPtr; }
+
+ // Returns the SDNode* for a register ref
+ SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+ // Returns the ResNo for a register ref
+ unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+ // Returns the Value* for a constant
+ const Value *getConst() { assert (kind==CONST); return u.Const; }
+
+ // Returns the FrameIx for a stack object
+ unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+ // Returns the offset.
+ uint64_t getOffset() { return Offset; }
+
+ // Returns the DebugLoc.
+ DebugLoc getDebugLoc() { return DL; }
+
+ // Returns the SDNodeOrder. This is the order of the preceding node in the
+ // input.
+ unsigned getOrder() { return Order; }
+
+ // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ // property. A SDDbgValue is invalid if the SDNode that produces the value is
+ // deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 000000000000..f88b26d5c422
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+ DenseMap<const SDNode*, unsigned> OrderMap;
+
+ void operator=(const SDNodeOrdering&); // Do not implement.
+ SDNodeOrdering(const SDNodeOrdering&); // Do not implement.
+public:
+ SDNodeOrdering() {}
+
+ void add(const SDNode *Node, unsigned O) {
+ OrderMap[Node] = O;
+ }
+ void remove(const SDNode *Node) {
+ DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+ if (Itr != OrderMap.end())
+ OrderMap.erase(Itr);
+ }
+ void clear() {
+ OrderMap.clear();
+ }
+ unsigned getOrder(const SDNode *Node) {
+ return OrderMap[Node];
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 000000000000..b275c6321ae4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,644 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleBottomUp();
+
+ /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = NewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = NewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 000000000000..430283d5eff9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,265 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ tdListDAGScheduler("list-td", "Top-down list scheduler",
+ createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation. This supports
+/// top-down scheduling.
+///
+class ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+public:
+ ScheduleDAGList(MachineFunction &mf,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void ReleaseSucc(SUnit *SU, const SDep &D);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ AvailableQueue->initNodes(SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, *I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 000000000000..12b183804c28
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,2994 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ tdrListrDAGScheduler("list-tdrr",
+ "Top-down register reduction list scheduling",
+ createTDRRListDAGScheduler);
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
+ createHybridListDAGScheduler);
+
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+#ifndef NDEBUG
+namespace {
+ // For sched=list-ilp, Count the number of times each factor comes into play.
+ enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+ FactStatic, FactOther, NumFactors };
+}
+static const char *FactorName[NumFactors] =
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+static int FactorCount[NumFactors];
+#endif //!NDEBUG
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+ /// it is top-down.
+ bool isBottomUp;
+
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ ///
+ bool NeedLatency;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<SUnit*> LiveRegGens;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGRRList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU);
+ void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
+ void CapturePred(SDep *PredEdge);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+
+ SUnit *PickNodeToScheduleBottomUp();
+ void ListScheduleBottomUp();
+
+ void ScheduleNodeTopDown(SUnit*);
+ void ListScheduleTopDown();
+
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = NewSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool ForceUnitLatencies() const {
+ return !NeedLatency;
+ }
+};
+} // end anonymous namespace
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost) {
+ EVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+ unsigned Opcode = Node->getMachineOpcode();
+
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+#ifndef NDEBUG
+ for (int i = 0; i < NumFactors; ++i) {
+ FactorCount[i] = 0;
+ }
+#endif //!NDEBUG
+
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegGens.resize(TRI->getNumRegs(), NULL);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ HazardRec->Reset();
+
+ // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+ if (isBottomUp)
+ ListScheduleBottomUp();
+ else
+ ListScheduleTopDown();
+
+#ifndef NDEBUG
+ for (int i = 0; i < NumFactors; ++i) {
+ DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
+ }
+#endif // !NDEBUG
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ if (!ForceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
+ }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegGens[I->getReg()] = SU;
+ }
+ }
+ }
+}
+
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle =
+ isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ if (isBottomUp)
+ HazardRec->RecedeCycle();
+ else
+ HazardRec->AdvanceCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (isBottomUp && SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (isBottomUp && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+
+ if (!isBottomUp && SU->isCall) {
+ HazardRec->Reset();
+ }
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
+ SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
+ Sequence.push_back(SU);
+
+ AvailableQueue->ScheduledNode(SU);
+
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
+ }
+ }
+
+ resetVRegCycle(SU);
+
+ SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ DEBUG(SU->dump(this));
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
+ }
+ }
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ }
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
+ }
+ }
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
+ AvailableQueue->UnscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
+ Sequence.pop_back();
+ if (SU->isSucc(OldSU))
+ // Don't try to remove SU from AvailableQueue.
+ SU->isAvailable = false;
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
+ UnscheduleNodeBottomUp(OldSU);
+ AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ ComputeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ ComputeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPreds.push_back(*I);
+ else if (isOperandOf(I->getSUnit(), LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+ const SDep &Pred = ChainPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(NULL);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true);
+ AddPred(SuccSU, D);
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI)) {
+ LRegs.push_back(*AliasI);
+ }
+ }
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+
+ return !LRegs.empty();
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SmallVector<SUnit*, 4> Interferences;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ assert(Interferences[i]->isAvailable && "must still be available");
+ AvailableQueue->push(Interferences[i]);
+ }
+ return CurSU;
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable) {
+ CurSU = AvailableQueue->pop();
+ }
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ Interferences.erase(Interferences.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (Interferences[i]->isAvailable) {
+ AvailableQueue->push(Interferences[i]);
+ }
+ }
+ return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
+
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+ }
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ SuccSU->isAvailable = true;
+ AvailableQueue->push(SuccSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+ AvailableQueue->setCurCycle(CurCycle);
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSU = AvailableQueue->pop();
+
+ if (CurSU)
+ ScheduleNodeTopDown(CurSU);
+ ++CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+ reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = false,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void ScheduledNode(SUnit *SU);
+
+ void UnscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ if (isBottomUp())
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ else
+ dbgs() << "Depth " << SU->getDepth() << ": ";
+ SU->dump(DAG);
+ }
+ }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ dumpRegPressure();
+}
+
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain succs
+ unsigned Height = I->getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *PredSU = I->getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ I->getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ I->getSUnit()->isVRegCycle = 0;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->isVRegCycle &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall) {
+ DEBUG(++FactorCount[FactStall]);
+ return 1;
+ }
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactStall]);
+ return LHeight > RHeight ? 1 : -1;
+ }
+ } else if (RStall) {
+ DEBUG(++FactorCount[FactStall]);
+ return -1;
+ }
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency)) {
+ if (DisableSchedCycles) {
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactHeight]);
+ return LHeight > RHeight ? 1 : -1;
+ }
+ }
+ else {
+ // If neither instruction stalls (!LStall && !RStall) then
+ // its height is already covered so only its depth matters. We also reach
+ // this if both stall but have the same height.
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(++FactorCount[FactDepth]);
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ }
+ if (left->Latency != right->Latency) {
+ DEBUG(++FactorCount[FactOther]);
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ DEBUG(++FactorCount[FactRegUses]);
+ #ifndef NDEBUG
+ const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority) {
+ DEBUG(++FactorCount[FactStatic]);
+ return LPriority > RPriority;
+ }
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist) {
+ DEBUG(++FactorCount[FactOther]);
+ return LDist < RDist;
+ }
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch) {
+ DEBUG(++FactorCount[FactOther]);
+ return LScratch > RScratch;
+ }
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight()) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (left->getDepth() != right->getDepth()) {
+ DEBUG(++FactorCount[FactDepth]);
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ DEBUG(++FactorCount[FactOther]);
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
+ return true;
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
+ return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+ << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ DEBUG(++FactorCount[FactRegUses]);
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ DEBUG(++FactorCount[FactDepth]);
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+ for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+ initVRegCycle(&sunits[i]);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const unsigned *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ if (!SUImpDefs)
+ return false;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU->NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU->NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = 0;
+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+ EE = SU->Preds.end(); II != EE; ++II)
+ if (!II->isCtrl()) {
+ PredSU = II->getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ SUnit *PredSuccSU = II->getSUnit();
+ if (PredSuccSU == SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
+ << " next to PredSU #" << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(SU, Edge);
+ Edge.setSUnit(SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->getNode();
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
+ continue;
+
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU) continue;
+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+ E = DUSU->Succs.end(); I != E; ++I) {
+ if (I->isCtrl()) continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if ((!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+ (!SU->isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, SU)) {
+ DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
+ << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ }
+ }
+ }
+ }
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+ unsigned Limit) {
+ unsigned Sum = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ const SUnit *SuccSU = I->getSUnit();
+ for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+ EE = SuccSU->Preds.end(); II != EE; ++II) {
+ SUnit *PredSU = II->getSUnit();
+ if (!PredSU->isScheduled)
+ if (++Sum > Limit)
+ return Sum;
+ }
+ }
+ return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res < 0;
+
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+ bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+ bool LIsFloater = LIsTarget && left->NumPreds == 0;
+ bool RIsFloater = RIsTarget && right->NumPreds == 0;
+ unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+ unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+ if (left->NumSuccs == 0 && right->NumSuccs != 0)
+ return false;
+ else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+ return true;
+
+ if (LIsFloater)
+ LBonus -= 2;
+ if (RIsFloater)
+ RBonus -= 2;
+ if (left->NumSuccs == 1)
+ LBonus += 2;
+ if (right->NumSuccs == 1)
+ RBonus += 2;
+
+ if (LPriority+LBonus != RPriority+RBonus)
+ return LPriority+LBonus < RPriority+RBonus;
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ if (left->NumSuccsLeft != right->NumSuccsLeft)
+ return left->NumSuccsLeft > right->NumSuccsLeft;
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ TDRegReductionPriorityQueue *PQ =
+ new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 000000000000..71f07d6fa47a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,802 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "SDNodeDbgValue.h"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ DAG = dag;
+ ScheduleDAG::Run(bb, insertPos);
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = NewSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
+ SU->SchedulingPref = Old->SchedulingPref;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+ }
+}
+
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SmallVector<EVT, 4> VTs;
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return;
+
+ // Don't add glue to something which already has glue.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
+ if (GlueDestNode)
+ Ops.push_back(Glue);
+
+ SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ bool Cluster = false;
+ SDNode *Base = Node;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ }
+
+ if (!Cluster)
+ return;
+
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ return;
+
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ AddGlue(Lead, SDValue(0, 0), true, DAG);
+
+ SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutGlue = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ AddGlue(Load, InGlue, OutGlue, DAG);
+
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ NI->setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 64> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ SmallVector<SUnit*, 8> CallSUnits;
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+ if (Visited.insert(NI->getOperand(i).getNode()))
+ Worklist.push_back(NI->getOperand(i).getNode());
+
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = NewSUnit(NI);
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ }
+
+ // Scan down to find any glued succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ break;
+ }
+ if (!HasGlueUse) break;
+ }
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ ComputeLatency(NodeSUnit);
+ }
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0 && !StressSched)
+ PhysReg = 0;
+
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
+ const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpLatency, PhysReg);
+ if (!isChain && !UnitLatencies) {
+ ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+ ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
+ }
+
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
+ return;
+ }
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes glued together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ if (!SU->getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ dbgs() << " ";
+ GluedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ GluedNodes.pop_back();
+ }
+}
+
+namespace {
+ struct OrderSorter {
+ bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+ const std::pair<unsigned, MachineInstr*> &B) {
+ return A.first < B.first;
+ }
+ };
+}
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with source
+ // order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
+ for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+ if (DVs[i]->isInvalidated())
+ continue;
+ unsigned DVOrder = DVs[i]->getOrder();
+ if (!Order || DVOrder == ++Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ }
+ DVs[i]->setIsInvalidated();
+ }
+ }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<unsigned, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
+
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI)
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any glued SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N;
+ N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+ GluedNodes.pop_back();
+ }
+ Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
+ Seen);
+ }
+
+ // Insert all the dbg_values which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+ // Sort the source order instructions and use the order to insert debug
+ // values.
+ std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ if (!MI)
+ continue;
+ for (; DI != DE &&
+ (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+ if ((*DI)->isInvalidated())
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(llvm::next(Pos), DbgMI);
+ }
+ }
+ }
+ LastOrder = Order;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ while (DI != DE) {
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
+ if (!(*DI)->isInvalidated()) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI)
+ InsertBB->insert(Pos, DbgMI);
+ }
+ ++DI;
+ }
+ }
+
+ BB = Emitter.getBlock();
+ InsertPos = Emitter.getInsertPos();
+ return BB;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 000000000000..9c27b2ea02ec
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,165 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ virtual ~ScheduleDAGSDNodes() {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken ||
+ isa<MDNodeSDNode>(Node)) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(SDNode *N);
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+ /// CopyToReg and its only active data operands are CopyFromReg within a
+ /// single block loop.
+ ///
+ void InitVRegCycleFlag(SUnit *SU);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ /// ComputeOperandLatency - Override dependence edge latency using
+ /// operand use/def information
+ ///
+ virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const { }
+
+ virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
+ virtual MachineBasicBlock *EmitSchedule();
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ EVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ EVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 000000000000..35ea0bb940b5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6655 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeOrdering.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // PPC long double cannot be converted to any other type.
+ if (VT == MVT::ppcf128 ||
+ &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDValue NotZero = N->getOperand(i);
+ if (isa<ConstantSDNode>(NotZero)) {
+ if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(NotZero)) {
+ if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+ bitcastToAPInt().isAllOnesValue())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements.
+ SDValue Zero = N->getOperand(i);
+ if (isa<ConstantSDNode>(Zero)) {
+ if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(Zero)) {
+ if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one 0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDValue *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDUse *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDValue *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+ ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+ break;
+ }
+ } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, temporalness, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+ bool isNonTemporal) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5) |
+ (isNonTemporal << 6);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+ DAGUpdateListener *UpdateListener) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, 0);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+ RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Remove the ordering of this node.
+ Ordering->remove(N);
+
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+ ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
+ for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+ DbgVals[i]->setIsInvalidated();
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(
+ std::pair<std::string,unsigned char>(ESN->getSymbol(),
+ ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+ DAGUpdateListener *UpdateListener) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+ // N is now dead. Inform the listener if it exists and delete it.
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform a listener if
+ // it exists.
+ if (UpdateListener)
+ UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDValue *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ break;
+ }
+ }
+}
+
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+ const Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm)
+ : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+ EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+ Root(getEntryNode()), Ordering(0) {
+ AllNodes.push_back(&EntryNode);
+ Ordering = new SDNodeOrdering();
+ DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &mf) {
+ MF = &mf;
+ Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+ allnodes_clear();
+ delete Ordering;
+ delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(0));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(0));
+
+ EntryNode.UseList = 0;
+ AllNodes.push_back(&EntryNode);
+ Root = getEntryNode();
+ Ordering->clear();
+ DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
+ if (Op.getValueType() == VT) return Op;
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+ EVT EltVT = VT.getScalarType();
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+ return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.getScalarType();
+ assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&Val);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+ return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT = VT.getScalarType();
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&V);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ // FIXME DebugLoc info might be appropriate here
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+ EVT EltVT = VT.getScalarType();
+ if (EltVT==MVT::f32)
+ return getConstantFP(APFloat((float)Val), VT, isTarget);
+ else if (EltVT==MVT::f64)
+ return getConstantFP(APFloat(Val), VT, isTarget);
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+ bool ignored;
+ APFloat apf = APFloat(Val);
+ apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(apf, VT, isTarget);
+ } else {
+ assert(0 && "Unsupported type in getConstantFP");
+ return SDValue();
+ }
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
+ EVT VT, int64_t Offset,
+ bool isTargetGA,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ EVT PTy = TLI.getPointerTy();
+ unsigned BitWidth = PTy.getSizeInBits();
+ if (BitWidth < 64)
+ Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ unsigned Opc;
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
+ Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+ Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->AddSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+ Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) VTSDNode(VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned char TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+ TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
+ CondCodeNodes[Cond] = N;
+ AllNodes.push_back(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+ std::swap(N1, N2);
+ int NElts = M.size();
+ for (int i = 0; i != NElts; ++i) {
+ if (M[i] >= NElts)
+ M[i] -= NElts;
+ else if (M[i] >= 0)
+ M[i] += NElts;
+ }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+ SDValue N2, const int *Mask) {
+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Vector Shuffle VTs must be a vectors");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+ && "Vector Shuffle VTs must have same element type");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NElts; ++i) {
+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+ MaskVec.push_back(Mask[i]);
+ }
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (unsigned i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.getOpcode() == ISD::UNDEF)
+ commuteShuffle(N1, N2, MaskVec);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= (int)NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+
+ // If Identity shuffle, or all shuffle in to undef, return that node.
+ bool AllUndef = true;
+ bool Identity = true;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] >= 0) AllUndef = false;
+ }
+ if (Identity && NElts == N1.getValueType().getVectorNumElements())
+ return N1;
+ if (AllUndef)
+ return getUNDEF(VT);
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ for (unsigned i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+ ShuffleVectorSDNode *N =
+ new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+ SDValue Val, SDValue DTy,
+ SDValue STy, SDValue Rnd, SDValue Sat,
+ ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+ Code);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddPointer(Label);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(BA);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || V->getType()->isPointerTy()) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MD);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSize();
+ const Type *Ty = VT.getTypeForEVT(*getContext());
+ unsigned StackAlign =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+ VT2.getStoreSizeInBits())/8;
+ const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const TargetData *TD = TLI.getTargetData();
+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+ TD->getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ // No compile time operations on this type yet.
+ if (N1C->getValueType(0) == MVT::ppcf128)
+ return SDValue();
+
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ // This predicate is not safe for vector operations.
+ if (Op.getValueType().isVector())
+ return false;
+
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) const {
+ unsigned BitWidth = Mask.getBitWidth();
+ assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
+ "Mask size mismatches value type size!");
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6 || Mask == 0)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::MUL: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ ComputeMaskedBits(Op.getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ return;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ APInt InDemandedMask = (Mask << ShAmt);
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ if (HighBits.getBoolValue())
+ InDemandedMask |= APInt::getSignBit(BitWidth);
+
+ ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit = InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clearAllBits();
+ return;
+ }
+ case ISD::LOAD: {
+ if (ISD::isZEXTLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InSignBit = APInt::getSignBit(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask.trunc(InBits);
+
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded. Temporarily set this bit in the mask for our callee.
+ if (NewBits.getBoolValue())
+ InMask |= InSignBit;
+
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+ assert(!(SignBitKnownZero && SignBitKnownOne) &&
+ "Sign bit can't be known to be both zero and one!");
+
+ // If the sign bit wasn't actually demanded by our caller, we don't
+ // want it set in the KnownZero and KnownOne result values. Reset the
+ // mask and reapply it to the result values.
+ InMask = Mask.trunc(InBits);
+ KnownZero &= InMask;
+ KnownOne &= InMask;
+
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = Mask.zext(InBits);
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+ KnownOne, Depth+1);
+ KnownZero |= (~InMask) & Mask;
+ return;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+ Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD:
+ case ISD::ADDE: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+ BitWidth - Mask.countLeadingZeros());
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+ return;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+ Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ return;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
+ default:
+ if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ break;
+ // Fallthrough
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // Allow the target to implement this method for its nodes.
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+ Depth);
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+ EVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp =
+ cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getBooleanContents() ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+ break;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ break;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (NoNaNsFPMath)
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->getValueAPF().isNaN();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return !C->isNullValue();
+ break;
+ }
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ EVT VT, SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ // No compile time operations on ppcf128.
+ if (VT == MVT::ppcf128) break;
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(Val.bitsToFloat(), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(Val.bitsToDouble(), VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), VT);
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), VT);
+ case ISD::CTLZ:
+ return getConstant(Val.countLeadingZeros(), VT);
+ case ISD::CTTZ:
+ return getConstant(Val.countTrailingZeros(), VT);
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ APFloat V = C->getValueAPF(); // make copy
+ if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), 2, x);
+ return getConstant(api, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (Operand.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid sext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, VT);
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid zext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, VT);
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid anyext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // (ext (trunx x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getNode()->getOperand(0);
+ if (OpOp.getValueType() == VT)
+ return OpOp;
+ }
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+ "Invalid truncate node, src < dst!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else
+ return Operand.getNode()->getOperand(0);
+ }
+ break;
+ case ISD::BITCAST:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BITCAST between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDValue Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+ EVT VT,
+ ConstantSDNode *Cst1,
+ ConstantSDNode *Cst2) {
+ const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD: return getConstant(C1 + C2, VT);
+ case ISD::SUB: return getConstant(C1 - C2, VT);
+ case ISD::MUL: return getConstant(C1 * C2, VT);
+ case ISD::UDIV:
+ if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+ break;
+ case ISD::UREM:
+ if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+ break;
+ case ISD::SDIV:
+ if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+ break;
+ case ISD::SREM:
+ if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+ break;
+ case ISD::AND: return getConstant(C1 & C2, VT);
+ case ISD::OR: return getConstant(C1 | C2, VT);
+ case ISD::XOR: return getConstant(C1 ^ C2, VT);
+ case ISD::SHL: return getConstant(C1 << C2, VT);
+ case ISD::SRL: return getConstant(C1.lshr(C2), VT);
+ case ISD::SRA: return getConstant(C1.ashr(C2), VT);
+ case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+ case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+ default: break;
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::AND:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ }
+ }
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "FP_ROUND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in FP_ROUND_INREG");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ if (N1C) {
+ APInt Val = N1C->getAPIntValue();
+ unsigned FromBits = EVT.getScalarType().getSizeInBits();
+ Val <<= Val.getBitWidth()-FromBits;
+ Val = Val.ashr(Val.getBitWidth()-FromBits);
+ return getConstant(Val, VT);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+ EVT VEltTy = N1.getValueType().getVectorElementType();
+ if (Elt.getValueType() != VEltTy) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+ }
+ if (VT != VEltTy) {
+ // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+ // result is implicitly extended.
+ Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
+ }
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ SDValue N1Op2 = N1.getOperand(2);
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+ if (N1Op2C && N2C) {
+ if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ else
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ }
+
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
+ break;
+ }
+ }
+
+ if (N1C) {
+ if (N2C) {
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+ if (SV.getNode()) return SV;
+ } else { // Cannonicalize constant to RHS if commutative
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+ }
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Cannonicalize constant to RHS if commutative
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP && VT != MVT::ppcf128) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (s != APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, VT);
+ default: break;
+ }
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (UnsafeFPMath)
+ return N2;
+ break;
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ switch (Opcode) {
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR &&
+ N3.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+ if (Simp.getNode()) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ else
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ break;
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+ UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(Value.getOpcode() != ISD::UNDEF);
+
+ unsigned NumBits = VT.getScalarType().getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+ if (VT.isInteger())
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(Val), VT);
+ }
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = SplatByte(NumBits, 0x01);
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+ }
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, VT);
+ else if (VT == MVT::f32 || VT == MVT::f64)
+ return DAG.getConstantFP(0.0, VT);
+ else if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ } else
+ llvm_unreachable("Expected type!");
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumBits = VT.getSizeInBits();
+ unsigned MSB = NumBits / 8;
+ uint64_t Val = 0;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ EVT VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+ VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+ return true;
+
+ return false;
+}
+
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+ "Expecting memcpy / memset source to meet alignment requirement!");
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ NonScalarIntSafe, MemcpyStrSrc,
+ DAG.getMachineFunction());
+
+ if (VT == MVT::Other) {
+ if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+ TLI.allowsUnalignedMemoryAccesses(VT)) {
+ VT = TLI.getPointerTy();
+ } else {
+ switch (DstAlign & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ VT = MVT::i64;
+ while (!TLI.isTypeLegal(VT))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ VTSize = VT.getSizeInBits() / 8;
+ } else {
+ // This can result in a type that is not legal on the target, e.g.
+ // 1 or 2 bytes on PPC.
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ VTSize >>= 1;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memcpy of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ std::string Str;
+ bool CopyFromStr = isMemSrcFromString(Src, Str);
+ bool isZeroStr = CopyFromStr && Str.empty();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ (isZeroStr ? 0 : SrcAlign),
+ true, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (CopyFromStr &&
+ (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We only handle zero vectors here.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
+ } else {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.bitsGE(VT));
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
+ MinAlign(SrcAlign, SrcOff));
+ Store = DAG.getTruncStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memmove of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ SrcAlign, true, false, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ uint64_t SrcOff = 0, DstOff = 0;
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, SrcAlign);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ // Turn a memset of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool NonScalarIntSafe =
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ Size, (DstAlignCanChange ? 0 : Align), 0,
+ NonScalarIntSafe, false, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+ unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
+ SDValue Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
+ OutChains.push_back(Store);
+ DstOff += VT.getSizeInBits() / 8;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),Align,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ true, DstPtrInfo, SrcPtrInfo);
+ }
+
+ // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+ // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+ // respect volatile, so they may do things like read or write memory
+ // beyond the given memory regions. But fixing this isn't easy, and most
+ // people don't care.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+ /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+ // not be safe. See memcpy above for more details.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+ /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, isVol, DstPtrInfo);
+
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+ /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ // For now, atomics are considered to be volatile always.
+ Flags |= MachineMemOperand::MOVolatile;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachineMemOperand *MMO) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ EVT VT = Cmp.getValueType();
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, Cmp, Swp, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ const Value* PtrVal,
+ unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ // For now, atomics are considered to be volatile always.
+ Flags |= MachineMemOperand::MOVolatile;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Val};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, Val, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+ DebugLoc dl) {
+ if (NumOps == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+ Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+ MemVT, PtrInfo, Align, Vol,
+ ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ if (Align == 0) // Ensure that codegen never sees alignment 0
+ Align = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = 0;
+ if (WriteMem)
+ Flags |= MachineMemOperand::MOStore;
+ if (ReadMem)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Vol)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
+ (Opcode <= INT_MAX &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+ MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+ MemVT, MMO);
+ }
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(VT);
+
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+ "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+ MMO->isNonTemporal()));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+ MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+ SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
+}
+
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(Val.getValueType());
+
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
+
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal()));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+ false, VT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(SVT);
+
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
+
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal()));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+ true, SVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+ ST->isTruncatingStore(),
+ ST->getMemoryVT(),
+ ST->getMemOperand());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ SDValue SV,
+ unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDUse *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const std::vector<EVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+ Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+ }
+#endif
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ if (NumOps == 1) {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+ Ops[2]);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1) {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+ Ops[2]);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ SDVTList Result = makeVTList(Array, 2);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ SDVTList Result = makeVTList(Array, 3);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3 && I->VTs[3] == VT4)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ SDVTList Result = makeVTList(Array, 4);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: llvm_unreachable("Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+ default: break;
+ }
+
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I) {
+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+ continue;
+
+ bool NoMatch = false;
+ for (unsigned i = 2; i != NumVTs; ++i)
+ if (VTs[i] != I->VTs[i]) {
+ NoMatch = true;
+ break;
+ }
+ if (!NoMatch)
+ return *I;
+ }
+
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs, VTs+NumVTs, Array);
+ SDVTList Result = makeVTList(Array, NumVTs);
+ VTList.push_back(Result);
+ return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return N; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+ // Reset the NodeID to -1.
+ N->setNodeId(-1);
+ return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ void *IP = 0;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = 0;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+ // Initialize the memory references information.
+ MN->setMemRefs(0, 0);
+ // If NumOps is larger than the # of operands we can have in a
+ // MachineSDNode, reallocate the operand list.
+ if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+ if (MN->OperandsNeedDelete)
+ delete[] MN->OperandList;
+ if (NumOps > array_lengthof(MN->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+ MN->OperandsNeedDelete = false;
+ } else
+ MN->InitOperands(MN->OperandList, Ops, NumOps);
+ } else {
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > N->NumOperands) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+ N->OperandsNeedDelete = true;
+ } else
+ N->InitOperands(N->OperandList, Ops, NumOps);
+ }
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+ }
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+ EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ const std::vector<EVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+ MachineSDNode *N;
+ void *IP = 0;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+ IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return cast<MachineSDNode>(E);
+ }
+
+ // Allocate a new MachineSDNode.
+ N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
+
+ // Initialize the operands list.
+ if (NumOps > array_lengthof(N->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ N->InitOperands(N->LocalOperands, Ops, NumOps);
+ N->OperandsNeedDelete = false;
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyMachineNode(N);
+#endif
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return E;
+ }
+ return NULL;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::DAGUpdateListener *DownLink;
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+
+ // Then forward the message.
+ if (DownLink) DownLink->NodeDeleted(N, E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Just forward the message.
+ if (DownLink) DownLink->NodeUpdated(N);
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+ DAGUpdateListener *UpdateListener) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, &Listener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+ DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, &Listener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+ const SDValue *To,
+ DAGUpdateListener *UpdateListener) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, &Listener);
+ }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+ DAGUpdateListener *UpdateListener){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To, UpdateListener);
+ return;
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, &Listener);
+ }
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num,
+ DAGUpdateListener *UpdateListener){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ checkForCycles(N);
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q = N;
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, moves nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+ checkForCycles(N);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (I == SortedPos) {
+#ifndef NDEBUG
+ SDNode *S = ++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+ assert(SD && "Trying to assign an order to a null node!");
+ Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+ assert(SD && "Trying to get the order of a null node!");
+ return Ordering->getOrder(SD);
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+ DbgInfo->add(DB, SD, isParameter);
+ if (SD)
+ SD->setHasDebugValue(true);
+}
+
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+ const GlobalValue *GA,
+ EVT VT, int64_t o, unsigned char TF)
+ : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ TheGlobal = GA;
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(isNonTemporal() == MMO->isNonTemporal() &&
+ "Non-temporal encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs, Ops, NumOps),
+ MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::LAST_VALUETYPE);
+ for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(*VTMutex);
+ return &(*EVTs->insert(VT).first);
+ } else {
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+ SmallPtrSet<const SDNode *, 32> &Visited,
+ SmallVector<const SDNode *, 16> &Worklist) const {
+ if (Visited.empty()) {
+ Worklist.push_back(this);
+ } else {
+ // Take a look in the visited set. If we've already encountered this node
+ // we needn't search further.
+ if (Visited.count(N))
+ return true;
+ }
+
+ // Haven't visited N yet. Continue the search.
+ while (!Worklist.empty()) {
+ const SDNode *M = Worklist.pop_back_val();
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ SDNode *Op = M->getOperand(i).getNode();
+ if (Visited.insert(Op))
+ Worklist.push_back(Op);
+ if (Op == N)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->get(getMachineOpcode()).getName();
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE:
+ return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP:return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::VSETCC: return "vsetcc";
+ case ISD::SELECT: return "select";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTLZ: return "ctlz";
+
+ // Trampolines
+ case ISD::TRAMPOLINE: return "trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default:
+ return "";
+ case ISD::PRE_INC:
+ return "<pre-inc>";
+ case ISD::PRE_DEC:
+ return "<pre-dec>";
+ case ISD::POST_INC:
+ return "<post-inc>";
+ case ISD::POST_DEC:
+ return "<post-dec>";
+ }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+ std::string S = "< ";
+
+ if (isZExt())
+ S += "zext ";
+ if (isSExt())
+ S += "sext ";
+ if (isInReg())
+ S += "inreg ";
+ if (isSRet())
+ S += "sret ";
+ if (isByVal())
+ S += "byval ";
+ if (isNest())
+ S += "nest ";
+ if (getByValAlign())
+ S += "byval-align:" + utostr(getByValAlign()) + " ";
+ if (getOrigAlign())
+ S += "orig-align:" + utostr(getOrigAlign()) + " ";
+ if (getByValSize())
+ S += "byval-size:" + utostr(getByValSize()) + " ";
+ return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent)
+{
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+ dbgs() << "\n";
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperandEltVT,
+ Operand,
+ getConstant(i, TLI.getPointerTy()));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::FP_ROUND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ return getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*getContext(), EltVT, ResNE),
+ &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const {
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const GlobalValue *GV1 = NULL;
+ const GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ const GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ // If GV has specified alignment, then use it. Otherwise, use the preferred
+ // alignment.
+ unsigned Align = GV->getAlignment();
+ if (!Align) {
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (GVar->hasInitializer()) {
+ const TargetData *TD = TLI.getTargetData();
+ Align = TD->getPreferredAlignment(GVar);
+ }
+ }
+ }
+ return MinAlign(Align, GVOffset);
+ }
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS << std::string(indent, ' ');
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool isBigEndian) {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltBitSize;
+
+ if (OpVal.getOpcode() == ISD::UNDEF)
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos;
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+#ifdef XDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+ SmallPtrSet<const SDNode*, 32> &Visited,
+ SmallPtrSet<const SDNode*, 32> &Checked) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N)) {
+ dbgs() << "Offending node:\n";
+ N->dumprFull();
+ errs() << "Detected cycle in SelectionDAG\n";
+ abort();
+ }
+
+ for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+ Checked.insert(N);
+ Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+ assert(N && "Checking nonexistant SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+ checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 000000000000..81b03ee76a5c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6602 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SDNodeDbgValue.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts,
+ unsigned NumParts, EVT PartVT, EVT ValueVT,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+ PartVT, HalfVT);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT);
+ } else {
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+ }
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, DL,
+ Parts + RoundParts, OddParts, PartVT, OddVT);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(),
+ TLI.getPointerTy()));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+ }
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, DL, PartVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(1));
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ llvm_unreachable("Unknown mismatch!");
+ return SDValue();
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+ ValueVT, &Ops[0], NumIntermediates);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT, Val);
+
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle cases such as i8 -> <1 x i1>
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+
+ if (ValueVT.getVectorNumElements() == 1 &&
+ ValueVT.getVectorElementType() != PartVT) {
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT.getScalarType(), Val);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+ if (NumParts == 0)
+ return;
+
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+ }
+ }
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (NumParts == 1) {
+ if (PartVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+ PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ bool Smaller = PartVT.bitsLE(ValueVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ } else{
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
+
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector())
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ IntermediateVT, Val,
+ DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ }
+}
+
+
+
+
+namespace {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information
+ /// about the value. The most common situation is to represent one value at a
+ /// time, but struct or array values are handled element-wise as multiple
+ /// values. The splitting of aggregates is performed recursively, so that we
+ /// never have aggregate-typed registers. The values at this point do not
+ /// necessarily have legal types, so each value may require one or more
+ /// registers of some legal type.
+ ///
+ struct RegsForValue {
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<EVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ EVT regvt, EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+ RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, const Type *Ty) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+ EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// areValueTypesLegal - Return true if types of all the values are legal.
+ bool areValueTypesLegal(const TargetLowering &TLI) {
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT RegisterVT = RegVTs[Value];
+ if (!TLI.isTypeLegal(RegisterVT))
+ return false;
+ }
+ return true;
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Kind,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+ Parts[i] = P;
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector())
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
+
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+ Ops.push_back(Res);
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ EVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+ AA = &aa;
+ GFI = gfi;
+ TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the current SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ UnusedArgNodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ CurDebugLoc = DebugLoc();
+ HasTailCall = false;
+}
+
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingExports[0],
+ PendingExports.size());
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+ if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+ DAG.AssignOrdering(Node, SDNodeOrder);
+
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+ AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (isa<TerminatorInst>(&I))
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+
+ CurDebugLoc = I.getDebugLoc();
+
+ visit(I.getOpcode(), I);
+
+ if (!isa<TerminatorInst>(&I) && !HasTailCall)
+ CopyToExportRegsIfNeeded(&I);
+
+ CurDebugLoc = DebugLoc();
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+#include "llvm/Instruction.def"
+ }
+
+ // Assign the ordering to the freshly created DAG nodes.
+ if (NodeMap.count(&I)) {
+ ++SDNodeOrder;
+ AssignOrderingToNode(getValue(&I).getNode());
+ }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ MDNode *Variable = DI->getVariable();
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ SDV = DAG.getDbgValue(Variable, Val.getNode(),
+ Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
+// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ resolveDanglingDebugInfo(V, N);
+ return N;
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ EVT VT = TLI.getValueType(V->getType(), true);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return DAG.getConstant(*CI, VT);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
+
+ if (isa<ConstantPointerNull>(C))
+ return DAG.getConstant(0, TLI.getPointerTy());
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return DAG.getConstantFP(*CFP, VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return DAG.getUNDEF(VT);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the NodeMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(&Constants[0], Constants.size(),
+ getCurDebugLoc());
+ }
+
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, EltVT);
+ }
+
+ return DAG.getMergeValues(&Constants[0], NumElts,
+ getCurDebugLoc());
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ const VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CP->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, EltVT);
+ else
+ Op = DAG.getConstant(0, EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ }
+
+ llvm_unreachable("Can't get register for value!");
+ return SDValue();
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<SDValue, 8> OutVals;
+
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i]));
+ Chains[i] =
+ DAG.getStore(Chain, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->paramHasAttr(0, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
+
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->paramHasAttr(0, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (ExtendKind == ISD::SIGN_EXTEND)
+ Flags.setSExt();
+ else if (ExtendKind == ISD::ZERO_EXTEND)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ /*isfixed=*/true));
+ OutVals.push_back(Parts[i]);
+ }
+ }
+ }
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction()->getCallingConv();
+ Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+ Outs, OutVals, getCurDebugLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (!BPI)
+ return 0;
+ BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
+ BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+ return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) {
+ uint32_t weight = getEdgeWeight(Src, Dst);
+ Src->addSuccessor(Dst, weight);
+}
+
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == SwitchBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ Condition = getFCmpCondCode(FC->getPredicate());
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ llvm_unreachable("Unknown compare instruction");
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = BrMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ BrMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+ BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0], BrMBB);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, Succ0MBB, Succ1MBB, BrMBB);
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ DebugLoc dl = getCurDebugLoc();
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithWeight(SwitchBB, CB.TrueBB);
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = TLI.getPointerTy();
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB) {
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()), Sub,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()),
+ Sub, DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
+
+ B.RegVT = VT;
+ B.Reg = FuncInfo.CreateReg(VT);
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ B.Reg, Sub);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ addSuccessorWithWeight(SwitchBB, B.Default);
+ addSuccessorWithWeight(SwitchBB, MBB);
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ if (MBB != NextBlock)
+ BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ EVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
+ SDValue Cmp;
+ unsigned PopCount = CountPopulation_64(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
+ ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+ ISD::SETNE);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
+ ISD::SETNE);
+ }
+
+ addSuccessorWithWeight(SwitchBB, B.TargetBB);
+ addSuccessorWithWeight(SwitchBB, NextMBB);
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ Cmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (NextMBB != NextBlock)
+ BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ InvokeMBB->addSuccessor(Return);
+ InvokeMBB->addSuccessor(LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Size is the number of Cases represented by this range.
+ size_t Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ // If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ SwitchBB->addSuccessor(Small.BB);
+ SwitchBB->addSuccessor(Default);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
+
+ // Rearrange the case blocks so that the last one falls through if possible.
+ if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->insert(BBI, FallThrough);
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ const Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETLE;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+ CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == SwitchBB)
+ visitSwitchCase(CB, SwitchBB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return !DisableJumpTables &&
+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
+ return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize.ult(4))
+ return false;
+
+ APInt Range = ComputeRange(First, Last);
+ double Density = TSize.roundToDouble() / Range.roundToDouble();
+ if (Density < 0.4)
+ return false;
+
+ DEBUG(dbgs() << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << '\n'
+ << "Range: " << Range
+ << ". Size: " << TSize << ". Density: " << Density << "\n\n");
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, JumpTableBB);
+
+ addSuccessorWithWeight(CR.CaseBB, Default);
+ addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ APInt TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+ const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+ if (Low.sle(TEI) && TEI.sle(High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ addSuccessorWithWeight(JumpTableBB, *I);
+ }
+ }
+
+ // Create a jump table index for this jump table.
+ unsigned JTEncoding = TLI.getJumpTableEncoding();
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+ ->createJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+ if (CR.CaseBB == SwitchBB)
+ visitJumpTableHeader(JT, JTH, SwitchBB);
+
+ JTCases.push_back(JumpTableBlock(JTH, JT));
+
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ APInt LSize = FrontCase.size();
+ APInt RSize = TSize-LSize;
+ DEBUG(dbgs() << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<'\n'
+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+ APInt Range = ComputeRange(LEnd, RBegin);
+ assert((Range - 2ULL).isNonNegative() &&
+ "Invalid case distance");
+ // Use volatile double here to avoid excess precision issues on some hosts,
+ // e.g. that use 80-bit X87 registers.
+ volatile double LDensity =
+ (double)LSize.roundToDouble() /
+ (LEnd - First + 1ULL).roundToDouble();
+ volatile double RDensity =
+ (double)RSize.roundToDouble() /
+ (Last - RBegin + 1ULL).roundToDouble();
+ double Metric = Range.logBase2()*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DEBUG(dbgs() <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+ << "LDensity: " << LDensity
+ << ", RDensity: " << RDensity << '\n'
+ << "Metric: " << Metric << '\n');
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getValue() ==
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == SwitchBB)
+ visitSwitchCase(CB, SwitchBB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB){
+ EVT PTy = TLI.getPointerTy();
+ unsigned IntPtrBits = PTy.getSizeInBits();
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+ return false;
+
+ size_t numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ numCmps += (I->Low == I->High ? 1 : 2);
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DEBUG(dbgs() << "Total number of unique destinations: "
+ << Dests.size() << '\n'
+ << "Total number of comparisons: " << numCmps << '\n');
+
+ // Compute span of values.
+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ APInt cmpRange = maxValue - minValue;
+
+ DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+ << "Low bound: " << minValue << '\n'
+ << "High bound: " << maxValue << '\n');
+
+ if (cmpRange.uge(IntPtrBits) ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DEBUG(dbgs() << "Emitting bit tests\n");
+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
+ cmpRange = maxValue;
+ } else {
+ lowBound = minValue;
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0));
+ count++;
+ }
+
+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+ uint64_t lo = (lowValue - lowBound).getZExtValue();
+ uint64_t hi = (highValue - lowBound).getZExtValue();
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DEBUG(dbgs() << "Cases:\n");
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+ << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << '\n');
+
+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, CaseBB);
+ BTC.push_back(BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ BitTestBlock BTB(lowBound, cmpRange, SV,
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == SwitchBB)
+ visitBitTestHeader(BTB, SwitchBB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+ size_t numCmps = 0;
+
+ // Start with "simple" cases
+ for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ Cases.push_back(Case(SI.getSuccessorValue(i),
+ SI.getSuccessorValue(i),
+ SMBB));
+ }
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (SI.getNumOperands() == 2) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ SwitchMBB->addSuccessor(Default);
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Default)));
+
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ size_t numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n');
+ numCmps = 0;
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ const Value *SV = SI.getOperand(0);
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(SwitchMBB,0,0,
+ CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+ }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges with unique successors.
+ SmallVector<BasicBlock*, 32> succs;
+ succs.reserve(I.getNumSuccessors());
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+ succs.push_back(I.getSuccessor(i));
+ array_pod_sort(succs.begin(), succs.end());
+ succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+ for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+ addSuccessorWithWeight(IndirectBrMBB, Succ);
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+ // -0.0 - X --> fneg
+ const Type *Ty = I.getType();
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+
+ visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
+ // If the operand is smaller than the shift count type, promote it.
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ }
+
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ // Turn exact SDivs into multiplications.
+ // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+ // exact bit.
+ if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+ !isa<ConstantSDNode>(Op1) &&
+ isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+ setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+ else
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+ Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue TrueVal = getValue(I.getOperand(1));
+ SDValue FalseVal = getValue(I.getOperand(2));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+ TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+ Cond,
+ SDValue(TrueVal.getNode(),
+ TrueVal.getResNo() + i),
+ SDValue(FalseVal.getNode(),
+ FalseVal.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+ DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I){
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I){
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BITCAST or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ DestVT, N)); // convert types.
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+ unsigned MaskNumElts = Mask.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+ return false;
+ return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+ SmallVector<int, 8> Mask;
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ // Convert the ConstantVector mask operand into an array of ints, with -1
+ // representing undef values.
+ SmallVector<Constant*, 8> MaskElts;
+ cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+ unsigned MaskNumElts = MaskElts.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (isa<UndefValue>(MaskElts[i]))
+ Mask.push_back(-1);
+ else
+ Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+ }
+
+ EVT VT = TLI.getValueType(I.getType());
+ EVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &Mask[0]));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+ if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps1[0], NumConcat);
+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps2[0], NumConcat);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx);
+ else
+ MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ int Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not
+ // Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (int Input=0; Input < 2; ++Input) {
+ if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+ // Fits within range but we should see if we can find a good
+ // start index that is a multiple of the mask length.
+ if (MaxRange[Input] < (int)MaskNumElts) {
+ RangeUse[Input] = 1; // Extract from beginning of the vector
+ StartIdx[Input] = 0;
+ } else {
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+ }
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (int Input=0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0)
+ Src = DAG.getUNDEF(VT);
+ else
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+ Src, DAG.getIntPtrConstant(StartIdx[Input]));
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ MappedOps.push_back(Idx);
+ else if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx - StartIdx[0]);
+ else
+ MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ EVT PtrVT = TLI.getPointerTy();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ } else {
+ int Idx = Mask[i];
+ SDValue Res;
+
+ if (Idx < (int)SrcNumElts)
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src1, DAG.getConstant(Idx, PtrVT));
+ else
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src2,
+ DAG.getConstant(Idx - SrcNumElts, PtrVT));
+
+ Ops.push_back(Res);
+ }
+ }
+
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ const Type *AggTy = I.getType();
+ const Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ SDValue Agg = getValue(Op0);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&AggValueVTs[0], NumAggValues),
+ &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Type *AggTy = Op0->getType();
+ const Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValValueVTs[0], NumValValues),
+ &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ const Type *Ty = I.getOperand(0)->getType();
+
+ for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ DAG.getIntPtrConstant(Offset));
+ }
+
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ uint64_t Offs =
+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ SDValue OffsVal;
+ EVT PTy = TLI.getPointerTy();
+ unsigned PtrBits = PTy.getSizeInBits();
+ if (PtrBits < 64)
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(Offs, MVT::i64));
+ else
+ OffsVal = DAG.getIntPtrConstant(Offs);
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ OffsVal);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+ TD->getTypeAllocSize(Ty));
+ SDValue IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ unsigned Amt = ElementSize.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, TLI.getPointerTy()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ const Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ EVT IntPtr = TLI.getPointerTy();
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, IntPtr));
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign-1));
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+ VTs, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ const Value *SV = I.getOperand(0);
+ SDValue Ptr = getValue(SV);
+
+ const Type *Ty = I.getType();
+
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (I.isVolatile() || NumValues > MaxParallelChains)
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, Alignment, TBAAInfo);
+
+ Values[i] = L;
+ Chains[ChainI] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ const Value *SrcV = I.getOperand(0);
+ const Value *PtrV = I.getOperand(1);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getArgOperand(i));
+ assert(TLI.isTypeLegal(Op.getValueType()) &&
+ "Intrinsic uses a non-legal type?");
+ Ops.push_back(Op);
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+ for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+ assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+ "Intrinsic uses a non-legal type?");
+ }
+#endif // NDEBUG
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size(),
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem);
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ EVT VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
+ }
+
+ setValue(&I, Result);
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+ DebugLoc dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, MVT::i32));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+ return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+/// Op is the associated NodeType for I
+const char *
+SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
+ ISD::NodeType Op) {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(Op, getCurDebugLoc(),
+ getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ I.getArgOperand(0));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+ return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp(const CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getArgOperand(0));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // 0.000107046256 error, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ //
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
+ MVT::i32, t13);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog(const CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getArgOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog2(const CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getArgOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG2, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog10(const CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getArgOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG10, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp2(const CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getArgOperand(0));
+
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP2, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGBuilder::visitPow(const CallInst &I) {
+ SDValue result;
+ const Value *Val = I.getArgOperand(0);
+ DebugLoc dl = getCurDebugLoc();
+ bool IsExp10 = false;
+
+ if (getValue(Val).getValueType() == MVT::f32 &&
+ getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ APFloat Ten(10.0f);
+ IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+ }
+ }
+ }
+
+ if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getArgOperand(1));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x40549a78));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // twoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BITCAST, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FPOW, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree,
+ // otherwise we end up lowering to a call to __powidf2 (for example). When
+ // optimizing for size, we only want to do this if the expansion would produce
+ // a small number of multiplies, otherwise we do the full expansion.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ // Get the exponent as a positive value.
+ unsigned Val = RHSC->getSExtValue();
+ if ((int)Val < 0) Val = -Val;
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, LHS.getValueType());
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+ // If optimizing for size, don't insert too many multiplies. This
+ // inserts up to 5 multiplies.
+ CountPopulation_32(Val)+Log2_32(Val) < 7) {
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+ if (N.getOpcode() != ISD::TRUNCATE)
+ return 0;
+
+ const SDValue &Ext = N.getOperand(0);
+ if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+ const SDValue &CFR = Ext.getOperand(0);
+ if (CFR.getOpcode() == ISD::CopyFromReg)
+ return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+ else
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
+ }
+ return 0;
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset,
+ const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
+ // Ignore inlined function arguments here.
+ DIVariable DV(Variable);
+ if (DV.isInlinedFnArgument(MF.getFunction()))
+ return false;
+
+ unsigned Reg = 0;
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ // If byval argument ofset is not recorded then ignore this.
+ if (!Offset)
+ Reg = 0;
+ }
+
+ if (N.getNode()) {
+ if (N.getOpcode() == ISD::CopyFromReg)
+ Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ else
+ Reg = getTruncatedArgReg(N);
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ }
+
+ if (!Reg) {
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
+ }
+
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+ TII->get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+ FuncInfo.ArgDbgValues.push_back(&*MIB);
+ return true;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::setjmp:
+ return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ case Intrinsic::longjmp:
+ return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ case Intrinsic::memcpy: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::memset: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::memmove: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ MDNode *Variable = DI.getVariable();
+ const Value *Address = DI.getAddress();
+ if (!Address || !DIVariable(DI.getVariable()).Verify())
+ return 0;
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ return 0;
+ }
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ // Parameters are handled specially.
+ bool isParameter =
+ DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
+ if (isParameter && !AI) {
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (FINode)
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, 0, N);
+ return 0;
+ }
+ } else if (AI)
+ SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ return 0;
+ }
+ DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ }
+ return 0;
+ }
+ case Intrinsic::dbg_value: {
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ if (!DIVariable(DI.getVariable()).Verify())
+ return 0;
+
+ MDNode *Variable = DI.getVariable();
+ uint64_t Offset = DI.getOffset();
+ const Value *V = DI.getValue();
+ if (!V)
+ return 0;
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+ SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ } else {
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+ SDV = DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else if (!V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
+ // We may expand this to cover more cases. One case where we have no
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ }
+
+ // Build a debug info table entry.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ V = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI)
+ return 0;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return 0; // VLAs.
+ int FI = SI->second;
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+ MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+ return 0;
+ }
+ case Intrinsic::eh_exception: {
+ // Insert the EXCEPTIONADDR instruction.
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[1];
+ Ops[0] = DAG.getRoot();
+ SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::eh_selector: {
+ MachineBasicBlock *CallMBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (CallMBB->isLandingPad())
+ AddCatchInfo(I, &MMI, CallMBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(&I);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+ }
+
+ // Insert the EHSELECTION instruction.
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = getValue(I.getArgOperand(0));
+ Ops[1] = getRoot();
+ SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+ DAG.setRoot(Op.getValue(1));
+ setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, MVT::i32);
+ setValue(&I, Res);
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+ return 0;
+ case Intrinsic::eh_dwarf_cfa: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
+ TLI.getPointerTy());
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
+ setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ FA, Offset));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_dispatch_setup: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ DebugLoc dl = getCurDebugLoc();
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ EVT DestVT = TLI.getValueType(I.getType());
+ const Value *Op1 = I.getArgOperand(0);
+ Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ Code);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::sqrt:
+ setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
+ return 0;
+ case Intrinsic::sin:
+ setValue(&I, DAG.getNode(ISD::FSIN, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::cos:
+ setValue(&I, DAG.getNode(ISD::FCOS, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::log:
+ visitLog(I);
+ return 0;
+ case Intrinsic::log2:
+ visitLog2(I);
+ return 0;
+ case Intrinsic::log10:
+ visitLog10(I);
+ return 0;
+ case Intrinsic::exp:
+ visitExp(I);
+ return 0;
+ case Intrinsic::exp2:
+ visitExp2(I);
+ return 0;
+ case Intrinsic::pow:
+ visitPow(I);
+ return 0;
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ return 0;
+ case Intrinsic::convert_to_fp16:
+ setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+ MVT::i16, getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::convert_from_fp16:
+ setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+ MVT::f32, getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+ DAG.getVTList(MVT::i64, MVT::Other),
+ &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::STACKSAVE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ Res = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+ return 0;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ EVT PtrTy = TLI.getPointerTy();
+
+ SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::objectsize: {
+ // If we don't know by now, we're never going to know.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+
+ assert(CI && "Non-constant type in __builtin_object_size?");
+
+ SDValue Arg = getValue(I.getCalledValue());
+ EVT Ty = Arg.getValueType();
+
+ if (CI->isZero())
+ Res = DAG.getConstant(-1ULL, Ty);
+ else
+ Res = DAG.getConstant(0, Ty);
+
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::TRAMPOLINE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+ Ops, 6);
+
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::gcroot:
+ if (GFI) {
+ const Value *Alloca = I.getArgOperand(0);
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ return 0;
+ case Intrinsic::flt_rounds:
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+ return 0;
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
+ return 0;
+ }
+
+ case Intrinsic::trap: {
+ StringRef TrapFuncName = getTrapFunctionName();
+ if (TrapFuncName.empty()) {
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+ Args, DAG, getCurDebugLoc());
+ DAG.setRoot(Result.second);
+ return 0;
+ }
+ case Intrinsic::uadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::UADDO);
+ case Intrinsic::sadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::SADDO);
+ case Intrinsic::usub_with_overflow:
+ return implVisitAluOverflow(I, ISD::USUBO);
+ case Intrinsic::ssub_with_overflow:
+ return implVisitAluOverflow(I, ISD::SSUBO);
+ case Intrinsic::umul_with_overflow:
+ return implVisitAluOverflow(I, ISD::UMULO);
+ case Intrinsic::smul_with_overflow:
+ return implVisitAluOverflow(I, ISD::SMULO);
+
+ case Intrinsic::prefetch: {
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 5,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
+ return 0;
+ }
+ case Intrinsic::memory_barrier: {
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ for (int x = 1; x < 6; ++x)
+ Ops[x] = getValue(I.getArgOperand(x - 1));
+
+ DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+ return 0;
+ }
+ case Intrinsic::atomic_cmp_swap: {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+ getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ MachinePointerInfo(I.getArgOperand(0)));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+ }
+ case Intrinsic::atomic_load_add:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+ case Intrinsic::atomic_load_sub:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+ case Intrinsic::atomic_load_or:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+ case Intrinsic::atomic_load_xor:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+ case Intrinsic::atomic_load_and:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+ case Intrinsic::atomic_load_nand:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+ case Intrinsic::atomic_load_max:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+ case Intrinsic::atomic_load_min:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+ case Intrinsic::atomic_load_umin:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+ case Intrinsic::atomic_load_umax:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+ case Intrinsic::atomic_swap:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ return 0;
+ }
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ const Type *RetTy = FTy->getReturnType();
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ MCSymbol *BeginLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ SmallVector<uint64_t, 4> Offsets;
+ GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+
+ if (!CanLowerReturn) {
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+ FTy->getReturnType());
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
+ FTy->getReturnType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.Alignment = Align;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(FTy->getContext());
+ }
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ unsigned attrInd = i - CS.arg_begin() + 1;
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
+ Args.push_back(Entry);
+ }
+
+ if (LandingPad) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().CreateTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI.LowerCallTo.
+ if (isTailCall &&
+ !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
+ isTailCall = false;
+
+ // If there's a possibility that fast-isel has already selected some amount
+ // of the current basic block, don't emit a tail call.
+ if (isTailCall && EnableFastISel)
+ isTailCall = false;
+
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), RetTy,
+ CS.paramHasAttr(0, Attribute::SExt),
+ CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+ CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
+ CS.getCallingConv(),
+ isTailCall,
+ !CS.getInstruction()->use_empty(),
+ Callee, Args, DAG, getCurDebugLoc());
+ assert((isTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+ if (Result.first.getNode()) {
+ setValue(CS.getInstruction(), Result.first);
+ } else if (!CanLowerReturn && Result.second.getNode()) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+ ComputeValueVTs(TLI, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+ unsigned NumValues = Outs.size();
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+ DemoteStackSlot,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
+ Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, 1);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ PendingLoads.push_back(Chain);
+
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ SmallVector<EVT, 4> RetTys;
+ RetTy = FTy->getReturnType();
+ ComputeValueVTs(TLI, RetTy, RetTys);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
+
+ SDValue ReturnValue =
+ getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
+ RegisterVT, VT, AssertOp);
+ ReturnValues.push_back(ReturnValue);
+ CurReg += NumRegs;
+ }
+
+ setValue(CS.getInstruction(),
+ DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size()));
+ }
+
+ // Assign order to nodes here. If the call does not produce a result, it won't
+ // be mapped to a SDNode and visit() will not assign it an order number.
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted and
+ // the DAG root is already updated.
+ HasTailCall = true;
+ ++SDNodeOrder;
+ AssignOrderingToNode(DAG.getRoot().getNode());
+ } else {
+ DAG.setRoot(Result.second);
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.second.getNode());
+ }
+
+ if (LandingPad) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+ const Type *LoadTy,
+ SelectionDAGBuilder &Builder) {
+
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+ PointerType::getUnqual(LoadTy));
+
+ if (const Constant *LoadCst =
+ ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+ Builder.TD))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+ Ptr, MachinePointerInfo(PtrVal),
+ false /*volatile*/,
+ false /*nontemporal*/, 1 /* align=1 */);
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+ if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+ !I.getArgOperand(2)->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ bool ActuallyDoIt = true;
+ MVT LoadVT;
+ const Type *LoadTy;
+ switch (Size->getZExtValue()) {
+ default:
+ LoadVT = MVT::Other;
+ LoadTy = 0;
+ ActuallyDoIt = false;
+ break;
+ case 2:
+ LoadVT = MVT::i16;
+ LoadTy = Type::getInt16Ty(Size->getContext());
+ break;
+ case 4:
+ LoadVT = MVT::i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ break;
+ case 8:
+ LoadVT = MVT::i64;
+ LoadTy = Type::getInt64Ty(Size->getContext());
+ break;
+ /*
+ case 16:
+ LoadVT = MVT::v4i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = VectorType::get(LoadTy, 4);
+ break;
+ */
+ }
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+
+ // Require that we can find a legal MVT, and only do this if the target
+ // supports unaligned loads of that type. Expanding into byte loads would
+ // bloat the code.
+ if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+ ActuallyDoIt = false;
+ }
+
+ if (ActuallyDoIt) {
+ SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+ SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+ SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+ ISD::SETNE);
+ EVT CallVT = TLI.getValueType(I.getType(), true);
+ setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+ return true;
+ }
+ }
+
+
+ return false;
+}
+
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ // See if any floating point values are being passed to this function. This is
+ // used to emit an undefined reference to fltused on Windows.
+ const FunctionType *FT =
+ cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (FT->isVarArg() &&
+ !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ const Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (!i->isFloatingPointTy()) continue;
+ MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+ break;
+ }
+ }
+ }
+
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call.
+ if (!F->hasLocalLinkage() && F->hasName()) {
+ StringRef Name = F->getName();
+ if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+ if (I.getNumArgOperands() == 2 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.getType() == I.getArgOperand(1)->getType()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name == "memcmp") {
+ if (visitMemCmpCall(I))
+ return;
+ }
+ }
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getCalledValue());
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ // Check if we can potentially perform a tail call. More detailed checking is
+ // be done within LowerCallTo, after more information about the call is known.
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ }
+
+ /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+ /// busy in OutputRegs/InputRegs.
+ void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs,
+ const TargetRegisterInfo &TRI) const {
+ if (isOutReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+ }
+ if (isInReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+ }
+ }
+
+ /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ EVT getCallOperandValEVT(LLVMContext &Context,
+ const TargetLowering &TLI,
+ const TargetData *TD) const {
+ if (CallOperandVal == 0) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy();
+
+ const llvm::Type *OpTy = CallOperandVal->getType();
+
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect) {
+ const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (const StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(Context, BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(OpTy, true);
+ }
+
+private:
+ /// MarkRegAndAliases - Mark the specified register and all aliases in the
+ /// specified set.
+ static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+ Regs.insert(Reg);
+ if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+ for (; *Aliases; ++Aliases)
+ Regs.insert(*Aliases);
+ }
+};
+
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
+} // end anonymous namespace
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+/// Input and OutputRegs are the set of already allocated physical registers.
+///
+static void GetRegistersForValue(SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ DebugLoc DL,
+ SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ LLVMContext &Context = *DAG.getContext();
+
+ // Compute whether this value requires an input register, an output register,
+ // or both.
+ bool isOutReg = false;
+ bool isInReg = false;
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ isOutReg = true;
+
+ // If there is an input constraint that matches this, we need to reserve
+ // the input register so no other inputs allocate to it.
+ isInReg = OpInfo.hasMatchingInput();
+ break;
+ case InlineAsm::isInput:
+ isInReg = true;
+ isOutReg = false;
+ break;
+ case InlineAsm::isClobber:
+ isOutReg = true;
+ isInReg = true;
+ break;
+ }
+
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ EVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = EVT::getIntegerVT(Context,
+ OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+ }
+
+ EVT RegVT;
+ EVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SDISelAsmOperandInfoVector ConstraintOperands;
+
+ std::set<unsigned> OutputRegs, InputRegs;
+
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI.ParseConstraints(CS);
+
+ bool hasMemory = false;
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ EVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
+ }
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ const Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+ InputRegs);
+ }
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+ InputRegs);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+ TLI.getPointerTy()));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect and AlignStack bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ report_fatal_error("Couldn't allocate output reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'!");
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ InlineAsm::Kind_RegDefEarlyClobber :
+ InlineAsm::Kind_RegDef,
+ false,
+ 0,
+ DAG,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
+ }
+
+ RegsForValue MatchedRegs;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.push_back
+ (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(),
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty())
+ report_fatal_error("Invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'!");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect register inputs yet!");
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ report_fatal_error("Couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'!");
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag);
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ EVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ const Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+ SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ MachinePointerInfo(StoresToEmit[i].second),
+ false, false, 0);
+ OutChains.push_back(Val);
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetData &TD = *TLI.getTargetData();
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)),
+ TD.getABITypeAlignment(I.getType()));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg, unsigned NumFixedArgs,
+ CallingConv::ID CallConv, bool isTailCall,
+ bool isReturnValueUsed,
+ SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ // Handle all of the outgoing arguments.
+ SmallVector<ISD::OutputArg, 32> Outs;
+ SmallVector<SDValue, 32> OutVals;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isByVal) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ const Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
+ PartVT, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ i < NumFixedArgs);
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0)
+ MyFlags.Flags.setOrigAlign(1);
+
+ Outs.push_back(MyFlags);
+ OutVals.push_back(Parts[j]);
+ }
+ }
+ }
+
+ // Handle the incoming return values from the call.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(*this, RetTy, RetTys);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.Used = isReturnValueUsed;
+ if (RetSExt)
+ MyFlags.Flags.setSExt();
+ if (RetZExt)
+ MyFlags.Flags.setZExt();
+ if (isInreg)
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!isTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((isTailCall || InVals.size() == Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (isTailCall) {
+ DAG.setRoot(Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+ DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerCall emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ });
+
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (RetZExt)
+ AssertOp = ISD::AssertZext;
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
+ NumRegs, RegisterVT, VT,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), Chain);
+
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size());
+ return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+ return SDValue();
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+ SDValue Op = getNonRegisterValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (EnableFastISel)
+ return A->use_empty();
+
+ const BasicBlock *Entry = A->getParent()->begin();
+ for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+ }
+ return true;
+}
+
+void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
+ // If this is the entry block, emit arguments.
+ const Function &F = *LLVMBB->getParent();
+ SelectionDAG &DAG = SDB->DAG;
+ DebugLoc dl = SDB->getCurDebugLoc();
+ const TargetData *TD = TLI.getTargetData();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ if (!FuncInfo->CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, true);
+ Ins.push_back(RetArg);
+ }
+
+ // Set up the incoming argument description vector.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++Idx) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ bool isArgValueUsed = !I->use_empty();
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ TD->getABITypeAlignment(ArgTy);
+
+ if (F.paramHasAttr(Idx, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.paramHasAttr(Idx, Attribute::SExt))
+ Flags.setSExt();
+ if (F.paramHasAttr(Idx, Attribute::InReg))
+ Flags.setInReg();
+ if (F.paramHasAttr(Idx, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(I->getType());
+ const Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (F.getParamAlignment(Idx))
+ FrameAlign = F.getParamAlignment(Idx);
+ else
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (F.paramHasAttr(Idx, Attribute::Nest))
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0)
+ MyFlags.Flags.setOrigAlign(1);
+ Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+ F.isVarArg(), Ins,
+ dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ Idx = 1;
+ if (!FuncInfo->CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ EVT VT = ValueVTs[0];
+ EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
+ RegVT, VT, AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+ FuncInfo->DemoteRegister = SRetReg;
+ NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+ SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
+
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ if (I->use_empty() && NumValues)
+ SDB->setUnusedArgValue(I, InVals[i]);
+
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
+ EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+ if (!I->use_empty()) {
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (F.paramHasAttr(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.paramHasAttr(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
+ NumParts, PartVT, VT,
+ AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
+ // Note down frame index for byval arguments.
+ if (I->hasByValAttr())
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
+ SDB->setValue(I, Res);
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general. It's also subtly incompatible with the hacks FastISel
+ // uses with vregs.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[I] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(I)) {
+ FuncInfo->InitializeRegForValue(I);
+ SDB->CopyToExportRegsIfNeeded(I);
+ }
+ }
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ unsigned Reg;
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegs(C->getType());
+ CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
+ CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+ ConstantsOut.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 000000000000..a0884ebf5d56
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,548 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class DbgValueInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDDbgValue;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.
+ DebugLoc CurDebugLoc;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+ DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
+public:
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+private:
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// SDNodeOrder - A unique monotonically increasing number used to order the
+ /// SDNodes we create.
+ unsigned SDNodeOrder;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ Constant* Low;
+ Constant* High;
+ MachineBasicBlock* BB;
+
+ Case() : Low(0), High(0), BB(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+ Low(low), High(high), BB(bb) { }
+ APInt size() const {
+ const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+ const APInt &rLow = cast<ConstantInt>(Low)->getValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+ Mask(mask), BB(bb), Bits(bits) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+ CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ const Constant *LT;
+ const Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator()(const Case &C1, const Case &C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
+ struct CaseBitsCmp {
+ bool operator()(const CaseBits &C1, const CaseBits &C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+ /// CaseBlock - This structure is used to communicate between
+ /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+ /// blocks needed by multi-case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me)
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ const Value *CmpLHS, *CmpMHS, *CmpRHS;
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+ };
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false):
+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ APInt First;
+ APInt Last;
+ const Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+ Mask(M), ThisBB(T), TargetBB(Tr) { }
+ uint64_t Mask;
+ MachineBasicBlock *ThisBB;
+ MachineBasicBlock *TargetBB;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, const Value* SV,
+ unsigned Rg, EVT RgVT, bool E,
+ MachineBasicBlock* P, MachineBasicBlock* D,
+ const BitTestInfo& C):
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+ Parent(P), Default(D), Cases(C) { }
+ APInt First;
+ APInt Range;
+ const Value *SValue;
+ unsigned Reg;
+ EVT RegVT;
+ bool Emitted;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ };
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const TargetData *TD;
+ AliasAnalysis *AA;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<const Constant *, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// OptLevel - What optimization level we're generating code for.
+ ///
+ CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// HasTailCall - This is set to true if a call in the current
+ /// block has been translated as a tail call. In this case,
+ /// no subsequent DAG nodes should be created.
+ ///
+ bool HasTailCall;
+
+ LLVMContext *Context;
+
+ SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ HasTailCall(false), Context(dag.getContext()) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+ /// clear - Clear out the current SelectionDAG and the associated
+ /// state and prepare this SelectionDAGBuilder object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// clearDanglingDebugInfo - Clear the dangling debug information
+ /// map. This function is seperated from the clear so that debug
+ /// information that is dangling in a basic block can be properly
+ /// resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached
+ /// to PHI nodes.
+ void clearDanglingDebugInfo();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+ /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+ /// from how the code appeared in the source. The ordering is used by the
+ /// scheduler to effectively turn off scheduling.
+ void AssignOrderingToNode(const SDNode *Node);
+
+ void visit(const Instruction &I);
+
+ void visit(unsigned Opcode, const User &I);
+
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+ SDValue getValue(const Value *V);
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB, unsigned Opc);
+ void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(const Value *V);
+ void ExportFromCurrentBlock(const Value *V);
+ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+private:
+ // Terminator instructions.
+ void visitRet(const ReturnInst &I);
+ void visitBr(const BranchInst &I);
+ void visitSwitch(const SwitchInst &I);
+ void visitIndirectBr(const IndirectBrInst &I);
+ void visitUnreachable(const UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+
+ uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+public:
+ void visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitUnwind(const UnwindInst &I);
+
+ void visitBinary(const User &I, unsigned OpCode);
+ void visitShift(const User &I, unsigned Opcode);
+ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(const User &I);
+ void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(const User &I);
+ void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (const User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(const User &I);
+ void visitFCmp(const User &I);
+ // Visit the conversion instructions
+ void visitTrunc(const User &I);
+ void visitZExt(const User &I);
+ void visitSExt(const User &I);
+ void visitFPTrunc(const User &I);
+ void visitFPExt(const User &I);
+ void visitFPToUI(const User &I);
+ void visitFPToSI(const User &I);
+ void visitUIToFP(const User &I);
+ void visitSIToFP(const User &I);
+ void visitPtrToInt(const User &I);
+ void visitIntToPtr(const User &I);
+ void visitBitCast(const User &I);
+
+ void visitExtractElement(const User &I);
+ void visitInsertElement(const User &I);
+ void visitShuffleVector(const User &I);
+
+ void visitExtractValue(const ExtractValueInst &I);
+ void visitInsertValue(const InsertValueInst &I);
+
+ void visitGetElementPtr(const User &I);
+ void visitSelect(const User &I);
+
+ void visitAlloca(const AllocaInst &I);
+ void visitLoad(const LoadInst &I);
+ void visitStore(const StoreInst &I);
+ void visitPHI(const PHINode &I);
+ void visitCall(const CallInst &I);
+ bool visitMemCmpCall(const CallInst &I);
+
+ void visitInlineAsm(ImmutableCallSite CS);
+ const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+ void visitPow(const CallInst &I);
+ void visitExp2(const CallInst &I);
+ void visitExp(const CallInst &I);
+ void visitLog(const CallInst &I);
+ void visitLog2(const CallInst &I);
+ void visitLog10(const CallInst &I);
+
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+
+ void visitUserOp1(const Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(const Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
+ const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
+
+ void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset, const SDValue &N);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 000000000000..87bb296b8c79
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,2805 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction fails"));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering &TLI = IS->getTargetLowering();
+
+ if (OptLevel == CodeGenOpt::None)
+ return createSourceListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::Latency)
+ return createTDListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::ILP &&
+ "Unknown sched type!");
+ return createILPListDAGScheduler(IS, OptLevel);
+ }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+ llvm_unreachable(0);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
+ FuncInfo(new FunctionLoweringInfo(TLI)),
+ CurDAG(new SelectionDAG(tm)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDB;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<GCModuleInfo>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
+ const Function &Fn = *mf.getFunction();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
+ DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
+ CurDAG->init(*MF);
+ FuncInfo->set(Fn, *MF);
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ else
+ FuncInfo->BPI = 0;
+
+ SDB->init(GFI, *AA);
+
+ SelectAllBasicBlocks(Fn);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ MachineBasicBlock *EntryMBB = MF->begin();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); LI != E; ++LI)
+ if (LI->second)
+ LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
+ // Insert DBG_VALUE instructions for function arguments to the entry block.
+ for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ EntryMBB->insert(EntryMBB->begin(), MI);
+ else {
+ MachineInstr *Def = RegInfo->getVRegDef(Reg);
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(llvm::next(InsertPos), MI);
+ }
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ unsigned Offset = MI->getOperand(1).getImm();
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(LDI->second, RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ EntryMBB->insertAfter(CopyUseMI, NewMI);
+ }
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (!MFI->hasCalls()) {
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::const_iterator
+ II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
+ MFI->setHasCalls(true);
+ goto done;
+ }
+ }
+ }
+ done:;
+ }
+
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ for (;;) {
+ DenseMap<unsigned, unsigned>::iterator J =
+ FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Replace it.
+ MRI.replaceRegWith(From, To);
+ }
+
+ // Release function-specific state. SDB and CurDAG are already cleared
+ // at this point.
+ FuncInfo->clear();
+
+ return true;
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
+ // Lower all of the non-terminator instructions. If a call is emitted
+ // as a tail call, cease emitting nodes for this block. Terminators
+ // are handled below.
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+ SDB->visit(*I);
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+ HadTailCall = SDB->HasTailCall;
+ SDB->clear();
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt Mask;
+ APInt KnownZero;
+ APInt KnownOne;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N))
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+ CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ int BlockNumber = -1;
+#ifdef NDEBUG
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+#endif
+ {
+ BlockNumber = FuncInfo->MBB->getNumber();
+ BlockName = MF->getFunction()->getNameStr() + ":" +
+ FuncInfo->MBB->getBasicBlock()->getNameStr();
+ }
+ DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ {
+ NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ {
+ NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ {
+ NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+ CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+ << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+ {
+ NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+ CurDAG->Legalize();
+ }
+
+ DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ {
+ NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+ DoInstructionSelection();
+ }
+
+ DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ {
+ NamedRegionTimer T("Instruction Scheduling", GroupName,
+ TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+ }
+
+ if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+ {
+ NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
+ FuncInfo->InsertPt = Scheduler->InsertPos;
+ }
+
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+ // Free the scheduler state.
+ {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+ TimePassesIsEnabled);
+ delete Scheduler;
+ }
+
+ // Free the SelectionDAG state, now that we're finished with it.
+ CurDAG->clear();
+}
+
+void SelectionDAGISel::DoInstructionSelection() {
+ DEBUG(errs() << "===== Instruction selection begins: BB#"
+ << FuncInfo->MBB->getNumber()
+ << " '" << FuncInfo->MBB->getName() << "'\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = --ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+ SDNode *ResNode = Select(Node);
+
+ // FIXME: This is pretty gross. 'Select' should be changed to not return
+ // anything at all and this code should be nuked with a tactical strike.
+
+ // If node should not be replaced, continue with the next one.
+ if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+ continue;
+ // Replace node.
+ if (ResNode)
+ ReplaceUses(Node, ResNode);
+
+ // If after the replacement this node is not used any more,
+ // remove this dead node.
+ if (Node->use_empty()) { // Don't delete EntryToken, etc.
+ ISelUpdater ISU(ISelPosition);
+ CurDAG->RemoveDeadNode(Node, &ISU);
+ }
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+
+ DEBUG(errs() << "===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad() {
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
+
+ const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+ BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+
+ // FIXME: Hack around an exception handling flaw (PR1508): the personality
+ // function and list of typeids logically belong to the invoke (or, if you
+ // like, the basic block containing the invoke), and need to be associated
+ // with it in the dwarf exception handling tables. Currently however the
+ // information is provided by an intrinsic (eh.selector) that can be moved
+ // to unexpected places by the optimizers: if the unwind edge is critical,
+ // then breaking it can result in the intrinsics being in the successor of
+ // the landing pad, not the landing pad itself. This results
+ // in exceptions not being caught because no typeids are associated with
+ // the invoke. This may not be the only way things can go wrong, but it
+ // is the only way we try to work around for the moment.
+ const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock();
+ const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+ if (Br && Br->isUnconditional()) { // Critical edge?
+ BasicBlock::const_iterator I, E;
+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+ if (isa<EHSelectorInst>(I))
+ break;
+
+ if (I == E)
+ // No catch info found - try to extract some from the successor.
+ CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+ }
+}
+
+
+
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst. Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr. For
+/// example we could have:
+/// A: x = load i32 *P
+/// B: y = icmp A, 42
+/// C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ const Instruction *FoldInst,
+ FastISel *FastIS) {
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ if (LoadReg == 0)
+ return false;
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (EnableFastISel)
+ FastIS = TLI.createFastISel(*FuncInfo);
+
+ // Iterate over all basic blocks in the function.
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+ for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ const BasicBlock *LLVMBB = *I;
+
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ if (!FuncInfo->VisitedBBs.count(*PI)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
+ FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+ } else {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
+ FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+ BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+ BasicBlock::const_iterator const End = LLVMBB->end();
+ BasicBlock::const_iterator BI = End;
+
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+ // Setup an EH landing-pad block.
+ if (FuncInfo->MBB->isLandingPad())
+ PrepareEHLandingPad();
+
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &Fn.getEntryBlock())
+ LowerArguments(LLVMBB);
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ FastIS->startNewBlock();
+
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(0);
+ }
+
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = llvm::prior(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+ continue;
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->SelectInstruction(Inst)) {
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != Begin) {
+ BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ // If we succeeded, don't re-select the load.
+ BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ continue;
+ }
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(Inst)) {
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed call: ";
+ Inst->dump();
+ }
+
+ if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst->getType());
+ }
+
+ bool HadTailCall = false;
+ SelectBasicBlock(Inst, BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ if (HadTailCall) {
+ --BI;
+ break;
+ }
+
+ continue;
+ }
+
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+ // Don't abort, and use a different message for terminator misses.
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed terminator: ";
+ Inst->dump();
+ }
+ } else {
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel miss: ";
+ Inst->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ llvm_unreachable("FastISel didn't select the entire block");
+ }
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
+
+ FinishBasicBlock();
+ FuncInfo->PHINodesToUpdate.clear();
+ }
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+ DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+ dbgs() << "Node " << i << " : ("
+ << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SDB->SwitchCases.empty() &&
+ SDB->JTCases.empty() &&
+ SDB->BitTestCases.empty()) {
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ continue;
+ PHI->addOperand(
+ MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+ }
+ return;
+ }
+
+ for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->BitTestCases[i].Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ if (j+1 != ej)
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
+ else
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
+
+
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == SDB->BitTestCases[i].Default) {
+ PHI->addOperand(MachineOperand::
+ CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+ PHI->addOperand(MachineOperand::
+ CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
+ back().ThisBB));
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->isSuccessor(PHIBB)) {
+ PHI->addOperand(MachineOperand::
+ CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(cBB));
+ }
+ }
+ }
+ }
+ SDB->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+ FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTable(SDB->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->JTCases[i].second.Default) {
+ PHI->addOperand
+ (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand
+ (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
+ }
+ // JT BB. Just iterate over successors here
+ if (FuncInfo->MBB->isSuccessor(PHIBB)) {
+ PHI->addOperand
+ (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+ }
+ }
+ }
+ SDB->JTCases.clear();
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
+ PHI->addOperand(
+ MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+ }
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Determine the unique successors.
+ SmallVector<MachineBasicBlock *, 2> Succs;
+ Succs.push_back(SDB->SwitchCases[i].TrueBB);
+ if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
+ SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
+ Phi != FuncInfo->MBB->end() && Phi->isPHI();
+ ++Phi) {
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
+ Phi->addOperand(MachineOperand::
+ CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
+ false));
+ Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ SDB->SwitchCases.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ return Ctor(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+ Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
+ Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+
+ unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if (!InlineAsm::isMemKind(Flags)) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+ report_fatal_error("Could not match memory address. Inline asm"
+ " failure!");
+
+ // Add this to the output node.
+ unsigned NewFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the glue input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ bool IgnoreChains) {
+ // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+ // greater than all of its (recursive) operands. If we scan to a point where
+ // 'use' is smaller than the node we're scanning for, then we know we will
+ // never find it.
+ //
+ // The Use may be -1 (unassigned) if it is a newly allocated node. This can
+ // happen because we scan down to newly selected nodes in the case of glue
+ // uses.
+ if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+ return false;
+
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use))
+ return false;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+ continue;
+
+ SDNode *N = Use->getOperand(i).getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
+ return true;
+ }
+ return false;
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ CodeGenOpt::Level OptLevel,
+ bool IgnoreChains) {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [GU] //
+ //
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
+ // a cycle in the scheduling graph.
+
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
+ break;
+ Root = GU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a glue result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
+ }
+
+
+ SmallPtrSet<SDNode*, 16> Visited;
+ return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops);
+
+ std::vector<EVT> VTs;
+ VTs.push_back(MVT::Other);
+ VTs.push_back(MVT::Glue);
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
+void SelectionDAGISel::
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ ISelUpdater ISU(ISelPosition);
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // glue results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() != 0 &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Glue)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
+ // Handle any interior nodes explicitly marked.
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (FRN->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+ InputGlue, &ISU);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (FRN->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
+ NowDeadNodes.push_back(FRN);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+
+ DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+ CR_Simple,
+ CR_InducesCycle,
+ CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(SDNode *ChainedNode,
+ SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+ SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+ ChainResult Result = CR_Simple;
+
+ for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+ E = ChainedNode->use_end(); UI != E; ++UI) {
+ // Make sure the use is of the chain, not some other value we produce.
+ if (UI.getUse().getValueType() != MVT::Other) continue;
+
+ SDNode *User = *UI;
+
+ // If we see an already-selected machine node, then we've gone beyond the
+ // pattern that we're selecting down into the already selected chunk of the
+ // DAG.
+ if (User->isMachineOpcode() ||
+ User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
+ if (User->getOpcode() == ISD::CopyToReg ||
+ User->getOpcode() == ISD::CopyFromReg ||
+ User->getOpcode() == ISD::INLINEASM ||
+ User->getOpcode() == ISD::EH_LABEL) {
+ // If their node ID got reset to -1 then they've already been selected.
+ // Treat them like a MachineOpcode.
+ if (User->getNodeId() == -1)
+ continue;
+ }
+
+ // If we have a TokenFactor, we handle it specially.
+ if (User->getOpcode() != ISD::TokenFactor) {
+ // If the node isn't a token factor and isn't part of our pattern, then it
+ // must be a random chained node in between two nodes we're selecting.
+ // This happens when we have something like:
+ // x = load ptr
+ // call
+ // y = x+4
+ // store y -> ptr
+ // Because we structurally match the load/store as a read/modify/write,
+ // but the call is chained between them. We cannot fold in this case
+ // because it would induce a cycle in the graph.
+ if (!std::count(ChainedNodesInPattern.begin(),
+ ChainedNodesInPattern.end(), User))
+ return CR_InducesCycle;
+
+ // Otherwise we found a node that is part of our pattern. For example in:
+ // x = load ptr
+ // y = x+4
+ // store y -> ptr
+ // This would happen when we're scanning down from the load and see the
+ // store as a user. Record that there is a use of ChainedNode that is
+ // part of the pattern and keep scanning uses.
+ Result = CR_LeadsToInteriorNode;
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ // If we found a TokenFactor, there are two cases to consider: first if the
+ // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+ // uses of the TF are in our pattern) we just want to ignore it. Second,
+ // the TokenFactor can be sandwiched in between two chained nodes, like so:
+ // [Load chain]
+ // ^
+ // |
+ // [Load]
+ // ^ ^
+ // | \ DAG's like cheese
+ // / \ do you?
+ // / |
+ // [TokenFactor] [Op]
+ // ^ ^
+ // | |
+ // \ /
+ // \ /
+ // [Store]
+ //
+ // In this case, the TokenFactor becomes part of our match and we rewrite it
+ // as a new TokenFactor.
+ //
+ // To distinguish these two cases, do a recursive walk down the uses.
+ switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+ case CR_Simple:
+ // If the uses of the TokenFactor are just already-selected nodes, ignore
+ // it, it is "below" our pattern.
+ continue;
+ case CR_InducesCycle:
+ // If the uses of the TokenFactor lead to nodes that are not part of our
+ // pattern that are not selected, folding would turn this into a cycle,
+ // bail out now.
+ return CR_InducesCycle;
+ case CR_LeadsToInteriorNode:
+ break; // Otherwise, keep processing.
+ }
+
+ // Okay, we know we're in the interesting interior case. The TokenFactor
+ // is now going to be considered part of the pattern so that we rewrite its
+ // uses (it may have uses that are not part of the pattern) with the
+ // ultimate chain result of the generated code. We will also add its chain
+ // inputs as inputs to the ultimate TokenFactor we create.
+ Result = CR_LeadsToInteriorNode;
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+ // Walk all of the chained nodes we've matched, recursively scanning down the
+ // users of the chain result. This adds any TokenFactor nodes that are caught
+ // in between chained nodes to the chained and interior nodes list.
+ SmallVector<SDNode*, 3> InteriorChainedNodes;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ InteriorChainedNodes) == CR_InducesCycle)
+ return SDValue(); // Would induce a cycle.
+ }
+
+ // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+ // that we are interested in. Form our input TokenFactor node.
+ SmallVector<SDValue, 3> InputChains;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ // Add the input chain of this node to the InputChains list (which will be
+ // the operands of the generated TokenFactor) if it's not an interior node.
+ SDNode *N = ChainNodesMatched[i];
+ if (N->getOpcode() != ISD::TokenFactor) {
+ if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+ continue;
+
+ // Otherwise, add the input chain.
+ SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+ assert(InChain.getValueType() == MVT::Other && "Not a chain");
+ InputChains.push_back(InChain);
+ continue;
+ }
+
+ // If we have a token factor, we want to add all inputs of the token factor
+ // that are not part of the pattern we're matching.
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+ N->getOperand(op).getNode()))
+ InputChains.push_back(N->getOperand(op));
+ }
+ }
+
+ SDValue Res;
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+ MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have glue and chains as well.
+ // In this case we need to shift the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though.
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node)
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+
+ return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C != 0 && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result, SelectionDAGISel &SDISel,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+namespace {
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
+};
+
+}
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ //case ISD::VALUETYPE:
+ //case ISD::CONDCODE:
+ case ISD::HANDLENODE:
+ case ISD::MDNODE_SDNODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::EH_LABEL:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return 0;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+ NodeToMatch->getOperand(0));
+ return 0;
+ case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+ case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and glue for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputGlue;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
+ DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ NodeToMatch->dump(CurDAG);
+ errs() << '\n');
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (1) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (1) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+ unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (1) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate
+ << ", continuing at " << FailIndex << "\n");
+ ++NumDAGIselRetries;
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputGlue = InputGlue;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode: {
+ // Remember this node, it may end up being an operand in the pattern.
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
+ continue;
+ }
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ continue;
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == Opc)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ << " to " << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI.getPointerTy();
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ Opcode-OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, OptLevel,
+ true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Imm = RecordedNodes[RecNo].first;
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+ Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+ }
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+ if (InputChain.getNode() == 0)
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
+ continue;
+ }
+
+ case OPC_EmitNode:
+ case OPC_MorphNodeTo: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo].first);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential glue
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Glue) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/glue inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
+ // Create the node.
+ SDNode *Res = 0;
+ if (Opcode != OPC_MorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+ VTList, Ops.data(), Ops.size());
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
+ }
+
+ } else {
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+ EmitNodeInfo);
+ }
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ unsigned NumMemRefs = 0;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ ++NumMemRefs;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ ++NumMemRefs;
+ } else {
+ ++NumMemRefs;
+ }
+ }
+
+ MachineSDNode::mmo_iterator MemRefs =
+ MF->allocateMemRefsArray(NumMemRefs);
+
+ MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ *MemRefsPos++ = *I;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ *MemRefsPos++ = *I;
+ } else {
+ *MemRefsPos++ = *I;
+ }
+ }
+
+ cast<MachineSDNode>(Res)
+ ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ }
+
+ DEBUG(errs() << " "
+ << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+ << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (Opcode == OPC_MorphNodeTo) {
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
+ return Res;
+ }
+
+ continue;
+ }
+
+ case OPC_MarkGlueResults: {
+ unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+ // Read and remember all the glue-result nodes.
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RecordedNodes[ResSlot].first;
+
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
+ "Invalid number of results to complete!");
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueType().getSizeInBits()) &&
+ "invalid replacement");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ }
+
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+
+ // FIXME: We just return here, which interacts correctly with SelectRoot
+ // above. We should fix this to not return an SDNode* anymore.
+ return 0;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ ++NumDAGIselRetries;
+ while (1) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return 0;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
+
+ InputChain = LastScope.InputChain;
+ InputGlue = LastScope.InputGlue;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot select: ";
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
+ report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 000000000000..cd1647b17b9b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Glue)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 000000000000..2626ac3bbb2a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,3370 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+using namespace llvm;
+
+/// We are in the process of implementing a new TypeLegalization action
+/// - the promotion of vector elements. This feature is disabled by default
+/// and only enabled using this flag.
+static cl::opt<bool>
+AllowPromoteIntElem("promote-elements", cl::Hidden,
+ cl::desc("Allow promotion of integer vector element types"));
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+ bool isLocal = GV->hasLocalLinkage();
+ bool isDeclaration = GV->isDeclaration();
+ // FIXME: what should we do for protected and internal visibility?
+ // For variables, is internal different from hidden?
+ bool isHidden = GV->hasHiddenVisibility();
+
+ if (reloc == Reloc::PIC_) {
+ if (isLocal || isHidden)
+ return TLSModel::LocalDynamic;
+ else
+ return TLSModel::GeneralDynamic;
+ } else {
+ if (!isDeclaration || isHidden)
+ return TLSModel::LocalExec;
+ else
+ return TLSModel::InitialExec;
+ }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::MULO_I32] = "__mulosi4";
+ Names[RTLIB::MULO_I64] = "__mulodi4";
+ Names[RTLIB::MULO_I128] = "__muloti4";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+
+ // These are generally not available.
+ Names[RTLIB::SDIVREM_I8] = 0;
+ Names[RTLIB::SDIVREM_I16] = 0;
+ Names[RTLIB::SDIVREM_I32] = 0;
+ Names[RTLIB::SDIVREM_I64] = 0;
+ Names[RTLIB::SDIVREM_I128] = 0;
+ Names[RTLIB::UDIVREM_I8] = 0;
+ Names[RTLIB::UDIVREM_I16] = 0;
+ Names[RTLIB::UDIVREM_I32] = 0;
+ Names[RTLIB::UDIVREM_I64] = 0;
+ Names[RTLIB::UDIVREM_I128] = 0;
+
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::FMA_F32] = "fmaf";
+ Names[RTLIB::FMA_F64] = "fma";
+ Names[RTLIB::FMA_F80] = "fmal";
+ Names[RTLIB::FMA_PPCF128] = "fmal";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::COPYSIGN_F32] = "copysignf";
+ Names[RTLIB::COPYSIGN_F64] = "copysign";
+ Names[RTLIB::COPYSIGN_F80] = "copysignl";
+ Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+ Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+ Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+ Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::MEMCPY] = "memcpy";
+ Names[RTLIB::MEMMOVE] = "memmove";
+ Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+ CCs[i] = CallingConv::C;
+ }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
+ mayPromoteElements(AllowPromoteIntElem) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+ = maxStoresPerMemmoveOptSize = 4;
+ benefitFromCodePlacementOpt = false;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::Latency;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ ShouldFoldAtomicFences = false;
+
+ InitLibcallNames(LibcallRoutineNames);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLowering::~TargetLowering() {
+ delete &TLOF;
+}
+
+MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
+ return MVT::getIntegerVT(8*TD->getPointerSize());
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ EVT &RegisterVT,
+ TargetLowering *TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ EVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// hasLegalSuperRegRegClasses - Return true if the specified register class
+/// has one or more super-reg register classes that are legal.
+bool
+TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
+ if (*RC->superregclasses_begin() == 0)
+ return false;
+ for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+ E = RC->superregclasses_end(); I != E; ++I) {
+ const TargetRegisterClass *RRC = *I;
+ if (isLegalRC(RRC))
+ return true;
+ }
+ return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLowering::findRepresentativeClass(EVT VT) const {
+ const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+ const TargetRegisterClass *BestRC = RC;
+ for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+ E = RC->superregclasses_end(); I != E; ++I) {
+ const TargetRegisterClass *RRC = *I;
+ if (RRC->isASubClass() || !isLegalRC(RRC))
+ continue;
+ if (!hasLegalSuperRegRegClasses(RRC))
+ return std::make_pair(RRC, 1);
+ BestRC = RRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+ EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+ if (!ExpandedVT.isInteger())
+ break;
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ EVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (isTypeLegal(VT)) continue;
+
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1) {
+ bool IsLegalWiderType = false;
+ // If we allow the promotion of vector elements using a flag,
+ // then return TypePromoteInteger on vector elements.
+ // First try to promote the elements of integer vectors. If no legal
+ // promotion was found, fallback to the widen-vector method.
+ if (mayPromoteElements)
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+ && SVT.getVectorNumElements() == NElts &&
+ isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+
+ if (IsLegalWiderType) continue;
+
+ // Try to widen the vector.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
+ MVT IntermediateVT;
+ EVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ unsigned NumElts = VT.getVectorNumElements();
+ ValueTypeActions.setTypeAction(VT,
+ NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ }
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+ return PointerTy.SimpleTy;
+}
+
+MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ EVT &RegisterVT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // we should widen to that legal vector type. This handles things like
+ // <2 x float> -> <4 x float>.
+ if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
+ RegisterVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterVT)) {
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(
+ EVT::getVectorVT(Context, EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ EVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI,
+ SmallVectorImpl<uint64_t> *Offsets) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+ unsigned Offset = 0;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr & Attribute::SExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr & Attribute::ZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+ PartVT.getTypeForEVT(ReturnType->getContext()));
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr & Attribute::InReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr & Attribute::SExt)
+ Flags.setSExt();
+ else if (attr & Attribute::ZExt)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
+ if (Offsets) {
+ Offsets->push_back(Offset);
+ Offset += PartSize;
+ }
+ }
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // Assume that everything is safe in static mode.
+ if (getTargetMachine().getRelocationModel() == Reloc::Static)
+ return true;
+
+ // In dynamic-no-pic mode, assume that known defined values are safe.
+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+ GA &&
+ !GA->getGlobal()->isDeclaration() &&
+ !GA->getGlobal()->isWeakForLinker())
+ return true;
+
+ // Otherwise assume nothing is safe.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ EVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ DebugLoc dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+ KnownZero = ~KnownOne & NewMask;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+ LHSZero, LHSOne, Depth);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+ if ((KnownOne & KnownOne2) == KnownOne) {
+ EVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ EVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = InOp.getNode()->getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ if ((APInt::getHighBitsSet(BitWidth,
+ BitWidth - InnerVT.getSizeInBits()) &
+ DemandedMask) == 0 &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy(InnerVT);
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, ShTy));
+ return
+ TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+ NarrowShl));
+ }
+ }
+
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1)
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getScalarType().getSizeInBits());
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if ((NewBits & NewMask) == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit =
+ APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt InputDemandedBits =
+ APInt::getLowBitsSet(BitWidth,
+ EVT.getScalarType().getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Otherwise, top bits aren't known.
+ KnownOne &= ~NewBits;
+ KnownZero &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() &&
+ !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
+ break;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ if (!ShAmt)
+ break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift =
+ TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+ }
+
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ Shift));
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ // Demand all the bits of the input that are demanded in the output.
+ // The low bits are obvious; the high bits are demanded because we're
+ // asserting that they're zero here.
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BITCAST:
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!Op.getOperand(0).getValueType().isVector() &&
+ NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ Op.getOperand(0).getValueType().isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+ // A left-shift of a constant one will have exactly one bit set, because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue() == 1)
+ return true;
+
+ // Similarly, a right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue().isSignBit())
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to ComputeMaskedBits to catch other known cases.
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI, DebugLoc dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ if (isa<ConstantSDNode>(N0.getNode()))
+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ const APInt &ShAmt
+ = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not loosing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!TD->isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+ if (newVT.isRound()) {
+ EVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ false, false, NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ newVT)),
+ DAG.getConstant(0LL, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ getCondCodeAction(Cond, newVT)==Legal))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(C1.trunc(InSize), newVT),
+ Cond);
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDValue ZextOp;
+ EVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ if (TrueWhenTrue)
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ } else if (N1C->getAPIntValue() == 1 &&
+ (VT == MVT::i1 ||
+ getBooleanContents() == ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ } else if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType().bitsGT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+ else if (Op0.getValueType().bitsLT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1-1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1+1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == N0.getValueType() ||
+ (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (AndRHS->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy)));
+ }
+ }
+ }
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+ } else {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ }
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger())
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(UOF, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond)
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+ }
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+ N1,
+ DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+ // Note that where y is variable and is known to have at most
+ // one bit set (for example, if it is z&1) we cannot do this;
+ // the expressions are not equivalent when y==0.
+ if (N0.getOpcode() == ISD::AND)
+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+ if (ValueHasExactlyOneBitSet(N1, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
+ }
+ if (N1.getOpcode() == ISD::AND)
+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+ if (ValueHasExactlyOneBitSet(N0, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+ int64_t &Offset) const {
+ if (isa<GlobalAddressSDNode>(N)) {
+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case '<':
+ case '>':
+ return C_Other;
+ }
+ }
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
+ Constraint[Constraint.size()-1] == '}')
+ return C_Register;
+ return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ C ? C->getDebugLoc() : DebugLoc(),
+ Op.getValueType(), Offs));
+ return;
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ MVT::i64));
+ return;
+ }
+ }
+ break;
+ }
+ }
+}
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint[0] != '{')
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+ // Figure out which register class contains this reg.
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ bool isLegal = false;
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+
+ if (!isLegal) continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (RegName.equals_lower(RI->getName(*I)))
+ return std::make_pair(*I, RC);
+ }
+ }
+
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (const StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+ } else {
+ OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ default: llvm_unreachable("Unknown constraint type!");
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+ OpInfo.CallOperandVal = v;
+ return;
+ }
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
+
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+ SelectionDAG &DAG) const {
+ ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+ APInt d = C->getAPIntValue();
+ assert(d != 0 && "Division by zero!");
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ unsigned ShAmt = d.countTrailingZeros();
+ if (ShAmt) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+ d = d.ashr(ShAmt);
+ }
+
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t, xn = d;
+ while ((t = d*xn) != 1)
+ xn *= APInt(d.getBitWidth(), 2) - t;
+
+ Op2 = DAG.getConstant(xn, Op1.getValueType());
+ return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl= N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::ms magics = d.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::mu magics = N1C.magicu();
+
+ SDValue Q = N->getOperand(0);
+
+ // If the divisor is even, we can avoid using the expensive fixup by shifting
+ // the divided value upfront.
+ if (magics.a != 0 && !N1C[0]) {
+ unsigned Shift = N1C.countTrailingZeros();
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ // Get magic number for the shifted divisor.
+ magics = N1C.lshr(Shift).magicu(Shift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < N1C.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a081e3cd493f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+ : TD(TM.getTargetData()) {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 000000000000..5a253a4d97e4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,441 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ StructType *StackEntryTy;
+ StructType *FrameMapTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ const Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return' and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind and return do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
+ CleanupBBName, &F);
+ UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
+ NewBB, CleanupBB,
+ Args, CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(UI->getParent(), UI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
+ };
+
+ Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+ StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys);
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+ };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+}
+
+const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+
+ return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys);
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type*> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::createNamed("gc_map", EltTys);
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(M, StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry,
+ 0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+ StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 000000000000..160f38f69236
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1152 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+// machine CFG to tightly surround their uses so that execution paths that
+// do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+// the use info for the CSRs inside the region is propagated outward in the
+// CFG to ensure validity of the spill/restore placements. This decreases
+// the effectiveness of shrink wrapping but does not require edge splitting
+// in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+ cl::desc("Shrink wrap the specified function"),
+ cl::value_desc("funcname"),
+ cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+ None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+ cl::desc("Print shrink wrapping debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(BasicInfo , "print basic DF sets"),
+ clEnumVal(Iterations, "print SR sets for each iteration"),
+ clEnumVal(Details , "print all DF sets"),
+ clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+ ReturnBlocks.clear();
+ clearAnticAvailSets();
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ TLLoops.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+ clearAllSets();
+ EntryBlock = 0;
+#ifndef NDEBUG
+ HasFastExitPath = false;
+#endif
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ // DEBUG: enable or disable shrink wrapping for the current function
+ // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+ if (ShrinkWrapFunc != "") {
+ std::string MFName = MF->getFunction()->getNameStr();
+ ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+ }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+ DEBUG(MF = &Fn);
+
+ initShrinkWrappingInfo();
+
+ DEBUG(if (ShrinkWrapThisFunction) {
+ dbgs() << "Place CSR spills/restores for "
+ << MF->getFunction()->getName() << "\n";
+ });
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = successors[i];
+
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+
+ // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn != AnticIn[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = predecessors[i];
+
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+
+ // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut != AvailOut[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+ // Initialize data flow sets.
+ clearAnticAvailSets();
+
+ // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ ++iterations;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Calculate anticipated in, out regs at MBB from
+ // anticipated at successors of MBB.
+ changed |= calcAnticInOut(MBB);
+
+ // Calculate available in, out regs at MBB from
+ // available at predecessors of MBB.
+ changed |= calcAvailInOut(MBB);
+ }
+ }
+
+ DEBUG({
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs()
+ << "-----------------------------------------------------------\n"
+ << " Antic/Avail Sets:\n"
+ << "-----------------------------------------------------------\n"
+ << "iterations = " << iterations << "\n"
+ << "-----------------------------------------------------------\n"
+ << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+ << "-----------------------------------------------------------\n";
+
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets(MBB);
+ }
+
+ dbgs()
+ << "-----------------------------------------------------------\n";
+ }
+ });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+ if (! MBB || !LP)
+ return;
+
+ std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+ for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* LBB = loopBlocks[i];
+ if (LBB == MBB)
+ continue;
+ if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+ continue;
+ CSRUsed[LBB] |= CSRUsed[MBB];
+ }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+/// 1. the current function has more than 500 MBBs: heuristic limit
+/// on function size to reduce compile time impact of the current
+/// iterative algorithm.
+/// 2. all CSRs are used in the entry block.
+/// 3. all CSRs are used in all immediate successors of the entry block.
+/// 4. all CSRs are used in a subset of blocks, each of which dominates
+/// all return blocks. These blocks, taken as a subgraph of the MCFG,
+/// are equivalent to the entry block since all execution paths pass
+/// through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": uses no callee-saved registers\n");
+ return false;
+ }
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ // Determine if this function has fast exit paths.
+ DEBUG(if (ShrinkWrapThisFunction)
+ findFastExitPath());
+
+ // Limit shrink wrapping via the current iterative bit vector
+ // implementation to functions with <= 500 MBBs.
+ if (Fn.size() > 500) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": too large (" << Fn.size() << " MBBs)\n");
+ ShrinkWrapThisFunction = false;
+ }
+
+ // Return now if not shrink wrapping.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ // Collect set of used CSRs.
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ }
+
+ // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+ // whether or not to shrink wrap this function.
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ bool allCSRUsesInEntryBlock = true;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(Reg, MOReg))) {
+ // CSR Reg is defined/used in block MBB.
+ CSRUsed[MBB].set(inx);
+ // Check for uses in EntryBlock.
+ if (MBB != EntryBlock)
+ allCSRUsesInEntryBlock = false;
+ }
+ }
+ }
+ }
+
+ if (CSRUsed[MBB].empty())
+ continue;
+
+ // Propagate CSRUsed[MBB] in loops
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ // Add top level loop to work list.
+ MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+ MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+ if (! HDR) {
+ HDR = PLP->getHeader();
+ assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+ MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+ HDR = *PI;
+ }
+ TLLoops[HDR] = PLP;
+
+ // Push uses from inside loop to its parent loops,
+ // or to all other MBBs in its loop.
+ if (LP->getLoopDepth() > 1) {
+ for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+ PLP = PLP->getParentLoop()) {
+ propagateUsesAroundLoop(MBB, PLP);
+ }
+ } else {
+ propagateUsesAroundLoop(MBB, LP);
+ }
+ }
+ }
+
+ if (allCSRUsesInEntryBlock) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ } else {
+ bool allCSRsUsedInEntryFanout = true;
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (CSRUsed[SUCC] != UsedCSRegs)
+ allCSRsUsedInEntryFanout = false;
+ }
+ if (allCSRsUsedInEntryFanout) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in imm successors of EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ }
+ }
+
+ if (ShrinkWrapThisFunction) {
+ // Check if MBB uses CSRs and dominates all exit nodes.
+ // Such nodes are equiv. to the entry node w.r.t.
+ // CSR uses: every path through the function must
+ // pass through this node. If each CSR is used at least
+ // once by these nodes, shrink wrapping is disabled.
+ CSRegSet CSRUsedInChokePoints;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+ continue;
+ bool dominatesExitNodes = true;
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+ dominatesExitNodes = false;
+ break;
+ }
+ if (dominatesExitNodes) {
+ CSRUsedInChokePoints |= CSRUsed[MBB];
+ if (CSRUsedInChokePoints == UsedCSRegs) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in choke point(s) at "
+ << getBasicBlockName(MBB) << "\n");
+ ShrinkWrapThisFunction = false;
+ break;
+ }
+ }
+ }
+ }
+
+ // Return now if we have decided not to apply shrink wrapping
+ // to the current function.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ DEBUG({
+ dbgs() << "ENABLED: " << Fn.getFunction()->getName();
+ if (HasFastExitPath)
+ dbgs() << " (fast exit path)";
+ dbgs() << "\n";
+ if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dumpAllUsed();
+ }
+ }
+ });
+
+ // Build initial DF sets to determine minimal regions in the
+ // Machine CFG around which CSRs must be spilled and restored.
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks) {
+ if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+ bool processThisBlock = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC->pred_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED->succ_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ }
+ if (! processThisBlock)
+ return false;
+ }
+
+ CSRegSet prop;
+ if (!CSRSave[MBB].empty())
+ prop = CSRSave[MBB];
+ else if (!CSRRestore[MBB].empty())
+ prop = CSRRestore[MBB];
+ else
+ prop = CSRUsed[MBB];
+ if (prop.empty())
+ return false;
+
+ // Propagate selected bits to successors, predecessors of MBB.
+ bool addedUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ // Self-loop
+ if (SUCC == MBB)
+ continue;
+ if (! CSRUsed[SUCC].contains(prop)) {
+ CSRUsed[SUCC] |= prop;
+ addedUses = true;
+ blks.push_back(SUCC);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "successor " << getBasicBlockName(SUCC) << "\n");
+ }
+ }
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ // Self-loop
+ if (PRED == MBB)
+ continue;
+ if (! CSRUsed[PRED].contains(prop)) {
+ CSRUsed[PRED] |= prop;
+ addedUses = true;
+ blks.push_back(PRED);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "predecessor " << getBasicBlockName(PRED) << "\n");
+ }
+ }
+ return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+ bool addedUses = false;
+
+ // Place restores for top level loops where needed.
+ for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+ I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+ MachineBasicBlock* MBB = I->first;
+ MachineLoop* LP = I->second;
+ MachineBasicBlock* HDR = LP->getHeader();
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+ CSRegSet loopSpills;
+
+ loopSpills = CSRSave[MBB];
+ if (CSRSave[MBB].empty()) {
+ loopSpills = CSRUsed[HDR];
+ assert(!loopSpills.empty() && "No CSRs used in loop?");
+ } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+ continue;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+ if (! CSRUsed[EXB].contains(loopSpills)) {
+ CSRUsed[EXB] |= loopSpills;
+ addedUses = true;
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "LOOP " << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(loopSpills) << ")->"
+ << getBasicBlockName(EXB) << "\n");
+ if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+ blks.push_back(EXB);
+ }
+ }
+ }
+ return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills) {
+ bool placedSpills = false;
+ // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+ CSRegSet anticInPreds;
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ MachineBasicBlock* PRED = predecessors[i];
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ } else {
+ // Handle uses in entry blocks (which have no predecessors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ anticInPreds = UsedCSRegs;
+ }
+ // Compute spills required at MBB:
+ CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ if (! CSRSave[MBB].empty()) {
+ if (MBB == EntryBlock) {
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+ } else {
+ // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+ if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+ }
+ }
+ }
+ placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+ prevSpills[MBB] = CSRSave[MBB];
+ // Remember this block for adding restores to successor
+ // blocks for multi-entry region.
+ if (placedSpills)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+ return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores) {
+ bool placedRestores = false;
+ // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+ CSRegSet availOutSucc;
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ MachineBasicBlock* SUCC = successors[i];
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else {
+ if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+ // Handle uses in return blocks (which have no successors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ availOutSucc = UsedCSRegs;
+ }
+ }
+ // Compute restores required at MBB:
+ CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Postprocess restore placements at MBB.
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+ if (! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+ }
+ placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+ prevRestores[MBB] = CSRRestore[MBB];
+ // Remember this block for adding saves to predecessor
+ // blocks for multi-entry region.
+ if (placedRestores)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+ CSRegBlockMap prevCSRSave;
+ CSRegBlockMap prevCSRRestore;
+ SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+ bool changed = true;
+ unsigned iterations = 0;
+
+ // Iterate computation of spill and restore placements in the MCFG until:
+ // 1. CSR use info has been fully propagated around the MCFG, and
+ // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
+ while (changed) {
+ changed = false;
+ ++iterations;
+
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "iter " << iterations
+ << " --------------------------------------------------\n");
+
+ // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+ // which determines the placements of spills and restores.
+ // Keep track of changes to spills, restores in each iteration to
+ // minimize the total iterations.
+ bool SRChanged = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Place spills for CSRs in MBB.
+ SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+ // Place restores for CSRs in MBB.
+ SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+ }
+
+ // Add uses of CSRs used inside loops where needed.
+ changed |= addUsesForTopLevelLoops(cvBlocks);
+
+ // Add uses for CSRs spilled or restored at branch, join points.
+ if (changed || SRChanged) {
+ while (! cvBlocks.empty()) {
+ MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+ changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+ }
+ if (! ncvBlocks.empty()) {
+ cvBlocks = ncvBlocks;
+ ncvBlocks.clear();
+ }
+ }
+
+ if (changed) {
+ calculateAnticAvail(Fn);
+ CSRSave.clear();
+ CSRRestore.clear();
+ }
+ }
+
+ // Check for effectiveness:
+ // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+ // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+ // Gives a measure of how many CSR spills have been moved from EntryBlock
+ // to minimal regions enclosing their uses.
+ CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+ unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+ numSRReduced += numSRReducedThisFunc;
+ DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "-----------------------------------------------------------\n";
+ dbgs() << "total iterations = " << iterations << " ( "
+ << Fn.getFunction()->getName()
+ << " " << numSRReducedThisFunc
+ << " " << Fn.size()
+ << " )\n";
+ dbgs() << "-----------------------------------------------------------\n";
+ dumpSRSets();
+ dbgs() << "-----------------------------------------------------------\n";
+ if (numSRReducedThisFunc)
+ verifySpillRestorePlacement();
+ });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+ if (! EntryBlock)
+ return;
+ // Fina a path from EntryBlock to any return block that does not branch:
+ // Entry
+ // | ...
+ // v |
+ // B1<-----+
+ // |
+ // v
+ // Return
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+
+ // Assume positive, disprove existence of fast path.
+ HasFastExitPath = true;
+
+ // Check the immediate successors.
+ if (isReturnBlock(SUCC)) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << getBasicBlockName(SUCC) << "\n";
+ break;
+ }
+ // Traverse df from SUCC, look for a branch block.
+ std::string exitPath = getBasicBlockName(SUCC);
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ // Reject paths with branch nodes.
+ if (SBB->succ_size() > 1) {
+ HasFastExitPath = false;
+ break;
+ }
+ exitPath += "->" + getBasicBlockName(SBB);
+ }
+ if (HasFastExitPath) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << exitPath << "\n";
+ break;
+ }
+ }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+/// all CSRs spilled at MMBB are restored on all paths
+/// from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+/// all CSRs restored at MBB are spilled on all paths
+/// reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+ unsigned numReturnBlocks = 0;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+ ++numReturnBlocks;
+ }
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet spilled = BI->second;
+ CSRegSet restored;
+
+ if (spilled.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(spilled)
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ if (CSRRestore[MBB].intersects(spilled)) {
+ restored |= (CSRRestore[MBB] & spilled);
+ }
+
+ // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+ // we must find restores for all spills w/no intervening spills on all
+ // paths from MBB to all return blocks.
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+ BE = df_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (SBB == MBB)
+ continue;
+ // Stop when we encounter spills of any CSRs spilled at MBB that
+ // have not yet been seen to be restored.
+ if (CSRSave[SBB].intersects(spilled) &&
+ !restored.contains(CSRSave[SBB] & spilled))
+ break;
+ // Collect the CSRs spilled at MBB that are restored
+ // at this DF successor of MBB.
+ if (CSRRestore[SBB].intersects(spilled))
+ restored |= (CSRRestore[SBB] & spilled);
+ // If we are at a retun block, check that the restores
+ // we have seen so far exhaust the spills at MBB, then
+ // reset the restores.
+ if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+ if (restored != spilled) {
+ CSRegSet notRestored = (spilled - restored);
+ DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notRestored)
+ << " spilled at " << getBasicBlockName(MBB)
+ << " are never restored on path to return "
+ << getBasicBlockName(SBB) << "\n");
+ }
+ restored.clear();
+ }
+ }
+ }
+
+ // Check restore placements.
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restored = BI->second;
+ CSRegSet spilled;
+
+ if (restored.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB])
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restored) << "\n");
+
+ if (CSRSave[MBB].intersects(restored)) {
+ spilled |= (CSRSave[MBB] & restored);
+ }
+ // Walk inverse depth first from MBB to find spills of all
+ // CSRs restored at MBB:
+ for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+ BE = idf_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* PBB = *BI;
+ if (PBB == MBB)
+ continue;
+ // Stop when we encounter restores of any CSRs restored at MBB that
+ // have not yet been seen to be spilled.
+ if (CSRRestore[PBB].intersects(restored) &&
+ !spilled.contains(CSRRestore[PBB] & restored))
+ break;
+ // Collect the CSRs restored at MBB that are spilled
+ // at this DF predecessor of MBB.
+ if (CSRSave[PBB].intersects(restored))
+ spilled |= (CSRSave[PBB] & restored);
+ }
+ if (spilled != restored) {
+ CSRegSet notSpilled = (restored - spilled);
+ DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notSpilled)
+ << " restored at " << getBasicBlockName(MBB)
+ << " are never spilled\n");
+ }
+ }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+ if (!MBB)
+ return "";
+
+ if (MBB->getBasicBlock())
+ return MBB->getBasicBlock()->getNameStr();
+
+ std::ostringstream name;
+ name << "_MBB_" << MBB->getNumber();
+ return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ MF->getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+ DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllUsed() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpUsed(MBB);
+ }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << " | "
+ << stringifyCSRegSet(CSRSave[MBB]) << " | "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllSets() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets1(MBB);
+ }
+}
+
+void PEI::dumpSRSets() {
+ DEBUG({
+ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+ MBB != E; ++MBB) {
+ if (!CSRSave[MBB].empty()) {
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]);
+ if (CSRRestore[MBB].empty())
+ dbgs() << '\n';
+ }
+
+ if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+ dbgs() << " "
+ << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+ });
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 000000000000..65a33da93afe
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,588 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+ class SjLjEHPass : public FunctionPass {
+
+ const TargetLowering *TLI;
+
+ const Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *BuiltinSetjmpFn;
+ Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
+ Constant *LSDAAddrFn;
+ Value *PersonalityFn;
+ Constant *SelectorFn;
+ Constant *ExceptionFn;
+ Constant *CallSiteFn;
+ Constant *DispatchSetupFn;
+
+ Value *CallSite;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPass(const TargetLowering *tli = NULL)
+ : FunctionPass(ID), TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ const char *getPassName() const {
+ return "SJLJ Exception Handling preparation";
+ }
+
+ private:
+ void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+ void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
+ SwitchInst *CatchSwitch);
+ void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+ bool insertSjLjEHSupport(Function &F);
+ };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+ return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ FunctionContextTy =
+ StructType::get(VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ ArrayType::get(Int32Ty, 4), // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ ArrayType::get(VoidPtrTy, 5), // __jbuf
+ NULL);
+ RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ UnregisterFn =
+ M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+ ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ DispatchSetupFn
+ = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
+ PersonalityFn = 0;
+
+ return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+ Value *CallSite) {
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ // Insert a store of the call-site number
+ new StoreInst(CallSiteNoC, CallSite, true, I); // volatile
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
+ Value *CallSite,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+ // The runtime comes back to the dispatcher with the call_site - 1 in
+ // the context. Odd, but there it is.
+ ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo - 1);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert the store of the call site value
+ insertCallSiteStore(II, InvokeNo, CallSite);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+ // We still want this to look like an invoke so we emit the LSDA properly,
+ // so we don't transform the invoke into a call here.
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge. This process also splits all critical edges
+/// coming out of invoke's.
+/// FIXME: Move this function to a common utility file (Local.cpp?) so
+/// both SjLj and LowerInvoke can use it.
+void SjLjEHPass::
+splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ const Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<UnwindInst*,16> Unwinds;
+ SmallVector<InvokeInst*,16> Invokes;
+
+ // Look through the terminators of the basic blocks to find invokes, returns
+ // and unwinds.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ Unwinds.push_back(UI);
+ }
+ }
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // If we don't have any invokes, there's nothing to do.
+ if (Invokes.empty()) return false;
+
+ // Find the eh.selector.*, eh.exception and alloca calls.
+ //
+ // Remember any allocas() that aren't in the entry block, as the
+ // jmpbuf saved SP will need to be updated for them.
+ //
+ // We'll use the first eh.selector to determine the right personality
+ // function to use. For SJLJ, we always use the same personality for the
+ // whole function, not on a per-selector basis.
+ // FIXME: That's a bit ugly. Better way?
+ SmallVector<CallInst*,16> EH_Selectors;
+ SmallVector<CallInst*,16> EH_Exceptions;
+ SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
+ // Note: Skip the entry block since there's nothing there that interests
+ // us. eh.selector and eh.exception shouldn't ever be there, and we
+ // want to disregard any allocas that are there.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() == SelectorFn) {
+ if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
+ EH_Selectors.push_back(CI);
+ } else if (CI->getCalledFunction() == ExceptionFn) {
+ EH_Exceptions.push_back(CI);
+ } else if (CI->getCalledFunction() == StackRestoreFn) {
+ JmpbufUpdatePoints.push_back(CI);
+ }
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ JmpbufUpdatePoints.push_back(AI);
+ }
+ }
+ }
+
+ // If we don't have any eh.selector calls, we can't determine the personality
+ // function. Without a personality function, we can't process exceptions.
+ if (!PersonalityFn) return false;
+
+ // We have invokes, so we need to add register/unregister calls to get this
+ // function onto the global unwind stack.
+ //
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge we
+ // spill into a stack location, guaranteeing that there is nothing live across
+ // the unwind edge. This process also splits all critical edges coming out of
+ // invoke's.
+ splitLiveRangesAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be added to the global context list.
+ unsigned Align = 4; // FIXME: Should be a TLI check?
+ AllocaInst *FunctionContext =
+ new AllocaInst(FunctionContextTy, 0, Align,
+ "fcn_context", F.begin()->begin());
+
+ Value *Idxs[2];
+ const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ // We need to also keep around a reference to the call_site field
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "call_site",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->data[1]
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "fc_data",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exc_selector_gep",
+ EntryBB->getTerminator());
+ // The exception value comes back in context->data[0]
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The result of the eh.selector call will be replaced with a a reference to
+ // the selector value returned in the function context. We leave the selector
+ // itself so the EH analysis later can use it.
+ for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+ CallInst *I = EH_Selectors[i];
+ Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+ I->replaceAllUsesWith(SelectorVal);
+ }
+
+ // eh.exception calls are replaced with references to the proper location in
+ // the context. Unlike eh.selector, the eh.exception calls are removed
+ // entirely.
+ for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+ CallInst *I = EH_Exceptions[i];
+ // Possible for there to be duplicates, so check to make sure the
+ // instruction hasn't already been removed.
+ if (!I->getParent()) continue;
+ Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+ const Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+ I->replaceAllUsesWith(Val);
+ I->eraseFromParent();
+ }
+
+ // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+ // branch to a dispatch block for non-zero returns. If we return normally,
+ // we're not handling an exception and just register the function context and
+ // continue.
+
+ // Create the dispatch block. The dispatch block is basically a big switch
+ // statement that goes to all of the invoke landing pads.
+ BasicBlock *DispatchBlock =
+ BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+ // Insert a load of the callsite in the dispatch block, and a switch on its
+ // value. By default, we issue a trap statement.
+ BasicBlock *TrapBlock =
+ BasicBlock::Create(F.getContext(), "trapbb", &F);
+ CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
+ "", TrapBlock);
+ new UnreachableInst(F.getContext(), TrapBlock);
+
+ Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+ DispatchBlock);
+ SwitchInst *DispatchSwitch =
+ SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
+ DispatchBlock);
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "eh.sjlj.setjmp.cont");
+
+ // Populate the Function Context
+ // 1. LSDA address
+ // 2. Personality function address
+ // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+ // LSDA address
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *JBufPtr
+ = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "jbuf_gep",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *FramePtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
+ EntryBB->getTerminator());
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg =
+ CastInst::Create(Instruction::BitCast, JBufPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+ "dispatch",
+ EntryBB->getTerminator());
+
+ // Add a call to dispatch_setup after the setjmp call. This is expanded to any
+ // target-specific setup that needs to be done.
+ CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+
+ // check the return value of the setjmp. non-zero goes to dispatcher.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, DispatchVal, Zero,
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FunctionContext, "",
+ ContBlock->getTerminator());
+ Register->setDoesNotThrow();
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values, and fill in the dispatch switch accordingly.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore calls to the EH builtins (eh.selector, eh.exception)
+ Constant *Callee = CI->getCalledFunction();
+ if (Callee != SelectorFn && Callee != ExceptionFn
+ && !CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
+ }
+ }
+
+ // Replace all unwinds with a branch to the unwind handler.
+ // ??? Should this ever happen with sjlj exceptions?
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(TrapBlock, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
+
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+ Instruction *AI = JmpbufUpdatePoints[i];
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(AI);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+ CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
+ return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+ bool Res = insertSjLjEHSupport(F);
+ return Res;
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 000000000000..ca79cafcf4be
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,179 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char SlotIndexes::ID = 0;
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+ "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum, "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+ mi2iMap.clear();
+ MBBRanges.clear();
+ idx2MBBMap.clear();
+ clearList();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+ // Compute numbering as follows:
+ // Grab an iterator to the start of the index list.
+ // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+ // iterator in lock-step (though skipping it over indexes which have
+ // null pointers in the instruction field).
+ // At each iteration assert that the instruction pointed to in the index
+ // is the same one pointed to by the MI iterator. This
+
+ // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+ // only need to be set up once after the first numbering is computed.
+
+ mf = &fn;
+ initList();
+
+ // Check that the list contains only the sentinal.
+ assert(indexListHead->getNext() == 0 &&
+ "Index list non-empty at initial numbering?");
+ assert(idx2MBBMap.empty() &&
+ "Index -> MBB mapping non-empty at initial numbering?");
+ assert(MBBRanges.empty() &&
+ "MBB -> Index mapping non-empty at initial numbering?");
+ assert(mi2iMap.empty() &&
+ "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+ functionSize = 0;
+ unsigned index = 0;
+ MBBRanges.resize(mf->getNumBlockIDs());
+ idx2MBBMap.reserve(mf->size());
+
+ push_back(createEntry(0, index));
+
+ // Iterate over the function.
+ for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ MachineBasicBlock *mbb = &*mbbItr;
+
+ // Insert an index for the MBB start.
+ SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+
+ for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ MachineInstr *mi = miItr;
+ if (mi->isDebugValue())
+ continue;
+
+ // Insert a store index for the instr.
+ push_back(createEntry(mi, index += SlotIndex::InstrDist));
+
+ // Save this base index in the maps.
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+
+ ++functionSize;
+ }
+
+ // We insert one blank instructions between basic blocks.
+ push_back(createEntry(0, index += SlotIndex::InstrDist));
+
+ MBBRanges[mbb->getNumber()].first = blockStartIndex;
+ MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+ }
+
+ // Sort the Idx2MBBMap
+ std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+ DEBUG(dump());
+
+ // And we're done!
+ return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+ // Renumber updates the index of every element of the index list.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+ ++NumGlobalRenum;
+
+ unsigned index = 0;
+
+ for (IndexListEntry *curEntry = front(); curEntry != getTail();
+ curEntry = curEntry->getNext()) {
+ curEntry->setIndex(index);
+ index += SlotIndex::InstrDist;
+ }
+}
+
+// Renumber indexes locally after curEntry was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) {
+ // Number indexes with half the default spacing so we can catch up quickly.
+ const unsigned Space = SlotIndex::InstrDist/2;
+ assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+ IndexListEntry *start = curEntry->getPrev();
+ unsigned index = start->getIndex();
+ IndexListEntry *tail = getTail();
+ do {
+ curEntry->setIndex(index += Space);
+ curEntry = curEntry->getNext();
+ // If the next index is bigger, we have caught up.
+ } while (curEntry != tail && curEntry->getIndex() <= index);
+
+ DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-'
+ << index << " ***\n");
+ ++NumLocalRenum;
+}
+
+
+void SlotIndexes::dump() const {
+ for (const IndexListEntry *itr = front(); itr != getTail();
+ itr = itr->getNext()) {
+ dbgs() << itr->getIndex() << " ";
+
+ if (itr->getInstr() != 0) {
+ dbgs() << *itr->getInstr();
+ } else {
+ dbgs() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+ dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
+}
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+ if (isValid())
+ os << entry().getIndex() << "LudS"[getSlot()];
+ else
+ os << "invalid";
+}
+
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..694961863261
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,352 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
+ /// Ideally, these two numbers should be identical, but inaccuracies in the
+ /// block frequency estimates means that we need to normalize ingoing and
+ /// outgoing frequencies separately so they are commensurate.
+ float Scale[2];
+
+ /// Bias - Normalized contributions from non-transparent blocks.
+ /// A bundle connected to a MustSpill block has a huge negative bias,
+ /// otherwise it is a number in the range [-2;2].
+ float Bias;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always in the range [-1;1]. A positive number means the variable
+ /// should go in a register through this bundle.
+ float Value;
+
+ typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive and add up to at most 2, weights
+ /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+ /// sum can be less than 2 when the variable is not live into / out of some
+ /// connected basic blocks.
+ LinkVector Links;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // Actually, we must spill if Bias < sum(weights).
+ // It may be worth it to compute the weight sum here?
+ return Bias < -2.0f;
+ }
+
+ /// Node - Create a blank Node.
+ Node() {
+ Scale[0] = Scale[1] = 0;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear() {
+ Bias = Value = 0;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ /// out=0 for an ingoing link, and 1 for an outgoing link.
+ void addLink(unsigned b, float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+ /// Return the change to the total number of positive biases.
+ void addBias(float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+ Bias += w;
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[]) {
+ // Compute the weighted sum of inputs.
+ float Sum = Bias;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ Sum += I->first * nodes[I->second].Value;
+
+ // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+ // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+ // two reasons:
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ const float Thres = 1e-4f;
+ bool Before = preferReg();
+ if (Sum < -Thres)
+ Value = -1;
+ else if (Sum > Thres)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ BlockFrequency.resize(mf.getNumBlockIDs());
+ for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+ float Freq = LiveIntervals::getSpillWeight(true, false,
+ loops->getLoopDepth(I));
+ unsigned Num = I->getNumber();
+ BlockFrequency[Num] = Freq;
+ nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
+ nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
+ }
+
+ // Scales are reciprocal frequencies.
+ for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
+ for (unsigned d = 0; d != 2; ++d)
+ if (nodes[i].Scale[d] > 0)
+ nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear();
+}
+
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+ for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ float Freq = getBlockFrequency(I->Number);
+ const float Bias[] = {
+ 0, // DontCare,
+ 1, // PrefReg,
+ -1, // PrefSpill
+ -HUGE_VALF // MustSpill
+ };
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+ }
+ }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+ for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+ ++I) {
+ unsigned Number = *I;
+ unsigned ib = bundles->getBundle(Number, 0);
+ unsigned ob = bundles->getBundle(Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+ Linked.push_back(ib);
+ if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+ Linked.push_back(ob);
+ float Freq = getBlockFrequency(Number);
+ nodes[ib].addLink(ob, Freq, 1);
+ nodes[ob].addLink(ib, Freq, 0);
+ }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+ Linked.clear();
+ RecentPositive.clear();
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ nodes[n].update(nodes);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (nodes[n].mustSpill())
+ continue;
+ if (!nodes[n].Links.empty())
+ Linked.push_back(n);
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ return !RecentPositive.empty();
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate() {
+ // First update the recently positive nodes. They have likely received new
+ // negative bias that will turn them off.
+ while (!RecentPositive.empty())
+ nodes[RecentPositive.pop_back_val()].update(nodes);
+
+ if (Linked.empty())
+ return;
+
+ // Run up to 10 iterations. The edge bundle numbering is closely related to
+ // basic block numbering, so there is a strong tendency towards chains of
+ // linked nodes with sequential numbers. By scanning the linked nodes
+ // backwards and forwards, we make it very likely that a single node can
+ // affect the entire network in a single iteration. That means very fast
+ // convergence, usually in a single iteration.
+ for (unsigned iteration = 0; iteration != 10; ++iteration) {
+ // Scan backwards, skipping the last node which was just updated.
+ bool Changed = false;
+ for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+ llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+
+ // Scan forwards, skipping the first node which was just updated.
+ Changed = false;
+ for (SmallVectorImpl<unsigned>::const_iterator I =
+ llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+ }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+ Linked.clear();
+ RecentPositive.clear();
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+ assert(ActiveNodes && "Call prepare() first");
+
+ // Write preferences back to ActiveNodes.
+ bool Perfect = true;
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ if (!nodes[n].preferReg()) {
+ ActiveNodes->reset(n);
+ Perfect = false;
+ }
+ ActiveNodes = 0;
+ return Perfect;
+}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..6952ad800965
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,142 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the prepare()
+ // caller.
+ BitVector *ActiveNodes;
+
+ // Nodes with active links. Populated by scanActiveBundles.
+ SmallVector<unsigned, 8> Linked;
+
+ // Nodes that went positive during the last call to scanActiveBundles or
+ // iterate.
+ SmallVector<unsigned, 8> RecentPositive;
+
+ // Block frequencies are computed once. Indexed by block number.
+ SmallVector<float, 4> BlockFrequency;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ ~SpillPlacement() { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+ };
+
+ /// prepare - Reset state and prepare for a new spill placement computation.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled. This vector is retained.
+ void prepare(BitVector &RegBundles);
+
+ /// addConstraints - Add constraints and biases. This method may be called
+ /// more than once to accumulate constraints.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out.
+ void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+ /// addLinks - Add transparent blocks with the given numbers.
+ void addLinks(ArrayRef<unsigned> Links);
+
+ /// scanActiveBundles - Perform an initial scan of all bundles activated by
+ /// addConstraints and addLinks, updating their state. Add all the bundles
+ /// that now prefer a register to RecentPositive.
+ /// Prepare internal data structures for iterate.
+ /// Return true is there are any positive nodes.
+ bool scanActiveBundles();
+
+ /// iterate - Update the network iteratively until convergence, or new bundles
+ /// are found.
+ void iterate();
+
+ /// getRecentPositive - Return an array of bundles that became positive during
+ /// the previous call to scanActiveBundles or iterate.
+ ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+ /// finish - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// The selected bundles are returned in the bitvector passed to prepare().
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool finish();
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ float getBlockFrequency(unsigned Number) const {
+ return BlockFrequency[Number];
+ }
+
+private:
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual void releaseMemory();
+
+ void activate(unsigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp
new file mode 100644
index 000000000000..b6bbcd7176dd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,241 @@
+//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ enum SpillerName { trivial, standard, inline_ };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+ cl::desc("Spiller to use: (default: standard)"),
+ cl::Prefix,
+ cl::values(clEnumVal(trivial, "trivial spiller"),
+ clEnumVal(standard, "default spiller"),
+ clEnumValN(inline_, "inline", "inline spiller"),
+ clEnumValEnd),
+ cl::init(standard));
+
+// Spiller virtual destructor implementation.
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+ MachineFunctionPass *pass;
+ MachineFunction *mf;
+ VirtRegMap *vrm;
+ LiveIntervals *lis;
+ MachineFrameInfo *mfi;
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+
+ /// Construct a spiller base.
+ SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : pass(&pass), mf(&mf), vrm(&vrm)
+ {
+ lis = &pass.getAnalysis<LiveIntervals>();
+ mfi = mf.getFrameInfo();
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
+ }
+
+ /// Add spill ranges for every use/def of the live interval, inserting loads
+ /// immediately before each use, and stores after each def. No folding or
+ /// remat is attempted.
+ void trivialSpillEverywhere(LiveInterval *li,
+ SmallVectorImpl<LiveInterval*> &newIntervals) {
+ DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
+
+ assert(li->weight != HUGE_VALF &&
+ "Attempting to spill already spilled value.");
+
+ assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
+ "Trying to spill a stack slot.");
+
+ DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+ // Iterate over reg uses/defs.
+ for (MachineRegisterInfo::reg_iterator
+ regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+ // Grab the use/def instr.
+ MachineInstr *mi = &*regItr;
+
+ DEBUG(dbgs() << " Processing " << *mi);
+
+ // Step regItr to the next use/def instr.
+ do {
+ ++regItr;
+ } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+ // Collect uses & defs for this instr.
+ SmallVector<unsigned, 2> indices;
+ bool hasUse = false;
+ bool hasDef = false;
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+ MachineOperand &op = mi->getOperand(i);
+ if (!op.isReg() || op.getReg() != li->reg)
+ continue;
+ hasUse |= mi->getOperand(i).isUse();
+ hasDef |= mi->getOperand(i).isDef();
+ indices.push_back(i);
+ }
+
+ // Create a new vreg & interval for this instr.
+ unsigned newVReg = mri->createVirtualRegister(trc);
+ vrm->grow();
+ vrm->assignVirt2StackSlot(newVReg, ss);
+ LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ newLI->weight = HUGE_VALF;
+
+ // Update the reg operands & kill flags.
+ for (unsigned i = 0; i < indices.size(); ++i) {
+ unsigned mopIdx = indices[i];
+ MachineOperand &mop = mi->getOperand(mopIdx);
+ mop.setReg(newVReg);
+ if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+ mop.setIsKill(true);
+ }
+ }
+ assert(hasUse || hasDef);
+
+ // Insert reload if necessary.
+ MachineBasicBlock::iterator miItr(mi);
+ if (hasUse) {
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+ tri);
+ MachineInstr *loadInstr(prior(miItr));
+ SlotIndex loadIndex =
+ lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+ vrm->addSpillSlotUse(ss, loadInstr);
+ SlotIndex endIndex = loadIndex.getNextIndex();
+ VNInfo *loadVNI =
+ newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+ newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+ }
+
+ // Insert store if necessary.
+ if (hasDef) {
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+ true, ss, trc, tri);
+ MachineInstr *storeInstr(llvm::next(miItr));
+ SlotIndex storeIndex =
+ lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+ vrm->addSpillSlotUse(ss, storeInstr);
+ SlotIndex beginIndex = storeIndex.getPrevIndex();
+ VNInfo *storeVNI =
+ newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+ newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+ }
+
+ newIntervals.push_back(newLI);
+ }
+ }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+
+ TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : SpillerBase(pass, mf, vrm) {}
+
+ void spill(LiveRangeEdit &LRE) {
+ // Ignore spillIs - we don't use it.
+ trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
+ }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Falls back on LiveIntervals::addIntervalsForSpills.
+class StandardSpiller : public Spiller {
+protected:
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ MachineLoopInfo *loopInfo;
+ VirtRegMap *vrm;
+public:
+ StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : mf(&mf),
+ lis(&pass.getAnalysis<LiveIntervals>()),
+ lss(&pass.getAnalysis<LiveStacks>()),
+ loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
+ vrm(&vrm) {}
+
+ /// Falls back on LiveIntervals::addIntervalsForSpills.
+ void spill(LiveRangeEdit &LRE) {
+ std::vector<LiveInterval*> added =
+ lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
+ loopInfo, *vrm);
+ LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
+ added.begin(), added.end());
+
+ // Update LiveStacks.
+ int SS = vrm->getStackSlot(LRE.getReg());
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+ const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
+ LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+ if (!SI.hasAtLeastOneValue())
+ SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+ SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
+ }
+};
+
+} // end anonymous namespace
+
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ switch (spillerOpt) {
+ default: assert(0 && "unknown spiller");
+ case trivial: return new TrivialSpiller(pass, mf, vrm);
+ case standard: return new StandardSpiller(pass, mf, vrm);
+ case inline_: return createInlineSpiller(pass, mf, vrm);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
new file mode 100644
index 000000000000..41f1727da439
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -0,0 +1,46 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+ class LiveRangeEdit;
+ class MachineFunction;
+ class MachineFunctionPass;
+ class VirtRegMap;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ public:
+ virtual ~Spiller() = 0;
+
+ /// spill - Spill the LRE.getParent() live interval.
+ virtual void spill(LiveRangeEdit &LRE) = 0;
+
+ };
+
+ /// Create and return a spiller object, as specified on the command line.
+ Spiller* createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 000000000000..761cab7ce850
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1386 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "SplitKit.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple, "Number of splits that were simple");
+STATISTIC(NumCopies, "Number of copies inserted for splitting");
+STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
+ const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : MF(vrm.getMachineFunction()),
+ VRM(vrm),
+ LIS(lis),
+ Loops(mli),
+ TII(*MF.getTarget().getInstrInfo()),
+ CurLI(0),
+ LastSplitPoint(MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = 0;
+ DidRepairRange = false;
+}
+
+SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
+ const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+ const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
+ std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+
+ // Compute split points on the first call. The pair is independent of the
+ // current live interval.
+ if (!LSP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
+ if (FirstTerm == MBB->end())
+ LSP.first = LIS.getMBBEndIdx(MBB);
+ else
+ LSP.first = LIS.getInstructionIndex(FirstTerm);
+
+ // If there is a landing pad successor, also find the call instruction.
+ if (!LPad)
+ return LSP.first;
+ // There may not be a call instruction (?) in which case we ignore LPad.
+ LSP.second = LSP.first;
+ for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+ I != E;) {
+ --I;
+ if (I->getDesc().isCall()) {
+ LSP.second = LIS.getInstructionIndex(I);
+ break;
+ }
+ }
+ }
+
+ // If CurLI is live into a landing pad successor, move the last split point
+ // back to the call that may throw.
+ if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
+ return LSP.second;
+ else
+ return LSP.first;
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+ assert(UseSlots.empty() && "Call clear first");
+
+ // First get all the defs from the interval values. This provides the correct
+ // slots for early clobbers.
+ for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
+ E = CurLI->vni_end(); I != E; ++I)
+ if (!(*I)->isPHIDef() && !(*I)->isUnused())
+ UseSlots.push_back((*I)->def);
+
+ // Get use slots form the use-def chain.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
+ ++I)
+ if (!I.getOperand().isUndef())
+ UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+ // Remove duplicates, keeping the smaller slot for each instruction.
+ // That is what we want for early clobbers.
+ UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+ SlotIndex::isSameInstr),
+ UseSlots.end());
+
+ // Compute per-live block info.
+ if (!calcLiveBlockInfo()) {
+ // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+ // I am looking at you, RegisterCoalescer!
+ DidRepairRange = true;
+ ++NumRepairs;
+ DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+ const_cast<LiveIntervals&>(LIS)
+ .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ bool fixed = calcLiveBlockInfo();
+ (void)fixed;
+ assert(fixed && "Couldn't fix broken live interval");
+ }
+
+ DEBUG(dbgs() << "Analyze counted "
+ << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+ ThroughBlocks.resize(MF.getNumBlockIDs());
+ NumThroughBlocks = NumGapBlocks = 0;
+ if (CurLI->empty())
+ return true;
+
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = MFI;
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // If the block contains no uses, the range must be live through. At one
+ // point, RegisterCoalescer could create dangling ranges that ended
+ // mid-block.
+ if (UseI == UseE || *UseI >= Stop) {
+ ++NumThroughBlocks;
+ ThroughBlocks.set(BI.MBB->getNumber());
+ // The range shouldn't end mid-block if there are no uses. This shouldn't
+ // happen.
+ if (LVI->end < Stop)
+ return false;
+ } else {
+ // This block has uses. Find the first and last uses in the block.
+ BI.FirstUse = *UseI;
+ assert(BI.FirstUse >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastUse = UseI[-1];
+ assert(BI.LastUse < Stop);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+
+ // Look for gaps in the live range.
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.LiveOut = false;
+ BI.LastUse = LastStop;
+ break;
+ }
+ if (LastStop < LVI->start) {
+ // There is a gap in the live range. Create duplicate entries for the
+ // live-in snippet and the live-out snippet.
+ ++NumGapBlocks;
+
+ // Push the Live-in part.
+ BI.LiveThrough = false;
+ BI.LiveOut = false;
+ UseBlocks.push_back(BI);
+ UseBlocks.back().LastUse = LastStop;
+
+ // Set up BI for the live-out part.
+ BI.LiveIn = false;
+ BI.LiveOut = true;
+ BI.FirstUse = LVI->start;
+ }
+ }
+
+ // Don't set LiveThrough when the block has a gap.
+ BI.LiveThrough = BI.LiveIn && BI.LiveOut;
+ UseBlocks.push_back(BI);
+
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
+ }
+
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
+ break;
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start);
+ }
+
+ assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
+ return true;
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+ if (cli->empty())
+ return 0;
+ LiveInterval *li = const_cast<LiveInterval*>(cli);
+ LiveInterval::iterator LVI = li->begin();
+ LiveInterval::iterator LVE = li->end();
+ unsigned Count = 0;
+
+ // Loop over basic blocks where li is live.
+ MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+ for (;;) {
+ ++Count;
+ LVI = li->advanceTo(LVI, Stop);
+ if (LVI == LVE)
+ return Count;
+ do {
+ ++MFI;
+ Stop = LIS.getMBBEndIdx(MFI);
+ } while (Stop <= LVI->start);
+ }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ const LiveInterval &Orig = LIS.getInterval(OrigReg);
+ assert(!Orig.empty() && "Splitting empty interval?");
+ LiveInterval::const_iterator I = Orig.find(Idx);
+
+ // Range containing Idx should begin at Idx.
+ if (I != Orig.end() && I->start <= Idx)
+ return I->start == Idx;
+
+ // Range does not contain Idx, previous must end at Idx.
+ return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ CurLI = li;
+ analyzeUses();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+ LiveIntervals &lis,
+ VirtRegMap &vrm,
+ MachineDominatorTree &mdt)
+ : SA(sa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt),
+ TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+ Edit(0),
+ OpenIdx(0),
+ RegAssign(Allocator)
+{}
+
+void SplitEditor::reset(LiveRangeEdit &lre) {
+ Edit = &lre;
+ OpenIdx = 0;
+ RegAssign.clear();
+ Values.clear();
+
+ // We don't need to clear LiveOutCache, only LiveOutSeen entries are read.
+ LiveOutSeen.clear();
+
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit->anyRematerializable(LIS, TII, 0);
+}
+
+void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
+}
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+ const VNInfo *ParentVNI,
+ SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator, bool> InsP =
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI));
+
+ // This was the first time (RegIdx, ParentVNI) was mapped.
+ // Keep it as a simple def without any liveness.
+ if (InsP.second)
+ return VNI;
+
+ // If the previous value was a simple mapping, add liveness for it now.
+ if (VNInfo *OldVNI = InsP.first->second) {
+ SlotIndex Def = OldVNI->def;
+ LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+ // No longer a simple mapping.
+ InsP.first->second = 0;
+ }
+
+ // This is a complex mapping, add liveness for VNI
+ SlotIndex Def = VNI->def;
+ LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+
+ return VNI;
+}
+
+void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) {
+ assert(ParentVNI && "Mapping NULL value");
+ VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)];
+
+ // ParentVNI was either unmapped or already complex mapped. Either way.
+ if (!VNI)
+ return;
+
+ // This was previously a single mapping. Make sure the old def is represented
+ // by a trivial live range.
+ SlotIndex Def = VNI->def;
+ Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ VNI = 0;
+}
+
+// extendRange - Extend the live range to reach Idx.
+// Potentially create phi-def values.
+void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
+ assert(IdxMBB && "No MBB at Idx");
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Is there a def in the same MBB we can extend?
+ if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx))
+ return;
+
+ // Now for the fun part. We know that ParentVNI potentially has multiple defs,
+ // and we may need to create even more phi-defs to preserve VNInfo SSA form.
+ // Perform a search for all predecessor blocks where we know the dominating
+ // VNInfo.
+ VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot());
+
+ // When there were multiple different values, we may need new PHIs.
+ if (!VNI)
+ return updateSSA();
+
+ // Poor man's SSA update for the single-value case.
+ LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]);
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+ E = LiveInBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->DomNode->getBlock();
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ if (I->Kill.isValid())
+ LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ LiveOutCache[MBB] = LOP;
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+ }
+ }
+}
+
+/// findReachingDefs - Search the CFG for known live-out values.
+/// Add required live-in blocks to LiveInBlocks.
+VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill) {
+ // Initialize the live-out cache the first time it is needed.
+ if (LiveOutSeen.empty()) {
+ unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+ LiveOutSeen.resize(N);
+ LiveOutCache.resize(N);
+ }
+
+ // Blocks where LI should be live-in.
+ SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = 0;
+
+ // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = WorkList[i];
+ assert(!MBB->pred_empty() && "Value live-in to entry block?");
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ LiveOutPair &LOP = LiveOutCache[Pred];
+
+ // Is this a known live-out block?
+ if (LiveOutSeen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = LOP.first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ // First time. LOP is garbage and must be cleared below.
+ LiveOutSeen.set(Pred->getNumber());
+
+ // Does Pred provide a live-out value?
+ SlotIndex Start, Last;
+ tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred);
+ Last = Last.getPrevSlot();
+ VNInfo *VNI = LI->extendInBlock(Start, Last);
+ LOP.first = VNI;
+ if (VNI) {
+ LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)];
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ continue;
+ }
+ LOP.second = 0;
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != KillMBB)
+ WorkList.push_back(Pred);
+ else
+ // Loopback to KillMBB, so value is really live through.
+ Kill = SlotIndex();
+ }
+ }
+
+ // Transfer WorkList to LiveInBlocks in reverse order.
+ // This ordering works best with updateSSA().
+ LiveInBlocks.clear();
+ LiveInBlocks.reserve(WorkList.size());
+ while(!WorkList.empty())
+ LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]);
+
+ // The kill block may not be live-through.
+ assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB);
+ LiveInBlocks.back().Kill = Kill;
+
+ return UniqueVNI ? TheVNI : 0;
+}
+
+void SplitEditor::updateSSA() {
+ // This is essentially the same iterative algorithm that SSAUpdater uses,
+ // except we already have a dominator tree, so we don't have to recompute it.
+ unsigned Changes;
+ do {
+ Changes = 0;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+ E = LiveInBlocks.end(); I != E; ++I) {
+ MachineDomTreeNode *Node = I->DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = LiveOutCache[IDom->getBlock()];
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair Value = LiveOutCache[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (MDT.dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOutCache indicates a foreign or missing value.
+ LiveOutPair &LOP = LiveOutCache[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ unsigned RegIdx = RegAssign.lookup(Start);
+ LiveInterval *LI = Edit->get(RegIdx);
+ VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ I->Value = VNI;
+ // This block is done, we know the final value.
+ I->DomNode = 0;
+ if (I->Kill.isValid())
+ LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value.
+ I->Value = IDomValue.first;
+ if (I->Kill.isValid())
+ continue;
+ // Propagate IDomValue if needed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.second != Node && LOP.first != IDomValue.first) {
+ ++Changes;
+ LOP = IDomValue;
+ }
+ }
+ }
+ } while (Changes);
+
+ // The values in LiveInBlocks are now accurate. No more phi-defs are needed
+ // for these blocks, so we can color the live ranges.
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+ E = LiveInBlocks.end(); I != E; ++I) {
+ if (!I->DomNode)
+ continue;
+ assert(I->Value && "No live-in value found");
+ MachineBasicBlock *MBB = I->DomNode->getBlock();
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ unsigned RegIdx = RegAssign.lookup(Start);
+ LiveInterval *LI = Edit->get(RegIdx);
+ LI->addRange(LiveRange(Start, I->Kill.isValid() ?
+ I->Kill : LIS.getMBBEndIdx(MBB), I->Value));
+ }
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = 0;
+ SlotIndex Def;
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // We may be trying to avoid interference that ends at a deleted instruction,
+ // so always begin RegIdx 0 early and all others late.
+ bool Late = RegIdx != 0;
+
+ // Attempt cheap-as-a-copy rematerialization.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+ ++NumRemats;
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit->getReg());
+ Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+ .getDefIndex();
+ ++NumCopies;
+ }
+
+ // Define the value in Reg.
+ VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
+ VNI->setCopy(CopyMI);
+ return VNI;
+}
+
+/// Create a new virtual register and live interval.
+unsigned SplitEditor::openIntv() {
+ // Create the complement as index 0.
+ if (Edit->empty())
+ Edit->create(LIS, VRM);
+
+ // Create the open interval.
+ OpenIdx = Edit->size();
+ Edit->create(LIS, VRM);
+ return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+ assert(Idx != 0 && "Cannot select the complement interval");
+ assert(Idx < Edit->size() && "Can only select previously opened interval");
+ DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ OpenIdx = Idx;
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvAfter");
+ DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvAfter called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
+
+ // The interval must be live beyond the instruction at Idx.
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
+
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+ "Parent changes value in extended range");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // The complement interval will be extended as needed by extendRange().
+ if (ParentVNI)
+ markComplexMapped(0, ParentVNI);
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need extendRange().
+bool SplitEditor::transferValues() {
+ bool Skipped = false;
+ LiveInBlocks.clear();
+ RegAssignMap::const_iterator AssignI = RegAssign.begin();
+ for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+ ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+ DEBUG(dbgs() << " blit " << *ParentI << ':');
+ VNInfo *ParentVNI = ParentI->valno;
+ // RegAssign has holes where RegIdx 0 should be used.
+ SlotIndex Start = ParentI->start;
+ AssignI.advanceTo(Start);
+ do {
+ unsigned RegIdx;
+ SlotIndex End = ParentI->end;
+ if (!AssignI.valid()) {
+ RegIdx = 0;
+ } else if (AssignI.start() <= Start) {
+ RegIdx = AssignI.value();
+ if (AssignI.stop() < End) {
+ End = AssignI.stop();
+ ++AssignI;
+ }
+ } else {
+ RegIdx = 0;
+ End = std::min(End, AssignI.start());
+ }
+
+ // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+ DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Check for a simply defined value that can be blitted directly.
+ if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) {
+ DEBUG(dbgs() << ':' << VNI->id);
+ LI->addRange(LiveRange(Start, End, VNI));
+ Start = End;
+ continue;
+ }
+
+ // Skip rematerialized values, we need to use extendRange() and
+ // extendPHIKillRanges() to completely recompute the live ranges.
+ if (Edit->didRematerialize(ParentVNI)) {
+ DEBUG(dbgs() << "(remat)");
+ Skipped = true;
+ Start = End;
+ continue;
+ }
+
+ // Initialize the live-out cache the first time it is needed.
+ if (LiveOutSeen.empty()) {
+ unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+ LiveOutSeen.resize(N);
+ LiveOutCache.resize(N);
+ }
+
+ // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+ // so the live range is accurate. Add live-in blocks in [Start;End) to the
+ // LiveInBlocks.
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex BlockStart, BlockEnd;
+ tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+ // The first block may be live-in, or it may have its own def.
+ if (Start != BlockStart) {
+ VNInfo *VNI = LI->extendInBlock(BlockStart,
+ std::min(BlockEnd, End).getPrevSlot());
+ assert(VNI && "Missing def for complex mapped value");
+ DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+ // MBB has its own def. Is it also live-out?
+ if (BlockEnd <= End) {
+ LiveOutSeen.set(MBB->getNumber());
+ LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+ }
+ // Skip to the next block for live-in.
+ ++MBB;
+ BlockStart = BlockEnd;
+ }
+
+ // Handle the live-in blocks covered by [Start;End).
+ assert(Start <= BlockStart && "Expected live-in block");
+ while (BlockStart < End) {
+ DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+ BlockEnd = LIS.getMBBEndIdx(MBB);
+ if (BlockStart == ParentVNI->def) {
+ // This block has the def of a parent PHI, so it isn't live-in.
+ assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+ VNInfo *VNI = LI->extendInBlock(BlockStart,
+ std::min(BlockEnd, End).getPrevSlot());
+ assert(VNI && "Missing def for complex mapped parent PHI");
+ if (End >= BlockEnd) {
+ // Live-out as well.
+ LiveOutSeen.set(MBB->getNumber());
+ LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+ }
+ } else {
+ // This block needs a live-in value.
+ LiveInBlocks.push_back(MDT[MBB]);
+ // The last block covered may not be live-out.
+ if (End < BlockEnd)
+ LiveInBlocks.back().Kill = End;
+ else {
+ // Live-out, but we need updateSSA to tell us the value.
+ LiveOutSeen.set(MBB->getNumber());
+ LiveOutCache[MBB] = LiveOutPair((VNInfo*)0,
+ (MachineDomTreeNode*)0);
+ }
+ }
+ BlockStart = BlockEnd;
+ ++MBB;
+ }
+ Start = End;
+ } while (Start != ParentI->end);
+ DEBUG(dbgs() << '\n');
+ }
+
+ if (!LiveInBlocks.empty())
+ updateSSA();
+
+ return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+ // Extend live ranges to be live-out for successor PHI values.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *PHIVNI = *I;
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (Edit->getParent().liveAt(End)) {
+ assert(RegAssign.lookup(End) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ extendRange(RegIdx, End);
+ }
+ }
+ }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
+ if (MI->isDebugValue()) {
+ DEBUG(dbgs() << "Zapping " << *MI);
+ MO.setReg(0);
+ continue;
+ }
+
+ // <undef> operands don't really read the register, so just assign them to
+ // the complement.
+ if (MO.isUse() && MO.isUndef()) {
+ MO.setReg(Edit->get(0)->reg);
+ continue;
+ }
+
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (MO.isDef())
+ Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ MO.setReg(Edit->get(RegIdx)->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx if the instruction reads reg.
+ if (!ExtendRanges)
+ continue;
+
+ // Skip instructions that don't read Reg.
+ if (MO.isDef()) {
+ if (!MO.getSubReg() && !MO.isEarlyClobber())
+ continue;
+ // We may wan't to extend a live range for a partial redef, or for a use
+ // tied to an early clobber.
+ Idx = Idx.getPrevSlot();
+ if (!Edit->getParent().liveAt(Idx))
+ continue;
+ } else
+ Idx = Idx.getUseIndex();
+
+ extendRange(RegIdx, Idx);
+ }
+}
+
+void SplitEditor::deleteRematVictims() {
+ SmallVector<MachineInstr*, 8> Dead;
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+ LiveInterval *LI = *I;
+ for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
+ LII != LIE; ++LII) {
+ // Dead defs end at the store slot.
+ if (LII->end != LII->valno->def.getNextSlot())
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+ assert(MI && "Missing instruction for dead def");
+ MI->addRegisterDead(LI->reg, &TRI);
+
+ if (!MI->allDefsAreDead())
+ continue;
+
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ Dead.push_back(MI);
+ }
+ }
+
+ if (Dead.empty())
+ return;
+
+ Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+ ++NumFinished;
+
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
+
+ // Add the original defs from the parent interval.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *ParentVNI = *I;
+ if (ParentVNI->isUnused())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+ VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
+ VNI->setIsPHIDef(ParentVNI->isPHIDef());
+ VNI->setCopy(ParentVNI->getCopy());
+
+ // Mark rematted values as complex everywhere to force liveness computation.
+ // The new live ranges may be truncated.
+ if (Edit->didRematerialize(ParentVNI))
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ markComplexMapped(i, ParentVNI);
+ }
+
+ // Transfer the simply mapped values, check if any are skipped.
+ bool Skipped = transferValues();
+ if (Skipped)
+ extendPHIKillRanges();
+ else
+ ++NumSimple;
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
+ // Delete defs that were rematted everywhere.
+ if (Skipped)
+ deleteRematVictims();
+
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+ (*I)->RenumberValues(LIS);
+
+ // Provide a reverse mapping from original indices to Edit ranges.
+ if (LRMap) {
+ LRMap->clear();
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ LRMap->push_back(i);
+ }
+
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ LiveInterval *li = Edit->get(i);
+ unsigned NumComp = ConEQ.Classify(li);
+ if (NumComp <= 1)
+ continue;
+ DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
+ SmallVector<LiveInterval*, 8> dups;
+ dups.push_back(li);
+ for (unsigned j = 1; j != NumComp; ++j)
+ dups.push_back(&Edit->create(LIS, VRM));
+ ConEQ.Distribute(&dups[0], MRI);
+ // The new intervals all map back to i.
+ if (LRMap)
+ LRMap->resize(Edit->size(), i);
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+
+ assert(!LRMap || LRMap->size() == Edit->size());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+ // If CurLI is local to one block, there is no point to splitting it.
+ if (UseBlocks.size() <= 1)
+ return false;
+ // Add blocks with multiple uses.
+ for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) {
+ const BlockInfo &BI = UseBlocks[i];
+ if (BI.FirstUse == BI.LastUse)
+ continue;
+ Blocks.insert(BI.MBB);
+ }
+ return !Blocks.empty();
+}
+
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+ openIntv();
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse,
+ LastSplitPoint));
+ if (!BI.LiveOut || BI.LastUse < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+ } else {
+ // The last use is after the last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastUse);
+ }
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+ DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ if (Blocks.count(BI.MBB))
+ splitSingleBlock(BI);
+ }
+ finish();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter){
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+ DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
+
+ assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+ if (!IntvOut) {
+ DEBUG(dbgs() << ", spill on entry.\n");
+ //
+ // <<<<<<<<< Possible LeaveBefore interference.
+ // |-----------| Live through.
+ // -____________ Spill on entry.
+ //
+ selectIntv(IntvIn);
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+ SlotIndex Idx = leaveIntvAtTop(*MBB);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (!IntvIn) {
+ DEBUG(dbgs() << ", reload on exit.\n");
+ //
+ // >>>>>>> Possible EnterAfter interference.
+ // |-----------| Live through.
+ // ___________-- Reload on exit.
+ //
+ selectIntv(IntvOut);
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+ SlotIndex Idx = enterIntvAtEnd(*MBB);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+ DEBUG(dbgs() << ", straight through.\n");
+ //
+ // |-----------| Live through.
+ // ------------- Straight through, same intv, no interference.
+ //
+ selectIntv(IntvOut);
+ useIntv(Start, Stop);
+ return;
+ }
+
+ // We cannot legally insert splits after LSP.
+ SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+
+ if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+ LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+ DEBUG(dbgs() << ", switch avoiding interference.\n");
+ //
+ // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ------======= Switch intervals between interference.
+ //
+ SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP;
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(Cut);
+ useIntv(Idx, Stop);
+ selectIntv(IntvIn);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ DEBUG(dbgs() << ", create local intv for interference.\n");
+ //
+ // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ==---------== Switch intervals before/after interference.
+ //
+ assert(LeaveBefore <= EnterAfter && "Missed case");
+
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ selectIntv(IntvIn);
+ Idx = leaveIntvBefore(LeaveBefore);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstUse << '-' << BI.LastUse
+ << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+ assert(IntvIn && "Must have register in");
+ assert(BI.LiveIn && "Must be live-in");
+ assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) {
+ DEBUG(dbgs() << " before interference.\n");
+ //
+ // <<< Interference after kill.
+ // |---o---x | Killed in block.
+ // ========= Use IntvIn everywhere.
+ //
+ selectIntv(IntvIn);
+ useIntv(Start, BI.LastUse);
+ return;
+ }
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) {
+ //
+ // <<< Possible interference after last use.
+ // |---o---o---| Live-out on stack.
+ // =========____ Leave IntvIn after last use.
+ //
+ // < Interference after last use.
+ // |---o---o--o| Live-out on stack, late last use.
+ // ============ Copy to stack after LSP, overlap IntvIn.
+ // \_____ Stack interval is live-out.
+ //
+ if (BI.LastUse < LSP) {
+ DEBUG(dbgs() << ", spill after last use before interference.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAfter(BI.LastUse);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ } else {
+ DEBUG(dbgs() << ", spill before last split point.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvBefore(LSP);
+ overlapIntv(Idx, BI.LastUse);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ }
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvIn. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ unsigned LocalIntv = openIntv();
+ (void)LocalIntv;
+ DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+ if (!BI.LiveOut || BI.LastUse < LSP) {
+ //
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o---| Live-out on stack.
+ // =====----____ Leave IntvIn before interference, then spill.
+ //
+ SlotIndex To = leaveIntvAfter(BI.LastUse);
+ SlotIndex From = enterIntvBefore(LeaveBefore);
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+ return;
+ }
+
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o--o| Live-out on stack, late last use.
+ // =====------- Copy to stack before LSP, overlap LocalIntv.
+ // \_____ Stack interval is live-out.
+ //
+ SlotIndex To = leaveIntvBefore(LSP);
+ overlapIntv(To, BI.LastUse);
+ SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstUse << '-' << BI.LastUse
+ << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ assert(IntvOut && "Must have register out");
+ assert(BI.LiveOut && "Must be live-out");
+ assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) {
+ DEBUG(dbgs() << " after interference.\n");
+ //
+ // >>>> Interference before def.
+ // | o---o---| Defined in block.
+ // ========= Use IntvOut everywhere.
+ //
+ selectIntv(IntvOut);
+ useIntv(BI.FirstUse, Stop);
+ return;
+ }
+
+ if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) {
+ DEBUG(dbgs() << ", reload after interference.\n");
+ //
+ // >>>> Interference before def.
+ // |---o---o---| Live-through, stack-in.
+ // ____========= Enter IntvOut before first use.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse));
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvOut. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ DEBUG(dbgs() << ", interference overlaps uses.\n");
+ //
+ // >>>>>>> Interference overlapping uses.
+ // |---o---o---| Live-through, stack-in.
+ // ____---====== Create local interval for interference range.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ openIntv();
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse));
+ useIntv(From, Idx);
+}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
new file mode 100644
index 000000000000..7948b725f856
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -0,0 +1,469 @@
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class ConnectedVNInfoEqClasses;
+class LiveInterval;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+ ///
+ /// Two BlockInfo entries are created for template 4. One for the live-in
+ /// segment, and one for the live-out segment. These entries look as if the
+ /// block were split in the middle where the live range isn't live.
+ ///
+ /// Live-through blocks without any uses don't get BlockInfo entries. They
+ /// are simply listed in ThroughBlocks instead.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstUse; ///< First instr using current reg.
+ SlotIndex LastUse; ///< Last instr using current reg.
+ bool LiveThrough; ///< Live in whole block (Templ 5. above).
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ /// isOneInstr - Returns true when this BlockInfo describes a single
+ /// instruction.
+ bool isOneInstr() const {
+ return SlotIndex::isSameInstr(FirstUse, LastUse);
+ }
+ };
+
+private:
+ // Current live interval.
+ const LiveInterval *CurLI;
+
+ /// LastSplitPoint - Last legal split point in each basic block in the current
+ /// function. The first entry is the first terminator, the second entry is the
+ /// last valid split point for a variable that is live in to a landing pad
+ /// successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
+
+ /// UseBlocks - Blocks where CurLI has uses.
+ SmallVector<BlockInfo, 8> UseBlocks;
+
+ /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+ /// the live range has a gap.
+ unsigned NumGapBlocks;
+
+ /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+ BitVector ThroughBlocks;
+
+ /// NumThroughBlocks - Number of live-through blocks.
+ unsigned NumThroughBlocks;
+
+ /// DidRepairRange - analyze was forced to shrinkToUses().
+ bool DidRepairRange;
+
+ SlotIndex computeLastSplitPoint(unsigned Num);
+
+ // Sumarize statistics by counting instructions using CurLI.
+ void analyzeUses();
+
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ bool calcLiveBlockInfo();
+
+public:
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+ /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+ /// can create live ranges that end in mid-air.
+ bool didRepairRange() const { return DidRepairRange; }
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
+
+ /// getLastSplitPoint - Return that base index of the last valid split point
+ /// in the basic block numbered Num.
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ // Inline the common simple case.
+ if (LastSplitPoint[Num].first.isValid() &&
+ !LastSplitPoint[Num].second.isValid())
+ return LastSplitPoint[Num].first;
+ return computeLastSplitPoint(Num);
+ }
+
+ /// isOriginalEndpoint - Return true if the original live range was killed or
+ /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+ /// and 'use' for an early-clobber def.
+ /// This can be used to recognize code inserted by earlier live range
+ /// splitting.
+ bool isOriginalEndpoint(SlotIndex Idx) const;
+
+ /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+ /// where CurLI has uses.
+ ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+ /// getNumThroughBlocks - Return the number of through blocks.
+ unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+ /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+ bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+ /// getThroughBlocks - Return the set of through blocks.
+ const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+ /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+ unsigned getNumLiveBlocks() const {
+ return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+ }
+
+ /// countLiveBlocks - Return the number of blocks where li is live. This is
+ /// guaranteed to return the same number as getNumLiveBlocks() after calling
+ /// analyze(li).
+ unsigned countLiveBlocks(const LiveInterval *li) const;
+
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+
+ /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
+ /// having CurLI split to a new live interval. Return true if Blocks can be
+ /// passed to SplitEditor::splitSingleBlocks.
+ bool getMultiUseBlocks(BlockPtrSet &Blocks);
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class SplitEditor {
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit *Edit;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
+
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap;
+
+ /// Values - keep track of the mapping from parent values to values in the new
+ /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+ ///
+ /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+ /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx).
+ /// Each value is represented by a minimal live range at its def.
+ /// 3. A non-null VNInfo - the value is mapped to a single new value.
+ /// The new value has no live ranges anywhere.
+ ValueMap Values;
+
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+ typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+ // LiveOutCache - Map each basic block where a new register is live out to the
+ // live-out value and its defining block.
+ // One of these conditions shall be true:
+ //
+ // 1. !LiveOutCache.count(MBB)
+ // 2. LiveOutCache[MBB].second.getNode() == MBB
+ // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+ //
+ // This is only a cache, the values can be computed as:
+ //
+ // VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+ // Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+ //
+ // The cache is also used as a visited set by extendRange(). It can be shared
+ // by all the new registers because at most one is live out of each block.
+ LiveOutMap LiveOutCache;
+
+ // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid
+ // entry in LiveOutCache.
+ BitVector LiveOutSeen;
+
+ /// LiveInBlock - Info for updateSSA() about a block where a register is
+ /// live-in.
+ /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA()
+ /// adds the computed live-in value.
+ struct LiveInBlock {
+ // Dominator tree node for the block.
+ // Cleared by updateSSA when the final value has been determined.
+ MachineDomTreeNode *DomNode;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block.
+ SlotIndex Kill;
+
+ LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {}
+ };
+
+ /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and
+ /// updateSSA(). This list is usually empty, it exists here to avoid frequent
+ /// reallocations.
+ SmallVector<LiveInBlock, 16> LiveInBlocks;
+
+ /// defValue - define a value in RegIdx from ParentVNI at Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in ParentLI. The new value is added to the value
+ /// map.
+ /// Return the new LI value.
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless
+ /// of the number of defs.
+ void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI);
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx.
+ /// Insert PHIDefs as needed to preserve SSA form.
+ void extendRange(unsigned RegIdx, SlotIndex Idx);
+
+ /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all
+ /// reaching defs for LI are found.
+ /// @param LI Live interval whose value is needed.
+ /// @param MBB Block where LI should be live-in.
+ /// @param Kill Kill point in MBB.
+ /// @return Unique value seen, or NULL.
+ VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB,
+ SlotIndex Kill);
+
+ /// updateSSA - Compute and insert PHIDefs such that all blocks in
+ // LiveInBlocks get a known live-in value. Add live ranges to the blocks.
+ void updateSSA();
+
+ /// transferValues - Transfer values to the new ranges.
+ /// Return true if any ranges were skipped.
+ bool transferValues();
+
+ /// extendPHIKillRanges - Extend the ranges of all values killed by original
+ /// parent PHIDefs.
+ void extendPHIKillRanges();
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned(bool ExtendRanges);
+
+ /// deleteRematVictims - Delete defs that are dead after rematerializing.
+ void deleteRematVictims();
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+ MachineDominatorTree&);
+
+ /// reset - Prepare for a new split.
+ void reset(LiveRangeEdit&);
+
+ /// Create a new virtual register and live interval.
+ /// Return the interval index, starting from 1. Interval index 0 is the
+ /// implicit complement interval.
+ unsigned openIntv();
+
+ /// currentIntv - Return the current interval index.
+ unsigned currentIntv() const { return OpenIdx; }
+
+ /// selectIntv - Select a previously opened interval index.
+ void selectIntv(unsigned Idx);
+
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAfter(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from he inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use OpenLI.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
+
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ /// @param LRMap When not null, this vector will map each live range in Edit
+ /// back to the indices returned by openIntv.
+ /// There may be extra indices created by dead code elimination.
+ void finish(SmallVectorImpl<unsigned> *LRMap = 0);
+
+ /// dump - print the current interval maping to dbgs().
+ void dump() const;
+
+ // ===--- High level methods ---===
+
+ /// splitSingleBlock - Split CurLI into a separate live interval around the
+ /// uses in a single block. This is intended to be used as part of a larger
+ /// split, and doesn't call finish().
+ void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
+ /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+ /// basic block in Blocks.
+ void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+
+ /// splitLiveThroughBlock - Split CurLI in the given block such that it
+ /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+ /// the block, but they will be ignored when placing split points.
+ ///
+ /// @param MBBNum Block number.
+ /// @param IntvIn Interval index entering the block.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter);
+
+ /// splitRegInBlock - Split CurLI in the given block such that it enters the
+ /// block in IntvIn and leaves it on the stack (or not at all). Split points
+ /// are placed in a way that avoids putting uses in the stack interval. This
+ /// may require creating a local interval when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvIn Interval index entering the block. Not 0.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore);
+
+ /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+ /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+ /// Split points are placed to avoid interference and such that the uses are
+ /// not in the stack interval. This may require creating a local interval
+ /// when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp
new file mode 100644
index 000000000000..ec75df4b7d1f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Splitter.cpp
@@ -0,0 +1,827 @@
+//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsplitter"
+
+#include "Splitter.h"
+
+#include "RegisterCoalescer.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char LoopSplitter::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
+
+namespace llvm {
+
+ class StartSlotComparator {
+ public:
+ StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
+ bool operator()(const MachineBasicBlock *mbb1,
+ const MachineBasicBlock *mbb2) const {
+ return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
+ }
+ private:
+ LiveIntervals &lis;
+ };
+
+ class LoopSplit {
+ public:
+ LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
+ : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Cannot split physical registers.");
+ }
+
+ LiveInterval& getLI() const { return li; }
+
+ MachineLoop& getLoop() const { return loop; }
+
+ bool isValid() const { return valid; }
+
+ bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
+
+ void invalidate() { valid = false; }
+
+ void splitIncoming() { inSplit = true; }
+
+ void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
+
+ void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
+
+ void apply() {
+ assert(valid && "Attempt to apply invalid split.");
+ applyIncoming();
+ applyOutgoing();
+ copyRanges();
+ renameInside();
+ }
+
+ private:
+ LoopSplitter &ls;
+ LiveInterval &li;
+ MachineLoop &loop;
+ bool valid, inSplit;
+ std::set<MachineLoop::Edge> outSplits;
+ std::vector<MachineInstr*> loopInstrs;
+
+ LiveInterval *newLI;
+ std::map<VNInfo*, VNInfo*> vniMap;
+
+ LiveInterval* getNewLI() {
+ if (newLI == 0) {
+ const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
+ unsigned vreg = ls.mri->createVirtualRegister(trc);
+ newLI = &ls.lis->getOrCreateInterval(vreg);
+ }
+ return newLI;
+ }
+
+ VNInfo* getNewVNI(VNInfo *oldVNI) {
+ VNInfo *newVNI = vniMap[oldVNI];
+
+ if (newVNI == 0) {
+ newVNI = getNewLI()->createValueCopy(oldVNI,
+ ls.lis->getVNInfoAllocator());
+ vniMap[oldVNI] = newVNI;
+ }
+
+ return newVNI;
+ }
+
+ void applyIncoming() {
+ if (!inSplit) {
+ return;
+ }
+
+ MachineBasicBlock *preHeader = loop.getLoopPreheader();
+ if (preHeader == 0) {
+ assert(ls.canInsertPreHeader(loop) &&
+ "Can't insert required preheader.");
+ preHeader = &ls.insertPreHeader(loop);
+ }
+
+ LiveRange *preHeaderRange =
+ ls.lis->findExitingRange(li, preHeader);
+ assert(preHeaderRange != 0 && "Range not live into preheader.");
+
+ // Insert the new copy.
+ MachineInstr *copy = BuildMI(*preHeader,
+ preHeader->getFirstTerminator(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(getNewLI()->reg, RegState::Define)
+ .addReg(li.reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ VNInfo *newVal = getNewVNI(preHeaderRange->valno);
+ newVal->def = copyDefIdx;
+ newVal->setCopy(copy);
+ li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
+
+ getNewLI()->addRange(LiveRange(copyDefIdx,
+ ls.lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+ void applyOutgoing() {
+
+ for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
+ osEnd = outSplits.end();
+ osItr != osEnd; ++osItr) {
+ MachineLoop::Edge edge = *osItr;
+ MachineBasicBlock *outBlock = edge.second;
+ if (ls.isCriticalEdge(edge)) {
+ assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
+ outBlock = &ls.splitEdge(edge, loop);
+ }
+ LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
+ assert(outRange != 0 && "No exiting range?");
+
+ MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(li.reg, RegState::Define)
+ .addReg(getNewLI()->reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ // Blow away output range definition.
+ outRange->valno->def = ls.lis->getInvalidIndex();
+ li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+
+ SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+ assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal =
+ getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
+
+ getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
+ copyDefIdx, newVal));
+
+ }
+ }
+
+ void copyRange(LiveRange &lr) {
+ std::pair<bool, LoopSplitter::SlotPair> lsr =
+ ls.getLoopSubRange(lr, loop);
+
+ if (!lsr.first)
+ return;
+
+ LiveRange loopRange(lsr.second.first, lsr.second.second,
+ getNewVNI(lr.valno));
+
+ li.removeRange(loopRange.start, loopRange.end, true);
+
+ getNewLI()->addRange(loopRange);
+ }
+
+ void copyRanges() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
+ if (instr.modifiesRegister(li.reg, 0)) {
+ LiveRange *defRange =
+ li.getLiveRangeContaining(instrIdx.getDefIndex());
+ if (defRange != 0) // May have caught this already.
+ copyRange(*defRange);
+ }
+ if (instr.readsRegister(li.reg, 0)) {
+ LiveRange *useRange =
+ li.getLiveRangeContaining(instrIdx.getUseIndex());
+ if (useRange != 0) { // May have caught this already.
+ copyRange(*useRange);
+ }
+ }
+ }
+
+ for (MachineLoop::block_iterator bbItr = loop.block_begin(),
+ bbEnd = loop.block_end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock &loopBlock = **bbItr;
+ LiveRange *enteringRange =
+ ls.lis->findEnteringRange(li, &loopBlock);
+ if (enteringRange != 0) {
+ copyRange(*enteringRange);
+ }
+ }
+ }
+
+ void renameInside() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
+ MachineOperand &mop = instr.getOperand(i);
+ if (mop.isReg() && mop.getReg() == li.reg) {
+ mop.setReg(getNewLI()->reg);
+ }
+ }
+ }
+ }
+
+ };
+
+ void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addPreserved<RegisterCoalescer>();
+ au.addPreserved<CalculateSpillWeights>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ sis = &getAnalysis<SlotIndexes>();
+ lis = &getAnalysis<LiveIntervals>();
+ mli = &getAnalysis<MachineLoopInfo>();
+ mdt = &getAnalysis<MachineDominatorTree>();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
+
+ dumpOddTerminators();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+// std::deque<MachineLoop*> loops;
+// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+// dbgs() << "Loops:\n";
+// while (!loops.empty()) {
+// MachineLoop &loop = *loops.front();
+// loops.pop_front();
+// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+
+// dumpLoopInfo(loop);
+// }
+
+ //lis->dump();
+ //exit(0);
+
+ // Setup initial intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
+ !lis->intervalIsInOneMBB(*li)) {
+ intervals.push_back(li);
+ }
+ }
+
+ processIntervals();
+
+ intervals.clear();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+ dumpOddTerminators();
+
+ //exit(1);
+
+ return false;
+ }
+
+ void LoopSplitter::releaseMemory() {
+ fqn.clear();
+ intervals.clear();
+ loopRangeMap.clear();
+ }
+
+ void LoopSplitter::dumpOddTerminators() {
+ for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock *mbb = &*bbItr;
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ if (tii->AnalyzeBranch(*mbb, a, b, c)) {
+ dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
+ dbgs() << " Terminators:\n";
+ for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr *instr= &*iItr;
+ dbgs() << " " << *instr << "";
+ }
+ dbgs() << "\n Listed successors: [ ";
+ for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock *succMBB = *sItr;
+ dbgs() << succMBB->getNumber() << " ";
+ }
+ dbgs() << "]\n\n";
+ }
+ }
+ }
+
+ void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
+ MachineBasicBlock &headerBlock = *loop.getHeader();
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ loop.getExitEdges(exitEdges);
+
+ dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
+ for (std::vector<MachineBasicBlock*>::const_iterator
+ subBlockItr = loop.getBlocks().begin(),
+ subBlockEnd = loop.getBlocks().end();
+ subBlockItr != subBlockEnd; ++subBlockItr) {
+ MachineBasicBlock &subBlock = **subBlockItr;
+ dbgs() << "BB#" << subBlock.getNumber() << " ";
+ }
+ dbgs() << "], Exit edges: [ ";
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge &exitEdge = *exitEdgeItr;
+ dbgs() << "(MBB#" << exitEdge.first->getNumber()
+ << ", MBB#" << exitEdge.second->getNumber() << ") ";
+ }
+ dbgs() << "], Sub-Loop Headers: [ ";
+ for (MachineLoop::iterator subLoopItr = loop.begin(),
+ subLoopEnd = loop.end();
+ subLoopItr != subLoopEnd; ++subLoopItr) {
+ MachineLoop &subLoop = **subLoopItr;
+ MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
+ dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
+ mbb.updateTerminator();
+
+ for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
+ miItr != miEnd; ++miItr) {
+ if (lis->isNotInMIMap(miItr)) {
+ lis->InsertMachineInstrInMaps(miItr);
+ }
+ }
+ }
+
+ bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
+ MachineBasicBlock *header = loop.getHeader();
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+
+ for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
+ pbEnd = header->pred_end();
+ pbItr != pbEnd; ++pbItr) {
+ MachineBasicBlock *predBlock = *pbItr;
+ if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
+ return false;
+ }
+ }
+
+ MachineFunction::iterator headerItr(header);
+ if (headerItr == mf->begin())
+ return true;
+ MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
+ assert(headerLayoutPred != 0 && "Header should have layout pred.");
+
+ return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
+ assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
+
+ MachineBasicBlock &header = *loop.getHeader();
+
+ // Save the preds - we'll need to update them once we insert the preheader.
+ typedef std::set<MachineBasicBlock*> HeaderPreds;
+ HeaderPreds headerPreds;
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (!loop.contains(*predItr))
+ headerPreds.insert(*predItr);
+ }
+
+ assert(!headerPreds.empty() && "No predecessors for header?");
+
+ //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
+
+ MachineBasicBlock *preHeader =
+ mf->CreateMachineBasicBlock(header.getBasicBlock());
+
+ assert(preHeader != 0 && "Failed to create pre-header.");
+
+ mf->insert(header, preHeader);
+
+ for (HeaderPreds::iterator hpItr = headerPreds.begin(),
+ hpEnd = headerPreds.end();
+ hpItr != hpEnd; ++hpItr) {
+ assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
+ MachineBasicBlock &hp = **hpItr;
+ hp.ReplaceUsesOfBlockWith(&header, preHeader);
+ }
+ preHeader->addSuccessor(&header);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(preHeader));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(preHeader);
+
+ if (MachineLoop *parentLoop = loop.getParentLoop()) {
+ assert(parentLoop->getHeader() != loop.getHeader() &&
+ "Parent loop has same header?");
+ parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (parentLoop != 0) {
+ loopRangeMap.erase(parentLoop);
+ parentLoop = parentLoop->getParentLoop();
+ }
+ }
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+
+ // Is this safe for physregs?
+ // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
+ if (!lis->isLiveInToMBB(li, &header))
+ continue;
+
+ if (lis->isLiveInToMBB(li, preHeader)) {
+ assert(lis->isLiveOutOfMBB(li, preHeader) &&
+ "Range terminates in newly added preheader?");
+ continue;
+ }
+
+ bool insertRange = false;
+
+ for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
+ predEnd = preHeader->pred_end();
+ predItr != predEnd; ++predItr) {
+ MachineBasicBlock *predMBB = *predItr;
+ if (lis->isLiveOutOfMBB(li, predMBB)) {
+ insertRange = true;
+ break;
+ }
+ }
+
+ if (!insertRange)
+ continue;
+
+ SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
+ lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+
+ //dbgs() << "Dumping SlotIndexes:\n";
+ //sis->dump();
+
+ //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
+
+ return *preHeader;
+ }
+
+ bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
+ assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
+ if (edge.second->pred_size() > 1)
+ return true;
+ return false;
+ }
+
+ bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
+ MachineFunction::iterator outBlockItr(edge.second);
+ if (outBlockItr == mf->begin())
+ return true;
+ MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
+ assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
+ !tii->AnalyzeBranch(*edge.first, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
+ MachineLoop &loop) {
+
+ MachineBasicBlock &inBlock = *edge.first;
+ MachineBasicBlock &outBlock = *edge.second;
+
+ assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
+ "Splitting non-critical edge?");
+
+ //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
+ // << " -> MBB#" << outBlock.getNumber() << ")...";
+
+ MachineBasicBlock *splitBlock =
+ mf->CreateMachineBasicBlock();
+
+ assert(splitBlock != 0 && "Failed to create split block.");
+
+ mf->insert(&outBlock, splitBlock);
+
+ inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
+ splitBlock->addSuccessor(&outBlock);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(splitBlock));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(splitBlock);
+
+ loopRangeMap.erase(&loop);
+
+ MachineLoop *splitParentLoop = loop.getParentLoop();
+ while (splitParentLoop != 0 &&
+ !splitParentLoop->contains(&outBlock)) {
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+
+ if (splitParentLoop != 0) {
+ assert(splitParentLoop->contains(&loop) &&
+ "Split-block parent doesn't contain original loop?");
+ splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (splitParentLoop != 0) {
+ loopRangeMap.erase(splitParentLoop);
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+ }
+
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+ bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
+ lis->isLiveInToMBB(li, &outBlock);
+ if (lis->isLiveInToMBB(li, splitBlock)) {
+ if (!intersects) {
+ li.removeRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock), true);
+ }
+ } else if (intersects) {
+ SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+ lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock),
+ newVal));
+ }
+ }
+
+ //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
+
+ return *splitBlock;
+ }
+
+ LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
+ typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
+ LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
+ if (lrItr == loopRangeMap.end()) {
+ LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
+ std::copy(loop.block_begin(), loop.block_end(),
+ std::inserter(loopMBBs, loopMBBs.begin()));
+
+ assert(!loopMBBs.empty() && "No blocks in loop?");
+
+ LoopRanges &loopRanges = loopRangeMap[&loop];
+ assert(loopRanges.empty() && "Loop encountered but not processed?");
+ SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
+ loopRanges.push_back(
+ std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
+ lis->getInvalidIndex()));
+ for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
+ curBlockEnd = loopMBBs.end();
+ curBlockItr != curBlockEnd; ++curBlockItr) {
+ SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
+ if (newStart != oldEnd) {
+ loopRanges.back().second = oldEnd;
+ loopRanges.push_back(std::make_pair(newStart,
+ lis->getInvalidIndex()));
+ }
+ oldEnd = lis->getMBBEndIdx(*curBlockItr);
+ }
+
+ loopRanges.back().second =
+ lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
+
+ return loopRanges;
+ }
+ return lrItr->second;
+ }
+
+ std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
+ const LiveRange &lr,
+ MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ LoopRanges::iterator lrItr = loopRanges.begin(),
+ lrEnd = loopRanges.end();
+ while (lrItr != lrEnd && lr.start >= lrItr->second) {
+ ++lrItr;
+ }
+
+ if (lrItr == lrEnd) {
+ SlotIndex invalid = lis->getInvalidIndex();
+ return std::make_pair(false, SlotPair(invalid, invalid));
+ }
+
+ SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
+ SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
+
+ return std::make_pair(true, SlotPair(srStart, srEnd));
+ }
+
+ void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
+ for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
+ lrItr != lrEnd; ++lrItr) {
+ dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::processHeader(LoopSplit &split) {
+ MachineBasicBlock &header = *split.getLoop().getHeader();
+ //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
+
+ if (!lis->isLiveInToMBB(split.getLI(), &header))
+ return; // Not live in, but nothing wrong so far.
+
+ MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
+ if (!preHeader) {
+
+ if (!canInsertPreHeader(split.getLoop())) {
+ split.invalidate();
+ return; // Couldn't insert a pre-header. Bail on this interval.
+ }
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
+ split.splitIncoming();
+ break;
+ }
+ }
+ } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
+ split.splitIncoming();
+ }
+ }
+
+ void LoopSplitter::processLoopExits(LoopSplit &split) {
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ split.getLoop().getExitEdges(exitEdges);
+
+ //dbgs() << " Processing loop exits:\n";
+
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge exitEdge = *exitEdgeItr;
+
+ LiveRange *outRange =
+ split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
+
+ if (outRange != 0) {
+ if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
+ split.invalidate();
+ return;
+ }
+
+ split.splitOutgoing(exitEdge);
+ }
+ }
+ }
+
+ void LoopSplitter::processLoopUses(LoopSplit &split) {
+ std::set<MachineInstr*> processed;
+
+ for (MachineRegisterInfo::reg_iterator
+ rItr = mri->reg_begin(split.getLI().reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ MachineInstr &instr = *rItr;
+ if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
+ split.addLoopInstr(&instr);
+ processed.insert(&instr);
+ }
+ }
+
+ //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
+ // << " in blocks [ ";
+ //dbgs() << "]\n";
+ }
+
+ bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Attempt to split physical register.");
+
+ LoopSplit split(*this, li, loop);
+ processHeader(split);
+ if (split.isValid())
+ processLoopExits(split);
+ if (split.isValid())
+ processLoopUses(split);
+ if (split.isValid() /* && split.isWorthwhile() */) {
+ split.apply();
+ DEBUG(dbgs() << "Success.\n");
+ return true;
+ }
+ DEBUG(dbgs() << "Failed.\n");
+ return false;
+ }
+
+ void LoopSplitter::processInterval(LiveInterval &li) {
+ std::deque<MachineLoop*> loops;
+ std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+
+ while (!loops.empty()) {
+ MachineLoop &loop = *loops.front();
+ loops.pop_front();
+ DEBUG(
+ dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
+ << loop.getHeader()->getNumber() << " ";
+ );
+ if (!splitOverLoop(li, loop)) {
+ // Couldn't split over outer loop, schedule sub-loops to be checked.
+ std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+ }
+ }
+ }
+
+ void LoopSplitter::processIntervals() {
+ while (!intervals.empty()) {
+ LiveInterval &li = *intervals.front();
+ intervals.pop_front();
+
+ assert(!lis->intervalIsInOneMBB(li) &&
+ "Single interval in process worklist.");
+
+ processInterval(li);
+ }
+ }
+
+}
diff --git a/contrib/llvm/lib/CodeGen/Splitter.h b/contrib/llvm/lib/CodeGen/Splitter.h
new file mode 100644
index 000000000000..9fb1b8b30139
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Splitter.h
@@ -0,0 +1,101 @@
+//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITTER_H
+#define LLVM_CODEGEN_SPLITTER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ struct LiveRange;
+ class LoopSplit;
+ class MachineDominatorTree;
+ class MachineRegisterInfo;
+ class SlotIndexes;
+ class TargetInstrInfo;
+ class VNInfo;
+
+ class LoopSplitter : public MachineFunctionPass {
+ friend class LoopSplit;
+ public:
+ static char ID;
+
+ LoopSplitter() : MachineFunctionPass(ID) {
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+
+ virtual void releaseMemory();
+
+
+ private:
+
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ MachineLoopInfo *mli;
+ MachineRegisterInfo *mri;
+ MachineDominatorTree *mdt;
+ SlotIndexes *sis;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+
+ std::string fqn;
+ std::deque<LiveInterval*> intervals;
+
+ typedef std::pair<SlotIndex, SlotIndex> SlotPair;
+ typedef std::vector<SlotPair> LoopRanges;
+ typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
+ LoopRangeMap loopRangeMap;
+
+ void dumpLoopInfo(MachineLoop &loop);
+
+ void dumpOddTerminators();
+
+ void updateTerminators(MachineBasicBlock &mbb);
+
+ bool canInsertPreHeader(MachineLoop &loop);
+ MachineBasicBlock& insertPreHeader(MachineLoop &loop);
+
+ bool isCriticalEdge(MachineLoop::Edge &edge);
+ bool canSplitEdge(MachineLoop::Edge &edge);
+ MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
+
+ LoopRanges& getLoopRanges(MachineLoop &loop);
+ std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
+ MachineLoop &loop);
+
+ void dumpLoopRanges(MachineLoop &loop);
+
+ void processHeader(LoopSplit &split);
+ void processLoopExits(LoopSplit &split);
+ void processLoopUses(LoopSplit &split);
+
+ bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
+
+ void processInterval(LiveInterval &li);
+
+ void processIntervals();
+ };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 000000000000..d3cbd15b64e8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,260 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// SSPBufferSize - The lower bound for a buffer to be considered for stack
+// smashing protection.
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+ cl::desc("Lower bound for a buffer to be considered for "
+ "stack protection"));
+
+namespace {
+ class StackProtector : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ Function *F;
+ Module *M;
+
+ DominatorTree* DT;
+
+ /// InsertStackProtectors - Insert code into the prologue and epilogue of
+ /// the function.
+ ///
+ /// - The prologue code loads and stores the stack guard onto the stack.
+ /// - The epilogue checks the value stored in the prologue against the
+ /// original value. It calls __stack_chk_fail if they differ.
+ bool InsertStackProtectors();
+
+ /// CreateFailBB - Create a basic block to jump to when the stack protector
+ /// check fails.
+ BasicBlock *CreateFailBB();
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+ bool RequiresStackProtector() const;
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ StackProtector() : FunctionPass(ID), TLI(0) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+ StackProtector(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+ };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+INITIALIZE_PASS(StackProtector, "stack-protector",
+ "Insert stack protectors", false, false)
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+ return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+ DT = getAnalysisIfAvailable<DominatorTree>();
+
+ if (!RequiresStackProtector()) return false;
+
+ return InsertStackProtectors();
+}
+
+/// RequiresStackProtector - Check whether or not this function needs a stack
+/// protector based upon the stack protector level. The heuristic we use is to
+/// add a guard variable to functions that call alloca, and functions with
+/// buffers larger than SSPBufferSize bytes.
+bool StackProtector::RequiresStackProtector() const {
+ if (F->hasFnAttr(Attribute::StackProtectReq))
+ return true;
+
+ if (!F->hasFnAttr(Attribute::StackProtect))
+ return false;
+
+ const TargetData *TD = TLI->getTargetData();
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ for (BasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (AI->isArrayAllocation())
+ // This is a call to alloca with a variable size. Emit stack
+ // protectors.
+ return true;
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+ // We apparently only care about character arrays.
+ if (!AT->getElementType()->isIntegerTy(8))
+ continue;
+
+ // If an array has more than SSPBufferSize bytes of allocated space,
+ // then we emit stack protectors.
+ if (SSPBufferSize <= TD->getTypeAllocSize(AT))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ BasicBlock *FailBBDom = 0; // FailBB's dominator.
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Value *StackGuardVar = 0; // The stack guard variable.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ BasicBlock *BB = I++;
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) continue;
+
+ if (!FailBB) {
+ // Insert code into the entry block that stores the __stack_chk_guard
+ // variable onto the stack:
+ //
+ // entry:
+ // StackGuardSlot = alloca i8*
+ // StackGuard = load __stack_chk_guard
+ // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+ //
+ const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ unsigned AddressSpace, Offset;
+ if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+ Constant *OffsetVal =
+ ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+
+ StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+ PointerType::get(PtrTy, AddressSpace));
+ } else {
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ }
+
+ BasicBlock &Entry = F->getEntryBlock();
+ Instruction *InsPt = &Entry.front();
+
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+ Value *Args[] = { LI, AI };
+ CallInst::
+ Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ Args, "", InsPt);
+
+ // Create the basic block to jump to when the guard check fails.
+ FailBB = CreateFailBB();
+ }
+
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ if (DT && DT->isReachableFromEntry(BB)) {
+ DT->addNewBlock(NewBB, BB);
+ FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
+ }
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old basic
+ // block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+ LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+ ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
+ BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ }
+
+ // Return if we didn't modify any basic blocks. I.e., there are no return
+ // statements in the function.
+ if (!FailBB) return false;
+
+ if (DT && FailBBDom)
+ DT->addNewBlock(FailBB, FailBBDom);
+
+ return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+ "CallStackCheckFailBlk", F);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail",
+ Type::getVoidTy(F->getContext()), NULL);
+ CallInst::Create(StackChkFail, "", FailBB);
+ new UnreachableInst(F->getContext(), FailBB);
+ return FailBB;
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 000000000000..57cbe1ba5960
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,768 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<bool>
+ColorWithRegsOpt("color-ss-with-regs",
+ cl::init(false), cl::Hidden,
+ cl::desc("Color stack slots with free registers"));
+
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs");
+STATISTIC(NumLoadElim, "Number of loads eliminated");
+STATISTIC(NumStoreElim, "Number of stores eliminated");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class StackSlotColoring : public MachineFunctionPass {
+ bool ColorWithRegs;
+ LiveStacks* LS;
+ VirtRegMap* VRM;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineLoopInfo *loopInfo;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of frame index references for each spill slot.
+ SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
+ StackSlotColoring(bool RegColor) :
+ MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char* getPassName() const {
+ return "Stack Slot Coloring";
+ }
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg);
+ void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+ MachineFunction &MF);
+ bool PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ bool PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg, const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF);
+ bool AllMemRefsCanBeUnfolded(int SS);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+
+FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
+ return new StackSlotColoring(RegColor);
+}
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ if (!MI->isDebugValue())
+ li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+ SSRefs[FI].push_back(MI);
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ // Gather all spill slots into a list.
+ DEBUG(dbgs() << "Spill slot intervals:\n");
+ for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+ LiveInterval &li = i->second;
+ DEBUG(li.dump());
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlotsWithFreeRegs - If there are any free registers available, try
+/// replacing spill slots references with registers instead.
+bool
+StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg) {
+ if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
+ return false;
+
+ bool Changed = false;
+ DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ if (!UsedColors[SS] || li->weight < 20)
+ // If the weight is < 20, i.e. two references in a loop with depth 1,
+ // don't bother with it.
+ continue;
+
+ // These slots allow to share the same registers.
+ bool AllColored = true;
+ SmallVector<unsigned, 4> ColoredRegs;
+ for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
+ int RSS = RevMap[SS][j];
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
+ // If it's not colored to another stack slot, try coloring it
+ // to a "free" register.
+ if (!RC) {
+ AllColored = false;
+ continue;
+ }
+ unsigned Reg = VRM->getFirstUnusedRegister(RC);
+ if (!Reg) {
+ AllColored = false;
+ continue;
+ }
+ if (!AllMemRefsCanBeUnfolded(RSS)) {
+ AllColored = false;
+ continue;
+ } else {
+ DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
+ << TRI->getName(Reg) << '\n');
+ ColoredRegs.push_back(Reg);
+ SlotMapping[RSS] = Reg;
+ SlotIsReg.set(RSS);
+ Changed = true;
+ }
+ }
+
+ // Register and its sub-registers are no longer free.
+ while (!ColoredRegs.empty()) {
+ unsigned Reg = ColoredRegs.back();
+ ColoredRegs.pop_back();
+ VRM->setRegisterUsed(Reg);
+ // If reg is a callee-saved register, it will have to be spilled in
+ // the prologue.
+ MRI->setPhysRegUsed(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ VRM->setRegisterUsed(*AS);
+ MRI->setPhysRegUsed(*AS);
+ }
+ }
+ // This spill slot is dead after the rewrites
+ if (AllColored) {
+ MFI->RemoveStackObject(SS);
+ ++NumEliminated;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+
+ return Changed;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector SlotIsReg(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DEBUG(dbgs() << "Color spill slot intervals:\n");
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // Can we "color" a stack slot with a unused register?
+ Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MO_FrameIndex operands.
+ SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ bool isReg = SlotIsReg[SS];
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+ continue;
+
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+ if (!isReg)
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+ else {
+ // Rewrite to use a register instead.
+ unsigned MBBId = RefMIs[i]->getParent()->getNumber();
+ SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
+ UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
+ }
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
+/// spill slot index can be unfolded.
+bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
+ MachineInstr *MI = RefMIs[i];
+ if (TII->isLoadFromStackSlot(MI, SS) ||
+ TII->isStoreToStackSlot(MI, SS))
+ // Restore and spill will become copies.
+ return true;
+ if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
+ return false;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (MO.isFI() && MO.getIndex() != SS)
+ // If it uses another frameindex, we can, currently* unfold it.
+ return false;
+ }
+ }
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+ int NewFI, MachineFunction &MF) {
+ // Update the operands.
+ for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI != OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // Update the memory references. This changes the MachineMemOperands
+ // directly. They may be in use by multiple instructions, however all
+ // instructions using OldFI are being rewritten to use NewFI.
+ const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+ const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I)
+ if ((*I)->getValue() == OldSV)
+ (*I)->setValue(NewSV);
+}
+
+/// PropagateBackward - Traverse backward and look for the definition of
+/// OldReg. If it can successfully update all of the references with NewReg,
+/// do so and return true.
+bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->begin())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ SmallVector<MachineOperand*, 4> Refs;
+ while (--MII != MBB->begin()) {
+ bool FoundDef = false; // Not counting 2address def.
+
+ Uses.clear();
+ const MCInstrDesc &MCID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isImplicit())
+ return false;
+
+ // Abort the use is actually a sub-register def. We don't have enough
+ // information to figure out if it is really legal.
+ if (MO.getSubReg() || MII->isSubregToReg())
+ return false;
+
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+ if (RC && !RC->contains(NewReg))
+ return false;
+
+ if (MO.isUse()) {
+ Uses.push_back(&MO);
+ } else {
+ Refs.push_back(&MO);
+ if (!MII->isRegTiedToUseOperand(i))
+ FoundDef = true;
+ }
+ } else if (TRI->regsOverlap(Reg, NewReg)) {
+ return false;
+ } else if (TRI->regsOverlap(Reg, OldReg)) {
+ if (!MO.isUse() || !MO.isKill())
+ return false;
+ }
+ }
+
+ if (FoundDef) {
+ // Found non-two-address def. Stop here.
+ for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+ Refs[i]->setReg(NewReg);
+ return true;
+ }
+
+ // Two-address uses must be updated as well.
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Refs.push_back(Uses[i]);
+ }
+ return false;
+}
+
+/// PropagateForward - Traverse forward and look for the kill of OldReg. If
+/// it can successfully update all of the uses with NewReg, do so and
+/// return true.
+bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->end())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ while (++MII != MBB->end()) {
+ bool FoundKill = false;
+ const MCInstrDesc &MCID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isDef() || MO.isImplicit())
+ return false;
+
+ // Abort the use is actually a sub-register use. We don't have enough
+ // information to figure out if it is really legal.
+ if (MO.getSubReg())
+ return false;
+
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+ if (RC && !RC->contains(NewReg))
+ return false;
+ if (MO.isKill())
+ FoundKill = true;
+
+ Uses.push_back(&MO);
+ } else if (TRI->regsOverlap(Reg, NewReg) ||
+ TRI->regsOverlap(Reg, OldReg))
+ return false;
+ }
+ if (FoundKill) {
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Uses[i]->setReg(NewReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
+/// folded memory references and replacing those references with register
+/// references instead.
+void
+StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg,
+ const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF) {
+ MachineBasicBlock *MBB = MI->getParent();
+ if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
+ if (PropagateForward(MI, MBB, DstReg, Reg)) {
+ DEBUG(dbgs() << "Eliminated load: ");
+ DEBUG(MI->dump());
+ ++NumLoadElim;
+ } else {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DstReg).addReg(Reg);
+ ++NumRegRepl;
+ }
+
+ if (!Defs.count(Reg)) {
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
+ if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
+ DEBUG(dbgs() << "Eliminated store: ");
+ DEBUG(MI->dump());
+ ++NumStoreElim;
+ } else {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(SrcReg);
+ ++NumRegRepl;
+ }
+
+ // Remember reg has been defined in MBB.
+ Defs.insert(Reg);
+ } else {
+ SmallVector<MachineInstr*, 4> NewMIs;
+ bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
+ (void)Success; // Silence compiler warning.
+ assert(Success && "Failed to unfold!");
+ MachineInstr *NewMI = NewMIs[0];
+ MBB->insert(MI, NewMI);
+ ++NumRegRepl;
+
+ if (NewMI->readsRegister(Reg)) {
+ if (!Defs.count(Reg))
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ }
+ MBB->erase(MI);
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ MachineBasicBlock::iterator NextMI = llvm::next(I);
+ if (NextMI == MBB->end()) continue;
+
+ int FirstSS, SecondSS;
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ ++NumDead;
+ toErase.push_back(I);
+ }
+
+ toErase.push_back(NextMI);
+ ++I;
+ }
+
+ for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: "
+ << MF.getFunction()->getName() << '\n';
+ });
+
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ LS = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots < 2) {
+ if (NumSlots == 0 || !VRM->HasUnusedRegisters())
+ // Nothing to do!
+ return false;
+ }
+
+ // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+ // coloring. The stack could be modified before the longjmp is executed,
+ // resulting in the wrong value being used afterwards. (See
+ // <rdar://problem/8007500>.)
+ if (MF.callsSetJmp())
+ return false;
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ if (Changed) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= RemoveDeadStores(I);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 000000000000..227eb47e6827
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,829 @@
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
+//
+// Budimlic, et al. Fast copy coalescing and live-range identification.
+// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+// PLDI '02. ACM, New York, NY, 25-32.
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest. This technique is explained in:
+//
+// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+// Quality and Efficiency,
+// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+// CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ class StrongPHIElimination : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ StrongPHIElimination() : MachineFunctionPass(ID) {
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ /// This struct represents a single node in the union-find data structure
+ /// representing the variable congruence classes. There is one difference
+ /// from a normal union-find data structure. We steal two bits from the parent
+ /// pointer . One of these bits is used to represent whether the register
+ /// itself has been isolated, and the other is used to represent whether the
+ /// PHI with that register as its destination has been isolated.
+ ///
+ /// Note that this leads to the strange situation where the leader of a
+ /// congruence class may no longer logically be a member, due to being
+ /// isolated.
+ struct Node {
+ enum Flags {
+ kRegisterIsolatedFlag = 1,
+ kPHIIsolatedFlag = 2
+ };
+ Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+ Node *getLeader();
+
+ PointerIntPair<Node*, 2> parent;
+ unsigned value;
+ unsigned rank;
+ };
+
+ /// Add a register in a new congruence class containing only itself.
+ void addReg(unsigned);
+
+ /// Join the congruence classes of two registers. This function is biased
+ /// towards the left argument, i.e. after
+ ///
+ /// addReg(r2);
+ /// unionRegs(r1, r2);
+ ///
+ /// the leader of the unioned congruence class is the same as the leader of
+ /// r1's congruence class prior to the union. This is actually relied upon
+ /// in the copy insertion code.
+ void unionRegs(unsigned, unsigned);
+
+ /// Get the color of a register. The color is 0 if the register has been
+ /// isolated.
+ unsigned getRegColor(unsigned);
+
+ // Isolate a register.
+ void isolateReg(unsigned);
+
+ /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+ /// isolated. Otherwise, it is the original color of its destination and
+ /// all of its operands (before they were isolated, if they were).
+ unsigned getPHIColor(MachineInstr*);
+
+ /// Isolate a PHI.
+ void isolatePHI(MachineInstr*);
+
+ /// Traverses a basic block, splitting any interferences found between
+ /// registers in the same congruence class. It takes two DenseMaps as
+ /// arguments that it also updates: CurrentDominatingParent, which maps
+ /// a color to the register in that congruence class whose definition was
+ /// most recently seen, and ImmediateDominatingParent, which maps a register
+ /// to the register in the same congruence class that most immediately
+ /// dominates it.
+ ///
+ /// This function assumes that it is being called in a depth-first traversal
+ /// of the dominator tree.
+ void SplitInterferencesForBasicBlock(
+ MachineBasicBlock&,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+ // Lowers a PHI instruction, inserting copies of the source and destination
+ // registers as necessary.
+ void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+ // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+ // everywhere that Reg appears. Requires Reg and NewReg to have non-
+ // overlapping lifetimes.
+ void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DT;
+ LiveIntervals *LI;
+
+ BumpPtrAllocator Allocator;
+
+ DenseMap<unsigned, Node*> RegNodeMap;
+
+ // Maps a basic block to a list of its defs of registers that appear as PHI
+ // sources.
+ DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+ // Maps a color to a pair of a MachineInstr* and a virtual register, which
+ // is the operand of that PHI corresponding to the current basic block.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+ // FIXME: Can these two data structures be combined? Would a std::multimap
+ // be any better?
+
+ // Stores pairs of predecessor basic blocks and the source registers of
+ // inserted copy instructions.
+ typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+ SrcCopySet InsertedSrcCopySet;
+
+ // Maps pairs of predecessor basic blocks and colors to their defining copy
+ // instructions.
+ typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+ SrcCopyMap;
+ SrcCopyMap InsertedSrcCopyMap;
+
+ // Maps inserted destination copy registers to their defining copy
+ // instructions.
+ typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+ DestCopyMap InsertedDestCopies;
+ };
+
+ struct MIIndexCompare {
+ MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
+ }
+
+ LiveIntervals *LI;
+ };
+} // namespace
+
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+ // FIXME: This only needs to check from the first terminator, as only the
+ // first terminator can use a virtual register.
+ for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+ assert (RI != MBB->rend());
+ MachineInstr *MI = &*RI;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ MachineOperand &MO = *OI;
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ return &MO;
+ }
+ }
+ return NULL;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<LiveIntervals>();
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+ PHISrcDefs[I].push_back(BBI);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = BBI->getOperand(i);
+ unsigned SrcReg = SrcMO.getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ PHISrcDefs[DefMI->getParent()].push_back(DefMI);
+ }
+ }
+ }
+
+ // Perform a depth-first traversal of the dominator tree, splitting
+ // interferences amongst PHI-congruence classes.
+ DenseMap<unsigned, unsigned> CurrentDominatingParent;
+ DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+ for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ SplitInterferencesForBasicBlock(*DI->getBlock(),
+ CurrentDominatingParent,
+ ImmediateDominatingParent);
+ }
+
+ // Insert copies for all PHI source and destination registers.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ InsertCopiesForPHI(BBI, I);
+ }
+ }
+
+ // FIXME: Preserve the equivalence classes during copy insertion and use
+ // the preversed equivalence classes instead of recomputing them.
+ RegNodeMap.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ unsigned SrcReg = BBI->getOperand(i).getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+ }
+ }
+ }
+
+ DenseMap<unsigned, unsigned> RegRenamingMap;
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ while (BBI != BBE && BBI->isPHI()) {
+ MachineInstr *PHI = BBI;
+
+ assert(PHI->getNumOperands() > 0);
+
+ unsigned SrcReg = PHI->getOperand(1).getReg();
+ unsigned SrcColor = getRegColor(SrcReg);
+ unsigned NewReg = RegRenamingMap[SrcColor];
+ if (!NewReg) {
+ NewReg = SrcReg;
+ RegRenamingMap[SrcColor] = SrcReg;
+ }
+ MergeLIsAndRename(SrcReg, NewReg);
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ if (!InsertedDestCopies.count(DestReg))
+ MergeLIsAndRename(DestReg, NewReg);
+
+ for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcReg = PHI->getOperand(i).getReg();
+ MergeLIsAndRename(SrcReg, NewReg);
+ }
+
+ ++BBI;
+ LI->RemoveMachineInstrFromMaps(PHI);
+ PHI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // Due to the insertion of copies to split live ranges, the live intervals are
+ // guaranteed to not overlap, except in one case: an original PHI source and a
+ // PHI destination copy. In this case, they have the same value and thus don't
+ // truly intersect, so we merge them into the value live at that point.
+ // FIXME: Is there some better way we can handle this?
+ for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+ E = InsertedDestCopies.end(); I != E; ++I) {
+ unsigned DestReg = I->first;
+ unsigned DestColor = getRegColor(DestReg);
+ unsigned NewReg = RegRenamingMap[DestColor];
+
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ assert(DestLI.ranges.size() == 1
+ && "PHI destination copy's live interval should be a single live "
+ "range from the beginning of the BB to the copy instruction.");
+ LiveRange *DestLR = DestLI.begin();
+ VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+ if (!NewVNI) {
+ NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+ MachineInstr *CopyInstr = I->second;
+ CopyInstr->getOperand(1).setIsKill(true);
+ }
+
+ LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+ NewLI.addRange(NewLR);
+
+ LI->removeInterval(DestReg);
+ MRI->replaceRegWith(DestReg, NewReg);
+ }
+
+ // Adjust the live intervals of all PHI source registers to handle the case
+ // where the PHIs in successor blocks were the only later uses of the source
+ // register.
+ for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+ E = InsertedSrcCopySet.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->first;
+ unsigned SrcReg = I->second;
+ if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+ SrcReg = RenamedRegister;
+
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (isLiveOut)
+ continue;
+
+ MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+ assert(LastUse);
+ SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+ SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+ LastUse->setIsKill(true);
+ }
+
+ LI->renumber();
+
+ Allocator.Reset();
+ RegNodeMap.clear();
+ PHISrcDefs.clear();
+ InsertedSrcCopySet.clear();
+ InsertedSrcCopyMap.clear();
+ InsertedDestCopies.clear();
+
+ return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+ if (RegNodeMap.count(Reg))
+ return;
+ RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+ Node *N = this;
+ Node *Parent = parent.getPointer();
+ Node *Grandparent = Parent->parent.getPointer();
+
+ while (Parent != Grandparent) {
+ N->parent.setPointer(Grandparent);
+ N = Grandparent;
+ Parent = Parent->parent.getPointer();
+ Grandparent = Parent->parent.getPointer();
+ }
+
+ return Parent;
+}
+
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+ DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+ if (RI == RegNodeMap.end())
+ return 0;
+ Node *Node = RI->second;
+ if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+ return 0;
+ return Node->getLeader()->value;
+}
+
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+ Node *Node1 = RegNodeMap[Reg1]->getLeader();
+ Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+ if (Node1->rank > Node2->rank) {
+ Node2->parent.setPointer(Node1->getLeader());
+ } else if (Node1->rank < Node2->rank) {
+ Node1->parent.setPointer(Node2->getLeader());
+ } else if (Node1 != Node2) {
+ Node2->parent.setPointer(Node1->getLeader());
+ Node1->rank++;
+ }
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+ Node *Node = RegNodeMap[Reg];
+ Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ Node *DestNode = RegNodeMap[DestReg];
+ if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+ return 0;
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+ if (SrcColor)
+ return SrcColor;
+ }
+ return 0;
+}
+
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+ Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+ Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+/// congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+/// same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+/// def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+///
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+ MachineBasicBlock &MBB,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+ // Sort defs by their order in the original basic block, as the code below
+ // assumes that it is processing definitions in dominance order.
+ std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+ std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+ for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+ BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+ for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+ E = (*BBI)->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+
+ // FIXME: This would be faster if it were possible to bail out of checking
+ // an instruction's operands after the explicit defs, but this is incorrect
+ // for variadic instructions, which may appear before register allocation
+ // in the future.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DestReg = MO.getReg();
+ if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // If the virtual register being defined is not used in any PHI or has
+ // already been isolated, then there are no more interferences to check.
+ unsigned DestColor = getRegColor(DestReg);
+ if (!DestColor)
+ continue;
+
+ // The input to this pass sometimes is not in SSA form in every basic
+ // block, as some virtual registers have redefinitions. We could eliminate
+ // this by fixing the passes that generate the non-SSA code, or we could
+ // handle it here by tracking defining machine instructions rather than
+ // virtual registers. For now, we just handle the situation conservatively
+ // in a way that will possibly lead to false interferences.
+ unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+ unsigned NewParent = CurrentParent;
+ if (NewParent == DestReg)
+ continue;
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+
+ // If NewParent is nonzero, then its definition dominates the current
+ // instruction, so it is only necessary to check for the liveness of
+ // NewParent in order to check for an interference.
+ if (NewParent
+ && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+ // If there is an interference, always isolate the new register. This
+ // could be improved by using a heuristic that decides which of the two
+ // registers to isolate.
+ isolateReg(DestReg);
+ CurrentParent = NewParent;
+ } else {
+ // If there is no interference, update ImmediateDominatingParent and set
+ // the CurrentDominatingParent for this color to the current register.
+ ImmediateDominatingParent[DestReg] = NewParent;
+ CurrentParent = DestReg;
+ }
+ }
+ }
+
+ // We now walk the PHIs in successor blocks and check for interferences. This
+ // is necessary because the use of a PHI's operands are logically contained in
+ // the predecessor block. The def of a PHI's destination register is processed
+ // along with the other defs in a basic block.
+
+ CurrentPHIForColor.clear();
+
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ MachineInstr *PHI = BBI;
+
+ // If a PHI is already isolated, either by being isolated directly or
+ // having all of its operands isolated, ignore it.
+ unsigned Color = getPHIColor(PHI);
+ if (!Color)
+ continue;
+
+ // Find the index of the PHI operand that corresponds to this basic block.
+ unsigned PredIndex;
+ for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+ if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+ break;
+ }
+ assert(PredIndex < PHI->getNumOperands());
+ unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ unsigned &CurrentParent = CurrentDominatingParent[Color];
+ unsigned NewParent = CurrentParent;
+ while (NewParent
+ && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+ CurrentParent = NewParent;
+
+ // If there is an interference with a register, always isolate the
+ // register rather than the PHI. It is also possible to isolate the
+ // PHI, but that introduces copies for all of the registers involved
+ // in that PHI.
+ if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+ && NewParent != PredOperandReg)
+ isolateReg(NewParent);
+
+ std::pair<MachineInstr*, unsigned>
+ &CurrentPHI = CurrentPHIForColor[Color];
+
+ // If two PHIs have the same operand from every shared predecessor, then
+ // they don't actually interfere. Otherwise, isolate the current PHI. This
+ // could possibly be improved, e.g. we could isolate the PHI with the
+ // fewest operands.
+ if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+ isolatePHI(PHI);
+ else
+ CurrentPHI = std::make_pair(PHI, PredOperandReg);
+ }
+ }
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+ MachineBasicBlock *MBB) {
+ assert(PHI->isPHI());
+ ++NumPHIsLowered;
+ unsigned PHIColor = getPHIColor(PHI);
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = PHI->getOperand(i);
+
+ // If a source is defined by an implicit def, there is no need to insert a
+ // copy in the predecessor.
+ if (SrcMO.isUndef())
+ continue;
+
+ unsigned SrcReg = SrcMO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+ unsigned SrcColor = getRegColor(SrcReg);
+
+ // If neither the PHI nor the operand were isolated, then we only need to
+ // set the phi-kill flag on the VNInfo at this PHI.
+ if (PHIColor && SrcColor == PHIColor) {
+ LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+ SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+ VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+ assert(SrcVNI);
+ SrcVNI->setHasPHIKill(true);
+ continue;
+ }
+
+ unsigned CopyReg = 0;
+ if (PHIColor) {
+ SrcCopyMap::const_iterator I
+ = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+ CopyReg
+ = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+ }
+
+ if (!CopyReg) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineBasicBlock::iterator
+ CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+ unsigned SrcSubReg = SrcMO.getSubReg();
+ MachineInstr *CopyInstr = BuildMI(*PredBB,
+ CopyInsertPoint,
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyReg).addReg(SrcReg, 0, SrcSubReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ ++NumSrcCopiesInserted;
+
+ // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+ // the newly added range.
+ LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+ InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+ addReg(CopyReg);
+ if (PHIColor) {
+ unionRegs(PHIColor, CopyReg);
+ assert(getRegColor(CopyReg) != CopyReg);
+ } else {
+ PHIColor = CopyReg;
+ assert(getRegColor(CopyReg) == CopyReg);
+ }
+
+ if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+ InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
+ }
+
+ SrcMO.setReg(CopyReg);
+
+ // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+ // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+ // processed later, but this is still correct to do at this point because we
+ // never rely on LiveIntervals being correct while inserting copies.
+ // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+ // pass does?
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+ if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+ SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
+ }
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ unsigned DestColor = getRegColor(DestReg);
+
+ if (PHIColor && DestColor == PHIColor) {
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+
+ // Set the phi-def flag for the VN at this PHI.
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->setIsPHIDef(true);
+
+ // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+ // instruction. After PHI elimination, PHI instructions are replaced by VNs
+ // with the phi-def flag set, and the live ranges of these VNs start at the
+ // beginning of the basic block.
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ DestVNI->def = MBBStartIndex;
+ DestLI.addRange(LiveRange(MBBStartIndex,
+ PHIIndex.getDefIndex(),
+ DestVNI));
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+ unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineInstr *CopyInstr = BuildMI(*MBB,
+ MBB->SkipPHIsAndLabels(MBB->begin()),
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DestReg).addReg(CopyReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ PHI->getOperand(0).setReg(CopyReg);
+ ++NumDestCopiesInserted;
+
+ // Add the region from the beginning of MBB to the copy instruction to
+ // CopyReg's live interval, and give the VNInfo the phidef flag.
+ LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+ VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+ CopyInstr,
+ LI->getVNInfoAllocator());
+ CopyVNI->setIsPHIDef(true);
+ CopyLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getDefIndex(),
+ CopyVNI));
+
+ // Adjust DestReg's live interval to adjust for its new definition at
+ // CopyInstr.
+ LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->def = DestCopyIndex.getDefIndex();
+
+ InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+ if (Reg == NewReg)
+ return;
+
+ LiveInterval &OldLI = LI->getInterval(Reg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ // Merge the live ranges of the two registers.
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+ LRI != LRE; ++LRI) {
+ LiveRange OldLR = *LRI;
+ VNInfo *OldVN = OldLR.valno;
+
+ VNInfo *&NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+ VNMap[OldVN] = NewVN;
+ }
+
+ LiveRange LR(OldLR.start, OldLR.end, NewVN);
+ NewLI.addRange(LR);
+ }
+
+ // Remove the LiveInterval for the register being renamed and replace all
+ // of its defs and uses with the new register.
+ LI->removeInterval(Reg);
+ MRI->replaceRegWith(Reg, NewReg);
+}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 000000000000..6b801cbf6e1e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,938 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumTails , "Number of tails duplicated");
+STATISTIC(NumTailDups , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs , "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"),
+ cl::init(2), cl::Hidden);
+
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
+namespace {
+ /// TailDuplicatePass - Perform tail duplication.
+ class TailDuplicatePass : public MachineFunctionPass {
+ bool PreRegAlloc;
+ const TargetInstrInfo *TII;
+ MachineModuleInfo *MMI;
+ MachineRegisterInfo *MRI;
+
+ // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ SmallVector<unsigned, 16> SSAUpdateVRs;
+
+ // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+ // source virtual registers.
+ DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+ public:
+ static char ID;
+ explicit TailDuplicatePass(bool PreRA) :
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Tail Duplication"; }
+
+ private:
+ void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB);
+ void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &UsedByPhi,
+ bool Remove);
+ void DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi);
+ void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*, 8> &Succs);
+ bool TailDuplicateBlocks(MachineFunction &MF);
+ bool shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple, MachineBasicBlock &TailBB);
+ bool isSimpleBB(MachineBasicBlock *TailBB);
+ bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+ bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF);
+
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ };
+
+ char TailDuplicatePass::ID = 0;
+}
+
+FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
+ return new TailDuplicatePass(PreRegAlloc);
+}
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ bool MadeChange = false;
+ while (TailDuplicateBlocks(MF))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (!MI->isPHI())
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+ << ": " << *MI;
+ dbgs() << " extra input from predecessor BB#"
+ << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock*, 8> TDBBs;
+ SmallVector<MachineInstr*, 16> Copies;
+ if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr*, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumInstrDups -= MBB->size();
+ RemoveDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = 0;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+ if (++UI == MRI->use_end()) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ return true;
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+
+ if (NumTails == TailDupLimit)
+ break;
+
+ bool IsSimple = isSimpleBB(MBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+ continue;
+
+ MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->isDebugValue())
+ continue;
+ if (UseMI->getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i+1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<unsigned> *UsedByPhi) {
+ for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ bool Remove) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, SrcReg));
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+ AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ if (!Remove)
+ return;
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx+1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi) {
+ MachineInstr *NewMI = TII->duplicate(MI, MF);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, NewReg));
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+ AddSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end())
+ MO.setReg(VI->second);
+ }
+ }
+ PredBB->insert(PredBB->end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*,8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (!II->isPHI())
+ break;
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i+1);
+ II->RemoveOperand(i);
+ }
+ }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive RemoveOperand calls.
+
+ DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
+ unsigned SrcReg = LI->second[j].second;
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(SrcReg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(Reg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ II->addOperand(MachineOperand::CreateReg(Reg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
+ }
+ }
+ if (Idx != 0) {
+ II->RemoveOperand(Idx+1);
+ II->RemoveOperand(Idx);
+ }
+ }
+ }
+}
+
+/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+bool
+TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ MaxDuplicateCount = 1;
+ else
+ MaxDuplicateCount = TailDuplicateSize;
+
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = 20;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
+ ++I) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (I->getDesc().isNotDuplicable())
+ return false;
+
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && I->getDesc().isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && I->getDesc().isCall())
+ return false;
+
+ if (!I->isPHI() && !I->isDebugValue())
+ InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ MachineBasicBlock::iterator E = TailBB->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I == E)
+ return true;
+ return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+ SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+ for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+ SE = A.succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *BB = *SI;
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+ }
+
+ return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+ for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->getLandingPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = NULL;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = NULL;
+ if (PredTBB == NextBB && PredFBB == NULL)
+ PredTBB = NULL;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ PredBB->removeSuccessor(TailBB);
+ unsigned NumSuccessors = PredBB->succ_size();
+ assert(NumSuccessors <= 1);
+ if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+ PredBB->addSuccessor(NewTarget);
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() > 1)
+ continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ continue;
+
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ while (I != TailBB->end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+ }
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+
+ // Simplify
+ TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
+ NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PrevBB->succ_size() == 1 &&
+ !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ if (MI->getParent())
+ MI->eraseFromParent();
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first)
+ .addReg(CopyInfos[i].second));
+ }
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ }
+
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+ }
+
+ return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
new file mode 100644
index 000000000000..86e71d8ccbb6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -0,0 +1,443 @@
+//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfoImpl class, it just provides default
+// implementations of various methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ Tail->getDebugLoc());
+ MBB->addSuccessor(NewDest);
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool HasDef = MCID.getNumDefs();
+ if (HasDef && !MI->getOperand(0).isReg())
+ // No idea how to commute this instruction. Target should implement its own.
+ return 0;
+ unsigned Idx1, Idx2;
+ if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Don't know how to commute: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+
+ assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg1 = MI->getOperand(Idx1).getReg();
+ unsigned Reg2 = MI->getOperand(Idx2).getReg();
+ bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+ bool ChangeReg0 = false;
+ if (HasDef && MI->getOperand(0).getReg() == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = HasDef
+ ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
+ bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ if (HasDef)
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg2IsKill));
+ else
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg2IsKill));
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(Idx2).setReg(Reg1);
+ MI->getOperand(Idx1).setReg(Reg2);
+ MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+ // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+ // is not true, then the target must implement this.
+ SrcOpIdx1 = MCID.getNumDefs();
+ SrcOpIdx2 = SrcOpIdx1 + 1;
+ if (!MI->getOperand(SrcOpIdx1).isReg() ||
+ !MI->getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+}
+
+
+bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+}
+
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const {
+ assert(!Orig->getDesc().isNotDuplicable() &&
+ "Instruction cannot be duplicated");
+ return MF.CloneMachineInstr(Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+ unsigned FoldIdx) {
+ assert(MI->isCopy() && "MI must be a COPY instruction");
+ if (MI->getNumOperands() != 2)
+ return 0;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return 0;
+
+ unsigned FoldReg = FoldOp.getReg();
+ unsigned LiveReg = LiveOp.getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+ "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+ if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return 0;
+}
+
+bool TargetInstrInfoImpl::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ unsigned Flags = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI->getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
+ // Ask the target to do the actual folding.
+ if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->getDesc().mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->getDesc().mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ NewMI->addMemOperand(MF, MMO);
+
+ // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+ return MBB->insert(MI, NewMI);
+ }
+
+ // Straight COPY may fold as load/store.
+ if (!MI->isCopy() || Ops.size() != 1)
+ return 0;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return 0;
+
+ const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ return --Pos;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ if (!NewMI) return 0;
+
+ NewMI = MBB.insert(MI, NewMI);
+
+ // Copy the memoperands from the load to the folded instruction.
+ NewMI->setMemRefs(LoadMI->memoperands_begin(),
+ LoadMI->memoperands_end());
+
+ return NewMI;
+}
+
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ // A load from a fixed stack slot can be rematerialized. This may be
+ // redundant with subsequent checks, but it's target-independent,
+ // simple, and a common case.
+ int FrameIdx = 0;
+ if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+ MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ return true;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // Avoid instructions obviously unsafe for remat.
+ if (MCID.isNotDuplicable() || MCID.mayStore() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Avoid instructions which load from potentially varying memory.
+ if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+ return false;
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Check for a well-behaved physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI.def_empty(Reg))
+ return false;
+ BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
+ if (AllocatableRegs.test(Reg))
+ return false;
+ // Check for a def among the register's aliases too.
+ for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (!MRI.def_empty(AliasReg))
+ return false;
+ if (AllocatableRegs.test(AliasReg))
+ return false;
+ }
+ } else {
+ // A physreg def. We can't remat it.
+ return false;
+ }
+ continue;
+ }
+
+ // Only allow one virtual-register def, and that in the first operand.
+ if (MO.isDef() != (i == 0))
+ return false;
+
+ // Don't allow any virtual-register uses. Rematting an instruction with
+ // virtual register uses would length the live ranges of the uses, which
+ // is not necessarily a good idea, certainly not "trivial".
+ if (MO.isUse())
+ return false;
+ }
+
+ // Everything checked out.
+ return true;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const{
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 000000000000..a3c562013b59
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,1160 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
+ : TargetLoweringObjectFile(),
+ TLSDataSection(0),
+ TLSBSSSection(0),
+ DataRelSection(0),
+ DataRelLocalSection(0),
+ DataRelROSection(0),
+ DataRelROLocalSection(0),
+ MergeableConst4Section(0),
+ MergeableConst8Section(0),
+ MergeableConst16Section(0) {
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+ BSSSection =
+ getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ TextSection =
+ getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC,
+ SectionKind::getText());
+
+ DataSection =
+ getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ ReadOnlySection =
+ getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+
+ TLSDataSection =
+ getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
+ SectionKind::getThreadData());
+
+ TLSBSSSection =
+ getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+
+ DataRelSection =
+ getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ DataRelLocalSection =
+ getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRelLocal());
+
+ DataRelROSection =
+ getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+
+ DataRelROLocalSection =
+ getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+
+ MergeableConst4Section =
+ getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst4());
+
+ MergeableConst8Section =
+ getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst8());
+
+ MergeableConst16Section =
+ getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst16());
+
+ StaticCtorSection =
+ getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ StaticDtorSection =
+ getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ // Exception Handling Sections.
+
+ // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+ // it contains relocatable pointers. In PIC mode, this is probably a big
+ // runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ // Debug Info Sections.
+ DwarfAbbrevSection =
+ getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+ return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+}
+
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+ Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ unsigned Encoding = getPersonalityEncoding();
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ return Mang->getSymbol(GV);
+ break;
+ case dwarf::DW_EH_PE_pcrel: {
+ return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+ Mang->getSymbol(GV)->getName());
+ break;
+ }
+ }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+ SmallString<64> NameData("DW.ref.");
+ NameData += Sym->getName();
+ MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+ StringRef Prefix = ".data.";
+ NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+ const MCSection *Sec = getContext().getELFSection(NameData,
+ ELF::SHT_PROGBITS,
+ Flags,
+ SectionKind::getDataRel(),
+ 0, Label->getName());
+ Streamer.SwitchSection(Sec);
+ Streamer.EmitValueToAlignment(8);
+ Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ const MCExpr *E = MCConstantExpr::Create(8, getContext());
+ Streamer.EmitELFSize(Label, E);
+ Streamer.EmitLabel(Label);
+
+ unsigned Size = TM.getTargetData()->getPointerSize();
+ Streamer.EmitSymbolValue(Sym, Size);
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ // N.B.: The defaults used in here are no the same ones used in MC.
+ // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+ // both gas and MC will produce a section with no flags. Given
+ // section(".eh_frame") gcc will produce
+ // .section .eh_frame,"a",@progbits
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Some lame default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+ if (Name == ".init_array")
+ return ELF::SHT_INIT_ARRAY;
+
+ if (Name == ".fini_array")
+ return ELF::SHT_FINI_ARRAY;
+
+ if (Name == ".preinit_array")
+ return ELF::SHT_PREINIT_ARRAY;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return ELF::SHT_NOBITS;
+
+ return ELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |= ELF::SHF_ALLOC;
+
+ if (K.isText())
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (K.isWriteable())
+ Flags |= ELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= ELF::SHF_TLS;
+
+ // K.isMergeableConst() is left out to honour PR4650
+ if (K.isMergeableCString() || K.isMergeableConst4() ||
+ K.isMergeableConst8() || K.isMergeableConst16())
+ Flags |= ELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= ELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ StringRef SectionName = GV->getSection();
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ return getContext().getELFSection(SectionName,
+ getELFSectionType(SectionName, Kind),
+ getELFSectionFlags(Kind), Kind);
+}
+
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+ if (Kind.isText()) return ".text.";
+ if (Kind.isReadOnly()) return ".rodata.";
+
+ if (Kind.isThreadData()) return ".tdata.";
+ if (Kind.isThreadBSS()) return ".tbss.";
+
+ if (Kind.isDataNoRel()) return ".data.";
+ if (Kind.isDataRelLocal()) return ".data.rel.local.";
+ if (Kind.isDataRel()) return ".data.rel.";
+ if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return ".data.rel.ro.";
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniquedSection;
+ if (Kind.isText())
+ EmitUniquedSection = TM.getFunctionSections();
+ else
+ EmitUniquedSection = TM.getDataSections();
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+ !Kind.isCommon() && !Kind.isBSS()) {
+ const char *Prefix;
+ Prefix = getSectionPrefixForGlobal(Kind);
+
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin(), Sym->getName().end());
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (GV->isWeakForLinker()) {
+ Group = Sym->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ return getContext().getELFSection(Name.str(),
+ getELFSectionType(Name.str(), Kind),
+ Flags, Kind, 0, Group);
+ }
+
+ if (Kind.isText()) return TextSection;
+
+ if (Kind.isMergeable1ByteCString() ||
+ Kind.isMergeable2ByteCString() ||
+ Kind.isMergeable4ByteCString()) {
+
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ unsigned Align =
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+ const char *SizeSpec = ".rodata.str1.";
+ if (Kind.isMergeable2ByteCString())
+ SizeSpec = ".rodata.str2.";
+ else if (Kind.isMergeable4ByteCString())
+ SizeSpec = ".rodata.str4.";
+ else
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+ std::string Name = SizeSpec + utostr(Align);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ Kind);
+ }
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ return ReadOnlySection; // .const
+ }
+
+ if (Kind.isReadOnly()) return ReadOnlySection;
+
+ if (Kind.isThreadData()) return TLSDataSection;
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+ if (Kind.isDataNoRel()) return DataSection;
+ if (Kind.isDataRelLocal()) return DataRelLocalSection;
+ if (Kind.isDataRel()) return DataRelSection;
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI,
+ unsigned Encoding, MCStreamer &Streamer) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += ".DW.stub";
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+ : TargetLoweringObjectFile(),
+ TLSDataSection(0),
+ TLSBSSSection(0),
+ TLSTLVSection(0),
+ TLSThreadInitSection(0),
+ CStringSection(0),
+ UStringSection(0),
+ TextCoalSection(0),
+ ConstTextCoalSection(0),
+ ConstDataSection(0),
+ DataCoalSection(0),
+ DataCommonSection(0),
+ DataBSSSection(0),
+ FourByteConstantSection(0),
+ EightByteConstantSection(0),
+ SixteenByteConstantSection(0),
+ LazySymbolPointerSection(0),
+ NonLazySymbolPointerSection(0) {
+}
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ IsFunctionEHFrameSymbolPrivate = false;
+ SupportsWeakOmittedEHFrame = false;
+
+ // .comm doesn't support alignment before Leopard.
+ Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
+ CommDirectiveSupportsAlignment = false;
+
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+ TextSection // .text
+ = getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ DataSection // .data
+ = getContext().getMachOSection("__DATA", "__data", 0,
+ SectionKind::getDataRel());
+
+ TLSDataSection // .tdata
+ = getContext().getMachOSection("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+ SectionKind::getDataRel());
+ TLSBSSSection // .tbss
+ = getContext().getMachOSection("__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ SectionKind::getThreadBSS());
+
+ // TODO: Verify datarel below.
+ TLSTLVSection // .tlv
+ = getContext().getMachOSection("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+ SectionKind::getDataRel());
+
+ TLSThreadInitSection
+ = getContext().getMachOSection("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+ SectionKind::getDataRel());
+
+ CStringSection // .cstring
+ = getContext().getMachOSection("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS,
+ SectionKind::getMergeable1ByteCString());
+ UStringSection
+ = getContext().getMachOSection("__TEXT","__ustring", 0,
+ SectionKind::getMergeable2ByteCString());
+ FourByteConstantSection // .literal4
+ = getContext().getMachOSection("__TEXT", "__literal4",
+ MCSectionMachO::S_4BYTE_LITERALS,
+ SectionKind::getMergeableConst4());
+ EightByteConstantSection // .literal8
+ = getContext().getMachOSection("__TEXT", "__literal8",
+ MCSectionMachO::S_8BYTE_LITERALS,
+ SectionKind::getMergeableConst8());
+
+ // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+ // to using it in -static mode.
+ SixteenByteConstantSection = 0;
+ if (TM.getRelocationModel() != Reloc::Static &&
+ TM.getTargetData()->getPointerSize() == 32)
+ SixteenByteConstantSection = // .literal16
+ getContext().getMachOSection("__TEXT", "__literal16",
+ MCSectionMachO::S_16BYTE_LITERALS,
+ SectionKind::getMergeableConst16());
+
+ ReadOnlySection // .const
+ = getContext().getMachOSection("__TEXT", "__const", 0,
+ SectionKind::getReadOnly());
+
+ TextCoalSection
+ = getContext().getMachOSection("__TEXT", "__textcoal_nt",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ ConstTextCoalSection
+ = getContext().getMachOSection("__TEXT", "__const_coal",
+ MCSectionMachO::S_COALESCED,
+ SectionKind::getReadOnly());
+ ConstDataSection // .const_data
+ = getContext().getMachOSection("__DATA", "__const", 0,
+ SectionKind::getReadOnlyWithRel());
+ DataCoalSection
+ = getContext().getMachOSection("__DATA","__datacoal_nt",
+ MCSectionMachO::S_COALESCED,
+ SectionKind::getDataRel());
+ DataCommonSection
+ = getContext().getMachOSection("__DATA","__common",
+ MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+ DataBSSSection
+ = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+
+
+ LazySymbolPointerSection
+ = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ NonLazySymbolPointerSection
+ = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorSection
+ = getContext().getMachOSection("__TEXT", "__constructor", 0,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = getContext().getMachOSection("__TEXT", "__destructor", 0,
+ SectionKind::getDataRel());
+ } else {
+ StaticCtorSection
+ = getContext().getMachOSection("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = getContext().getMachOSection("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ }
+
+ // Exception Handling.
+ LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
+ SectionKind::getReadOnlyWithRel());
+
+ if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ CompactUnwindSection =
+ getContext().getMachOSection("__LD", "__compact_unwind",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getReadOnly());
+
+ // Debug Information.
+ DwarfAbbrevSection =
+ getContext().getMachOSection("__DWARF", "__debug_abbrev",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ getContext().getMachOSection("__DWARF", "__debug_info",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ getContext().getMachOSection("__DWARF", "__debug_line",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ getContext().getMachOSection("__DWARF", "__debug_frame",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ getContext().getMachOSection("__DWARF", "__debug_pubnames",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ getContext().getMachOSection("__DWARF", "__debug_pubtypes",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ getContext().getMachOSection("__DWARF", "__debug_str",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ getContext().getMachOSection("__DWARF", "__debug_loc",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ getContext().getMachOSection("__DWARF", "__debug_aranges",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ getContext().getMachOSection("__DWARF", "__debug_ranges",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ getContext().getMachOSection("__DWARF", "__debug_macinfo",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ getContext().getMachOSection("__DWARF", "__debug_inlined",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+
+ TLSExtraDataSection = TLSTLVSection;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+ return getContext().getMachOSection("__TEXT", "__eh_frame",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_NO_TOC |
+ MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+ MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty()) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getNameStr() +
+ "' has an invalid section specifier '" + GV->getSection()+
+ "': " + ErrorCode + ".");
+ // Fall back to dropping it into the data section.
+ return DataSection;
+ }
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // If TAA wasn't set by ParseSectionSpecifier() above,
+ // use the value returned by getMachOSection() as a default.
+ if (!TAAParsed)
+ TAA = S->getTypeAndAttributes();
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getNameStr() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
+ if (Kind.isText())
+ return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GV->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return UStringSection;
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+ /// On Darwin, internally linked data beginning with "L" or "l" does not have
+ /// the directive emitted (this occurs in ObjC metadata).
+ if (!GV) return false;
+
+ // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+ if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+ // FIXME: ObjC metadata is currently emitted as internal symbols that have
+ // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
+ // this horrible hack can go away.
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
+ return false;
+ }
+
+ return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return SSym;
+}
+
+unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
+ return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const {
+ return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF()
+ : TargetLoweringObjectFile(),
+ DrectveSection(0),
+ PDataSection(0),
+ XDataSection(0) {
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+ TextSection =
+ getContext().getCOFFSection(".text",
+ COFF::IMAGE_SCN_CNT_CODE |
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ DataSection =
+ getContext().getCOFFSection(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ ReadOnlySection =
+ getContext().getCOFFSection(".rdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ StaticCtorSection =
+ getContext().getCOFFSection(".ctors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getCOFFSection(".dtors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+
+ // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+ // though it contains relocatable pointers. In PIC mode, this is probably a
+ // big runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ getContext().getCOFFSection(".gcc_except_table",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ // Debug info.
+ DwarfAbbrevSection =
+ getContext().getCOFFSection(".debug_abbrev",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ getContext().getCOFFSection(".debug_info",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ getContext().getCOFFSection(".debug_line",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ getContext().getCOFFSection(".debug_frame",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ getContext().getCOFFSection(".debug_pubnames",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ getContext().getCOFFSection(".debug_pubtypes",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ getContext().getCOFFSection(".debug_str",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ getContext().getCOFFSection(".debug_loc",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ getContext().getCOFFSection(".debug_aranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ getContext().getCOFFSection(".debug_ranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ getContext().getCOFFSection(".debug_macinfo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
+ DrectveSection =
+ getContext().getCOFFSection(".drectve",
+ COFF::IMAGE_SCN_LNK_INFO,
+ SectionKind::getMetadata());
+
+ PDataSection =
+ getContext().getCOFFSection(".pdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+
+ XDataSection =
+ getContext().getCOFFSection(".xdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+ return getContext().getCOFFSection(".eh_frame",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getWin64EHFuncTableSection(
+ StringRef suffix) const {
+ if (suffix == "")
+ return PDataSection;
+ return getContext().getCOFFSection((".pdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getWin64EHTableSection(
+ StringRef suffix) const {
+ if (suffix == "")
+ return XDataSection;
+ return getContext().getCOFFSection((".xdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (K.isMetadata())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_CNT_CODE;
+ else if (K.isBSS ())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ return getContext().getCOFFSection(GV->getSection(),
+ getCOFFSectionFlags(Kind),
+ Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text$";
+ if (Kind.isBSS ())
+ return ".bss$";
+ if (Kind.isWriteable())
+ return ".data$";
+ return ".rdata$";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if (GV->isWeakForLinker()) {
+ const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name.str(), Characteristics,
+ COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+ }
+
+ if (Kind.isText())
+ return getTextSection();
+
+ return getDataSection();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 000000000000..6d6244e4f879
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1530 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReMats, "Number of instructions re-materialized");
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class TwoAddressInstructionPass : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+ AliasAnalysis *AA;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // SrcRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies from physical
+ // registers to virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // DstRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies to physical
+ // registers from virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+ /// during the initial walk of the machine function.
+ SmallVector<MachineInstr*, 16> RegSequences;
+
+ bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc);
+
+ bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef);
+
+ MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+
+ bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegA, unsigned RegB, unsigned Dist);
+
+ typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
+ bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+ SmallVector<NewKill, 4> &NewKills,
+ MachineBasicBlock *MBB, unsigned Dist);
+ bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi, unsigned Dist);
+
+ bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+
+ void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+
+ void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+
+ void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+
+ /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+ /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+ /// sub-register references of the register defined by REG_SEQUENCE.
+ bool EliminateRegSequences();
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+ };
+}
+
+char TwoAddressInstructionPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
+ MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = NULL;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SavedReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = NULL;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill()) {
+ if (OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// isTwoAddrUse - Return true if the specified MI is using the specified
+/// register as a two-address operand.
+static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
+ const MCInstrDesc &MCID = UseMI->getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg &&
+ (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
+ // Earlier use is a two-address one.
+ return true;
+ }
+ return false;
+}
+
+/// isProfitableToReMat - Return true if the heuristics determines it is likely
+/// to be profitable to re-materialize the definition of Reg rather than copy
+/// the register.
+bool
+TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
+ const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc) {
+ bool OtherUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = UseMO.getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end() && DI->second == Loc)
+ continue; // Current use.
+ OtherUse = true;
+ // There is at least one other use in the MBB that will clobber the
+ // register.
+ if (isTwoAddrUse(UseMI, Reg))
+ return true;
+ }
+ }
+
+ // If other uses in MBB are not two-address uses, then don't remat.
+ if (OtherUse)
+ return false;
+
+ // No other uses in the same block, remat if it's defined in the same
+ // block so it does not unnecessarily extend the live range.
+ return MBB == DefMI->getParent();
+}
+
+/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
+ MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
+ MachineBasicBlock *MBB,
+ unsigned Dist) {
+ unsigned LastUseDist = 0;
+ MachineInstr *LastUse = 0;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (DI->second >= Dist)
+ continue;
+
+ if (MO.isUse() && DI->second > LastUseDist) {
+ LastUse = DI->first;
+ LastUseDist = DI->second;
+ }
+ }
+ return LastUse;
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else
+ return false;
+
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ if (!DefMI->killsRegister(Reg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (llvm::next(Begin) != MRI->def_end())
+ return true;
+ DefMI = &*Begin;
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned NumOps = MI.isInlineAsm()
+ ? MI.getNumOperands() : MCID.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // None or more than one use.
+ return 0;
+ MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
+ if (UseMI.getParent() != MBB)
+ return 0;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist) {
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!MI->killsRegister(regC))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ unsigned ToRegB = getMappedReg(regB, DstRegMap);
+ unsigned ToRegC = getMappedReg(regC, DstRegMap);
+ if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+ ((!FromRegC && !ToRegC) ||
+ regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ regsAreCompatible(FromRegC, ToRegB, TRI)))
+ return true;
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+ return true;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool
+TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
+ MachineInstr *MI = mi;
+ DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+ if (NewMI == 0) {
+ DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ // If the instruction changed to commute it, update livevar.
+ if (NewMI != MI) {
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(RegC, MI, NewMI);
+
+ mbbi->insert(mi, NewMI); // Insert the new inst
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = NewMI;
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ }
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ if (!FromRegB)
+ return false;
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
+ MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
+ if (NewMI) {
+ DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ bool Sunk = false;
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+
+ mbbi->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = llvm::next(mi);
+ }
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
+ return true;
+ }
+
+ return false;
+}
+
+/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsDstPhys;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ unsigned Reg = DstReg;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+ NewReg, IsDstPhys)) {
+ if (IsCopy && !Processed.insert(UseMI))
+ break;
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ VirtRegPairs.push_back(NewReg);
+ Reg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+ ToReg = FromReg;
+ }
+ bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+ }
+}
+
+/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ ScanUses(DstReg, MBB, Processed);
+ }
+
+ Processed.insert(MI);
+ return;
+}
+
+/// isSafeToDelete - If the specified instruction does not produce any side
+/// effects and all of its defs are dead, then it's safe to delete.
+static bool isSafeToDelete(MachineInstr *MI,
+ const TargetInstrInfo *TII,
+ SmallVector<unsigned, 4> &Kills) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.mayStore() || MCID.isCall())
+ return false;
+ if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && !MO.isDead())
+ return false;
+ if (MO.isUse() && MO.isKill())
+ Kills.push_back(MO.getReg());
+ }
+ return true;
+}
+
+/// canUpdateDeletedKills - Check if all the registers listed in Kills are
+/// killed by instructions in MBB preceding the current instruction at
+/// position Dist. If so, return true and record information about the
+/// preceding kills in NewKills.
+bool TwoAddressInstructionPass::
+canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+ SmallVector<NewKill, 4> &NewKills,
+ MachineBasicBlock *MBB, unsigned Dist) {
+ while (!Kills.empty()) {
+ unsigned Kill = Kills.back();
+ Kills.pop_back();
+ if (TargetRegisterInfo::isPhysicalRegister(Kill))
+ return false;
+
+ MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
+ if (!LastKill)
+ return false;
+
+ bool isModRef = LastKill->definesRegister(Kill);
+ NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
+ LastKill));
+ }
+ return true;
+}
+
+/// DeleteUnusedInstr - If an instruction with a tied register operand can
+/// be safely deleted, just delete it.
+bool
+TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned Dist) {
+ // Check if the instruction has no side effects and if all its defs are dead.
+ SmallVector<unsigned, 4> Kills;
+ if (!isSafeToDelete(mi, TII, Kills))
+ return false;
+
+ // If this instruction kills some virtual registers, we need to
+ // update the kill information. If it's not possible to do so,
+ // then bail out.
+ SmallVector<NewKill, 4> NewKills;
+ if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
+ return false;
+
+ if (LV) {
+ while (!NewKills.empty()) {
+ MachineInstr *NewKill = NewKills.back().second;
+ unsigned Kill = NewKills.back().first.first;
+ bool isDead = NewKills.back().first.second;
+ NewKills.pop_back();
+ if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+ if (isDead)
+ LV->addVirtualRegisterDead(Kill, NewKill);
+ else
+ LV->addVirtualRegisterKilled(Kill, NewKill);
+ }
+ }
+ }
+
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = nmi;
+ return true;
+}
+
+/// TryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy. Returns true if the tied operands
+/// are eliminated altogether.
+bool TwoAddressInstructionPass::
+TryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ const MCInstrDesc &MCID = mi->getDesc();
+ unsigned regA = mi->getOperand(DstIdx).getReg();
+ unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+
+ // If regA is dead and the instruction can be deleted, just delete
+ // it so it doesn't clobber regB.
+ bool regBKilled = isKilled(*mi, regB, MRI, TII);
+ if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+ DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
+ ++NumDeletes;
+ return true; // Done with this instruction.
+ }
+
+ // Check if it is profitable to commute the operands.
+ unsigned SrcOp1, SrcOp2;
+ unsigned regC = 0;
+ unsigned regCIdx = ~0U;
+ bool TryCommute = false;
+ bool AggressiveCommute = false;
+ if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
+ TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+ if (SrcIdx == SrcOp1)
+ regCIdx = SrcOp2;
+ else if (SrcIdx == SrcOp2)
+ regCIdx = SrcOp1;
+
+ if (regCIdx != ~0U) {
+ regC = mi->getOperand(regCIdx).getReg();
+ if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+ // If C dies but B does not, swap the B and C operands.
+ // This makes the live ranges of A and C joinable.
+ TryCommute = true;
+ else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+ TryCommute = true;
+ AggressiveCommute = true;
+ }
+ }
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return false;
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ ScanUses(regA, &*mbbi, Processed);
+
+ if (MCID.isConvertibleTo3Addr()) {
+ // This instruction is potentially convertible to a true
+ // three-address instruction. Check if it is profitable.
+ if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+ // Try to convert it.
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ return true; // Done with this instruction.
+ }
+ }
+ }
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (MCID.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+ if (UnfoldMCID.getNumDefs() == 1) {
+ MachineFunction &MF = *mbbi->getParent();
+
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
+ const TargetRegisterClass *RC =
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+ /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+ NewMIs)) {
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ mbbi->insert(mi, NewMIs[0]);
+ mbbi->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformSuccess =
+ TryInstructionTransform(NewMI, mi, mbbi,
+ NewSrcIdx, NewDstIdx, Dist, Processed);
+ if (TransformSuccess ||
+ NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ }
+ mi->eraseFromParent();
+ mi = NewMIs[1];
+ if (TransformSuccess)
+ return true;
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n");
+ const TargetMachine &TM = MF.getTarget();
+ MRI = &MF.getRegInfo();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool MadeChange = false;
+
+ DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+
+ // ReMatRegs - Keep track of the registers whose def's are remat'ed.
+ BitVector ReMatRegs(MRI->getNumVirtRegs());
+
+ typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
+ TiedOperandMap;
+ TiedOperandMap TiedOperands(4);
+
+ SmallPtrSet<MachineInstr*, 8> Processed;
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
+ if (mi->isDebugValue()) {
+ mi = nmi;
+ continue;
+ }
+
+ // Remember REG_SEQUENCE instructions, we'll deal with them later.
+ if (mi->isRegSequence())
+ RegSequences.push_back(&*mi);
+
+ const MCInstrDesc &MCID = mi->getDesc();
+ bool FirstTied = true;
+
+ DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+ ProcessCopy(&*mi, &*mbbi, Processed);
+
+ // First scan through all the tied register uses in this instruction
+ // and record a list of pairs of tied operands for each register.
+ unsigned NumOps = mi->isInlineAsm()
+ ? mi->getNumOperands() : MCID.getNumOperands();
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+
+ if (FirstTied) {
+ FirstTied = false;
+ ++NumTwoAddressInstrs;
+ DEBUG(dbgs() << '\t' << *mi);
+ }
+
+ assert(mi->getOperand(SrcIdx).isReg() &&
+ mi->getOperand(SrcIdx).getReg() &&
+ mi->getOperand(SrcIdx).isUse() &&
+ "two address instruction invalid");
+
+ unsigned regB = mi->getOperand(SrcIdx).getReg();
+ TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+
+ // Now iterate over the information collected above.
+ for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+ OE = TiedOperands.end(); OI != OE; ++OI) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+
+ // If the registers are already equal, nothing needs to be done.
+ if (mi->getOperand(SrcIdx).getReg() ==
+ mi->getOperand(DstIdx).getReg())
+ break; // Done with this instruction.
+
+ if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+ Processed))
+ break; // The tied operands have been eliminated.
+ }
+
+ bool IsEarlyClobber = false;
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ unsigned regB = OI->first;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = mi->getOperand(DstIdx);
+ unsigned regA = DstMO.getReg();
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+
+ // Grab regB from the instruction because it may have changed if the
+ // instruction was commuted.
+ regB = mi->getOperand(SrcIdx).getReg();
+
+ if (regA == regB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = regA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !mi->getOperand(i).isReg() ||
+ mi->getOperand(i).getReg() != regA);
+#endif
+
+ // Emit a copy or rematerialize the definition.
+ const TargetRegisterClass *rc = MRI->getRegClass(regB);
+ MachineInstr *DefMI = MRI->getVRegDef(regB);
+ // If it's safe and profitable, remat the definition instead of
+ // copying it.
+ if (DefMI &&
+ DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isSafeToReMat(TII, AA, regB) &&
+ isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
+ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
+ unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
+ TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
+ ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
+ ++NumReMats;
+ } else {
+ BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ regA).addReg(regB);
+ }
+
+ MachineBasicBlock::iterator prevMI = prior(mi);
+ // Update DistanceMap.
+ DistanceMap.insert(std::make_pair(prevMI, Dist));
+ DistanceMap[mi] = ++Dist;
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
+
+ MachineOperand &MO = mi->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(regA);
+ }
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
+ LV->addVirtualRegisterKilled(regB, prior(mi));
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+
+ // Schedule the source copy / remat inserted to form two-address
+ // instruction. FIXME: Does it matter the distance map may not be
+ // accurate after it's scheduled?
+ TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+
+ MadeChange = true;
+
+ DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
+
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
+
+ // Clear TiedOperands here instead of at the top of the loop
+ // since most instructions do not have tied operands.
+ TiedOperands.clear();
+ mi = nmi;
+ }
+ }
+
+ // Some remat'ed instructions are dead.
+ for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->use_nodbg_empty(VReg)) {
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ DefMI->eraseFromParent();
+ }
+ }
+
+ // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
+ // SSA form. It's now safe to de-SSA.
+ MadeChange |= EliminateRegSequences();
+
+ return MadeChange;
+}
+
+static void UpdateRegSequenceSrcs(unsigned SrcReg,
+ unsigned DstReg, unsigned SubIdx,
+ MachineRegisterInfo *MRI,
+ const TargetRegisterInfo &TRI) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ) {
+ MachineOperand &MO = RI.getOperand();
+ ++RI;
+ MO.substVirtReg(DstReg, SubIdx, TRI);
+ }
+}
+
+/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
+/// EXTRACT_SUBREG from the same register and to the same virtual register
+/// with different sub-register indices, attempt to combine the
+/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
+/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
+/// reg1026 to reg1029.
+void
+TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
+ unsigned DstReg) {
+ SmallSet<unsigned, 4> Seen;
+ for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
+ unsigned SrcReg = Srcs[i];
+ if (!Seen.insert(SrcReg))
+ continue;
+
+ // Check that the instructions are all in the same basic block.
+ MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
+ MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
+ if (SrcDefMI->getParent() != DstDefMI->getParent())
+ continue;
+
+ // If there are no other uses than copies which feed into
+ // the reg_sequence, then we might be able to coalesce them.
+ bool CanCoalesce = true;
+ SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
+ CanCoalesce = false;
+ break;
+ }
+ SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
+ DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
+ }
+
+ if (!CanCoalesce || SrcSubIndices.size() < 2)
+ continue;
+
+ // Check that the source subregisters can be combined.
+ std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
+ unsigned NewSrcSubIdx = 0;
+ if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
+ NewSrcSubIdx))
+ continue;
+
+ // Check that the destination subregisters can also be combined.
+ std::sort(DstSubIndices.begin(), DstSubIndices.end());
+ unsigned NewDstSubIdx = 0;
+ if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
+ NewDstSubIdx))
+ continue;
+
+ // If neither source nor destination can be combined to the full register,
+ // just give up. This could be improved if it ever matters.
+ if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
+ continue;
+
+ // Now that we know that all the uses are extract_subregs and that those
+ // subregs can somehow be combined, scan all the extract_subregs again to
+ // make sure the subregs are in the right order and can be composed.
+ MachineInstr *SomeMI = 0;
+ CanCoalesce = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ assert(UseMI->isCopy());
+ unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
+ unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
+ assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
+ if ((NewDstSubIdx == 0 &&
+ TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
+ (NewSrcSubIdx == 0 &&
+ TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
+ CanCoalesce = false;
+ break;
+ }
+ // Keep track of one of the uses.
+ SomeMI = UseMI;
+ }
+ if (!CanCoalesce)
+ continue;
+
+ // Insert a copy to replace the original.
+ MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
+ SomeMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, NewDstSubIdx)
+ .addReg(SrcReg, 0, NewSrcSubIdx);
+
+ // Remove all the old extract instructions.
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ) {
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI == CopyMI)
+ continue;
+ assert(UseMI->isCopy());
+ // Move any kills to the new copy or extract instruction.
+ if (UseMI->getOperand(1).isKill()) {
+ CopyMI->getOperand(1).setIsKill();
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
+ }
+ UseMI->eraseFromParent();
+ }
+ }
+}
+
+static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
+ MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI != RegSeq && UseMI->isRegSequence())
+ return true;
+ }
+ return false;
+}
+
+/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+/// sub-register references of the register defined by REG_SEQUENCE. e.g.
+///
+/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
+/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
+/// =>
+/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+bool TwoAddressInstructionPass::EliminateRegSequences() {
+ if (RegSequences.empty())
+ return false;
+
+ for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
+ MachineInstr *MI = RegSequences[i];
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (MI->getOperand(0).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !(MI->getNumOperands() & 1)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ llvm_unreachable(0);
+ }
+
+ bool IsImpDef = true;
+ SmallVector<unsigned, 4> RealSrcs;
+ SmallSet<unsigned, 4> Seen;
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
+ if (MI->getOperand(i).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isImplicitDef()) {
+ DefMI->eraseFromParent();
+ continue;
+ }
+ IsImpDef = false;
+
+ // Remember COPY sources. These might be candidate for coalescing.
+ if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+ RealSrcs.push_back(DefMI->getOperand(1).getReg());
+
+ bool isKill = MI->getOperand(i).isKill();
+ if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+ !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+ !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+ MRI->getRegClass(SrcReg), SubIdx)) {
+ // REG_SEQUENCE cannot have duplicated operands, add a copy.
+ // Also add an copy if the source is live-in the block. We don't want
+ // to end up with a partial-redef of a livein, e.g.
+ // BB0:
+ // reg1051:10<def> =
+ // ...
+ // BB1:
+ // ... = reg1051:10
+ // BB2:
+ // reg1051:9<def> =
+ // LiveIntervalAnalysis won't like it.
+ //
+ // If the REG_SEQUENCE doesn't kill its source, keeping live variables
+ // correctly up to date becomes very difficult. Insert a copy.
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI->getOperand(j).getReg() == SrcReg) {
+ MI->getOperand(j).setIsKill();
+ isKill = false;
+ break;
+ }
+
+ MachineBasicBlock::iterator InsertLoc = MI;
+ MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
+ MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, SubIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ MI->getOperand(i).setReg(0);
+ if (LV && isKill)
+ LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+ DEBUG(dbgs() << "Inserted: " << *CopyMI);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (!SrcReg) continue;
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
+ UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
+ }
+
+ if (IsImpDef) {
+ DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI->RemoveOperand(j);
+ } else {
+ DEBUG(dbgs() << "Eliminated: " << *MI);
+ MI->eraseFromParent();
+ }
+
+ // Try coalescing some EXTRACT_SUBREG instructions. This can create
+ // INSERT_SUBREG instructions that must have <undef> flags added by
+ // LiveIntervalAnalysis, so only run it when LiveVariables is available.
+ if (LV)
+ CoalesceExtSubRegs(RealSrcs, DstReg);
+ }
+
+ RegSequences.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 000000000000..52693f03e828
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,215 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass(ID) {
+ initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ProfileInfo>();
+ }
+ };
+}
+char UnreachableBlockElim::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+ df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ if (PI) PI->removeBlock(DeadBlocks[i]);
+ DeadBlocks[i]->eraseFromParent();
+ }
+
+ return DeadBlocks.size();
+}
+
+
+namespace {
+ class UnreachableMachineBlockElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+ I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ // Update dominator and loop info.
+ if (MLI) MLI->removeBlock(BB);
+ if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() && start->isPHI()) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ DeadBlocks[i]->eraseFromParent();
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() && phi->isPHI()) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ MachineInstr* temp = phi;
+ ++phi;
+ temp->eraseFromParent();
+ ModifiedPHI = true;
+
+ if (Input != Output) {
+ MachineRegisterInfo &MRI = F.getRegInfo();
+ MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+ MRI.replaceRegWith(Output, Input);
+ }
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return (DeadBlocks.size() || ModifiedPHI);
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 000000000000..7557979ec9bb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,371 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregmap"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills , "Number of register spills");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
+ TII = mf.getTarget().getInstrInfo();
+ TRI = mf.getTarget().getRegisterInfo();
+ MF = &mf;
+
+ ReMatId = MAX_STACK_SLOT+1;
+ LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2ReMatIdMap.clear();
+ Virt2SplitMap.clear();
+ Virt2SplitKillMap.clear();
+ ReMatMap.clear();
+ ImplicitDefed.clear();
+ SpillSlotToUsesMap.clear();
+ MI2VirtMap.clear();
+ SpillPt2VirtMap.clear();
+ RestorePt2VirtMap.clear();
+ EmergencySpillMap.clear();
+ EmergencySpillSlots.clear();
+
+ SpillSlotToUsesMap.resize(8);
+ ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
+
+ allocatableRCRegs.clear();
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ allocatableRCRegs.insert(std::make_pair(*I,
+ TRI->getAllocatableSet(mf, *I)));
+
+ grow();
+
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2ReMatIdMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+ Virt2SplitKillMap.resize(NumRegs);
+ ReMatMap.resize(NumRegs);
+ ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ return SS;
+}
+
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+ unsigned physReg = Hint.second;
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+ physReg = getPhys(physReg);
+ if (Hint.first == 0)
+ return (TargetRegisterInfo::isPhysicalRegister(physReg))
+ ? physReg : 0;
+ return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ ++NumSpills;
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = ReMatId;
+ return ReMatId++;
+}
+
+void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = id;
+}
+
+int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
+ std::map<const TargetRegisterClass*, int>::iterator I =
+ EmergencySpillSlots.find(RC);
+ if (I != EmergencySpillSlots.end())
+ return I->second;
+ return EmergencySpillSlots[RC] = createSpillSlot(RC);
+}
+
+void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
+ if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
+ // If FI < LowSpillSlot, this stack reference was produced by
+ // instruction selection and is not a spill
+ if (FI >= LowSpillSlot) {
+ assert(FI >= 0 && "Spill slot index should not be negative!");
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
+ }
+ }
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+ MachineInstr *NewMI, ModRef MRInfo) {
+ // Move previous memory references folded to new instruction.
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+ for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+ E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+ MI2VirtMap.erase(I++);
+ }
+
+ // add new memory reference
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
+ MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (MF->getFrameInfo()->isFixedObjectIndex(FI))
+ continue;
+ // This stack reference was produced by instruction selection and
+ // is not a spill
+ if (FI < LowSpillSlot)
+ continue;
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
+ }
+ MI2VirtMap.erase(MI);
+ SpillPt2VirtMap.erase(MI);
+ RestorePt2VirtMap.erase(MI);
+ EmergencySpillMap.erase(MI);
+}
+
+/// FindUnusedRegisters - Gather a list of allocatable registers that
+/// have not been allocated to any virtual register.
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
+ unsigned NumRegs = TRI->getNumRegs();
+ UnusedRegs.reset();
+ UnusedRegs.resize(NumRegs);
+
+ BitVector Used(NumRegs);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[Reg]);
+ }
+
+ BitVector Allocatable = TRI->getAllocatableSet(*MF);
+ bool AnyUnused = false;
+ for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
+ if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
+ bool ReallyUnused = true;
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ if (Used[*AS] || LIs->hasInterval(*AS)) {
+ ReallyUnused = false;
+ break;
+ }
+ }
+ if (ReallyUnused) {
+ AnyUnused = true;
+ UnusedRegs.set(Reg);
+ }
+ }
+ }
+
+ return AnyUnused;
+}
+
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getFunction()->getName() << '\n');
+ DEBUG(dump());
+ SmallVector<unsigned, 8> SuperDeads;
+ SmallVector<unsigned, 8> SuperDefs;
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+ MII != MIE;) {
+ MachineInstr *MI = MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = getPhys(VirtReg);
+ assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+ // Preserve semantics of sub-register operands.
+ if (MO.getSubReg()) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register.
+ if (MO.isUse()) {
+ if (MO.isKill() && !MO.isUndef())
+ SuperKills.push_back(PhysReg);
+ } else if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ while (!SuperDeads.empty())
+ MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+ while (!SuperDefs.empty())
+ MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // Finally, remove any identity copies.
+ if (MI->isIdentityCopy()) {
+ ++NumIdCopies;
+ if (MI->getNumOperands() == 2) {
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ RemoveMachineInstrFromMaps(MI);
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
+ } else {
+ // Transform identity copy to a KILL to deal with subregisters.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "Identity copy: " << *MI);
+ }
+ }
+ }
+ }
+
+ // Tell MRI about physical registers in use.
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI.getRegClass(Reg)->getName() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 000000000000..03abff356934
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,528 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+
+namespace llvm {
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineFunction;
+ class MachineRegisterInfo;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+ class raw_ostream;
+ class SlotIndexes;
+
+ class VirtRegMap : public MachineFunctionPass {
+ public:
+ enum {
+ NO_PHYS_REG = 0,
+ NO_STACK_SLOT = (1L << 30)-1,
+ MAX_STACK_SLOT = (1L << 18)-1
+ };
+
+ enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+ typedef std::multimap<MachineInstr*,
+ std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+ private:
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MF;
+
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
+
+ /// Virt2PhysMap - This is a virtual to physical register
+ /// mapping. Each virtual register is required to have an entry in
+ /// it; even spilled virtual registers (the register mapped to a
+ /// spilled register is the temporary used to load it from the
+ /// stack).
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+ /// Virt2StackSlotMap - This is virtual register to stack slot
+ /// mapping. Each spilled virtual register has an entry in it
+ /// which corresponds to the stack slot this register is spilled
+ /// at.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+ /// Virt2ReMatIdMap - This is virtual register to rematerialization id
+ /// mapping. Each spilled virtual register that should be remat'd has an
+ /// entry in it which corresponds to the remat id.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
+
+ /// Virt2SplitMap - This is virtual register to splitted virtual register
+ /// mapping.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+ /// Virt2SplitKillMap - This is splitted virtual register to its last use
+ /// (kill) index mapping.
+ IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
+
+ /// ReMatMap - This is virtual register to re-materialized instruction
+ /// mapping. Each virtual register whose definition is going to be
+ /// re-materialized has an entry in it.
+ IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
+
+ /// MI2VirtMap - This is MachineInstr to virtual register
+ /// mapping. In the case of memory spill code being folded into
+ /// instructions, we need to know which virtual register was
+ /// read/written by this instruction.
+ MI2VirtMapTy MI2VirtMap;
+
+ /// SpillPt2VirtMap - This records the virtual registers which should
+ /// be spilled right after the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
+ SpillPt2VirtMap;
+
+ /// RestorePt2VirtMap - This records the virtual registers which should
+ /// be restored right before the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
+
+ /// EmergencySpillMap - This records the physical registers that should
+ /// be spilled / restored around the MachineInstr since the register
+ /// allocator has run out of registers.
+ std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
+
+ /// EmergencySpillSlots - This records emergency spill slots used to
+ /// spill physical registers when the register allocator runs out of
+ /// registers. Ideally only one stack slot is used per function per
+ /// register class.
+ std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
+
+ /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+ /// virtual register, an unique id is being assigned. This keeps track of
+ /// the highest id used so far. Note, this starts at (1<<18) to avoid
+ /// conflicts with stack slot numbers.
+ int ReMatId;
+
+ /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
+ int LowSpillSlot, HighSpillSlot;
+
+ /// SpillSlotToUsesMap - Records uses for each register spill slot.
+ SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
+
+ /// ImplicitDefed - One bit for each virtual register. If set it indicates
+ /// the register is implicitly defined.
+ BitVector ImplicitDefed;
+
+ /// UnusedRegs - A list of physical registers that have not been used.
+ BitVector UnusedRegs;
+
+ /// createSpillSlot - Allocate a spill slot for RC from MFI.
+ unsigned createSpillSlot(const TargetRegisterClass *RC);
+
+ VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
+ void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+ public:
+ static char ID;
+ VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
+ Virt2StackSlotMap(NO_STACK_SLOT),
+ Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
+ Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
+ ReMatId(MAX_STACK_SLOT+1),
+ LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunction &getMachineFunction() const {
+ assert(MF && "getMachineFunction called before runOnMachineFunction");
+ return *MF;
+ }
+
+ MachineRegisterInfo &getRegInfo() const { return *MRI; }
+ const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
+ void grow();
+
+ /// @brief returns true if the specified virtual register is
+ /// mapped to a physical register
+ bool hasPhys(unsigned virtReg) const {
+ return getPhys(virtReg) != NO_PHYS_REG;
+ }
+
+ /// @brief returns the physical register mapped to the specified
+ /// virtual register
+ unsigned getPhys(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2PhysMap[virtReg];
+ }
+
+ /// @brief creates a mapping for the specified virtual register to
+ /// the specified physical register
+ void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ Virt2PhysMap[virtReg] = physReg;
+ }
+
+ /// @brief clears the specified virtual register's, physical
+ /// register mapping
+ void clearVirt(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ "attempt to clear a not assigned virtual register");
+ Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ }
+
+ /// @brief clears all virtual to physical register mappings
+ void clearAllVirt() {
+ Virt2PhysMap.clear();
+ grow();
+ }
+
+ /// @brief returns the register allocation preference.
+ unsigned getRegAllocPref(unsigned virtReg);
+
+ /// @brief returns true if VirtReg is assigned to its preferred physreg.
+ bool hasPreferredPhys(unsigned VirtReg) {
+ return getPhys(VirtReg) == getRegAllocPref(VirtReg);
+ }
+
+ /// @brief records virtReg is a split live interval from SReg.
+ void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+ Virt2SplitMap[virtReg] = SReg;
+ }
+
+ /// @brief returns the live interval virtReg is split from.
+ unsigned getPreSplitReg(unsigned virtReg) const {
+ return Virt2SplitMap[virtReg];
+ }
+
+ /// getOriginal - Return the original virtual register that VirtReg descends
+ /// from through splitting.
+ /// A register that was not created by splitting is its own original.
+ /// This operation is idempotent.
+ unsigned getOriginal(unsigned VirtReg) const {
+ unsigned Orig = getPreSplitReg(VirtReg);
+ return Orig ? Orig : VirtReg;
+ }
+
+ /// @brief returns true if the specified virtual register is not
+ /// mapped to a stack slot or rematerialized.
+ bool isAssignedReg(unsigned virtReg) const {
+ if (getStackSlot(virtReg) == NO_STACK_SLOT &&
+ getReMatId(virtReg) == NO_STACK_SLOT)
+ return true;
+ // Split register can be assigned a physical register as well as a
+ // stack slot or remat id.
+ return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+ }
+
+ /// @brief returns the stack slot mapped to the specified virtual
+ /// register
+ int getStackSlot(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2StackSlotMap[virtReg];
+ }
+
+ /// @brief returns the rematerialization id mapped to the specified virtual
+ /// register
+ int getReMatId(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2ReMatIdMap[virtReg];
+ }
+
+ /// @brief create a mapping for the specifed virtual register to
+ /// the next available stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+ /// @brief create a mapping for the specified virtual register to
+ /// the specified stack slot
+ void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ int assignVirtReMatId(unsigned virtReg);
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ void assignVirtReMatId(unsigned virtReg, int id);
+
+ /// @brief returns true if the specified virtual register is being
+ /// re-materialized.
+ bool isReMaterialized(unsigned virtReg) const {
+ return ReMatMap[virtReg] != NULL;
+ }
+
+ /// @brief returns the original machine instruction being re-issued
+ /// to re-materialize the specified virtual register.
+ MachineInstr *getReMaterializedMI(unsigned virtReg) const {
+ return ReMatMap[virtReg];
+ }
+
+ /// @brief records the specified virtual register will be
+ /// re-materialized and the original instruction which will be re-issed
+ /// for this purpose. If parameter all is true, then all uses of the
+ /// registers are rematerialized and it's safe to delete the definition.
+ void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+ ReMatMap[virtReg] = def;
+ }
+
+ /// @brief record the last use (kill) of a split virtual register.
+ void addKillPoint(unsigned virtReg, SlotIndex index) {
+ Virt2SplitKillMap[virtReg] = index;
+ }
+
+ SlotIndex getKillPoint(unsigned virtReg) const {
+ return Virt2SplitKillMap[virtReg];
+ }
+
+ /// @brief remove the last use (kill) of a split virtual register.
+ void removeKillPoint(unsigned virtReg) {
+ Virt2SplitKillMap[virtReg] = SlotIndex();
+ }
+
+ /// @brief returns true if the specified MachineInstr is a spill point.
+ bool isSpillPt(MachineInstr *Pt) const {
+ return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be spilled due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
+ return SpillPt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a spill point for virtReg.
+ void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Pt);
+ if (I != SpillPt2VirtMap.end())
+ I->second.push_back(std::make_pair(virtReg, isKill));
+ else {
+ std::vector<std::pair<unsigned,bool> > Virts;
+ Virts.push_back(std::make_pair(virtReg, isKill));
+ SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer spill point information from one instruction to
+ /// another.
+ void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Old);
+ if (I == SpillPt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back().first;
+ bool isKill = I->second.back().second;
+ I->second.pop_back();
+ addSpillPoint(virtReg, isKill, New);
+ }
+ SpillPt2VirtMap.erase(I);
+ }
+
+ /// @brief returns true if the specified MachineInstr is a restore point.
+ bool isRestorePt(MachineInstr *Pt) const {
+ return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be restoreed due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
+ return RestorePt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a restore point for virtReg.
+ void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Pt);
+ if (I != RestorePt2VirtMap.end())
+ I->second.push_back(virtReg);
+ else {
+ std::vector<unsigned> Virts;
+ Virts.push_back(virtReg);
+ RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer restore point information from one instruction to
+ /// another.
+ void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Old);
+ if (I == RestorePt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addRestorePoint(virtReg, New);
+ }
+ RestorePt2VirtMap.erase(I);
+ }
+
+ /// @brief records that the specified physical register must be spilled
+ /// around the specified machine instr.
+ void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
+ if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
+ EmergencySpillMap[MI].push_back(PhysReg);
+ else {
+ std::vector<unsigned> PhysRegs;
+ PhysRegs.push_back(PhysReg);
+ EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
+ }
+ }
+
+ /// @brief returns true if one or more physical registers must be spilled
+ /// around the specified instruction.
+ bool hasEmergencySpills(MachineInstr *MI) const {
+ return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
+ }
+
+ /// @brief returns the physical registers to be spilled and restored around
+ /// the instruction.
+ std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
+ return EmergencySpillMap[MI];
+ }
+
+ /// @brief - transfer emergency spill information from one instruction to
+ /// another.
+ void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
+ EmergencySpillMap.find(Old);
+ if (I == EmergencySpillMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addEmergencySpill(virtReg, New);
+ }
+ EmergencySpillMap.erase(I);
+ }
+
+ /// @brief return or get a emergency spill slot for the register class.
+ int getEmergencySpillSlot(const TargetRegisterClass *RC);
+
+ /// @brief Return lowest spill slot index.
+ int getLowSpillSlot() const {
+ return LowSpillSlot;
+ }
+
+ /// @brief Return highest spill slot index.
+ int getHighSpillSlot() const {
+ return HighSpillSlot;
+ }
+
+ /// @brief Records a spill slot use.
+ void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
+
+ /// @brief Returns true if spill slot has been used.
+ bool isSpillSlotUsed(int FrameIndex) const {
+ assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
+ return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
+ }
+
+ /// @brief Mark the specified register as being implicitly defined.
+ void setIsImplicitlyDefined(unsigned VirtReg) {
+ ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+
+ /// @brief Returns true if the virtual register is implicitly defined.
+ bool isImplicitlyDefined(unsigned VirtReg) const {
+ return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
+ }
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into newMI machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
+ ModRef MRInfo);
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into the specified machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
+
+ /// @brief returns the virtual registers' values folded in memory
+ /// operands of this instruction
+ std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+ getFoldedVirts(MachineInstr* MI) const {
+ return MI2VirtMap.equal_range(MI);
+ }
+
+ /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
+ /// the folded instruction map and spill point map.
+ void RemoveMachineInstrFromMaps(MachineInstr *MI);
+
+ /// FindUnusedRegisters - Gather a list of allocatable registers that
+ /// have not been allocated to any virtual register.
+ bool FindUnusedRegisters(LiveIntervals* LIs);
+
+ /// HasUnusedRegisters - Return true if there are any allocatable registers
+ /// that have not been allocated to any virtual register.
+ bool HasUnusedRegisters() const {
+ return !UnusedRegs.none();
+ }
+
+ /// setRegisterUsed - Remember the physical register is now used.
+ void setRegisterUsed(unsigned Reg) {
+ UnusedRegs.reset(Reg);
+ }
+
+ /// isRegisterUnused - Return true if the physical register has not been
+ /// used.
+ bool isRegisterUnused(unsigned Reg) const {
+ return UnusedRegs[Reg];
+ }
+
+ /// getFirstUnusedRegister - Return the first physical register that has not
+ /// been used.
+ unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
+ int Reg = UnusedRegs.find_first();
+ while (Reg != -1) {
+ if (allocatableRCRegs[RC][Reg])
+ return (unsigned)Reg;
+ Reg = UnusedRegs.find_next(Reg);
+ }
+ return 0;
+ }
+
+ /// rewrite - Rewrite all instructions in MF to use only physical registers
+ /// by mapping all virtual register operands to their assigned physical
+ /// registers.
+ ///
+ /// @param Indexes Optionally remove deleted instructions from indexes.
+ void rewrite(SlotIndexes *Indexes);
+
+ void print(raw_ostream &OS, const Module* M = 0) const;
+ void dump() const;
+ };
+
+ inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
new file mode 100644
index 000000000000..a5ec797b27db
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
@@ -0,0 +1,2633 @@
+//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregrewriter"
+#include "VirtRegRewriter.h"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDSE , "Number of dead stores elided");
+STATISTIC(NumDSS , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumDRM , "Number of re-materializable defs elided");
+STATISTIC(NumStores , "Number of stores added");
+STATISTIC(NumPSpills , "Number of physical register spills");
+STATISTIC(NumOmitted , "Number of reloads omitted");
+STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
+STATISTIC(NumReMats , "Number of re-materialization");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumReused , "Number of values reused");
+STATISTIC(NumDCE , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
+STATISTIC(NumModRefUnfold, "Number of modref unfolded");
+
+namespace {
+ enum RewriterName { local, trivial };
+}
+
+static cl::opt<RewriterName>
+RewriterOpt("rewriter",
+ cl::desc("Rewriter to use (default=local)"),
+ cl::Prefix,
+ cl::values(clEnumVal(local, "local rewriter"),
+ clEnumVal(trivial, "trivial rewriter"),
+ clEnumValEnd),
+ cl::init(local));
+
+static cl::opt<bool>
+ScheduleSpills("schedule-spills",
+ cl::desc("Schedule spill code"),
+ cl::init(false));
+
+VirtRegRewriter::~VirtRegRewriter() {}
+
+/// substitutePhysReg - Replace virtual register in MachineOperand with a
+/// physical register. Do the right thing with the sub-register index.
+/// Note that operands may be added, so the MO reference is no longer valid.
+static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ if (MO.getSubReg()) {
+ MO.substPhysReg(Reg, TRI);
+
+ // Any kill flags apply to the full virtual register, so they also apply to
+ // the full physical register.
+ // We assume that partial defs have already been decorated with a super-reg
+ // <imp-def> operand by LiveIntervals.
+ MachineInstr &MI = *MO.getParent();
+ if (MO.isUse() && !MO.isUndef() &&
+ (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+ MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
+ } else {
+ MO.setReg(Reg);
+ }
+}
+
+namespace {
+
+/// This class is intended for use with the new spilling framework only. It
+/// rewrites vreg def/uses to use the assigned preg, but does not insert any
+/// spill code.
+struct TrivialRewriter : public VirtRegRewriter {
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+ DEBUG(dbgs() << "**** Machine Instrs"
+ << "(NOTE! Does not include spills and reloads!) ****\n");
+ DEBUG(MF.dump());
+
+ MachineRegisterInfo *mri = &MF.getRegInfo();
+ const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
+
+ bool changed = false;
+
+ for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = li->reg;
+
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ if (!li->empty())
+ mri->setPhysRegUsed(reg);
+ }
+ else {
+ if (!VRM.hasPhys(reg))
+ continue;
+ unsigned pReg = VRM.getPhys(reg);
+ mri->setPhysRegUsed(pReg);
+ // Copy the register use-list before traversing it.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
+ for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
+ E = mri->reg_end(); I != E; ++I)
+ reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+ for (unsigned N=0; N != reglist.size(); ++N)
+ substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
+ pReg, *tri);
+ changed |= !reglist.empty();
+ }
+ }
+
+ DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+ DEBUG(MF.dump());
+
+ return changed;
+ }
+
+};
+
+}
+
+// ************************************************************************ //
+
+namespace {
+
+/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
+/// from top down, keep track of which spill slots or remat are available in
+/// each register.
+///
+/// Note that not all physregs are created equal here. In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using
+/// that we cannot CHANGE, but we can read if we like. We keep track of this
+/// on a per-stack-slot / remat id basis as the low bit in the value of the
+/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks
+/// this bit and addAvailable sets it if.
+class AvailableSpills {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
+ // or remat'ed virtual register values that are still available, due to
+ // being loaded or stored to, but not invalidated yet.
+ std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
+
+ // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
+ // indicating which stack slot values are currently held by a physreg. This
+ // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
+ // physreg is modified.
+ std::multimap<unsigned, int> PhysRegsAvailable;
+
+ void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+ void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+ AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
+ : TRI(tri), TII(tii) {
+ }
+
+ /// clear - Reset the state.
+ void clear() {
+ SpillSlotsOrReMatsAvailable.clear();
+ PhysRegsAvailable.clear();
+ }
+
+ const TargetRegisterInfo *getRegInfo() const { return TRI; }
+
+ /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
+ /// available in a physical register, return that PhysReg, otherwise
+ /// return 0.
+ unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
+ std::map<int, unsigned>::const_iterator I =
+ SpillSlotsOrReMatsAvailable.find(Slot);
+ if (I != SpillSlotsOrReMatsAvailable.end()) {
+ return I->second >> 1; // Remove the CanClobber bit.
+ }
+ return 0;
+ }
+
+ /// addAvailable - Mark that the specified stack slot / remat is available
+ /// in the specified physreg. If CanClobber is true, the physreg can be
+ /// modified at any time without changing the semantics of the program.
+ void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
+ // If this stack slot is thought to be available in some other physreg,
+ // remove its record.
+ ModifyStackSlotOrReMat(SlotOrReMat);
+
+ PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
+ SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
+ (unsigned)CanClobber;
+
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Remembering RM#"
+ << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
+ DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+ << (CanClobber ? " canclobber" : "") << "\n");
+ }
+
+ /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+ /// the value of the specified stackslot register if it desires. The
+ /// specified stack slot must be available in a physreg for this query to
+ /// make sense.
+ bool canClobberPhysRegForSS(int SlotOrReMat) const {
+ assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
+ "Value not available!");
+ return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
+ }
+
+ /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+ /// physical register where values for some stack slot(s) might be
+ /// available.
+ bool canClobberPhysReg(unsigned PhysReg) const {
+ std::multimap<unsigned, int>::const_iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ if (!canClobberPhysRegForSS(SlotOrReMat))
+ return false;
+ }
+ return true;
+ }
+
+ /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+ /// stackslot register. The register is still available but is no longer
+ /// allowed to be modifed.
+ void disallowClobberPhysReg(unsigned PhysReg);
+
+ /// ClobberPhysReg - This is called when the specified physreg changes
+ /// value. We use this to invalidate any info about stuff that lives in
+ /// it and any of its aliases.
+ void ClobberPhysReg(unsigned PhysReg);
+
+ /// ModifyStackSlotOrReMat - This method is called when the value in a stack
+ /// slot changes. This removes information about which register the
+ /// previous value for this slot lives in (as the previous value is dead
+ /// now).
+ void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+ /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
+ /// other stack slots sharing the same register are no longer valid.
+ void ClobberSharingStackSlots(int StackSlot);
+
+ /// AddAvailableRegsToLiveIn - Availability information is being kept coming
+ /// into the specified MBB. Add available physical registers as potential
+ /// live-in's. If they are reused in the MBB, they will be added to the
+ /// live-in set to make register scavenger and post-allocation scheduler.
+ void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+};
+
+}
+
+// ************************************************************************ //
+
+// Given a location where a reload of a spilled register or a remat of
+// a constant is to be inserted, attempt to find a safe location to
+// insert the load at an earlier point in the basic-block, to hide
+// latency of the load and to avoid address-generation interlock
+// issues.
+static MachineBasicBlock::iterator
+ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
+ MachineBasicBlock::iterator const Begin,
+ unsigned PhysReg,
+ const TargetRegisterInfo *TRI,
+ bool DoReMat,
+ int SSorRMId,
+ const TargetInstrInfo *TII,
+ const MachineFunction &MF)
+{
+ if (!ScheduleSpills)
+ return InsertLoc;
+
+ // Spill backscheduling is of primary interest to addresses, so
+ // don't do anything if the register isn't in the register class
+ // used for pointers.
+
+ const TargetLowering *TL = MF.getTarget().getTargetLowering();
+
+ if (!TL->isTypeLegal(TL->getPointerTy()))
+ // Believe it or not, this is true on 16-bit targets like PIC16.
+ return InsertLoc;
+
+ const TargetRegisterClass *ptrRegClass =
+ TL->getRegClassFor(TL->getPointerTy());
+ if (!ptrRegClass->contains(PhysReg))
+ return InsertLoc;
+
+ // Scan upwards through the preceding instructions. If an instruction doesn't
+ // reference the stack slot or the register we're loading, we can
+ // backschedule the reload up past it.
+ MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
+ while (NewInsertLoc != Begin) {
+ MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
+ for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
+ MachineOperand &Op = Prev->getOperand(i);
+ if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
+ goto stop;
+ }
+ if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
+ Prev->findRegisterDefOperand(PhysReg))
+ goto stop;
+ for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
+ if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
+ Prev->findRegisterDefOperand(*Alias))
+ goto stop;
+ NewInsertLoc = Prev;
+ }
+stop:;
+
+ // If we made it to the beginning of the block, turn around and move back
+ // down just past any existing reloads. They're likely to be reloads/remats
+ // for instructions earlier than what our current reload/remat is for, so
+ // they should be scheduled earlier.
+ if (NewInsertLoc == Begin) {
+ int FrameIdx;
+ while (InsertLoc != NewInsertLoc &&
+ (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
+ TII->isTriviallyReMaterializable(NewInsertLoc)))
+ ++NewInsertLoc;
+ }
+
+ return NewInsertLoc;
+}
+
+namespace {
+
+// ReusedOp - For each reused operand, we keep track of a bit of information,
+// in case we need to rollback upon processing a new operand. See comments
+// below.
+struct ReusedOp {
+ // The MachineInstr operand that reused an available value.
+ unsigned Operand;
+
+ // StackSlotOrReMat - The spill slot or remat id of the value being reused.
+ unsigned StackSlotOrReMat;
+
+ // PhysRegReused - The physical register the value was available in.
+ unsigned PhysRegReused;
+
+ // AssignedPhysReg - The physreg that was assigned for use by the reload.
+ unsigned AssignedPhysReg;
+
+ // VirtReg - The virtual register itself.
+ unsigned VirtReg;
+
+ ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+ unsigned vreg)
+ : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
+ AssignedPhysReg(apr), VirtReg(vreg) {}
+};
+
+/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+/// is reused instead of reloaded.
+class ReuseInfo {
+ MachineInstr &MI;
+ std::vector<ReusedOp> Reuses;
+ BitVector PhysRegsClobbered;
+public:
+ ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
+ PhysRegsClobbered.resize(tri->getNumRegs());
+ }
+
+ bool hasReuses() const {
+ return !Reuses.empty();
+ }
+
+ /// addReuse - If we choose to reuse a virtual register that is already
+ /// available instead of reloading it, remember that we did so.
+ void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
+ unsigned PhysRegReused, unsigned AssignedPhysReg,
+ unsigned VirtReg) {
+ // If the reload is to the assigned register anyway, no undo will be
+ // required.
+ if (PhysRegReused == AssignedPhysReg) return;
+
+ // Otherwise, remember this.
+ Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
+ AssignedPhysReg, VirtReg));
+ }
+
+ void markClobbered(unsigned PhysReg) {
+ PhysRegsClobbered.set(PhysReg);
+ }
+
+ bool isClobbered(unsigned PhysReg) const {
+ return PhysRegsClobbered.test(PhysReg);
+ }
+
+ /// GetRegForReload - We are about to emit a reload into PhysReg. If there
+ /// is some other operand that is using the specified register, either pick
+ /// a new register to use, or evict the previous reload and use this reg.
+ unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
+ MachineFunction &MF, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
+
+ /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+ /// 'Rejected' set to remember which registers have been considered and
+ /// rejected for the reload. This avoids infinite looping in case like
+ /// this:
+ /// t1 := op t2, t3
+ /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+ /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+ /// t1 <- desires r1
+ /// sees r1 is taken by t2, tries t2's reload register r0
+ /// sees r0 is taken by t3, tries t3's reload register r1
+ /// sees r1 is taken by t2, tries t2's reload register r0 ...
+ unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallSet<unsigned, 8> Rejected;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+ return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+ }
+};
+
+}
+
+// ****************** //
+// Utility Functions //
+// ****************** //
+
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &Succs){
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->pred_size() == 1)
+ Succs.push_back(SuccMBB);
+ }
+}
+
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+ MachineOperand *KillOp = KillOps[Reg];
+ KillOp->setIsKill(false);
+ // KillOps[Reg] might be a def of a super-register.
+ unsigned KReg = KillOp->getReg();
+ if (!RegKills[KReg])
+ return;
+
+ assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
+ "invalid superreg kill flags");
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ // If it's a def of a super-register. Its other sub-regsters are no
+ // longer killed as well.
+ for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+ DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+ assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
+ "invalid subreg kill flags");
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo* TRI, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+ return;
+ }
+ // No previous kill for this reg. Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+ ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
+ }
+}
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ SmallVector<unsigned, 2> *KillRegs = NULL) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (KillRegs)
+ KillRegs->push_back(Reg);
+ assert(Reg < KillOps.size());
+ if (KillOps[Reg] == &MO) {
+ // This operand was the kill, now no longer.
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ assert(KillOps[*SR] == &MO && "bad subreg kill flags");
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+ else {
+ // This operand may have reused a previously killed reg. Keep it live in
+ // case it continues to be used after erasing this instruction.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+ }
+ }
+}
+
+/// InvalidateRegDef - If the def operand of the specified def MI is now dead
+/// (since its spill instruction is removed), mark it isDead. Also checks if
+/// the def MI has other definition operands that are not dead. Returns it by
+/// reference.
+static bool InvalidateRegDef(MachineBasicBlock::iterator I,
+ MachineInstr &NewDef, unsigned Reg,
+ bool &HasLiveDef,
+ const TargetRegisterInfo *TRI) {
+ // Due to remat, it's possible this reg isn't being reused. That is,
+ // the def of this reg (by prev MI) is now dead.
+ MachineInstr *DefMI = I;
+ MachineOperand *DefOp = NULL;
+ for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = DefMI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
+ continue;
+ if (MO.getReg() == Reg)
+ DefOp = &MO;
+ else if (!MO.isDead())
+ HasLiveDef = true;
+ }
+ if (!DefOp)
+ return false;
+
+ bool FoundUse = false, Done = false;
+ MachineBasicBlock::iterator E = &NewDef;
+ ++I; ++E;
+ for (; !Done && I != E; ++I) {
+ MachineInstr *NMI = I;
+ for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = NMI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() == 0 ||
+ (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
+ continue;
+ if (MO.isUse())
+ FoundUse = true;
+ Done = true; // Stop after scanning all the operands of this MI.
+ }
+ }
+ if (!FoundUse) {
+ // Def is dead!
+ DefOp->setIsDead();
+ return true;
+ }
+ return false;
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ // These do not affect kill info at all.
+ if (MI.isDebugValue())
+ return;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // This operand may have reused a previously killed reg. Keep it live.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+
+ if (MO.isKill()) {
+ RegKills.set(Reg);
+ KillOps[Reg] = &MO;
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.set(*SR);
+ KillOps[*SR] = &MO;
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ RegKills.reset(Reg);
+ KillOps[Reg] = NULL;
+ // It also defines (or partially define) aliases.
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
+ for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
+ }
+}
+
+/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
+///
+static void ReMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned DestReg, unsigned Reg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
+#ifndef NDEBUG
+ const MCInstrDesc &MCID = ReMatDefMI->getDesc();
+ assert(MCID.getNumDefs() == 1 &&
+ "Don't know how to remat instructions that define > 1 values!");
+#endif
+ TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
+ MachineInstr *NewMI = prior(MII);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
+ continue;
+ assert(MO.isUse());
+ unsigned Phys = VRM.getPhys(VirtReg);
+ assert(Phys && "Virtual register is not assigned a register?");
+ substitutePhysReg(MO, Phys, *TRI);
+ }
+ ++NumReMats;
+}
+
+/// findSuperReg - Find the SubReg's super-register of given register class
+/// where its SubIdx sub-register is SubReg.
+static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
+ unsigned SubIdx, const TargetRegisterInfo *TRI) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ unsigned Reg = *I;
+ if (TRI->getSubReg(Reg, SubIdx) == SubReg)
+ return Reg;
+ }
+ return 0;
+}
+
+// ******************************** //
+// Available Spills Implementation //
+// ******************************** //
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
+ DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+ << " copied, it is available for use but can no longer be modified\n");
+ }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ disallowClobberPhysRegOnly(*AS);
+ disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ PhysRegsAvailable.erase(I++);
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
+ DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+ << " clobbered, invalidating ");
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
+ else
+ DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
+ }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ ClobberPhysRegOnly(*AS);
+ ClobberPhysRegOnly(PhysReg);
+}
+
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as potential
+/// live-in's. If they are reused in the MBB, they will be added to the
+/// live-in set to make register scavenger and post-allocation scheduler.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ std::set<unsigned> NotAvailable;
+ for (std::multimap<unsigned, int>::iterator
+ I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+ I != E; ++I) {
+ unsigned Reg = I->first;
+ const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
+ // FIXME: A temporary workaround. We can't reuse available value if it's
+ // not safe to move the def of the virtual register's class. e.g.
+ // X86::RFP* register classes. Do not add it as a live-in.
+ if (!TII->isSafeToMoveRegClassDefs(RC))
+ // This is no longer available.
+ NotAvailable.insert(Reg);
+ else {
+ MBB.addLiveIn(Reg);
+ if (RegKills[Reg])
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+ }
+
+ // Skip over the same register.
+ std::multimap<unsigned, int>::iterator NI = llvm::next(I);
+ while (NI != E && NI->first == Reg) {
+ ++I;
+ ++NI;
+ }
+ }
+
+ for (std::set<unsigned>::iterator I = NotAvailable.begin(),
+ E = NotAvailable.end(); I != E; ++I) {
+ ClobberPhysReg(*I);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
+ *SubRegs; ++SubRegs)
+ ClobberPhysReg(*SubRegs);
+ }
+}
+
+/// ModifyStackSlotOrReMat - This method is called when the value in a stack
+/// slot changes. This removes information about which register the previous
+/// value for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
+ std::map<int, unsigned>::iterator It =
+ SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
+ if (It == SpillSlotsOrReMatsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+ SpillSlotsOrReMatsAvailable.erase(It);
+
+ // This register may hold the value of multiple stack slots, only remove this
+ // stack slot from the set of values the register contains.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ for (; ; ++I) {
+ assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+ "Map inverse broken!");
+ if (I->second == SlotOrReMat) break;
+ }
+ PhysRegsAvailable.erase(I);
+}
+
+void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
+ std::map<int, unsigned>::iterator It =
+ SpillSlotsOrReMatsAvailable.find(StackSlot);
+ if (It == SpillSlotsOrReMatsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+
+ // Erase entries in PhysRegsAvailable for other stack slots.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ while (I != PhysRegsAvailable.end() && I->first == Reg) {
+ std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
+ if (I->second != StackSlot) {
+ DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
+ << PrintReg(Reg, TRI) << '\n');
+ SpillSlotsOrReMatsAvailable.erase(I->second);
+ PhysRegsAvailable.erase(I);
+ }
+ I = NextI;
+ }
+}
+
+// ************************** //
+// Reuse Info Implementation //
+// ************************** //
+
+/// GetRegForReload - We are about to emit a reload into PhysReg. If there
+/// is some other operand that is using the specified register, either pick
+/// a new register to use, or evict the previous reload and use this reg.
+unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
+ unsigned PhysReg,
+ MachineFunction &MF,
+ MachineInstr *MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Spills.getRegInfo();
+
+ if (Reuses.empty()) return PhysReg; // This is most often empty.
+
+ for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+ ReusedOp &Op = Reuses[ro];
+ // If we find some other reuse that was supposed to use this register
+ // exactly for its reload, we can change this reload to use ITS reload
+ // register. That is, unless its reload register has already been
+ // considered and subsequently rejected because it has also been reused
+ // by another operand.
+ if (Op.PhysRegReused == PhysReg &&
+ Rejected.count(Op.AssignedPhysReg) == 0 &&
+ RC->contains(Op.AssignedPhysReg)) {
+ // Yup, use the reload register that we didn't use before.
+ unsigned NewReg = Op.AssignedPhysReg;
+ Rejected.insert(PhysReg);
+ return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+ } else {
+ // Otherwise, we might also have a problem if a previously reused
+ // value aliases the new register. If so, codegen the previous reload
+ // and use this one.
+ unsigned PRRU = Op.PhysRegReused;
+ if (TRI->regsOverlap(PRRU, PhysReg)) {
+ // Okay, we found out that an alias of a reused register
+ // was used. This isn't good because it means we have
+ // to undo a previous reuse.
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *AliasRC =
+ MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
+
+ // Copy Op out of the vector and remove it, we're going to insert an
+ // explicit load for it.
+ ReusedOp NewOp = Op;
+ Reuses.erase(Reuses.begin()+ro);
+
+ // MI may be using only a sub-register of PhysRegUsed.
+ unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
+ unsigned SubIdx = 0;
+ assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
+ "A reuse cannot be a virtual register");
+ if (PRRU != RealPhysRegUsed) {
+ // What was the sub-register index?
+ SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+ assert(SubIdx &&
+ "Operand physreg is not a sub-register of PhysRegUsed");
+ }
+
+ // Ok, we're going to try to reload the assigned physreg into the
+ // slot that we were supposed to in the first place. However, that
+ // register could hold a reuse. Check to see if it conflicts or
+ // would prefer us to use a different register.
+ unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
+ MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+
+ bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
+ DoReMat, SSorRMId, TII, MF);
+
+ if (DoReMat) {
+ ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
+ TRI, VRM);
+ } else {
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
+ NewOp.StackSlotOrReMat, AliasRC, TRI);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
+ // Any stores to this stack slot are not dead anymore.
+ MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
+ ++NumLoads;
+ }
+ Spills.ClobberPhysReg(NewPhysReg);
+ Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+ unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
+ MI->getOperand(NewOp.Operand).setReg(RReg);
+ MI->getOperand(NewOp.Operand).setSubReg(0);
+
+ Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+
+ DEBUG(dbgs() << "Reuse undone!\n");
+ --NumReused;
+
+ // Finally, PhysReg is now available, go ahead and use it.
+ return PhysReg;
+ }
+ }
+ }
+ return PhysReg;
+}
+
+// ************************************************************************ //
+
+/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
+/// stack slot mod/ref. It also checks if it's possible to unfold the
+/// instruction by having it define a specified physical register instead.
+static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
+ return false;
+
+ bool Found = false;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ if (MR & VirtRegMap::isModRef)
+ if (VRM.getStackSlot(VirtReg) == SS) {
+ Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
+ break;
+ }
+ }
+ if (!Found)
+ return false;
+
+ // Does the instruction uses a register that overlaps the scratch register?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!VRM.hasPhys(Reg))
+ continue;
+ Reg = VRM.getPhys(Reg);
+ }
+ if (TRI->regsOverlap(PhysReg, Reg))
+ return false;
+ }
+ return true;
+}
+
+/// FindFreeRegister - Find a free register of a given register class by looking
+/// at (at most) the last two machine instructions.
+static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
+ MachineBasicBlock &MBB,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ BitVector &AllocatableRegs) {
+ BitVector Defs(TRI->getNumRegs());
+ BitVector Uses(TRI->getNumRegs());
+ SmallVector<unsigned, 4> LocalUses;
+ SmallVector<unsigned, 4> Kills;
+
+ // Take a look at 2 instructions at most.
+ unsigned Count = 0;
+ while (Count < 2) {
+ if (MII == MBB.begin())
+ break;
+ MachineInstr *PrevMI = prior(MII);
+ MII = PrevMI;
+
+ if (PrevMI->isDebugValue())
+ continue; // Skip over dbg_value instructions.
+ ++Count;
+
+ for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = PrevMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef()) {
+ Defs.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.set(*AS);
+ } else {
+ LocalUses.push_back(Reg);
+ if (MO.isKill() && AllocatableRegs[Reg])
+ Kills.push_back(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned Kill = Kills[i];
+ if (!Defs[Kill] && !Uses[Kill] &&
+ RC->contains(Kill))
+ return Kill;
+ }
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.set(*AS);
+ }
+ }
+
+ return 0;
+}
+
+static
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == VirtReg)
+ substitutePhysReg(MO, PhysReg, TRI);
+ }
+}
+
+namespace {
+
+struct RefSorter {
+ bool operator()(const std::pair<MachineInstr*, int> &A,
+ const std::pair<MachineInstr*, int> &B) {
+ return A.second < B.second;
+ }
+};
+
+// ***************************** //
+// Local Spiller Implementation //
+// ***************************** //
+
+class LocalRewriter : public VirtRegRewriter {
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ VirtRegMap *VRM;
+ LiveIntervals *LIs;
+ BitVector AllocatableRegs;
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+ DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
+
+ MachineBasicBlock *MBB; // Basic block currently being processed.
+
+public:
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs);
+
+private:
+ void EraseInstr(MachineInstr *MI) {
+ VRM->RemoveMachineInstrFromMaps(MI);
+ LIs->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+
+ bool OptimizeByUnfold2(unsigned VirtReg, int SS,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+
+ bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+
+ bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+ unsigned VirtReg, unsigned SrcReg, int SS,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ const TargetRegisterInfo *TRI);
+
+ void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+ int Idx, unsigned PhysReg, int StackSlot,
+ const TargetRegisterClass *RC,
+ bool isAvailable, MachineInstr *&LastStore,
+ AvailableSpills &Spills,
+ SmallSet<MachineInstr*, 4> &ReMatDefs,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+
+ void TransferDeadness(unsigned Reg, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+
+ bool InsertEmergencySpills(MachineInstr *MI);
+
+ bool InsertRestores(MachineInstr *MI,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+
+ bool InsertSpills(MachineInstr *MI);
+
+ void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps);
+
+ void RewriteMBB(LiveIntervals *LIs,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+};
+}
+
+bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
+ LiveIntervals* lis) {
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ VRM = &vrm;
+ LIs = lis;
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
+ << MF.getFunction()->getName() << "':\n");
+ DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
+ " reloads!) ****\n");
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
+
+ // Spills - Keep track of which spilled values are available in physregs
+ // so that we can choose to reuse the physregs instead of emitting
+ // reloads. This is usually refreshed per basic block.
+ AvailableSpills Spills(TRI, TII);
+
+ // Keep track of kill information.
+ BitVector RegKills(TRI->getNumRegs());
+ std::vector<MachineOperand*> KillOps;
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ // SingleEntrySuccs - Successor blocks which have a single predecessor.
+ SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+ SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+ // Traverse the basic blocks depth first.
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MBB = *DFI;
+ if (!EarlyVisited.count(MBB))
+ RewriteMBB(LIs, Spills, RegKills, KillOps);
+
+ // If this MBB is the only predecessor of a successor. Keep the
+ // availability information and visit it next.
+ do {
+ // Keep visiting single predecessor successor as long as possible.
+ SinglePredSuccs.clear();
+ findSinglePredSuccessor(MBB, SinglePredSuccs);
+ if (SinglePredSuccs.empty())
+ MBB = 0;
+ else {
+ // FIXME: More than one successors, each of which has MBB has
+ // the only predecessor.
+ MBB = SinglePredSuccs[0];
+ if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+ Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+ RewriteMBB(LIs, Spills, RegKills, KillOps);
+ }
+ }
+ } while (MBB);
+
+ // Clear the availability info.
+ Spills.clear();
+ }
+
+ DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
+
+ // Mark unused spill slots.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int SS = VRM->getLowSpillSlot();
+ if (SS != VirtRegMap::NO_STACK_SLOT) {
+ for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
+ SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
+ if (!VRM->isSpillSlotUsed(SS)) {
+ MFI->RemoveStackObject(SS);
+ for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
+ MachineInstr *DVMI = DbgValues[j];
+ DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
+ EraseInstr(DVMI);
+ }
+ ++NumDSS;
+ }
+ DbgValues.clear();
+ }
+ }
+ Slot2DbgValues.clear();
+
+ return true;
+}
+
+/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+/// a scratch register is available.
+/// xorq %r12<kill>, %r13
+/// addq %rax, -184(%rbp)
+/// addq %r13, -184(%rbp)
+/// ==>
+/// xorq %r12<kill>, %r13
+/// movq -184(%rbp), %r12
+/// addq %rax, %r12
+/// addq %r13, %r12
+/// movq %r12, -184(%rbp)
+bool LocalRewriter::
+OptimizeByUnfold2(unsigned VirtReg, int SS,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+
+ MachineBasicBlock::iterator NextMII = llvm::next(MII);
+ // Skip over dbg_value instructions.
+ while (NextMII != MBB->end() && NextMII->isDebugValue())
+ NextMII = llvm::next(NextMII);
+ if (NextMII == MBB->end())
+ return false;
+
+ if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+ return false;
+
+ // Now let's see if the last couple of instructions happens to have freed up
+ // a register.
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
+ if (!PhysReg)
+ return false;
+
+ MachineFunction &MF = *MBB->getParent();
+ TRI = MF.getTarget().getRegisterInfo();
+ MachineInstr &MI = *MII;
+ if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
+ return false;
+
+ // If the next instruction also folds the same SS modref and can be unfoled,
+ // then it's worthwhile to issue a load from SS into the free register and
+ // then unfold these instructions.
+ if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
+ return false;
+
+ // Back-schedule reloads and remats.
+ ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
+
+ // Load from SS to the spare physical register.
+ TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(PhysReg);
+ // Remember it's available.
+ Spills.addAvailable(SS, PhysReg);
+ MaybeDeadStores[SS] = NULL;
+
+ // Unfold current MI.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+ llvm_unreachable("Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+ VRM->transferRestorePts(&MI, NewMIs[0]);
+ MII = MBB->insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ ++NumModRefUnfold;
+
+ // Unfold next instructions that fold the same SS.
+ do {
+ MachineInstr &NextMI = *NextMII;
+ NextMII = llvm::next(NextMII);
+ NewMIs.clear();
+ if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+ llvm_unreachable("Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+ VRM->transferRestorePts(&NextMI, NewMIs[0]);
+ MBB->insert(NextMII, NewMIs[0]);
+ InvalidateKills(NextMI, TRI, RegKills, KillOps);
+ EraseInstr(&NextMI);
+ ++NumModRefUnfold;
+ // Skip over dbg_value instructions.
+ while (NextMII != MBB->end() && NextMII->isDebugValue())
+ NextMII = llvm::next(NextMII);
+ if (NextMII == MBB->end())
+ break;
+ } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
+
+ // Store the value back into SS.
+ TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
+ MachineInstr *StoreMI = prior(NextMII);
+ VRM->addSpillSlotUse(SS, StoreMI);
+ VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+ return true;
+}
+
+/// OptimizeByUnfold - Turn a store folding instruction into a load folding
+/// instruction. e.g.
+/// xorl %edi, %eax
+/// movl %eax, -32(%ebp)
+/// movl -36(%ebp), %eax
+/// orl %eax, -32(%ebp)
+/// ==>
+/// xorl %edi, %eax
+/// orl -36(%ebp), %eax
+/// mov %eax, -32(%ebp)
+/// This enables unfolding optimization for a subsequent instruction which will
+/// also eliminate the newly introduced store instruction.
+bool LocalRewriter::
+OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ MachineFunction &MF = *MBB->getParent();
+ MachineInstr &MI = *MII;
+ unsigned UnfoldedOpc = 0;
+ unsigned UnfoldPR = 0;
+ unsigned UnfoldVR = 0;
+ int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+ // Only transform a MI that folds a single register.
+ if (UnfoldedOpc)
+ return false;
+ UnfoldVR = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ if (VRM->isAssignedReg(UnfoldVR))
+ continue;
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ FoldedSS = VRM->getStackSlot(UnfoldVR);
+ MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+ if (DeadStore && (MR & VirtRegMap::isModRef)) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+ if (!PhysReg || !DeadStore->readsRegister(PhysReg))
+ continue;
+ UnfoldPR = PhysReg;
+ UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ false, true);
+ }
+ }
+
+ if (!UnfoldedOpc) {
+ if (!UnfoldVR)
+ return false;
+
+ // Look for other unfolding opportunities.
+ return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
+ RegKills, KillOps);
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+ continue;
+ if (VRM->isAssignedReg(VirtReg)) {
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ } else if (VRM->isReMaterialized(VirtReg))
+ continue;
+ int SS = VRM->getStackSlot(VirtReg);
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ if (PhysReg) {
+ if (TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ continue;
+ }
+ if (VRM->hasPhys(VirtReg)) {
+ PhysReg = VRM->getPhys(VirtReg);
+ if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+ continue;
+ }
+
+ // Ok, we'll need to reload the value into a register which makes
+ // it impossible to perform the store unfolding optimization later.
+ // Let's see if it is possible to fold the load if the store is
+ // unfolded. This allows us to perform the store unfolding
+ // optimization.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+ assert(NewMIs.size() == 1);
+ MachineInstr *NewMI = NewMIs.back();
+ MBB->insert(MII, NewMI);
+ NewMIs.clear();
+ int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+ assert(Idx != -1);
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(Idx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
+ NewMI->eraseFromParent();
+ if (FoldedMI) {
+ VRM->addSpillSlotUse(SS, FoldedMI);
+ if (!VRM->hasPhys(UnfoldVR))
+ VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
+ VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ MII = FoldedMI;
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+/// where SrcReg is r1 and it is tied to r0. Return true if after
+/// commuting this instruction it will be r0 = op r2, r1.
+static bool CommuteChangesDestination(MachineInstr *DefMI,
+ const MCInstrDesc &MCID,
+ unsigned SrcReg,
+ const TargetInstrInfo *TII,
+ unsigned &DstIdx) {
+ if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
+ return false;
+ if (!DefMI->getOperand(1).isReg() ||
+ DefMI->getOperand(1).getReg() != SrcReg)
+ return false;
+ unsigned DefIdx;
+ if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+ return false;
+ unsigned SrcIdx1, SrcIdx2;
+ if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+ return false;
+ if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+ DstIdx = 2;
+ return true;
+ }
+ return false;
+}
+
+/// CommuteToFoldReload -
+/// Look for
+/// r1 = load fi#1
+/// r1 = op r1, r2<kill>
+/// store r1, fi#1
+///
+/// If op is commutable and r2 is killed, then we can xform these to
+/// r2 = op r2, fi#1
+/// store r2, fi#1
+bool LocalRewriter::
+CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+ unsigned VirtReg, unsigned SrcReg, int SS,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ const TargetRegisterInfo *TRI) {
+ if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
+ return false;
+
+ MachineInstr &MI = *MII;
+ MachineBasicBlock::iterator DefMII = prior(MII);
+ MachineInstr *DefMI = DefMII;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ unsigned NewDstIdx;
+ if (DefMII != MBB->begin() &&
+ MCID.isCommutable() &&
+ CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+ return false;
+ MachineInstr *ReloadMI = prior(DefMII);
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+ if (DestReg != SrcReg || FrameIdx != SS)
+ return false;
+ int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+ if (UseIdx == -1)
+ return false;
+ unsigned DefIdx;
+ if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+ return false;
+ assert(DefMI->getOperand(DefIdx).isReg() &&
+ DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+ // Now commute def instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+ if (!CommutedMI)
+ return false;
+ MBB->insert(MII, CommutedMI);
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(NewDstIdx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
+ // Not needed since foldMemoryOperand returns new MI.
+ CommutedMI->eraseFromParent();
+ if (!FoldedMI)
+ return false;
+
+ VRM->addSpillSlotUse(SS, FoldedMI);
+ VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ // Insert new def MI and spill MI.
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
+ MII = prior(MII);
+ MachineInstr *StoreMI = MII;
+ VRM->addSpillSlotUse(SS, StoreMI);
+ VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ MII = FoldedMI; // Update MII to backtrack.
+
+ // Delete all 3 old instructions.
+ InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+ EraseInstr(ReloadMI);
+ InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+ EraseInstr(DefMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+
+ // If NewReg was previously holding value of some SS, it's now clobbered.
+ // This has to be done now because it's a physical register. When this
+ // instruction is re-visited, it's ignored.
+ Spills.ClobberPhysReg(NewReg);
+
+ ++NumCommutes;
+ return true;
+ }
+
+ return false;
+}
+
+/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+/// the last store to the same slot is now dead. If so, remove the last store.
+void LocalRewriter::
+SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+ int Idx, unsigned PhysReg, int StackSlot,
+ const TargetRegisterClass *RC,
+ bool isAvailable, MachineInstr *&LastStore,
+ AvailableSpills &Spills,
+ SmallSet<MachineInstr*, 4> &ReMatDefs,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+
+ MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+ TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
+ TRI);
+ MachineInstr *StoreMI = prior(oldNextMII);
+ VRM->addSpillSlotUse(StackSlot, StoreMI);
+ DEBUG(dbgs() << "Store:\t" << *StoreMI);
+
+ // If there is a dead store to this stack slot, nuke it now.
+ if (LastStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
+ ++NumDSE;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+ MachineBasicBlock::iterator PrevMII = LastStore;
+ bool CheckDef = PrevMII != MBB->begin();
+ if (CheckDef)
+ --PrevMII;
+ EraseInstr(LastStore);
+ if (CheckDef) {
+ // Look at defs of killed registers on the store. Mark the defs
+ // as dead since the store has been deleted and they aren't
+ // being reused.
+ for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+ bool HasOtherDef = false;
+ if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
+ MachineInstr *DeadDef = PrevMII;
+ if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+ // FIXME: This assumes a remat def does not have side effects.
+ EraseInstr(DeadDef);
+ ++NumDRM;
+ }
+ }
+ }
+ }
+ }
+
+ // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume
+ // the last of multiple instructions is the actual store.
+ LastStore = prior(oldNextMII);
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.ModifyStackSlotOrReMat(StackSlot);
+ Spills.ClobberPhysReg(PhysReg);
+ Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+ ++NumStores;
+}
+
+/// isSafeToDelete - Return true if this instruction doesn't produce any side
+/// effect and all of its defs are dead.
+static bool isSafeToDelete(MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
+ MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
+ MI.isLabel() || MI.isDebugValue() ||
+ MI.hasUnmodeledSideEffects())
+ return false;
+
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI.isInlineAsm())
+ return false;
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && !MO.isDead())
+ return false;
+ if (MO.isUse() && MO.isKill())
+ // FIXME: We can't remove kill markers or else the scavenger will assert.
+ // An alternative is to add a ADD pseudo instruction to replace kill
+ // markers.
+ return false;
+ }
+ return true;
+}
+
+/// TransferDeadness - A identity copy definition is dead and it's being
+/// removed. Find the last def or use and mark it as dead / kill.
+void LocalRewriter::
+TransferDeadness(unsigned Reg, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ SmallPtrSet<MachineInstr*, 4> Seens;
+ SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (Seens.insert(UDMI))
+ Refs.push_back(std::make_pair(UDMI, DI->second));
+ }
+
+ if (Refs.empty())
+ return;
+ std::sort(Refs.begin(), Refs.end(), RefSorter());
+
+ while (!Refs.empty()) {
+ MachineInstr *LastUDMI = Refs.back().first;
+ Refs.pop_back();
+
+ MachineOperand *LastUD = NULL;
+ for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastUDMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (!LastUD || (LastUD->isUse() && MO.isDef()))
+ LastUD = &MO;
+ if (LastUDMI->isRegTiedToDefOperand(i))
+ break;
+ }
+ if (LastUD->isDef()) {
+ // If the instruction has no side effect, delete it and propagate
+ // backward further. Otherwise, mark is dead and we are done.
+ if (!isSafeToDelete(*LastUDMI)) {
+ LastUD->setIsDead();
+ break;
+ }
+ EraseInstr(LastUDMI);
+ } else {
+ LastUD->setIsKill();
+ RegKills.set(Reg);
+ KillOps[Reg] = LastUD;
+ break;
+ }
+ }
+}
+
+/// InsertEmergencySpills - Insert emergency spills before MI if requested by
+/// VRM. Return true if spills were inserted.
+bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
+ if (!VRM->hasEmergencySpills(MI))
+ return false;
+ MachineBasicBlock::iterator MII = MI;
+ SmallSet<int, 4> UsedSS;
+ std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
+ for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+ unsigned PhysReg = EmSpills[i];
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+ assert(RC && "Unable to determine register class!");
+ int SS = VRM->getEmergencySpillSlot(RC);
+ if (UsedSS.count(SS))
+ llvm_unreachable("Need to spill more than one physical registers!");
+ UsedSS.insert(SS);
+ TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
+ MachineInstr *StoreMI = prior(MII);
+ VRM->addSpillSlotUse(SS, StoreMI);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
+ TII, *MBB->getParent());
+
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
+
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM->addSpillSlotUse(SS, LoadMI);
+ ++NumPSpills;
+ DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+ }
+ return true;
+}
+
+/// InsertRestores - Restore registers before MI is requested by VRM. Return
+/// true is any instructions were inserted.
+bool LocalRewriter::InsertRestores(MachineInstr *MI,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (!VRM->isRestorePt(MI))
+ return false;
+ MachineBasicBlock::iterator MII = MI;
+ std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
+ for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+ unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order.
+ if (!VRM->getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ unsigned Phys = VRM->getPhys(VirtReg);
+ MRI->setPhysRegUsed(Phys);
+
+ // Check if the value being restored if available. If so, it must be
+ // from a predecessor BB that fallthrough into this BB. We do not
+ // expect:
+ // BB1:
+ // r1 = load fi#1
+ // ...
+ // = r1<kill>
+ // ... # r1 not clobbered
+ // ...
+ // = load fi#1
+ bool DoReMat = VRM->isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+ unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+ if (InReg == Phys) {
+ // If the value is already available in the expected register, save
+ // a reload / remat.
+ if (SSorRMId)
+ DEBUG(dbgs() << "Reusing RM#"
+ << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+ <<" instead of reloading into physreg "
+ << TRI->getName(Phys) << '\n');
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+ // the kill flags for the current instruction after processing it.
+
+ ++NumOmitted;
+ continue;
+ } else if (InReg && InReg != Phys) {
+ if (SSorRMId)
+ DEBUG(dbgs() << "Reusing RM#"
+ << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+ <<" by copying it into physreg "
+ << TRI->getName(Phys) << '\n');
+
+ // If the reloaded / remat value is available in another register,
+ // copy it to the desired register.
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+ *MBB->getParent());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), Phys)
+ .addReg(InReg, RegState::Kill);
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ DEBUG(dbgs() << '\t' << *CopyMI);
+ ++NumCopified;
+ continue;
+ }
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+ *MBB->getParent());
+
+ if (VRM->isReMaterialized(VirtReg)) {
+ ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
+ } else {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM->addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+ }
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(MII));
+ }
+ return true;
+}
+
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
+/// true if spills were inserted.
+bool LocalRewriter::InsertSpills(MachineInstr *MI) {
+ if (!VRM->isSpillPt(MI))
+ return false;
+ MachineBasicBlock::iterator MII = MI;
+ std::vector<std::pair<unsigned,bool> > &SpillRegs =
+ VRM->getSpillPtSpills(MI);
+ for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+ unsigned VirtReg = SpillRegs[i].first;
+ bool isKill = SpillRegs[i].second;
+ if (!VRM->getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ unsigned Phys = VRM->getPhys(VirtReg);
+ int StackSlot = VRM->getStackSlot(VirtReg);
+ MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+ TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
+ RC, TRI);
+ MachineInstr *StoreMI = prior(oldNextMII);
+ VRM->addSpillSlotUse(StackSlot, StoreMI);
+ DEBUG(dbgs() << "Store:\t" << *StoreMI);
+ VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ }
+ return true;
+}
+
+
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps) {
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ MRI->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ // FIXME: This is a horrible hack done the by register allocator to
+ // remat a definition with virtual register operand.
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+
+ // A partial def causes problems because the same operand both reads and
+ // writes the register. This rewriter is designed to rewrite uses and defs
+ // separately, so a partial def would already have been rewritten to a
+ // physreg by the time we get to processing defs.
+ // Add an implicit use operand to model the partial def.
+ if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+ MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+ VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+ MI.addOperand(MachineOperand::CreateReg(VirtReg,
+ false, // isDef
+ true)); // isImplicit
+ DEBUG(dbgs() << "Partial redef: " << MI);
+ }
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ unsigned VirtReg = MI.getOperand(i).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MI.getOperand(i).getSubReg();
+ if (VRM->isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM->getPhys(VirtReg);
+ MRI->setPhysRegUsed(Phys);
+ if (MI.getOperand(i).isDef())
+ ReusedOperands.markClobbered(Phys);
+ substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+ if (VRM->isImplicitlyDefined(VirtReg))
+ // FIXME: Is this needed?
+ BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MI.getOperand(i).isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = MI.getOperand(i).isUndef();
+ // Check if it is defined by an implicit def. It should not be spilled.
+ // Note, this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ bool DoReMat = VRM->isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload && SubIdx) {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+ // If this is an asm, and a PhysReg alias is used elsewhere as an
+ // earlyclobber operand, we can't also use it as an input.
+ if (MI.isInlineAsm()) {
+ for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+ MachineOperand &MOk = MI.getOperand(k);
+ if (MOk.isReg() && MOk.isEarlyClobber() &&
+ TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+ CanReuse = false;
+ DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+ << " for " << PrintReg(VirtReg) << ": " << MOk
+ << '\n');
+ break;
+ }
+ }
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
+ << " instead of reloading into "
+ << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to
+ // update the kill flags for the current instr after processing it.
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM->getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ //
+ // This case also applies to an earlyclobber'd PhysReg.
+ unsigned DesignatedReg = VRM->getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.
+ GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+ MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+ << " for " << PrintReg(VirtReg)
+ << " instead of reloading into same physreg.\n");
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ MRI->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DesignatedReg).addReg(PhysReg);
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ MRI->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+
+ if (DoReMat) {
+ ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+ } else {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM->addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ EraseInstr(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+}
+
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them. If possible, avoid reloading vregs.
+void
+LocalRewriter::RewriteMBB(LiveIntervals *LIs,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+
+ DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
+ << MBB->getName() << "':\n");
+
+ MachineFunction &MF = *MBB->getParent();
+
+ // MaybeDeadStores - When we need to write a value back into a stack slot,
+ // keep track of the inserted store. If the stack slot value is never read
+ // (because the value was used from some available register, for example), and
+ // subsequently stored to, the original store is dead. This map keeps track
+ // of inserted stores that are not used. If we see a subsequent store to the
+ // same stack slot, the original store is deleted.
+ std::vector<MachineInstr*> MaybeDeadStores;
+ MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+ // ReMatDefs - These are rematerializable def MIs which are not deleted.
+ SmallSet<MachineInstr*, 4> ReMatDefs;
+
+ // Keep track of the registers we have already spilled in case there are
+ // multiple defs of the same register in MI.
+ SmallSet<unsigned, 8> SpilledMIRegs;
+
+ RegKills.reset();
+ KillOps.clear();
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ DistanceMap.clear();
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ) {
+ MachineBasicBlock::iterator NextMII = llvm::next(MII);
+
+ if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
+ NextMII = llvm::next(MII);
+
+ if (InsertEmergencySpills(MII))
+ NextMII = llvm::next(MII);
+
+ InsertRestores(MII, Spills, RegKills, KillOps);
+
+ if (InsertSpills(MII))
+ NextMII = llvm::next(MII);
+
+ bool Erased = false;
+ bool BackTracked = false;
+ MachineInstr &MI = *MII;
+
+ // Remember DbgValue's which reference stack slots.
+ if (MI.isDebugValue() && MI.getOperand(0).isFI())
+ Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
+
+ /// ReusedOperands - Keep track of operand reuse in case we need to undo
+ /// reuse.
+ ReuseInfo ReusedOperands(MI, TRI);
+
+ ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
+
+ DEBUG(dbgs() << '\t' << MI);
+
+
+ // If we have folded references to memory operands, make sure we clear all
+ // physical registers that may contain the value of the spilled virtual
+ // register
+
+ // Copy the folded virts to a small vector, we may change MI2VirtMap.
+ SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
+ // C++0x FTW!
+ for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
+ VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
+ VRM->getFoldedVirts(&MI);
+ FVRange.first != FVRange.second; ++FVRange.first)
+ FoldedVirts.push_back(FVRange.first->second);
+
+ SmallSet<int, 2> FoldedSS;
+ for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
+ unsigned VirtReg = FoldedVirts[FVI].first;
+ VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
+ DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR);
+
+ int SS = VRM->getStackSlot(VirtReg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ continue;
+ FoldedSS.insert(SS);
+ DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
+
+ // If this folded instruction is just a use, check to see if it's a
+ // straight load from the virt reg slot.
+ if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+ if (DestReg && FrameIdx == SS) {
+ // If this spill slot is available, turn it into a copy (or nothing)
+ // instead of leaving it as a load!
+ if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+ DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
+ if (DestReg != InReg) {
+ MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+ MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DestReg, RegState::Define, DefMO->getSubReg())
+ .addReg(InReg, RegState::Kill);
+ // Revisit the copy so we make sure to notice the effects of the
+ // operation on the destreg (either needing to RA it if it's
+ // virtual or needing to clobber any values if it's physical).
+ NextMII = CopyMI;
+ NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ BackTracked = true;
+ } else {
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ // InvalidateKills resurrects any prior kill of the copy's source
+ // allowing the source reg to be reused in place of the copy.
+ Spills.disallowClobberPhysReg(InReg);
+ }
+
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ Erased = true;
+ goto ProcessNextInst;
+ }
+ } else {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (PhysReg &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
+ MBB->insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ Erased = true;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+ }
+ }
+
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ MachineInstr* DeadStore = MaybeDeadStores[SS];
+ if (DeadStore) {
+ bool isDead = !(MR & VirtRegMap::isRef);
+ MachineInstr *NewStore = NULL;
+ if (MR & VirtRegMap::isModRef) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ // We can reuse this physreg as long as we are allowed to clobber
+ // the value and there isn't an earlier def that has already clobbered
+ // the physreg.
+ if (PhysReg &&
+ !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg) &&
+ !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+ MachineOperand *KillOpnd =
+ DeadStore->findRegisterUseOperand(PhysReg, true);
+ // Note, if the store is storing a sub-register, it's possible the
+ // super-register is needed below.
+ if (KillOpnd && !KillOpnd->getSubReg() &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+ MBB->insert(MII, NewMIs[0]);
+ NewStore = NewMIs[1];
+ MBB->insert(MII, NewStore);
+ VRM->addSpillSlotUse(SS, NewStore);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ Erased = true;
+ --NextMII;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ isDead = true;
+ ++NumSUnfold;
+ }
+ }
+ }
+
+ if (isDead) { // Previous store is dead.
+ // If we get here, the store is dead, nuke it now.
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ EraseInstr(DeadStore);
+ if (!NewStore)
+ ++NumDSE;
+ }
+
+ MaybeDeadStores[SS] = NULL;
+ if (NewStore) {
+ // Treat this store as a spill merged into a copy. That makes the
+ // stack slot value available.
+ VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+ goto ProcessNextInst;
+ }
+ }
+
+ // If the spill slot value is available, and this is a new definition of
+ // the value, the value is not available anymore.
+ if (MR & VirtRegMap::isMod) {
+ // Notice that the value in this stack slot has been modified.
+ Spills.ModifyStackSlotOrReMat(SS);
+
+ // If this is *just* a mod of the value, check to see if this is just a
+ // store to the spill slot (i.e. the spill got merged into the copy). If
+ // so, realize that the vreg is available now, and add the store to the
+ // MaybeDeadStore info.
+ int StackSlot;
+ if (!(MR & VirtRegMap::isRef)) {
+ if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Src hasn't been allocated yet?");
+
+ if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
+ Spills, RegKills, KillOps, TRI)) {
+ NextMII = llvm::next(MII);
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+
+ // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
+ // this as a potentially dead store in case there is a subsequent
+ // store into the stack slot without a read from it.
+ MaybeDeadStores[StackSlot] = &MI;
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register
+ // available in PhysReg.
+ Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
+ }
+ }
+ }
+ }
+
+ // Process all of the spilled defs.
+ SpilledMIRegs.clear();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+ continue;
+
+ unsigned VirtReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ // Also check if it's copying from an "undef", if so, we can't
+ // eliminate this or else the undef marker is lost and it will
+ // confuses the scavenger. This is extremely rare.
+ if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
+ MI.getNumOperands() == 2) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+ if (MO.isDead() && !KillRegs.empty()) {
+ // Source register or an implicit super/sub-register use is killed.
+ assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
+ // Last def is now dead.
+ TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
+ }
+ EraseInstr(&MI);
+ Erased = true;
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
+
+ // If it's not a no-op copy, it clobbers the value in the destreg.
+ Spills.ClobberPhysReg(VirtReg);
+ ReusedOperands.markClobbered(VirtReg);
+
+ // Check to see if this instruction is a load from a stack slot into
+ // a register. If so, this provides the stack slot value in the reg.
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ assert(DestReg == VirtReg && "Unknown load situation!");
+
+ // If it is a folded reference, then it's not safe to clobber.
+ bool Folded = FoldedSS.count(FrameIdx);
+ // Otherwise, if it wasn't available, remember that it is now!
+ Spills.addAvailable(FrameIdx, DestReg, !Folded);
+ goto ProcessNextInst;
+ }
+
+ continue;
+ }
+
+ unsigned SubIdx = MO.getSubReg();
+ bool DoReMat = VRM->isReMaterialized(VirtReg);
+ if (DoReMat)
+ ReMatDefs.insert(&MI);
+
+ // The only vregs left are stack slot definitions.
+ int StackSlot = VRM->getStackSlot(VirtReg);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+ // If this def is part of a two-address operand, make sure to execute
+ // the store from the correct physical register.
+ unsigned PhysReg;
+ unsigned TiedOp;
+ if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+ PhysReg = MI.getOperand(TiedOp).getReg();
+ if (SubIdx) {
+ unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+ assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+ "Can't find corresponding super-register!");
+ PhysReg = SuperReg;
+ }
+ } else {
+ PhysReg = VRM->getPhys(VirtReg);
+ if (ReusedOperands.isClobbered(PhysReg)) {
+ // Another def has taken the assigned physreg. It must have been a
+ // use&def which got it due to reuse. Undo the reuse!
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+ }
+ }
+
+ // If StackSlot is available in a register that also holds other stack
+ // slots, clobber those stack slots now.
+ Spills.ClobberSharingStackSlots(StackSlot);
+
+ assert(PhysReg && "VR not assigned a physical register?");
+ MRI->setPhysRegUsed(PhysReg);
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ ReusedOperands.markClobbered(RReg);
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
+ MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+ SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
+ LastStore, Spills, ReMatDefs, RegKills, KillOps);
+ NextMII = llvm::next(MII);
+
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ if (MI.isIdentityCopy()) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ Erased = true;
+ UpdateKills(*LastStore, TRI, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
+ }
+ }
+ ProcessNextInst:
+ // Delete dead instructions without side effects.
+ if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ EraseInstr(&MI);
+ Erased = true;
+ }
+ if (!Erased)
+ DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
+ if (!Erased && !BackTracked) {
+ for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+ UpdateKills(*II, TRI, RegKills, KillOps);
+ }
+ MII = NextMII;
+ }
+
+}
+
+llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
+ switch (RewriterOpt) {
+ default: llvm_unreachable("Unreachable!");
+ case local:
+ return new LocalRewriter();
+ case trivial:
+ return new TrivialRewriter();
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.h b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h
new file mode 100644
index 000000000000..93474e0d7ff7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h
@@ -0,0 +1,32 @@
+//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
+#define LLVM_CODEGEN_VIRTREGREWRITER_H
+
+namespace llvm {
+ class LiveIntervals;
+ class MachineFunction;
+ class VirtRegMap;
+
+ /// VirtRegRewriter interface: Implementations of this interface assign
+ /// spilled virtual registers to stack slots, rewriting the code.
+ struct VirtRegRewriter {
+ virtual ~VirtRegRewriter();
+ virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) = 0;
+ };
+
+ /// createVirtRegRewriter - Create an return a rewriter object, as specified
+ /// on the command line.
+ VirtRegRewriter* createVirtRegRewriter();
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CompilerDriver/Action.cpp b/contrib/llvm/lib/CompilerDriver/Action.cpp
new file mode 100644
index 000000000000..a8d625c7ac04
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/Action.cpp
@@ -0,0 +1,134 @@
+//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Action class - implementation and auxiliary functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Action.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/Main.h"
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TimeValue.h"
+
+#include <stdexcept>
+#include <string>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace llvmc {
+
+extern const char* ProgramName;
+
+}
+
+namespace {
+
+ void PrintString (const std::string& str) {
+ errs() << str << ' ';
+ }
+
+ void PrintCommand (const std::string& Cmd, const StrVector& Args) {
+ errs() << Cmd << ' ';
+ std::for_each(Args.begin(), Args.end(), &PrintString);
+ errs() << '\n';
+ }
+
+ bool IsSegmentationFault (int returnCode) {
+#ifdef LLVM_ON_WIN32
+ return (returnCode >= 0xc0000000UL)
+#else
+ return (returnCode < 0);
+#endif
+ }
+
+ int ExecuteProgram (const std::string& name, const StrVector& args) {
+ sys::Path prog(name);
+
+ if (sys::path::is_relative(prog.str())) {
+ prog = PrependMainExecutablePath(name, ProgramName,
+ (void *)(intptr_t)&Main);
+
+ if (!prog.canExecute()) {
+ prog = sys::Program::FindProgramByName(name);
+ if (prog.isEmpty()) {
+ PrintError("Can't find program '" + name + "'");
+ return -1;
+ }
+ }
+ }
+ if (!prog.canExecute()) {
+ PrintError("Program '" + name + "' is not executable.");
+ return -1;
+ }
+
+ // Build the command line vector and the redirects array.
+ const sys::Path* redirects[3] = {0,0,0};
+ sys::Path stdout_redirect;
+
+ std::vector<const char*> argv;
+ argv.reserve((args.size()+2));
+ argv.push_back(name.c_str());
+
+ for (StrVector::const_iterator B = args.begin(), E = args.end();
+ B!=E; ++B) {
+ if (*B == ">") {
+ ++B;
+ stdout_redirect.set(*B);
+ redirects[1] = &stdout_redirect;
+ }
+ else {
+ argv.push_back((*B).c_str());
+ }
+ }
+ argv.push_back(0); // null terminate list.
+
+ // Invoke the program.
+ int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
+
+ if (IsSegmentationFault(ret)) {
+ errs() << "Segmentation fault: ";
+ PrintCommand(name, args);
+ }
+
+ return ret;
+ }
+}
+
+namespace llvmc {
+ void AppendToGlobalTimeLog (const std::string& cmd, double time);
+}
+
+int llvmc::Action::Execute () const {
+ if (DryRun || VerboseMode)
+ PrintCommand(Command_, Args_);
+
+ if (!DryRun) {
+ if (Time) {
+ sys::TimeValue now = sys::TimeValue::now();
+ int ret = ExecuteProgram(Command_, Args_);
+ sys::TimeValue now2 = sys::TimeValue::now();
+ now2 -= now;
+ double elapsed = now2.seconds() + now2.microseconds() / 1000000.0;
+ AppendToGlobalTimeLog(Command_, elapsed);
+
+ return ret;
+ }
+ else {
+ return ExecuteProgram(Command_, Args_);
+ }
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp b/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp
new file mode 100644
index 000000000000..38442038d738
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp
@@ -0,0 +1,61 @@
+//===--- BuiltinOptions.cpp - The LLVM Compiler Driver ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definitions of all global command-line option variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+
+#ifdef ENABLE_LLVMC_DYNAMIC_PLUGINS
+#include "llvm/Support/PluginLoader.h"
+#endif
+
+namespace cl = llvm::cl;
+
+namespace llvmc {
+
+cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
+ cl::ZeroOrMore);
+cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"),
+ cl::value_desc("file"), cl::Prefix);
+cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
+ cl::value_desc("<directory>"), cl::Prefix);
+cl::list<std::string> Languages("x",
+ cl::desc("Specify the language of the following input files"),
+ cl::ZeroOrMore);
+
+cl::opt<bool> DryRun("dry-run",
+ cl::desc("Only pretend to run commands"));
+cl::opt<bool> Time("time", cl::desc("Time individual commands"));
+cl::opt<bool> VerboseMode("v",
+ cl::desc("Enable verbose mode"));
+
+cl::opt<bool> CheckGraph("check-graph",
+ cl::desc("Check the compilation graph for errors"),
+ cl::Hidden);
+cl::opt<bool> WriteGraph("write-graph",
+ cl::desc("Write compilation-graph.dot file"),
+ cl::Hidden);
+cl::opt<bool> ViewGraph("view-graph",
+ cl::desc("Show compilation graph in GhostView"),
+ cl::Hidden);
+
+cl::opt<SaveTempsEnum::Values> SaveTemps
+("save-temps", cl::desc("Keep temporary files"),
+ cl::init(SaveTempsEnum::Unset),
+ cl::values(clEnumValN(SaveTempsEnum::Obj, "obj",
+ "Save files in the directory specified with -o"),
+ clEnumValN(SaveTempsEnum::Cwd, "cwd",
+ "Use current working directory"),
+ clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
+ clEnumValEnd),
+ cl::ValueOptional);
+
+} // End namespace llvmc.
diff --git a/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
new file mode 100644
index 000000000000..33c6566499b8
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
@@ -0,0 +1,655 @@
+//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Compilation graph - implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <queue>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace llvmc {
+
+ const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
+ // Remove the '.'.
+ StringRef suf = sys::path::extension(File.str()).substr(1);
+ LanguageMap::const_iterator Lang =
+ this->find(suf.empty() ? "*empty*" : suf);
+ if (Lang == this->end()) {
+ PrintError("File '" + File.str() + "' has unknown suffix '"
+ + suf.str() + '\'');
+ return 0;
+ }
+ return &Lang->second;
+ }
+}
+
+namespace {
+
+ /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error.
+ template <class C>
+ const Edge* ChooseEdge(const C& EdgesContainer,
+ const InputLanguagesSet& InLangs,
+ const std::string& NodeName = "root") {
+ const Edge* MaxEdge = 0;
+ int MaxWeight = 0;
+ bool SingleMax = true;
+
+ // TODO: fix calculation of SingleMax.
+ for (typename C::const_iterator B = EdgesContainer.begin(),
+ E = EdgesContainer.end(); B != E; ++B) {
+ const Edge* e = B->getPtr();
+ int EW = e->Weight(InLangs);
+ if (EW < 0) {
+ // (error) invocation in TableGen -> we don't need to print an error
+ // message.
+ return 0;
+ }
+ if (EW > MaxWeight) {
+ MaxEdge = e;
+ MaxWeight = EW;
+ SingleMax = true;
+ } else if (EW == MaxWeight) {
+ SingleMax = false;
+ }
+ }
+
+ if (!SingleMax) {
+ PrintError("Node " + NodeName + ": multiple maximal outward edges found!"
+ " Most probably a specification error.");
+ return 0;
+ }
+ if (!MaxEdge) {
+ PrintError("Node " + NodeName + ": no maximal outward edge found!"
+ " Most probably a specification error.");
+ return 0;
+ }
+ return MaxEdge;
+ }
+
+}
+
+void Node::AddEdge(Edge* Edg) {
+ // If there already was an edge between two nodes, modify it instead
+ // of adding a new edge.
+ const std::string& ToolName = Edg->ToolName();
+ for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end();
+ B != E; ++B) {
+ if ((*B)->ToolName() == ToolName) {
+ llvm::IntrusiveRefCntPtr<Edge>(Edg).swap(*B);
+ return;
+ }
+ }
+ OutEdges.push_back(llvm::IntrusiveRefCntPtr<Edge>(Edg));
+}
+
+CompilationGraph::CompilationGraph() {
+ NodesMap["root"] = Node(this);
+}
+
+Node* CompilationGraph::getNode(const std::string& ToolName) {
+ nodes_map_type::iterator I = NodesMap.find(ToolName);
+ if (I == NodesMap.end()) {
+ PrintError("Node " + ToolName + " is not in the graph");
+ return 0;
+ }
+ return &I->second;
+}
+
+const Node* CompilationGraph::getNode(const std::string& ToolName) const {
+ nodes_map_type::const_iterator I = NodesMap.find(ToolName);
+ if (I == NodesMap.end()) {
+ PrintError("Node " + ToolName + " is not in the graph!");
+ return 0;
+ }
+ return &I->second;
+}
+
+// Find the tools list corresponding to the given language name.
+const CompilationGraph::tools_vector_type*
+CompilationGraph::getToolsVector(const std::string& LangName) const
+{
+ tools_map_type::const_iterator I = ToolsMap.find(LangName);
+ if (I == ToolsMap.end()) {
+ PrintError("No tool corresponding to the language " + LangName + " found");
+ return 0;
+ }
+ return &I->second;
+}
+
+void CompilationGraph::insertNode(Tool* V) {
+ if (NodesMap.count(V->Name()) == 0)
+ NodesMap[V->Name()] = Node(this, V);
+}
+
+int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
+ Node* B = getNode(Edg->ToolName());
+ if (B == 0)
+ return 1;
+
+ if (A == "root") {
+ const char** InLangs = B->ToolPtr->InputLanguages();
+ for (;*InLangs; ++InLangs)
+ ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
+ NodesMap["root"].AddEdge(Edg);
+ }
+ else {
+ Node* N = getNode(A);
+ if (N == 0)
+ return 1;
+
+ N->AddEdge(Edg);
+ }
+ // Increase the inward edge counter.
+ B->IncrInEdges();
+
+ return 0;
+}
+
+// Pass input file through the chain until we bump into a Join node or
+// a node that says that it is the last.
+int CompilationGraph::PassThroughGraph (const sys::Path& InFile,
+ const Node* StartNode,
+ const InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) const {
+ sys::Path In = InFile;
+ const Node* CurNode = StartNode;
+
+ while(true) {
+ Tool* CurTool = CurNode->ToolPtr.getPtr();
+
+ if (CurTool->IsJoin()) {
+ JoinTool& JT = static_cast<JoinTool&>(*CurTool);
+ JT.AddToJoinList(In);
+ break;
+ }
+
+ Action CurAction;
+ if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap)) {
+ return ret;
+ }
+
+ if (int ret = CurAction.Execute())
+ return ret;
+
+ if (CurAction.StopCompilation())
+ return 0;
+
+ const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+ if (Edg == 0)
+ return 1;
+
+ CurNode = getNode(Edg->ToolName());
+ if (CurNode == 0)
+ return 1;
+
+ In = CurAction.OutFile();
+ }
+
+ return 0;
+}
+
+// Find the head of the toolchain corresponding to the given file.
+// Also, insert an input language into InLangs.
+const Node* CompilationGraph::
+FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
+ InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
+
+ // Determine the input language.
+ const std::string* InLang = (ForceLanguage ? ForceLanguage
+ : LangMap.GetLanguage(In));
+ if (InLang == 0)
+ return 0;
+ const std::string& InLanguage = *InLang;
+
+ // Add the current input language to the input language set.
+ InLangs.insert(InLanguage);
+
+ // Find the toolchain for the input language.
+ const tools_vector_type* pTV = getToolsVector(InLanguage);
+ if (pTV == 0)
+ return 0;
+
+ const tools_vector_type& TV = *pTV;
+ if (TV.empty()) {
+ PrintError("No toolchain corresponding to language "
+ + InLanguage + " found");
+ return 0;
+ }
+
+ const Edge* Edg = ChooseEdge(TV, InLangs);
+ if (Edg == 0)
+ return 0;
+
+ return getNode(Edg->ToolName());
+}
+
+// Helper function used by Build().
+// Traverses initial portions of the toolchains (up to the first Join node).
+// This function is also responsible for handling the -x option.
+int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) {
+ // This is related to -x option handling.
+ cl::list<std::string>::const_iterator xIter = Languages.begin(),
+ xBegin = xIter, xEnd = Languages.end();
+ bool xEmpty = true;
+ const std::string* xLanguage = 0;
+ unsigned xPos = 0, xPosNext = 0, filePos = 0;
+
+ if (xIter != xEnd) {
+ xEmpty = false;
+ xPos = Languages.getPosition(xIter - xBegin);
+ cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+ xPosNext = (xNext == xEnd) ? std::numeric_limits<unsigned>::max()
+ : Languages.getPosition(xNext - xBegin);
+ xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+ }
+
+ // For each input file:
+ for (cl::list<std::string>::const_iterator B = InputFilenames.begin(),
+ CB = B, E = InputFilenames.end(); B != E; ++B) {
+ sys::Path In = sys::Path(*B);
+
+ // Code for handling the -x option.
+ // Output: std::string* xLanguage (can be NULL).
+ if (!xEmpty) {
+ filePos = InputFilenames.getPosition(B - CB);
+
+ if (xPos < filePos) {
+ if (filePos < xPosNext) {
+ xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+ }
+ else { // filePos >= xPosNext
+ // Skip xIters while filePos > xPosNext
+ while (filePos > xPosNext) {
+ ++xIter;
+ xPos = xPosNext;
+
+ cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+ if (xNext == xEnd)
+ xPosNext = std::numeric_limits<unsigned>::max();
+ else
+ xPosNext = Languages.getPosition(xNext - xBegin);
+ xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+ }
+ }
+ }
+ }
+
+ // Find the toolchain corresponding to this file.
+ const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
+ if (N == 0)
+ return 1;
+ // Pass file through the chain starting at head.
+ if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap))
+ return ret;
+ }
+
+ return 0;
+}
+
+// Sort the nodes in topological order.
+int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
+ std::queue<const Node*> Q;
+
+ Node* Root = getNode("root");
+ if (Root == 0)
+ return 1;
+
+ Q.push(Root);
+
+ while (!Q.empty()) {
+ const Node* A = Q.front();
+ Q.pop();
+ Out.push_back(A);
+ for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+ EB != EE; ++EB) {
+ Node* B = getNode((*EB)->ToolName());
+ if (B == 0)
+ return 1;
+
+ B->DecrInEdges();
+ if (B->HasNoInEdges())
+ Q.push(B);
+ }
+ }
+
+ return 0;
+}
+
+namespace {
+ bool NotJoinNode(const Node* N) {
+ return N->ToolPtr ? !N->ToolPtr->IsJoin() : true;
+ }
+}
+
+// Call TopologicalSort and filter the resulting list to include
+// only Join nodes.
+int CompilationGraph::
+TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
+ std::vector<const Node*> TopSorted;
+ if (int ret = TopologicalSort(TopSorted))
+ return ret;
+ std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
+ std::back_inserter(Out), NotJoinNode);
+
+ return 0;
+}
+
+int CompilationGraph::Build (const sys::Path& TempDir,
+ const LanguageMap& LangMap) {
+ InputLanguagesSet InLangs;
+ bool WasSomeActionGenerated = !InputFilenames.empty();
+
+ // Traverse initial parts of the toolchains and fill in InLangs.
+ if (int ret = BuildInitial(InLangs, TempDir, LangMap))
+ return ret;
+
+ std::vector<const Node*> JTV;
+ if (int ret = TopologicalSortFilterJoinNodes(JTV))
+ return ret;
+
+ // For all join nodes in topological order:
+ for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
+ B != E; ++B) {
+
+ const Node* CurNode = *B;
+ JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
+
+ // Are there any files in the join list?
+ if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
+ continue;
+
+ WasSomeActionGenerated = true;
+ Action CurAction;
+ if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap)) {
+ return ret;
+ }
+
+ if (int ret = CurAction.Execute())
+ return ret;
+
+ if (CurAction.StopCompilation())
+ return 0;
+
+ const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+ if (Edg == 0)
+ return 1;
+
+ const Node* NextNode = getNode(Edg->ToolName());
+ if (NextNode == 0)
+ return 1;
+
+ if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
+ InLangs, TempDir, LangMap)) {
+ return ret;
+ }
+ }
+
+ if (!WasSomeActionGenerated) {
+ PrintError("no input files");
+ return 1;
+ }
+
+ return 0;
+}
+
+int CompilationGraph::CheckLanguageNames() const {
+ int ret = 0;
+
+ // Check that names for output and input languages on all edges do match.
+ for (const_nodes_iterator B = this->NodesMap.begin(),
+ E = this->NodesMap.end(); B != E; ++B) {
+
+ const Node & N1 = B->second;
+ if (N1.ToolPtr) {
+ for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
+ EB != EE; ++EB) {
+ const Node* N2 = this->getNode((*EB)->ToolName());
+ if (N2 == 0)
+ return 1;
+
+ if (!N2->ToolPtr) {
+ ++ret;
+ errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
+ << "' back to the root!\n\n";
+ continue;
+ }
+
+ const char** OutLangs = N1.ToolPtr->OutputLanguages();
+ const char** InLangs = N2->ToolPtr->InputLanguages();
+ bool eq = false;
+ const char* OutLang = 0;
+ for (;*OutLangs; ++OutLangs) {
+ OutLang = *OutLangs;
+ for (;*InLangs; ++InLangs) {
+ if (std::strcmp(OutLang, *InLangs) == 0) {
+ eq = true;
+ break;
+ }
+ }
+ }
+
+ if (!eq) {
+ ++ret;
+ errs() << "Error: Output->input language mismatch in the edge '"
+ << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name()
+ << "'!\n"
+ << "Expected one of { ";
+
+ InLangs = N2->ToolPtr->InputLanguages();
+ for (;*InLangs; ++InLangs) {
+ errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
+ }
+
+ errs() << " }, but got '" << OutLang << "'!\n\n";
+ }
+
+ }
+ }
+ }
+
+ return ret;
+}
+
+int CompilationGraph::CheckMultipleDefaultEdges() const {
+ int ret = 0;
+ InputLanguagesSet Dummy;
+
+ // For all nodes, just iterate over the outgoing edges and check if there is
+ // more than one edge with maximum weight.
+ for (const_nodes_iterator B = this->NodesMap.begin(),
+ E = this->NodesMap.end(); B != E; ++B) {
+ const Node& N = B->second;
+ int MaxWeight = -1024;
+
+ // Ignore the root node.
+ if (!N.ToolPtr)
+ continue;
+
+ for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
+ EB != EE; ++EB) {
+ int EdgeWeight = (*EB)->Weight(Dummy);
+ if (EdgeWeight > MaxWeight) {
+ MaxWeight = EdgeWeight;
+ }
+ else if (EdgeWeight == MaxWeight) {
+ ++ret;
+ errs() << "Error: there are multiple maximal edges stemming from the '"
+ << N.ToolPtr->Name() << "' node!\n\n";
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+int CompilationGraph::CheckCycles() {
+ unsigned deleted = 0;
+ std::queue<Node*> Q;
+
+ Node* Root = getNode("root");
+ if (Root == 0)
+ return 1;
+
+ Q.push(Root);
+
+ // Try to delete all nodes that have no ingoing edges, starting from the
+ // root. If there are any nodes left after this operation, then we have a
+ // cycle. This relies on '--check-graph' not performing the topological sort.
+ while (!Q.empty()) {
+ Node* A = Q.front();
+ Q.pop();
+ ++deleted;
+
+ for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+ EB != EE; ++EB) {
+ Node* B = getNode((*EB)->ToolName());
+ if (B == 0)
+ return 1;
+
+ B->DecrInEdges();
+ if (B->HasNoInEdges())
+ Q.push(B);
+ }
+ }
+
+ if (deleted != NodesMap.size()) {
+ errs() << "Error: there are cycles in the compilation graph!\n"
+ << "Try inspecting the diagram produced by "
+ << "'llvmc --view-graph'.\n\n";
+ return 1;
+ }
+
+ return 0;
+}
+
+int CompilationGraph::Check () {
+ // We try to catch as many errors as we can in one go.
+ int errs = 0;
+ int ret = 0;
+
+ // Check that output/input language names match.
+ ret = this->CheckLanguageNames();
+ if (ret < 0)
+ return 1;
+ errs += ret;
+
+ // Check for multiple default edges.
+ ret = this->CheckMultipleDefaultEdges();
+ if (ret < 0)
+ return 1;
+ errs += ret;
+
+ // Check for cycles.
+ ret = this->CheckCycles();
+ if (ret < 0)
+ return 1;
+ errs += ret;
+
+ return errs;
+}
+
+// Code related to graph visualization.
+
+namespace {
+
+std::string SquashStrArray (const char** StrArr) {
+ std::string ret;
+
+ for (; *StrArr; ++StrArr) {
+ if (*(StrArr + 1)) {
+ ret += *StrArr;
+ ret += ", ";
+ }
+ else {
+ ret += *StrArr;
+ }
+ }
+
+ return ret;
+}
+
+} // End anonymous namespace.
+
+namespace llvm {
+ template <>
+ struct DOTGraphTraits<llvmc::CompilationGraph*>
+ : public DefaultDOTGraphTraits
+ {
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ template<typename GraphType>
+ static std::string getNodeLabel(const Node* N, const GraphType&)
+ {
+ if (N->ToolPtr)
+ if (N->ToolPtr->IsJoin())
+ return N->Name() + "\n (join" +
+ (N->HasChildren() ? ")"
+ : std::string(": ") +
+ SquashStrArray(N->ToolPtr->OutputLanguages()) + ')');
+ else
+ return N->Name();
+ else
+ return "root";
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
+ if (N->ToolPtr) {
+ return SquashStrArray(N->ToolPtr->OutputLanguages());
+ }
+ else {
+ return SquashStrArray(I->ToolPtr->InputLanguages());
+ }
+ }
+ };
+
+} // End namespace llvm
+
+int CompilationGraph::writeGraph(const std::string& OutputFilename) {
+ std::string ErrorInfo;
+ raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty()) {
+ errs() << "Writing '"<< OutputFilename << "' file...";
+ llvm::WriteGraph(O, this);
+ errs() << "done.\n";
+ }
+ else {
+ PrintError("Error opening file '" + OutputFilename + "' for writing!");
+ return 1;
+ }
+
+ return 0;
+}
+
+void CompilationGraph::viewGraph() {
+ llvm::ViewGraph(this, "compilation-graph");
+}
diff --git a/contrib/llvm/lib/CompilerDriver/Main.cpp b/contrib/llvm/lib/CompilerDriver/Main.cpp
new file mode 100644
index 000000000000..7120027f7ce0
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/Main.cpp
@@ -0,0 +1,146 @@
+//===--- Main.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// llvmc::Main function - driver entry point.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/AutoGenerated.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+
+#include <sstream>
+#include <string>
+
+namespace cl = llvm::cl;
+namespace sys = llvm::sys;
+using namespace llvmc;
+
+namespace {
+
+ std::stringstream* GlobalTimeLog;
+
+ /// GetTempDir - Get the temporary directory location. Returns non-zero value
+ /// on error.
+ int GetTempDir(sys::Path& tempDir) {
+ // The --temp-dir option.
+ if (!TempDirname.empty()) {
+ tempDir = TempDirname;
+ }
+ // GCC 4.5-style -save-temps handling.
+ else if (SaveTemps == SaveTempsEnum::Unset) {
+ tempDir = sys::Path::GetTemporaryDirectory();
+ return 0;
+ }
+ else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
+ tempDir = sys::path::parent_path(OutputFilename);
+ }
+ else {
+ // SaveTemps == Cwd --> use current dir (leave tempDir empty).
+ return 0;
+ }
+
+ bool Exists;
+ if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) {
+ std::string ErrMsg;
+ if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
+ PrintError(ErrMsg);
+ return 1;
+ }
+ }
+
+ return 0;
+ }
+
+ /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns
+ /// non-zero value in case of error.
+ int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
+ int ret;
+ sys::Path tempDir;
+ bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
+
+ if (int ret = GetTempDir(tempDir))
+ return ret;
+
+ ret = graph.Build(tempDir, langMap);
+
+ if (toDelete)
+ tempDir.eraseFromDisk(true);
+
+ return ret;
+ }
+}
+
+namespace llvmc {
+
+// Used to implement -time option. External linkage is intentional.
+void AppendToGlobalTimeLog(const std::string& cmd, double time) {
+ *GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
+}
+
+// Sometimes user code wants to access the argv[0] value.
+const char* ProgramName;
+
+int Main(int argc, char** argv) {
+ int ret = 0;
+ LanguageMap langMap;
+ CompilationGraph graph;
+
+ ProgramName = argv[0];
+
+ cl::ParseCommandLineOptions
+ (argc, argv,
+ /* Overview = */ "LLVM Compiler Driver (Work In Progress)",
+ /* ReadResponseFiles = */ false);
+
+ if (int ret = autogenerated::RunInitialization(langMap, graph))
+ return ret;
+
+ if (CheckGraph) {
+ ret = graph.Check();
+ if (!ret)
+ llvm::errs() << "check-graph: no errors found.\n";
+
+ return ret;
+ }
+
+ if (ViewGraph) {
+ graph.viewGraph();
+ if (!WriteGraph)
+ return 0;
+ }
+
+ if (WriteGraph) {
+ const std::string& Out = (OutputFilename.empty()
+ ? std::string("compilation-graph.dot")
+ : OutputFilename);
+ return graph.writeGraph(Out);
+ }
+
+ if (Time) {
+ GlobalTimeLog = new std::stringstream;
+ GlobalTimeLog->precision(2);
+ }
+
+ ret = BuildTargets(graph, langMap);
+
+ if (Time) {
+ llvm::errs() << GlobalTimeLog->str();
+ delete GlobalTimeLog;
+ }
+
+ return ret;
+}
+
+} // end namespace llvmc
diff --git a/contrib/llvm/lib/CompilerDriver/Tool.cpp b/contrib/llvm/lib/CompilerDriver/Tool.cpp
new file mode 100644
index 000000000000..876759aa72b0
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/Tool.cpp
@@ -0,0 +1,95 @@
+//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tool base class - implementation details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Tool.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Path.h"
+
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace {
+ sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
+ const std::string& Suffix) {
+ sys::Path Out;
+
+ // Make sure we don't end up with path names like '/file.o' if the
+ // TempDir is empty.
+ if (TempDir.empty()) {
+ Out.set(BaseName);
+ }
+ else {
+ Out = TempDir;
+ Out.appendComponent(BaseName);
+ }
+ Out.appendSuffix(Suffix);
+ // NOTE: makeUnique always *creates* a unique temporary file,
+ // which is good, since there will be no races. However, some
+ // tools do not like it when the output file already exists, so
+ // they need to be placated with -f or something like that.
+ Out.makeUnique(true, NULL);
+ return Out;
+ }
+}
+
+sys::Path Tool::OutFilename(const sys::Path& In,
+ const sys::Path& TempDir,
+ bool StopCompilation,
+ const char* OutputSuffix) const {
+ sys::Path Out;
+
+ if (StopCompilation) {
+ if (!OutputFilename.empty()) {
+ Out.set(OutputFilename);
+ }
+ else if (IsJoin()) {
+ Out.set("a");
+ Out.appendSuffix(OutputSuffix);
+ }
+ else {
+ Out.set(sys::path::stem(In.str()));
+ Out.appendSuffix(OutputSuffix);
+ }
+ }
+ else {
+ if (IsJoin())
+ Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
+ else
+ Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix);
+ }
+ return Out;
+}
+
+namespace {
+ template <class A, class B>
+ bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
+ return std::less<A>()(p1.first, p2.first);
+ }
+}
+
+StrVector Tool::SortArgs(ArgsVector& Args) const {
+ StrVector Out;
+
+ // HACK: this won't be needed when we'll migrate away from CommandLine.
+ std::stable_sort(Args.begin(), Args.end(),
+ &CompareFirst<unsigned, std::string>);
+ for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
+ Out.push_back(B->second);
+ }
+
+ return Out;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
new file mode 100644
index 000000000000..7652090af8f5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -0,0 +1,1130 @@
+//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interface used by the various execution engine
+// subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cmath>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
+STATISTIC(NumGlobals , "Number of global vars initialized");
+
+ExecutionEngine *(*ExecutionEngine::JITCtor)(
+ Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ TargetMachine *TM) = 0;
+ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
+ Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ TargetMachine *TM) = 0;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
+ std::string *ErrorStr) = 0;
+
+ExecutionEngine::ExecutionEngine(Module *M)
+ : EEState(*this),
+ LazyFunctionCreator(0),
+ ExceptionTableRegister(0),
+ ExceptionTableDeregister(0) {
+ CompilingLazily = false;
+ GVCompilationDisabled = false;
+ SymbolSearchingDisabled = false;
+ Modules.push_back(M);
+ assert(M && "Module is null?");
+}
+
+ExecutionEngine::~ExecutionEngine() {
+ clearAllGlobalMappings();
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+ delete Modules[i];
+}
+
+void ExecutionEngine::DeregisterAllTables() {
+ if (ExceptionTableDeregister) {
+ DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin();
+ DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end();
+ for (; it != ite; ++it)
+ ExceptionTableDeregister(it->second);
+ AllExceptionTables.clear();
+ }
+}
+
+namespace {
+/// \brief Helper class which uses a value handler to automatically deletes the
+/// memory block when the GlobalVariable is destroyed.
+class GVMemoryBlock : public CallbackVH {
+ GVMemoryBlock(const GlobalVariable *GV)
+ : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
+
+public:
+ /// \brief Returns the address the GlobalVariable should be written into. The
+ /// GVMemoryBlock object prefixes that.
+ static char *Create(const GlobalVariable *GV, const TargetData& TD) {
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
+ void *RawMemory = ::operator new(
+ TargetData::RoundUpAlignment(sizeof(GVMemoryBlock),
+ TD.getPreferredAlignment(GV))
+ + GVSize);
+ new(RawMemory) GVMemoryBlock(GV);
+ return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock);
+ }
+
+ virtual void deleted() {
+ // We allocated with operator new and with some extra memory hanging off the
+ // end, so don't just delete this. I'm not sure if this is actually
+ // required.
+ this->~GVMemoryBlock();
+ ::operator delete(this);
+ }
+};
+} // anonymous namespace
+
+char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
+ return GVMemoryBlock::Create(GV, *getTargetData());
+}
+
+bool ExecutionEngine::removeModule(Module *M) {
+ for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
+ E = Modules.end(); I != E; ++I) {
+ Module *Found = *I;
+ if (Found == M) {
+ Modules.erase(I);
+ clearGlobalMappingsFromModule(M);
+ return true;
+ }
+ }
+ return false;
+}
+
+Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+ if (Function *F = Modules[i]->getFunction(FnName))
+ return F;
+ }
+ return 0;
+}
+
+
+void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
+ const GlobalValue *ToUnmap) {
+ GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
+ void *OldVal;
+
+ // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
+ // GlobalAddressMap.
+ if (I == GlobalAddressMap.end())
+ OldVal = 0;
+ else {
+ OldVal = I->second;
+ GlobalAddressMap.erase(I);
+ }
+
+ GlobalAddressReverseMap.erase(OldVal);
+ return OldVal;
+}
+
+void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
+ MutexGuard locked(lock);
+
+ DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
+ << "\' to [" << Addr << "]\n";);
+ void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
+ assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
+ CurVal = Addr;
+
+ // If we are using the reverse mapping, add it too.
+ if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+ AssertingVH<const GlobalValue> &V =
+ EEState.getGlobalAddressReverseMap(locked)[Addr];
+ assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ V = GV;
+ }
+}
+
+void ExecutionEngine::clearAllGlobalMappings() {
+ MutexGuard locked(lock);
+
+ EEState.getGlobalAddressMap(locked).clear();
+ EEState.getGlobalAddressReverseMap(locked).clear();
+}
+
+void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
+ MutexGuard locked(lock);
+
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+ EEState.RemoveMapping(locked, FI);
+ for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+ GI != GE; ++GI)
+ EEState.RemoveMapping(locked, GI);
+}
+
+void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+ MutexGuard locked(lock);
+
+ ExecutionEngineState::GlobalAddressMapTy &Map =
+ EEState.getGlobalAddressMap(locked);
+
+ // Deleting from the mapping?
+ if (Addr == 0)
+ return EEState.RemoveMapping(locked, GV);
+
+ void *&CurVal = Map[GV];
+ void *OldVal = CurVal;
+
+ if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
+ EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
+ CurVal = Addr;
+
+ // If we are using the reverse mapping, add it too.
+ if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+ AssertingVH<const GlobalValue> &V =
+ EEState.getGlobalAddressReverseMap(locked)[Addr];
+ assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ V = GV;
+ }
+ return OldVal;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+ MutexGuard locked(lock);
+
+ ExecutionEngineState::GlobalAddressMapTy::iterator I =
+ EEState.getGlobalAddressMap(locked).find(GV);
+ return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
+}
+
+const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
+ MutexGuard locked(lock);
+
+ // If we haven't computed the reverse mapping yet, do so first.
+ if (EEState.getGlobalAddressReverseMap(locked).empty()) {
+ for (ExecutionEngineState::GlobalAddressMapTy::iterator
+ I = EEState.getGlobalAddressMap(locked).begin(),
+ E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
+ EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+ I->second, I->first));
+ }
+
+ std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
+ EEState.getGlobalAddressReverseMap(locked).find(Addr);
+ return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+}
+
+namespace {
+class ArgvArray {
+ char *Array;
+ std::vector<char*> Values;
+public:
+ ArgvArray() : Array(NULL) {}
+ ~ArgvArray() { clear(); }
+ void clear() {
+ delete[] Array;
+ Array = NULL;
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ delete[] Values[I];
+ }
+ Values.clear();
+ }
+ /// Turn a vector of strings into a nice argv style array of pointers to null
+ /// terminated strings.
+ void *reset(LLVMContext &C, ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv);
+};
+} // anonymous namespace
+void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv) {
+ clear(); // Free the old contents.
+ unsigned PtrSize = EE->getTargetData()->getPointerSize();
+ Array = new char[(InputArgv.size()+1)*PtrSize];
+
+ DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
+ const Type *SBytePtr = Type::getInt8PtrTy(C);
+
+ for (unsigned i = 0; i != InputArgv.size(); ++i) {
+ unsigned Size = InputArgv[i].size()+1;
+ char *Dest = new char[Size];
+ Values.push_back(Dest);
+ DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
+
+ std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
+ Dest[Size-1] = 0;
+
+ // Endian safe: Array[i] = (PointerTy)Dest;
+ EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize),
+ SBytePtr);
+ }
+
+ // Null terminate it
+ EE->StoreValueToMemory(PTOGV(0),
+ (GenericValue*)(Array+InputArgv.size()*PtrSize),
+ SBytePtr);
+ return Array;
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
+ bool isDtors) {
+ const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
+ GlobalVariable *GV = module->getNamedGlobal(Name);
+
+ // If this global has internal linkage, or if it has a use, then it must be
+ // an old-style (llvmgcc3) static ctor with __main linked in and in use. If
+ // this is the case, don't execute any of the global ctors, __main will do
+ // it.
+ if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+ // Should be an array of '{ i32, void ()* }' structs. The first value is
+ // the init priority, which we ignore.
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return;
+ ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ if (isa<ConstantAggregateZero>(InitList->getOperand(i)))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+
+ Constant *FP = CS->getOperand(1);
+ if (FP->isNullValue())
+ continue; // Found a sentinal value, ignore.
+
+ // Strip off constant expression casts.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+
+ // Execute the ctor/dtor function!
+ if (Function *F = dyn_cast<Function>(FP))
+ runFunction(F, std::vector<GenericValue>());
+
+ // FIXME: It is marginally lame that we just do nothing here if we see an
+ // entry we don't recognize. It might not be unreasonable for the verifier
+ // to not even allow this and just assert here.
+ }
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
+ // Execute global ctors/dtors for each module in the program.
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+ runStaticConstructorsDestructors(Modules[i], isDtors);
+}
+
+#ifndef NDEBUG
+/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
+static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
+ unsigned PtrSize = EE->getTargetData()->getPointerSize();
+ for (unsigned i = 0; i < PtrSize; ++i)
+ if (*(i + (uint8_t*)Loc))
+ return false;
+ return true;
+}
+#endif
+
+int ExecutionEngine::runFunctionAsMain(Function *Fn,
+ const std::vector<std::string> &argv,
+ const char * const * envp) {
+ std::vector<GenericValue> GVArgs;
+ GenericValue GVArgc;
+ GVArgc.IntVal = APInt(32, argv.size());
+
+ // Check main() type
+ unsigned NumArgs = Fn->getFunctionType()->getNumParams();
+ const FunctionType *FTy = Fn->getFunctionType();
+ const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
+
+ // Check the argument types.
+ if (NumArgs > 3)
+ report_fatal_error("Invalid number of arguments of main() supplied");
+ if (NumArgs >= 3 && FTy->getParamType(2) != PPInt8Ty)
+ report_fatal_error("Invalid type for third argument of main() supplied");
+ if (NumArgs >= 2 && FTy->getParamType(1) != PPInt8Ty)
+ report_fatal_error("Invalid type for second argument of main() supplied");
+ if (NumArgs >= 1 && !FTy->getParamType(0)->isIntegerTy(32))
+ report_fatal_error("Invalid type for first argument of main() supplied");
+ if (!FTy->getReturnType()->isIntegerTy() &&
+ !FTy->getReturnType()->isVoidTy())
+ report_fatal_error("Invalid return type of main() supplied");
+
+ ArgvArray CArgv;
+ ArgvArray CEnv;
+ if (NumArgs) {
+ GVArgs.push_back(GVArgc); // Arg #0 = argc.
+ if (NumArgs > 1) {
+ // Arg #1 = argv.
+ GVArgs.push_back(PTOGV(CArgv.reset(Fn->getContext(), this, argv)));
+ assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
+ "argv[0] was null after CreateArgv");
+ if (NumArgs > 2) {
+ std::vector<std::string> EnvVars;
+ for (unsigned i = 0; envp[i]; ++i)
+ EnvVars.push_back(envp[i]);
+ // Arg #2 = envp.
+ GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars)));
+ }
+ }
+ }
+
+ return runFunction(Fn, GVArgs).IntVal.getZExtValue();
+}
+
+ExecutionEngine *ExecutionEngine::create(Module *M,
+ bool ForceInterpreter,
+ std::string *ErrorStr,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode) {
+ return EngineBuilder(M)
+ .setEngineKind(ForceInterpreter
+ ? EngineKind::Interpreter
+ : EngineKind::JIT)
+ .setErrorStr(ErrorStr)
+ .setOptLevel(OptLevel)
+ .setAllocateGVsWithCode(GVsWithCode)
+ .create();
+}
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter. This takes ownership
+/// of the module.
+ExecutionEngine *ExecutionEngine::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ CodeModel::Model CMM) {
+ if (ExecutionEngine::JITCtor == 0) {
+ if (ErrorStr)
+ *ErrorStr = "JIT has not been linked in.";
+ return 0;
+ }
+
+ // Use the defaults for extra parameters. Users can use EngineBuilder to
+ // set them.
+ StringRef MArch = "";
+ StringRef MCPU = "";
+ SmallVector<std::string, 1> MAttrs;
+
+ TargetMachine *TM =
+ EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+ if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+ TM->setCodeModel(CMM);
+
+ return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
+}
+
+ExecutionEngine *EngineBuilder::create() {
+ // Make sure we can resolve symbols in the program as well. The zero arg
+ // to the function tells DynamicLibrary to load the program, not a library.
+ if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
+ return 0;
+
+ // If the user specified a memory manager but didn't specify which engine to
+ // create, we assume they only want the JIT, and we fail if they only want
+ // the interpreter.
+ if (JMM) {
+ if (WhichEngine & EngineKind::JIT)
+ WhichEngine = EngineKind::JIT;
+ else {
+ if (ErrorStr)
+ *ErrorStr = "Cannot create an interpreter with a memory manager.";
+ return 0;
+ }
+ }
+
+ // Unless the interpreter was explicitly selected or the JIT is not linked,
+ // try making a JIT.
+ if (WhichEngine & EngineKind::JIT) {
+ if (TargetMachine *TM =
+ EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr)) {
+ TM->setCodeModel(CMModel);
+
+ if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+ AllocateGVsWithCode, TM);
+ if (EE) return EE;
+ } else if (ExecutionEngine::JITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
+ AllocateGVsWithCode, TM);
+ if (EE) return EE;
+ }
+ }
+ }
+
+ // If we can't make a JIT and we didn't request one specifically, try making
+ // an interpreter instead.
+ if (WhichEngine & EngineKind::Interpreter) {
+ if (ExecutionEngine::InterpCtor)
+ return ExecutionEngine::InterpCtor(M, ErrorStr);
+ if (ErrorStr)
+ *ErrorStr = "Interpreter has not been linked in.";
+ return 0;
+ }
+
+ if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
+ if (ErrorStr)
+ *ErrorStr = "JIT has not been linked in.";
+ }
+
+ return 0;
+}
+
+void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
+ if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
+ return getPointerToFunction(F);
+
+ MutexGuard locked(lock);
+ if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+ return P;
+
+ // Global variable might have been added since interpreter started.
+ if (GlobalVariable *GVar =
+ const_cast<GlobalVariable *>(dyn_cast<GlobalVariable>(GV)))
+ EmitGlobalVariable(GVar);
+ else
+ llvm_unreachable("Global hasn't had an address allocated yet!");
+
+ return EEState.getGlobalAddressMap(locked)[GV];
+}
+
+/// \brief Converts a Constant* into a GenericValue, including handling of
+/// ConstantExpr values.
+GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
+ // If its undefined, return the garbage.
+ if (isa<UndefValue>(C)) {
+ GenericValue Result;
+ switch (C->getType()->getTypeID()) {
+ case Type::IntegerTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ // Although the value is undefined, we still have to construct an APInt
+ // with the correct bit width.
+ Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
+ break;
+ default:
+ break;
+ }
+ return Result;
+ }
+
+ // Otherwise, if the value is a ConstantExpr...
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ Constant *Op0 = CE->getOperand(0);
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ // Compute the index
+ GenericValue Result = getConstantValue(Op0);
+ SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+ uint64_t Offset =
+ TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size());
+
+ char* tmp = (char*) Result.PointerVal;
+ Result = PTOGV(tmp + Offset);
+ return Result;
+ }
+ case Instruction::Trunc: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.trunc(BitWidth);
+ return GV;
+ }
+ case Instruction::ZExt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.zext(BitWidth);
+ return GV;
+ }
+ case Instruction::SExt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.sext(BitWidth);
+ return GV;
+ }
+ case Instruction::FPTrunc: {
+ // FIXME long double
+ GenericValue GV = getConstantValue(Op0);
+ GV.FloatVal = float(GV.DoubleVal);
+ return GV;
+ }
+ case Instruction::FPExt:{
+ // FIXME long double
+ GenericValue GV = getConstantValue(Op0);
+ GV.DoubleVal = double(GV.FloatVal);
+ return GV;
+ }
+ case Instruction::UIToFP: {
+ GenericValue GV = getConstantValue(Op0);
+ if (CE->getType()->isFloatTy())
+ GV.FloatVal = float(GV.IntVal.roundToDouble());
+ else if (CE->getType()->isDoubleTy())
+ GV.DoubleVal = GV.IntVal.roundToDouble();
+ else if (CE->getType()->isX86_FP80Ty()) {
+ APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+ (void)apf.convertFromAPInt(GV.IntVal,
+ false,
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apf.bitcastToAPInt();
+ }
+ return GV;
+ }
+ case Instruction::SIToFP: {
+ GenericValue GV = getConstantValue(Op0);
+ if (CE->getType()->isFloatTy())
+ GV.FloatVal = float(GV.IntVal.signedRoundToDouble());
+ else if (CE->getType()->isDoubleTy())
+ GV.DoubleVal = GV.IntVal.signedRoundToDouble();
+ else if (CE->getType()->isX86_FP80Ty()) {
+ APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+ (void)apf.convertFromAPInt(GV.IntVal,
+ true,
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apf.bitcastToAPInt();
+ }
+ return GV;
+ }
+ case Instruction::FPToUI: // double->APInt conversion handles sign
+ case Instruction::FPToSI: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ if (Op0->getType()->isFloatTy())
+ GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth);
+ else if (Op0->getType()->isDoubleTy())
+ GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
+ else if (Op0->getType()->isX86_FP80Ty()) {
+ APFloat apf = APFloat(GV.IntVal);
+ uint64_t v;
+ bool ignored;
+ (void)apf.convertToInteger(&v, BitWidth,
+ CE->getOpcode()==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored);
+ GV.IntVal = v; // endian?
+ }
+ return GV;
+ }
+ case Instruction::PtrToInt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t PtrWidth = TD->getPointerSizeInBits();
+ GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
+ return GV;
+ }
+ case Instruction::IntToPtr: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t PtrWidth = TD->getPointerSizeInBits();
+ if (PtrWidth != GV.IntVal.getBitWidth())
+ GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
+ assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
+ GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
+ return GV;
+ }
+ case Instruction::BitCast: {
+ GenericValue GV = getConstantValue(Op0);
+ const Type* DestTy = CE->getType();
+ switch (Op0->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid bitcast operand");
+ case Type::IntegerTyID:
+ assert(DestTy->isFloatingPointTy() && "invalid bitcast");
+ if (DestTy->isFloatTy())
+ GV.FloatVal = GV.IntVal.bitsToFloat();
+ else if (DestTy->isDoubleTy())
+ GV.DoubleVal = GV.IntVal.bitsToDouble();
+ break;
+ case Type::FloatTyID:
+ assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
+ GV.IntVal = APInt::floatToBits(GV.FloatVal);
+ break;
+ case Type::DoubleTyID:
+ assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
+ GV.IntVal = APInt::doubleToBits(GV.DoubleVal);
+ break;
+ case Type::PointerTyID:
+ assert(DestTy->isPointerTy() && "Invalid bitcast");
+ break; // getConstantValue(Op0) above already converted it
+ }
+ return GV;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ GenericValue LHS = getConstantValue(Op0);
+ GenericValue RHS = getConstantValue(CE->getOperand(1));
+ GenericValue GV;
+ switch (CE->getOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Bad add type!");
+ case Type::IntegerTyID:
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid integer opcode");
+ case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break;
+ case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break;
+ case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break;
+ case Instruction::UDiv:GV.IntVal = LHS.IntVal.udiv(RHS.IntVal); break;
+ case Instruction::SDiv:GV.IntVal = LHS.IntVal.sdiv(RHS.IntVal); break;
+ case Instruction::URem:GV.IntVal = LHS.IntVal.urem(RHS.IntVal); break;
+ case Instruction::SRem:GV.IntVal = LHS.IntVal.srem(RHS.IntVal); break;
+ case Instruction::And: GV.IntVal = LHS.IntVal & RHS.IntVal; break;
+ case Instruction::Or: GV.IntVal = LHS.IntVal | RHS.IntVal; break;
+ case Instruction::Xor: GV.IntVal = LHS.IntVal ^ RHS.IntVal; break;
+ }
+ break;
+ case Type::FloatTyID:
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid float opcode");
+ case Instruction::FAdd:
+ GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
+ case Instruction::FSub:
+ GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
+ case Instruction::FMul:
+ GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
+ case Instruction::FDiv:
+ GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
+ case Instruction::FRem:
+ GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
+ }
+ break;
+ case Type::DoubleTyID:
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid double opcode");
+ case Instruction::FAdd:
+ GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
+ case Instruction::FSub:
+ GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
+ case Instruction::FMul:
+ GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
+ case Instruction::FDiv:
+ GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
+ case Instruction::FRem:
+ GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+ }
+ break;
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ APFloat apfLHS = APFloat(LHS.IntVal);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid long double opcode");
+ case Instruction::FAdd:
+ apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FSub:
+ apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FMul:
+ apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FDiv:
+ apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FRem:
+ apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ }
+ }
+ break;
+ }
+ return GV;
+ }
+ default:
+ break;
+ }
+
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ConstantExpr not handled: " << *CE;
+ report_fatal_error(OS.str());
+ }
+
+ // Otherwise, we have a simple constant.
+ GenericValue Result;
+ switch (C->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
+ break;
+ case Type::DoubleTyID:
+ Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ Result.IntVal = cast <ConstantFP>(C)->getValueAPF().bitcastToAPInt();
+ break;
+ case Type::IntegerTyID:
+ Result.IntVal = cast<ConstantInt>(C)->getValue();
+ break;
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(C))
+ Result.PointerVal = 0;
+ else if (const Function *F = dyn_cast<Function>(C))
+ Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
+ else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
+ else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ Result = PTOGV(getPointerToBasicBlock(const_cast<BasicBlock*>(
+ BA->getBasicBlock())));
+ else
+ llvm_unreachable("Unknown constant pointer type!");
+ break;
+ default:
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ERROR: Constant unimplemented for type: " << *C->getType();
+ report_fatal_error(OS.str());
+ }
+
+ return Result;
+}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+ unsigned StoreBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+ uint8_t *Src = (uint8_t *)IntVal.getRawData();
+
+ if (sys::isLittleEndianHost()) {
+ // Little-endian host - the source is ordered from LSB to MSB. Order the
+ // destination from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, StoreBytes);
+ } else {
+ // Big-endian host - the source is an array of 64 bit words ordered from
+ // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
+ // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+ while (StoreBytes > sizeof(uint64_t)) {
+ StoreBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+ Src += sizeof(uint64_t);
+ }
+
+ memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+ }
+}
+
+void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
+ GenericValue *Ptr, const Type *Ty) {
+ const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
+
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
+ break;
+ case Type::FloatTyID:
+ *((float*)Ptr) = Val.FloatVal;
+ break;
+ case Type::DoubleTyID:
+ *((double*)Ptr) = Val.DoubleVal;
+ break;
+ case Type::X86_FP80TyID:
+ memcpy(Ptr, Val.IntVal.getRawData(), 10);
+ break;
+ case Type::PointerTyID:
+ // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
+ if (StoreBytes != sizeof(PointerTy))
+ memset(&(Ptr->PointerVal), 0, StoreBytes);
+
+ *((PointerTy*)Ptr) = Val.PointerVal;
+ break;
+ default:
+ dbgs() << "Cannot store value of type " << *Ty << "!\n";
+ }
+
+ if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian())
+ // Host and target are different endian - reverse the stored bytes.
+ std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+ uint8_t *Dst = (uint8_t *)IntVal.getRawData();
+
+ if (sys::isLittleEndianHost())
+ // Little-endian host - the destination must be ordered from LSB to MSB.
+ // The source is ordered from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, LoadBytes);
+ else {
+ // Big-endian - the destination is an array of 64 bit words ordered from
+ // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
+ // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+ // a word.
+ while (LoadBytes > sizeof(uint64_t)) {
+ LoadBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+ Dst += sizeof(uint64_t);
+ }
+
+ memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+ }
+}
+
+/// FIXME: document
+///
+void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
+ GenericValue *Ptr,
+ const Type *Ty) {
+ const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty);
+
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ // An APInt with all words initially zero.
+ Result.IntVal = APInt(cast<IntegerType>(Ty)->getBitWidth(), 0);
+ LoadIntFromMemory(Result.IntVal, (uint8_t*)Ptr, LoadBytes);
+ break;
+ case Type::FloatTyID:
+ Result.FloatVal = *((float*)Ptr);
+ break;
+ case Type::DoubleTyID:
+ Result.DoubleVal = *((double*)Ptr);
+ break;
+ case Type::PointerTyID:
+ Result.PointerVal = *((PointerTy*)Ptr);
+ break;
+ case Type::X86_FP80TyID: {
+ // This is endian dependent, but it will only work on x86 anyway.
+ // FIXME: Will not trap if loading a signaling NaN.
+ uint64_t y[2];
+ memcpy(y, Ptr, 10);
+ Result.IntVal = APInt(80, 2, y);
+ break;
+ }
+ default:
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "Cannot load value of type " << *Ty << "!";
+ report_fatal_error(OS.str());
+ }
+}
+
+void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
+ DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
+ DEBUG(Init->dump());
+ if (isa<UndefValue>(Init)) {
+ return;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+ unsigned ElementSize =
+ getTargetData()->getTypeAllocSize(CP->getType()->getElementType());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
+ return;
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType()));
+ return;
+ } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+ unsigned ElementSize =
+ getTargetData()->getTypeAllocSize(CPA->getType()->getElementType());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
+ return;
+ } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+ const StructLayout *SL =
+ getTargetData()->getStructLayout(cast<StructType>(CPS->getType()));
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
+ return;
+ } else if (Init->getType()->isFirstClassType()) {
+ GenericValue Val = getConstantValue(Init);
+ StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
+ return;
+ }
+
+ DEBUG(dbgs() << "Bad Type: " << *Init->getType() << "\n");
+ llvm_unreachable("Unknown constant type to initialize memory with!");
+}
+
+/// EmitGlobals - Emit all of the global variables to memory, storing their
+/// addresses into GlobalAddress. This must make sure to copy the contents of
+/// their initializers into the memory.
+void ExecutionEngine::emitGlobals() {
+ // Loop over all of the global variables in the program, allocating the memory
+ // to hold them. If there is more than one module, do a prepass over globals
+ // to figure out how the different modules should link together.
+ std::map<std::pair<std::string, const Type*>,
+ const GlobalValue*> LinkedGlobalsMap;
+
+ if (Modules.size() != 1) {
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+ Module &M = *Modules[m];
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ const GlobalValue *GV = I;
+ if (GV->hasLocalLinkage() || GV->isDeclaration() ||
+ GV->hasAppendingLinkage() || !GV->hasName())
+ continue;// Ignore external globals and globals with internal linkage.
+
+ const GlobalValue *&GVEntry =
+ LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+
+ // If this is the first time we've seen this global, it is the canonical
+ // version.
+ if (!GVEntry) {
+ GVEntry = GV;
+ continue;
+ }
+
+ // If the existing global is strong, never replace it.
+ if (GVEntry->hasExternalLinkage() ||
+ GVEntry->hasDLLImportLinkage() ||
+ GVEntry->hasDLLExportLinkage())
+ continue;
+
+ // Otherwise, we know it's linkonce/weak, replace it if this is a strong
+ // symbol. FIXME is this right for common?
+ if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
+ GVEntry = GV;
+ }
+ }
+ }
+
+ std::vector<const GlobalValue*> NonCanonicalGlobals;
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+ Module &M = *Modules[m];
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ // In the multi-module case, see what this global maps to.
+ if (!LinkedGlobalsMap.empty()) {
+ if (const GlobalValue *GVEntry =
+ LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
+ // If something else is the canonical global, ignore this one.
+ if (GVEntry != &*I) {
+ NonCanonicalGlobals.push_back(I);
+ continue;
+ }
+ }
+ }
+
+ if (!I->isDeclaration()) {
+ addGlobalMapping(I, getMemoryForGV(I));
+ } else {
+ // External variable reference. Try to use the dynamic loader to
+ // get a pointer to it.
+ if (void *SymAddr =
+ sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
+ addGlobalMapping(I, SymAddr);
+ else {
+ report_fatal_error("Could not resolve external global address: "
+ +I->getName());
+ }
+ }
+ }
+
+ // If there are multiple modules, map the non-canonical globals to their
+ // canonical location.
+ if (!NonCanonicalGlobals.empty()) {
+ for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
+ const GlobalValue *GV = NonCanonicalGlobals[i];
+ const GlobalValue *CGV =
+ LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+ void *Ptr = getPointerToGlobalIfAvailable(CGV);
+ assert(Ptr && "Canonical global wasn't codegen'd!");
+ addGlobalMapping(GV, Ptr);
+ }
+ }
+
+ // Now that all of the globals are set up in memory, loop through them all
+ // and initialize their contents.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ if (!LinkedGlobalsMap.empty()) {
+ if (const GlobalValue *GVEntry =
+ LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
+ if (GVEntry != &*I) // Not the canonical variable.
+ continue;
+ }
+ EmitGlobalVariable(I);
+ }
+ }
+ }
+}
+
+// EmitGlobalVariable - This method emits the specified global variable to the
+// address specified in GlobalAddresses, or allocates new memory if it's not
+// already in the map.
+void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
+ void *GA = getPointerToGlobalIfAvailable(GV);
+
+ if (GA == 0) {
+ // If it's not already specified, allocate memory for the global.
+ GA = getMemoryForGV(GV);
+ addGlobalMapping(GV, GA);
+ }
+
+ // Don't initialize if it's thread local, let the client do it.
+ if (!GV->isThreadLocal())
+ InitializeMemory(GV->getInitializer(), GA);
+
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+ NumInitBytes += (unsigned)GVSize;
+ ++NumGlobals;
+}
+
+ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
+ : EE(EE), GlobalAddressMap(this) {
+}
+
+sys::Mutex *
+ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
+ return &EES->EE.lock;
+}
+
+void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
+ const GlobalValue *Old) {
+ void *OldVal = EES->GlobalAddressMap.lookup(Old);
+ EES->GlobalAddressReverseMap.erase(OldVal);
+}
+
+void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
+ const GlobalValue *,
+ const GlobalValue *) {
+ assert(false && "The ExecutionEngine doesn't know how to handle a"
+ " RAUW on a value it has a global mapping for.");
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
new file mode 100644
index 000000000000..f8f1f4a78ee5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -0,0 +1,254 @@
+//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings for the ExecutionEngine library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+
+using namespace llvm;
+
+/*===-- Operations on generic values --------------------------------------===*/
+
+LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
+ unsigned long long N,
+ LLVMBool IsSigned) {
+ GenericValue *GenVal = new GenericValue();
+ GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned);
+ return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P) {
+ GenericValue *GenVal = new GenericValue();
+ GenVal->PointerVal = P;
+ return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) {
+ GenericValue *GenVal = new GenericValue();
+ switch (unwrap(TyRef)->getTypeID()) {
+ case Type::FloatTyID:
+ GenVal->FloatVal = N;
+ break;
+ case Type::DoubleTyID:
+ GenVal->DoubleVal = N;
+ break;
+ default:
+ llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+ }
+ return wrap(GenVal);
+}
+
+unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) {
+ return unwrap(GenValRef)->IntVal.getBitWidth();
+}
+
+unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef,
+ LLVMBool IsSigned) {
+ GenericValue *GenVal = unwrap(GenValRef);
+ if (IsSigned)
+ return GenVal->IntVal.getSExtValue();
+ else
+ return GenVal->IntVal.getZExtValue();
+}
+
+void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal) {
+ return unwrap(GenVal)->PointerVal;
+}
+
+double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
+ switch (unwrap(TyRef)->getTypeID()) {
+ case Type::FloatTyID:
+ return unwrap(GenVal)->FloatVal;
+ case Type::DoubleTyID:
+ return unwrap(GenVal)->DoubleVal;
+ default:
+ llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+ break;
+ }
+ return 0; // Not reached
+}
+
+void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
+ delete unwrap(GenVal);
+}
+
+/*===-- Operations on execution engines -----------------------------------===*/
+
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleRef M,
+ char **OutError) {
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::Either)
+ .setErrorStr(&Error);
+ if (ExecutionEngine *EE = builder.create()){
+ *OutEE = wrap(EE);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleRef M,
+ char **OutError) {
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::Interpreter)
+ .setErrorStr(&Error);
+ if (ExecutionEngine *Interp = builder.create()) {
+ *OutInterp = wrap(Interp);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleRef M,
+ unsigned OptLevel,
+ char **OutError) {
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::JIT)
+ .setErrorStr(&Error)
+ .setOptLevel((CodeGenOpt::Level)OptLevel);
+ if (ExecutionEngine *JIT = builder.create()) {
+ *OutJIT = wrap(JIT);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateExecutionEngineForModule(OutEE,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateInterpreterForModule(OutInterp,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleProviderRef MP,
+ unsigned OptLevel,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateJITCompilerForModule(OutJIT,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OptLevel, OutError);
+}
+
+
+void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
+ delete unwrap(EE);
+}
+
+void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE) {
+ unwrap(EE)->runStaticConstructorsDestructors(false);
+}
+
+void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
+ unwrap(EE)->runStaticConstructorsDestructors(true);
+}
+
+int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
+ unsigned ArgC, const char * const *ArgV,
+ const char * const *EnvP) {
+ std::vector<std::string> ArgVec;
+ for (unsigned I = 0; I != ArgC; ++I)
+ ArgVec.push_back(ArgV[I]);
+
+ return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP);
+}
+
+LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
+ unsigned NumArgs,
+ LLVMGenericValueRef *Args) {
+ std::vector<GenericValue> ArgVec;
+ ArgVec.reserve(NumArgs);
+ for (unsigned I = 0; I != NumArgs; ++I)
+ ArgVec.push_back(*unwrap(Args[I]));
+
+ GenericValue *Result = new GenericValue();
+ *Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
+ return wrap(Result);
+}
+
+void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
+ unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
+}
+
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+ unwrap(EE)->addModule(unwrap(M));
+}
+
+void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
+ /* The module provider is now actually a module. */
+ LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+ LLVMModuleRef *OutMod, char **OutError) {
+ Module *Mod = unwrap(M);
+ unwrap(EE)->removeModule(Mod);
+ *OutMod = wrap(Mod);
+ return 0;
+}
+
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+ LLVMModuleProviderRef MP,
+ LLVMModuleRef *OutMod, char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+ OutError);
+}
+
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+ LLVMValueRef *OutFn) {
+ if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
+ *OutFn = wrap(F);
+ return 0;
+ }
+ return 1;
+}
+
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+ return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+}
+
+LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
+ return wrap(unwrap(EE)->getTargetData());
+}
+
+void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+ void* Addr) {
+ unwrap(EE)->addGlobalMapping(unwrap<GlobalValue>(Global), Addr);
+}
+
+void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+ return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
new file mode 100644
index 000000000000..498063bf6555
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -0,0 +1,1350 @@
+//===-- Execution.cpp - Implement code to simulate the program ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the actual instruction interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "interpreter"
+#include "Interpreter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
+
+static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
+ cl::desc("make the interpreter print every volatile load and store"));
+
+//===----------------------------------------------------------------------===//
+// Various Helper Functions
+//===----------------------------------------------------------------------===//
+
+static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
+ SF.Values[V] = Val;
+}
+
+//===----------------------------------------------------------------------===//
+// Binary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+#define IMPLEMENT_BINARY_OPERATOR(OP, TY) \
+ case Type::TY##TyID: \
+ Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
+ break
+
+static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(+, Float);
+ IMPLEMENT_BINARY_OPERATOR(+, Double);
+ default:
+ dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(-, Float);
+ IMPLEMENT_BINARY_OPERATOR(-, Double);
+ default:
+ dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(*, Float);
+ IMPLEMENT_BINARY_OPERATOR(*, Double);
+ default:
+ dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(/, Float);
+ IMPLEMENT_BINARY_OPERATOR(/, Double);
+ default:
+ dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal);
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
+ break;
+ default:
+ dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+}
+
+#define IMPLEMENT_INTEGER_ICMP(OP, TY) \
+ case Type::IntegerTyID: \
+ Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
+ break;
+
+// Handle pointers specially because they must be compared with only as much
+// width as the host has. We _do not_ want to be comparing 64 bit values when
+// running on a 32-bit target, otherwise the upper 32 bits might mess up
+// comparisons if they contain garbage.
+#define IMPLEMENT_POINTER_ICMP(OP) \
+ case Type::PointerTyID: \
+ Dest.IntVal = APInt(1,(void*)(intptr_t)Src1.PointerVal OP \
+ (void*)(intptr_t)Src2.PointerVal); \
+ break;
+
+static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(eq,Ty);
+ IMPLEMENT_POINTER_ICMP(==);
+ default:
+ dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ne,Ty);
+ IMPLEMENT_POINTER_ICMP(!=);
+ default:
+ dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ult,Ty);
+ IMPLEMENT_POINTER_ICMP(<);
+ default:
+ dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(slt,Ty);
+ IMPLEMENT_POINTER_ICMP(<);
+ default:
+ dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+ IMPLEMENT_POINTER_ICMP(>);
+ default:
+ dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+ IMPLEMENT_POINTER_ICMP(>);
+ default:
+ dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ule,Ty);
+ IMPLEMENT_POINTER_ICMP(<=);
+ default:
+ dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sle,Ty);
+ IMPLEMENT_POINTER_ICMP(<=);
+ default:
+ dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(uge,Ty);
+ IMPLEMENT_POINTER_ICMP(>=);
+ default:
+ dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sge,Ty);
+ IMPLEMENT_POINTER_ICMP(>=);
+ default:
+ dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+void Interpreter::visitICmpInst(ICmpInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_ULT: R = executeICMP_ULT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SLT: R = executeICMP_SLT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_UGT: R = executeICMP_UGT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SGT: R = executeICMP_SGT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_ULE: R = executeICMP_ULE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SLE: R = executeICMP_SLE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
+ default:
+ dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
+ llvm_unreachable(0);
+ }
+
+ SetValue(&I, R, SF);
+}
+
+#define IMPLEMENT_FCMP(OP, TY) \
+ case Type::TY##TyID: \
+ Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
+ break
+
+static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(==, Float);
+ IMPLEMENT_FCMP(==, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(!=, Float);
+ IMPLEMENT_FCMP(!=, Double);
+
+ default:
+ dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(<=, Float);
+ IMPLEMENT_FCMP(<=, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(>=, Float);
+ IMPLEMENT_FCMP(>=, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(<, Float);
+ IMPLEMENT_FCMP(<, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(>, Float);
+ IMPLEMENT_FCMP(>, Double);
+ default:
+ dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ return Dest;
+}
+
+#define IMPLEMENT_UNORDERED(TY, X,Y) \
+ if (TY->isFloatTy()) { \
+ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \
+ Dest.IntVal = APInt(1,true); \
+ return Dest; \
+ } \
+ } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+ Dest.IntVal = APInt(1,true); \
+ return Dest; \
+ }
+
+
+static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OEQ(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_ONE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OLE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OGE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OLT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OGT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ if (Ty->isFloatTy())
+ Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
+ Src2.FloatVal == Src2.FloatVal));
+ else
+ Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
+ Src2.DoubleVal == Src2.DoubleVal));
+ return Dest;
+}
+
+static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ if (Ty->isFloatTy())
+ Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
+ Src2.FloatVal != Src2.FloatVal));
+ else
+ Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
+ Src2.DoubleVal != Src2.DoubleVal));
+ return Dest;
+}
+
+void Interpreter::visitFCmpInst(FCmpInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getPredicate()) {
+ case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
+ case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break;
+ case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OEQ: R = executeFCMP_OEQ(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UNE: R = executeFCMP_UNE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ONE: R = executeFCMP_ONE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ULT: R = executeFCMP_ULT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OLT: R = executeFCMP_OLT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UGT: R = executeFCMP_UGT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OGT: R = executeFCMP_OGT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ULE: R = executeFCMP_ULE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
+ default:
+ dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+ llvm_unreachable(0);
+ }
+
+ SetValue(&I, R, SF);
+}
+
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ GenericValue Result;
+ switch (predicate) {
+ case ICmpInst::ICMP_EQ: return executeICMP_EQ(Src1, Src2, Ty);
+ case ICmpInst::ICMP_NE: return executeICMP_NE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_UGT: return executeICMP_UGT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SGT: return executeICMP_SGT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_ULT: return executeICMP_ULT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SLT: return executeICMP_SLT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_UGE: return executeICMP_UGE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SGE: return executeICMP_SGE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_ULE: return executeICMP_ULE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SLE: return executeICMP_SLE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ORD: return executeFCMP_ORD(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UNO: return executeFCMP_UNO(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OEQ: return executeFCMP_OEQ(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UEQ: return executeFCMP_UEQ(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ONE: return executeFCMP_ONE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UNE: return executeFCMP_UNE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OLT: return executeFCMP_OLT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ULT: return executeFCMP_ULT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OGT: return executeFCMP_OGT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UGT: return executeFCMP_UGT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OLE: return executeFCMP_OLE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_FALSE: {
+ GenericValue Result;
+ Result.IntVal = APInt(1, false);
+ return Result;
+ }
+ case FCmpInst::FCMP_TRUE: {
+ GenericValue Result;
+ Result.IntVal = APInt(1, true);
+ return Result;
+ }
+ default:
+ dbgs() << "Unhandled Cmp predicate\n";
+ llvm_unreachable(0);
+ }
+}
+
+void Interpreter::visitBinaryOperator(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getOpcode()) {
+ case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
+ case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
+ case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break;
+ case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break;
+ case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break;
+ case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break;
+ case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break;
+ case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break;
+ case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+ case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+ case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+ case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+ case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
+ case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
+ case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+ default:
+ dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+ llvm_unreachable(0);
+ }
+
+ SetValue(&I, R, SF);
+}
+
+static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
+ GenericValue Src3) {
+ return Src1.IntVal == 0 ? Src3 : Src2;
+}
+
+void Interpreter::visitSelectInst(SelectInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+ GenericValue R = executeSelectInst(Src1, Src2, Src3);
+ SetValue(&I, R, SF);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Terminator Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::exitCalled(GenericValue GV) {
+ // runAtExitHandlers() assumes there are no stack frames, but
+ // if exit() was called, then it had a stack frame. Blow away
+ // the stack before interpreting atexit handlers.
+ ECStack.clear();
+ runAtExitHandlers();
+ exit(GV.IntVal.zextOrTrunc(32).getZExtValue());
+}
+
+/// Pop the last stack frame off of ECStack and then copy the result
+/// back into the result variable if we are not returning void. The
+/// result variable may be the ExitValue, or the Value of the calling
+/// CallInst if there was a previous stack frame. This method may
+/// invalidate any ECStack iterators you have. This method also takes
+/// care of switching to the normal destination BB, if we are returning
+/// from an invoke.
+///
+void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
+ GenericValue Result) {
+ // Pop the current stack frame.
+ ECStack.pop_back();
+
+ if (ECStack.empty()) { // Finished main. Put result into exit code...
+ if (RetTy && !RetTy->isVoidTy()) { // Nonvoid return type?
+ ExitValue = Result; // Capture the exit value of the program
+ } else {
+ memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+ }
+ } else {
+ // If we have a previous stack frame, and we have a previous call,
+ // fill in the return value...
+ ExecutionContext &CallingSF = ECStack.back();
+ if (Instruction *I = CallingSF.Caller.getInstruction()) {
+ // Save result...
+ if (!CallingSF.Caller.getType()->isVoidTy())
+ SetValue(I, Result, CallingSF);
+ if (InvokeInst *II = dyn_cast<InvokeInst> (I))
+ SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
+ CallingSF.Caller = CallSite(); // We returned from the call...
+ }
+ }
+}
+
+void Interpreter::visitReturnInst(ReturnInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *RetTy = Type::getVoidTy(I.getContext());
+ GenericValue Result;
+
+ // Save away the return value... (if we are not 'ret void')
+ if (I.getNumOperands()) {
+ RetTy = I.getReturnValue()->getType();
+ Result = getOperandValue(I.getReturnValue(), SF);
+ }
+
+ popStackAndReturnValueToCaller(RetTy, Result);
+}
+
+void Interpreter::visitUnwindInst(UnwindInst &I) {
+ // Unwind stack
+ Instruction *Inst;
+ do {
+ ECStack.pop_back();
+ if (ECStack.empty())
+ report_fatal_error("Empty stack during unwind!");
+ Inst = ECStack.back().Caller.getInstruction();
+ } while (!(Inst && isa<InvokeInst>(Inst)));
+
+ // Return from invoke
+ ExecutionContext &InvokingSF = ECStack.back();
+ InvokingSF.Caller = CallSite();
+
+ // Go to exceptional destination BB of invoke instruction
+ SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
+}
+
+void Interpreter::visitUnreachableInst(UnreachableInst &I) {
+ report_fatal_error("Program executed an 'unreachable' instruction!");
+}
+
+void Interpreter::visitBranchInst(BranchInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ BasicBlock *Dest;
+
+ Dest = I.getSuccessor(0); // Uncond branches have a fixed dest...
+ if (!I.isUnconditional()) {
+ Value *Cond = I.getCondition();
+ if (getOperandValue(Cond, SF).IntVal == 0) // If false cond...
+ Dest = I.getSuccessor(1);
+ }
+ SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitSwitchInst(SwitchInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue CondVal = getOperandValue(I.getOperand(0), SF);
+ const Type *ElTy = I.getOperand(0)->getType();
+
+ // Check to see if any of the cases match...
+ BasicBlock *Dest = 0;
+ for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2)
+ if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy)
+ .IntVal != 0) {
+ Dest = cast<BasicBlock>(I.getOperand(i+1));
+ break;
+ }
+
+ if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default
+ SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitIndirectBrInst(IndirectBrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ void *Dest = GVTOP(getOperandValue(I.getAddress(), SF));
+ SwitchToNewBasicBlock((BasicBlock*)Dest, SF);
+}
+
+
+// SwitchToNewBasicBlock - This method is used to jump to a new basic block.
+// This function handles the actual updating of block and instruction iterators
+// as well as execution of all of the PHI nodes in the destination block.
+//
+// This method does this because all of the PHI nodes must be executed
+// atomically, reading their inputs before any of the results are updated. Not
+// doing this can cause problems if the PHI nodes depend on other PHI nodes for
+// their inputs. If the input PHI node is updated before it is read, incorrect
+// results can happen. Thus we use a two phase approach.
+//
+void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
+ BasicBlock *PrevBB = SF.CurBB; // Remember where we came from...
+ SF.CurBB = Dest; // Update CurBB to branch destination
+ SF.CurInst = SF.CurBB->begin(); // Update new instruction ptr...
+
+ if (!isa<PHINode>(SF.CurInst)) return; // Nothing fancy to do
+
+ // Loop over all of the PHI nodes in the current block, reading their inputs.
+ std::vector<GenericValue> ResultValues;
+
+ for (; PHINode *PN = dyn_cast<PHINode>(SF.CurInst); ++SF.CurInst) {
+ // Search for the value corresponding to this previous bb...
+ int i = PN->getBasicBlockIndex(PrevBB);
+ assert(i != -1 && "PHINode doesn't contain entry for predecessor??");
+ Value *IncomingValue = PN->getIncomingValue(i);
+
+ // Save the incoming value for this PHI node...
+ ResultValues.push_back(getOperandValue(IncomingValue, SF));
+ }
+
+ // Now loop over all of the PHI nodes setting their values...
+ SF.CurInst = SF.CurBB->begin();
+ for (unsigned i = 0; isa<PHINode>(SF.CurInst); ++SF.CurInst, ++i) {
+ PHINode *PN = cast<PHINode>(SF.CurInst);
+ SetValue(PN, ResultValues[i], SF);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitAllocaInst(AllocaInst &I) {
+ ExecutionContext &SF = ECStack.back();
+
+ const Type *Ty = I.getType()->getElementType(); // Type to be allocated
+
+ // Get the number of elements being allocated by the array...
+ unsigned NumElements =
+ getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
+
+ unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+
+ // Avoid malloc-ing zero bytes, use max()...
+ unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
+
+ // Allocate enough memory to hold the type...
+ void *Memory = malloc(MemToAlloc);
+
+ DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x "
+ << NumElements << " (Total: " << MemToAlloc << ") at "
+ << uintptr_t(Memory) << '\n');
+
+ GenericValue Result = PTOGV(Memory);
+ assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
+ SetValue(&I, Result, SF);
+
+ if (I.getOpcode() == Instruction::Alloca)
+ ECStack.back().Allocas.add(Memory);
+}
+
+// getElementOffset - The workhorse for getelementptr.
+//
+GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E,
+ ExecutionContext &SF) {
+ assert(Ptr->getType()->isPointerTy() &&
+ "Cannot getElementOffset of a nonpointer type!");
+
+ uint64_t Total = 0;
+
+ for (; I != E; ++I) {
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ const StructLayout *SLO = TD.getStructLayout(STy);
+
+ const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
+ unsigned Index = unsigned(CPU->getZExtValue());
+
+ Total += SLO->getElementOffset(Index);
+ } else {
+ const SequentialType *ST = cast<SequentialType>(*I);
+ // Get the index number for the array... which must be long type...
+ GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
+
+ int64_t Idx;
+ unsigned BitWidth =
+ cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
+ if (BitWidth == 32)
+ Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
+ else {
+ assert(BitWidth == 64 && "Invalid index type for getelementptr");
+ Idx = (int64_t)IdxGV.IntVal.getZExtValue();
+ }
+ Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+ }
+ }
+
+ GenericValue Result;
+ Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
+ DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n");
+ return Result;
+}
+
+void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeGEPOperation(I.getPointerOperand(),
+ gep_type_begin(I), gep_type_end(I), SF), SF);
+}
+
+void Interpreter::visitLoadInst(LoadInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+ GenericValue *Ptr = (GenericValue*)GVTOP(SRC);
+ GenericValue Result;
+ LoadValueFromMemory(Result, Ptr, I.getType());
+ SetValue(&I, Result, SF);
+ if (I.isVolatile() && PrintVolatile)
+ dbgs() << "Volatile load " << I;
+}
+
+void Interpreter::visitStoreInst(StoreInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Val = getOperandValue(I.getOperand(0), SF);
+ GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+ StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
+ I.getOperand(0)->getType());
+ if (I.isVolatile() && PrintVolatile)
+ dbgs() << "Volatile store: " << I;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitCallSite(CallSite CS) {
+ ExecutionContext &SF = ECStack.back();
+
+ // Check to see if this is an intrinsic function call...
+ Function *F = CS.getCalledFunction();
+ if (F && F->isDeclaration())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ break;
+ case Intrinsic::vastart: { // va_start
+ GenericValue ArgIndex;
+ ArgIndex.UIntPairVal.first = ECStack.size() - 1;
+ ArgIndex.UIntPairVal.second = 0;
+ SetValue(CS.getInstruction(), ArgIndex, SF);
+ return;
+ }
+ case Intrinsic::vaend: // va_end is a noop for the interpreter
+ return;
+ case Intrinsic::vacopy: // va_copy: dest = src
+ SetValue(CS.getInstruction(), getOperandValue(*CS.arg_begin(), SF), SF);
+ return;
+ default:
+ // If it is an unknown intrinsic function, use the intrinsic lowering
+ // class to transform it into hopefully tasty LLVM code.
+ //
+ BasicBlock::iterator me(CS.getInstruction());
+ BasicBlock *Parent = CS.getInstruction()->getParent();
+ bool atBegin(Parent->begin() == me);
+ if (!atBegin)
+ --me;
+ IL->LowerIntrinsicCall(cast<CallInst>(CS.getInstruction()));
+
+ // Restore the CurInst pointer to the first instruction newly inserted, if
+ // any.
+ if (atBegin) {
+ SF.CurInst = Parent->begin();
+ } else {
+ SF.CurInst = me;
+ ++SF.CurInst;
+ }
+ return;
+ }
+
+
+ SF.Caller = CS;
+ std::vector<GenericValue> ArgVals;
+ const unsigned NumArgs = SF.Caller.arg_size();
+ ArgVals.reserve(NumArgs);
+ uint16_t pNum = 1;
+ for (CallSite::arg_iterator i = SF.Caller.arg_begin(),
+ e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
+ Value *V = *i;
+ ArgVals.push_back(getOperandValue(V, SF));
+ }
+
+ // To handle indirect calls, we must get the pointer value from the argument
+ // and treat it as a function pointer.
+ GenericValue SRC = getOperandValue(SF.Caller.getCalledValue(), SF);
+ callFunction((Function*)GVTOP(SRC), ArgVals);
+}
+
+void Interpreter::visitShl(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitLShr(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitAShr(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ const IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ const IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.sext(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ const IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.zext(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
+ "Invalid FPTrunc instruction");
+ Dest.FloatVal = (float) Src.DoubleVal;
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
+ "Invalid FPTrunc instruction");
+ Dest.DoubleVal = (double) Src.FloatVal;
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ const Type *SrcTy = SrcVal->getType();
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+
+ if (SrcTy->getTypeID() == Type::FloatTyID)
+ Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+ else
+ Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ const Type *SrcTy = SrcVal->getType();
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+
+ if (SrcTy->getTypeID() == Type::FloatTyID)
+ Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+ else
+ Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+
+ if (DstTy->getTypeID() == Type::FloatTyID)
+ Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+ else
+ Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+ return Dest;
+}
+
+GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+
+ if (DstTy->getTypeID() == Type::FloatTyID)
+ Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+ else
+ Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+ return Dest;
+
+}
+
+GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
+
+ Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
+ return Dest;
+}
+
+GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
+
+ uint32_t PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize != Src.IntVal.getBitWidth())
+ Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
+
+ Dest.PointerVal = PointerTy(intptr_t(Src.IntVal.getZExtValue()));
+ return Dest;
+}
+
+GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+
+ const Type *SrcTy = SrcVal->getType();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ if (DstTy->isPointerTy()) {
+ assert(SrcTy->isPointerTy() && "Invalid BitCast");
+ Dest.PointerVal = Src.PointerVal;
+ } else if (DstTy->isIntegerTy()) {
+ if (SrcTy->isFloatTy()) {
+ Dest.IntVal = APInt::floatToBits(Src.FloatVal);
+ } else if (SrcTy->isDoubleTy()) {
+ Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
+ } else if (SrcTy->isIntegerTy()) {
+ Dest.IntVal = Src.IntVal;
+ } else
+ llvm_unreachable("Invalid BitCast");
+ } else if (DstTy->isFloatTy()) {
+ if (SrcTy->isIntegerTy())
+ Dest.FloatVal = Src.IntVal.bitsToFloat();
+ else
+ Dest.FloatVal = Src.FloatVal;
+ } else if (DstTy->isDoubleTy()) {
+ if (SrcTy->isIntegerTy())
+ Dest.DoubleVal = Src.IntVal.bitsToDouble();
+ else
+ Dest.DoubleVal = Src.DoubleVal;
+ } else
+ llvm_unreachable("Invalid Bitcast");
+
+ return Dest;
+}
+
+void Interpreter::visitTruncInst(TruncInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSExtInst(SExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeSExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitZExtInst(ZExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeZExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPTruncInst(FPTruncInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPExtInst(FPExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitUIToFPInst(UIToFPInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeUIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSIToFPInst(SIToFPInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeSIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToUIInst(FPToUIInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPToUIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToSIInst(FPToSIInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPToSIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitPtrToIntInst(PtrToIntInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executePtrToIntInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitIntToPtrInst(IntToPtrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeIntToPtrInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitBitCastInst(BitCastInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeBitCastInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+#define IMPLEMENT_VAARG(TY) \
+ case Type::TY##TyID: Dest.TY##Val = Src.TY##Val; break
+
+void Interpreter::visitVAArgInst(VAArgInst &I) {
+ ExecutionContext &SF = ECStack.back();
+
+ // Get the incoming valist parameter. LLI treats the valist as a
+ // (ec-stack-depth var-arg-index) pair.
+ GenericValue VAList = getOperandValue(I.getOperand(0), SF);
+ GenericValue Dest;
+ GenericValue Src = ECStack[VAList.UIntPairVal.first]
+ .VarArgs[VAList.UIntPairVal.second];
+ const Type *Ty = I.getType();
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: Dest.IntVal = Src.IntVal;
+ IMPLEMENT_VAARG(Pointer);
+ IMPLEMENT_VAARG(Float);
+ IMPLEMENT_VAARG(Double);
+ default:
+ dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+
+ // Set the Value of this Instruction.
+ SetValue(&I, Dest, SF);
+
+ // Move the pointer to the next vararg.
+ ++VAList.UIntPairVal.second;
+}
+
+GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
+ ExecutionContext &SF) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::ZExt:
+ return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::SExt:
+ return executeSExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPTrunc:
+ return executeFPTruncInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPExt:
+ return executeFPExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::UIToFP:
+ return executeUIToFPInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::SIToFP:
+ return executeSIToFPInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPToUI:
+ return executeFPToUIInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPToSI:
+ return executeFPToSIInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::PtrToInt:
+ return executePtrToIntInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::IntToPtr:
+ return executeIntToPtrInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::BitCast:
+ return executeBitCastInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::GetElementPtr:
+ return executeGEPOperation(CE->getOperand(0), gep_type_begin(CE),
+ gep_type_end(CE), SF);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ return executeCmpInst(CE->getPredicate(),
+ getOperandValue(CE->getOperand(0), SF),
+ getOperandValue(CE->getOperand(1), SF),
+ CE->getOperand(0)->getType());
+ case Instruction::Select:
+ return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
+ getOperandValue(CE->getOperand(1), SF),
+ getOperandValue(CE->getOperand(2), SF));
+ default :
+ break;
+ }
+
+ // The cases below here require a GenericValue parameter for the result
+ // so we initialize one, compute it and then return it.
+ GenericValue Op0 = getOperandValue(CE->getOperand(0), SF);
+ GenericValue Op1 = getOperandValue(CE->getOperand(1), SF);
+ GenericValue Dest;
+ const Type * Ty = CE->getOperand(0)->getType();
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
+ case Instruction::Sub: Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
+ case Instruction::Mul: Dest.IntVal = Op0.IntVal * Op1.IntVal; break;
+ case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
+ case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
+ case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
+ case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
+ case Instruction::And: Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
+ case Instruction::Or: Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
+ case Instruction::Xor: Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
+ case Instruction::Shl:
+ Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
+ break;
+ case Instruction::LShr:
+ Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
+ break;
+ case Instruction::AShr:
+ Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
+ break;
+ default:
+ dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
+ llvm_unreachable(0);
+ return GenericValue();
+ }
+ return Dest;
+}
+
+GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return getConstantExprValue(CE, SF);
+ } else if (Constant *CPV = dyn_cast<Constant>(V)) {
+ return getConstantValue(CPV);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ return PTOGV(getPointerToGlobal(GV));
+ } else {
+ return SF.Values[V];
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Dispatch and Execution Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// callFunction - Execute the specified function...
+//
+void Interpreter::callFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals) {
+ assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 ||
+ ECStack.back().Caller.arg_size() == ArgVals.size()) &&
+ "Incorrect number of arguments passed into function call!");
+ // Make a new stack frame... and fill it in.
+ ECStack.push_back(ExecutionContext());
+ ExecutionContext &StackFrame = ECStack.back();
+ StackFrame.CurFunction = F;
+
+ // Special handling for external functions.
+ if (F->isDeclaration()) {
+ GenericValue Result = callExternalFunction (F, ArgVals);
+ // Simulate a 'ret' instruction of the appropriate type.
+ popStackAndReturnValueToCaller (F->getReturnType (), Result);
+ return;
+ }
+
+ // Get pointers to first LLVM BB & Instruction in function.
+ StackFrame.CurBB = F->begin();
+ StackFrame.CurInst = StackFrame.CurBB->begin();
+
+ // Run through the function arguments and initialize their values...
+ assert((ArgVals.size() == F->arg_size() ||
+ (ArgVals.size() > F->arg_size() && F->getFunctionType()->isVarArg()))&&
+ "Invalid number of values passed to function invocation!");
+
+ // Handle non-varargs arguments...
+ unsigned i = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++i)
+ SetValue(AI, ArgVals[i], StackFrame);
+
+ // Handle varargs arguments...
+ StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
+}
+
+
+void Interpreter::run() {
+ while (!ECStack.empty()) {
+ // Interpret a single instruction & increment the "PC".
+ ExecutionContext &SF = ECStack.back(); // Current stack frame
+ Instruction &I = *SF.CurInst++; // Increment before execute
+
+ // Track the number of dynamic instructions executed.
+ ++NumDynamicInsts;
+
+ DEBUG(dbgs() << "About to interpret: " << I);
+ visit(I); // Dispatch to one of the visit* methods...
+#if 0
+ // This is not safe, as visiting the instruction could lower it and free I.
+DEBUG(
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I) &&
+ I.getType() != Type::VoidTy) {
+ dbgs() << " --> ";
+ const GenericValue &Val = SF.Values[&I];
+ switch (I.getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid GenericValue Type");
+ case Type::VoidTyID: dbgs() << "void"; break;
+ case Type::FloatTyID: dbgs() << "float " << Val.FloatVal; break;
+ case Type::DoubleTyID: dbgs() << "double " << Val.DoubleVal; break;
+ case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
+ break;
+ case Type::IntegerTyID:
+ dbgs() << "i" << Val.IntVal.getBitWidth() << " "
+ << Val.IntVal.toStringUnsigned(10)
+ << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
+ break;
+ }
+ });
+#endif
+ }
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
new file mode 100644
index 000000000000..f7e2a4df951e
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -0,0 +1,491 @@
+//===-- ExternalFunctions.cpp - Implement External Functions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains both code to deal with invoking "external" functions, but
+// also contains code that implements "exported" external functions.
+//
+// There are currently two mechanisms for handling external functions in the
+// Interpreter. The first is to implement lle_* wrapper functions that are
+// specific to well-known library functions which manually translate the
+// arguments from GenericValues and make the call. If such a wrapper does
+// not exist, and libffi is available, then the Interpreter will attempt to
+// invoke the function using libffi, after finding its address.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Config/config.h" // Detect libffi
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include <csignal>
+#include <cstdio>
+#include <map>
+#include <cmath>
+#include <cstring>
+
+#ifdef HAVE_FFI_CALL
+#ifdef HAVE_FFI_H
+#include <ffi.h>
+#define USE_LIBFFI
+#elif HAVE_FFI_FFI_H
+#include <ffi/ffi.h>
+#define USE_LIBFFI
+#endif
+#endif
+
+using namespace llvm;
+
+static ManagedStatic<sys::Mutex> FunctionsLock;
+
+typedef GenericValue (*ExFunc)(const FunctionType *,
+ const std::vector<GenericValue> &);
+static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
+static std::map<std::string, ExFunc> FuncNames;
+
+#ifdef USE_LIBFFI
+typedef void (*RawFunc)();
+static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
+#endif
+
+static Interpreter *TheInterpreter;
+
+static char getTypeID(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return 'V';
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 1: return 'o';
+ case 8: return 'B';
+ case 16: return 'S';
+ case 32: return 'I';
+ case 64: return 'L';
+ default: return 'N';
+ }
+ case Type::FloatTyID: return 'F';
+ case Type::DoubleTyID: return 'D';
+ case Type::PointerTyID: return 'P';
+ case Type::FunctionTyID:return 'M';
+ case Type::StructTyID: return 'T';
+ case Type::ArrayTyID: return 'A';
+ default: return 'U';
+ }
+}
+
+// Try to find address of external function given a Function object.
+// Please note, that interpreter doesn't know how to assemble a
+// real call in general case (this is JIT job), that's why it assumes,
+// that all external functions has the same (and pretty "general") signature.
+// The typical example of such functions are "lle_X_" ones.
+static ExFunc lookupFunction(const Function *F) {
+ // Function not found, look it up... start by figuring out what the
+ // composite function name should be.
+ std::string ExtName = "lle_";
+ const FunctionType *FT = F->getFunctionType();
+ for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
+ ExtName += getTypeID(FT->getContainedType(i));
+ ExtName + "_" + F->getNameStr();
+
+ sys::ScopedLock Writer(*FunctionsLock);
+ ExFunc FnPtr = FuncNames[ExtName];
+ if (FnPtr == 0)
+ FnPtr = FuncNames["lle_X_" + F->getNameStr()];
+ if (FnPtr == 0) // Try calling a generic function... if it exists...
+ FnPtr = (ExFunc)(intptr_t)
+ sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr());
+ if (FnPtr != 0)
+ ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later
+ return FnPtr;
+}
+
+#ifdef USE_LIBFFI
+static ffi_type *ffiTypeFor(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return &ffi_type_void;
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8: return &ffi_type_sint8;
+ case 16: return &ffi_type_sint16;
+ case 32: return &ffi_type_sint32;
+ case 64: return &ffi_type_sint64;
+ }
+ case Type::FloatTyID: return &ffi_type_float;
+ case Type::DoubleTyID: return &ffi_type_double;
+ case Type::PointerTyID: return &ffi_type_pointer;
+ default: break;
+ }
+ // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+ report_fatal_error("Type could not be mapped for use with libffi.");
+ return NULL;
+}
+
+static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
+ void *ArgDataPtr) {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8: {
+ int8_t *I8Ptr = (int8_t *) ArgDataPtr;
+ *I8Ptr = (int8_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 16: {
+ int16_t *I16Ptr = (int16_t *) ArgDataPtr;
+ *I16Ptr = (int16_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 32: {
+ int32_t *I32Ptr = (int32_t *) ArgDataPtr;
+ *I32Ptr = (int32_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 64: {
+ int64_t *I64Ptr = (int64_t *) ArgDataPtr;
+ *I64Ptr = (int64_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ }
+ case Type::FloatTyID: {
+ float *FloatPtr = (float *) ArgDataPtr;
+ *FloatPtr = AV.FloatVal;
+ return ArgDataPtr;
+ }
+ case Type::DoubleTyID: {
+ double *DoublePtr = (double *) ArgDataPtr;
+ *DoublePtr = AV.DoubleVal;
+ return ArgDataPtr;
+ }
+ case Type::PointerTyID: {
+ void **PtrPtr = (void **) ArgDataPtr;
+ *PtrPtr = GVTOP(AV);
+ return ArgDataPtr;
+ }
+ default: break;
+ }
+ // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+ report_fatal_error("Type value could not be mapped for use with libffi.");
+ return NULL;
+}
+
+static bool ffiInvoke(RawFunc Fn, Function *F,
+ const std::vector<GenericValue> &ArgVals,
+ const TargetData *TD, GenericValue &Result) {
+ ffi_cif cif;
+ const FunctionType *FTy = F->getFunctionType();
+ const unsigned NumArgs = F->arg_size();
+
+ // TODO: We don't have type information about the remaining arguments, because
+ // this information is never passed into ExecutionEngine::runFunction().
+ if (ArgVals.size() > NumArgs && F->isVarArg()) {
+ report_fatal_error("Calling external var arg function '" + F->getName()
+ + "' is not supported by the Interpreter.");
+ }
+
+ unsigned ArgBytes = 0;
+
+ std::vector<ffi_type*> args(NumArgs);
+ for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ const unsigned ArgNo = A->getArgNo();
+ const Type *ArgTy = FTy->getParamType(ArgNo);
+ args[ArgNo] = ffiTypeFor(ArgTy);
+ ArgBytes += TD->getTypeStoreSize(ArgTy);
+ }
+
+ SmallVector<uint8_t, 128> ArgData;
+ ArgData.resize(ArgBytes);
+ uint8_t *ArgDataPtr = ArgData.data();
+ SmallVector<void*, 16> values(NumArgs);
+ for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ const unsigned ArgNo = A->getArgNo();
+ const Type *ArgTy = FTy->getParamType(ArgNo);
+ values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
+ ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+ }
+
+ const Type *RetTy = FTy->getReturnType();
+ ffi_type *rtype = ffiTypeFor(RetTy);
+
+ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+ SmallVector<uint8_t, 128> ret;
+ if (RetTy->getTypeID() != Type::VoidTyID)
+ ret.resize(TD->getTypeStoreSize(RetTy));
+ ffi_call(&cif, Fn, ret.data(), values.data());
+ switch (RetTy->getTypeID()) {
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(RetTy)->getBitWidth()) {
+ case 8: Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break;
+ case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break;
+ case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break;
+ case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break;
+ }
+ break;
+ case Type::FloatTyID: Result.FloatVal = *(float *) ret.data(); break;
+ case Type::DoubleTyID: Result.DoubleVal = *(double*) ret.data(); break;
+ case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break;
+ default: break;
+ }
+ return true;
+ }
+
+ return false;
+}
+#endif // USE_LIBFFI
+
+GenericValue Interpreter::callExternalFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals) {
+ TheInterpreter = this;
+
+ FunctionsLock->acquire();
+
+ // Do a lookup to see if the function is in our cache... this should just be a
+ // deferred annotation!
+ std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
+ if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
+ : FI->second) {
+ FunctionsLock->release();
+ return Fn(F->getFunctionType(), ArgVals);
+ }
+
+#ifdef USE_LIBFFI
+ std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F);
+ RawFunc RawFn;
+ if (RF == RawFunctions->end()) {
+ RawFn = (RawFunc)(intptr_t)
+ sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
+ if (!RawFn)
+ RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
+ if (RawFn != 0)
+ RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later
+ } else {
+ RawFn = RF->second;
+ }
+
+ FunctionsLock->release();
+
+ GenericValue Result;
+ if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result))
+ return Result;
+#endif // USE_LIBFFI
+
+ if (F->getName() == "__main")
+ errs() << "Tried to execute an unknown external function: "
+ << *F->getType() << " __main\n";
+ else
+ report_fatal_error("Tried to execute an unknown external function: " +
+ F->getName());
+#ifndef USE_LIBFFI
+ errs() << "Recompiling LLVM with --enable-libffi might help.\n";
+#endif
+ return GenericValue();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functions "exported" to the running application...
+//
+
+// Visual Studio warns about returning GenericValue in extern "C" linkage
+#ifdef _MSC_VER
+ #pragma warning(disable : 4190)
+#endif
+
+extern "C" { // Don't add C++ manglings to llvm mangling :)
+
+// void atexit(Function*)
+GenericValue lle_X_atexit(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ assert(Args.size() == 1);
+ TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
+ GenericValue GV;
+ GV.IntVal = 0;
+ return GV;
+}
+
+// void exit(int)
+GenericValue lle_X_exit(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ TheInterpreter->exitCalled(Args[0]);
+ return GenericValue();
+}
+
+// void abort(void)
+GenericValue lle_X_abort(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ //FIXME: should we report or raise here?
+ //report_fatal_error("Interpreted program raised SIGABRT");
+ raise (SIGABRT);
+ return GenericValue();
+}
+
+// int sprintf(char *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_sprintf(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ char *OutputBuffer = (char *)GVTOP(Args[0]);
+ const char *FmtStr = (const char *)GVTOP(Args[1]);
+ unsigned ArgNo = 2;
+
+ // printf should return # chars printed. This is completely incorrect, but
+ // close enough for now.
+ GenericValue GV;
+ GV.IntVal = APInt(32, strlen(FmtStr));
+ while (1) {
+ switch (*FmtStr) {
+ case 0: return GV; // Null terminator...
+ default: // Normal nonspecial character
+ sprintf(OutputBuffer++, "%c", *FmtStr++);
+ break;
+ case '\\': { // Handle escape codes
+ sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1));
+ FmtStr += 2; OutputBuffer += 2;
+ break;
+ }
+ case '%': { // Handle format specifiers
+ char FmtBuf[100] = "", Buffer[1000] = "";
+ char *FB = FmtBuf;
+ *FB++ = *FmtStr++;
+ char Last = *FB++ = *FmtStr++;
+ unsigned HowLong = 0;
+ while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' &&
+ Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' &&
+ Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' &&
+ Last != 'p' && Last != 's' && Last != '%') {
+ if (Last == 'l' || Last == 'L') HowLong++; // Keep track of l's
+ Last = *FB++ = *FmtStr++;
+ }
+ *FB = 0;
+
+ switch (Last) {
+ case '%':
+ memcpy(Buffer, "%", 2); break;
+ case 'c':
+ sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+ break;
+ case 'd': case 'i':
+ case 'u': case 'o':
+ case 'x': case 'X':
+ if (HowLong >= 1) {
+ if (HowLong == 1 &&
+ TheInterpreter->getTargetData()->getPointerSizeInBits() == 64 &&
+ sizeof(long) < sizeof(int64_t)) {
+ // Make sure we use %lld with a 64 bit argument because we might be
+ // compiling LLI on a 32 bit compiler.
+ unsigned Size = strlen(FmtBuf);
+ FmtBuf[Size] = FmtBuf[Size-1];
+ FmtBuf[Size+1] = 0;
+ FmtBuf[Size-1] = 'l';
+ }
+ sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue());
+ } else
+ sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+ break;
+ case 'e': case 'E': case 'g': case 'G': case 'f':
+ sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break;
+ case 'p':
+ sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
+ case 's':
+ sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
+ default:
+ errs() << "<unknown printf code '" << *FmtStr << "'!>";
+ ArgNo++; break;
+ }
+ size_t Len = strlen(Buffer);
+ memcpy(OutputBuffer, Buffer, Len + 1);
+ OutputBuffer += Len;
+ }
+ break;
+ }
+ }
+ return GV;
+}
+
+// int printf(const char *, ...) - a very rough implementation to make output
+// useful.
+GenericValue lle_X_printf(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ char Buffer[10000];
+ std::vector<GenericValue> NewArgs;
+ NewArgs.push_back(PTOGV((void*)&Buffer[0]));
+ NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
+ GenericValue GV = lle_X_sprintf(FT, NewArgs);
+ outs() << Buffer;
+ return GV;
+}
+
+// int sscanf(const char *format, ...);
+GenericValue lle_X_sscanf(const FunctionType *FT,
+ const std::vector<GenericValue> &args) {
+ assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
+
+ char *Args[10];
+ for (unsigned i = 0; i < args.size(); ++i)
+ Args[i] = (char*)GVTOP(args[i]);
+
+ GenericValue GV;
+ GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
+ return GV;
+}
+
+// int scanf(const char *format, ...);
+GenericValue lle_X_scanf(const FunctionType *FT,
+ const std::vector<GenericValue> &args) {
+ assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
+
+ char *Args[10];
+ for (unsigned i = 0; i < args.size(); ++i)
+ Args[i] = (char*)GVTOP(args[i]);
+
+ GenericValue GV;
+ GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
+ return GV;
+}
+
+// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_fprintf(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ assert(Args.size() >= 2);
+ char Buffer[10000];
+ std::vector<GenericValue> NewArgs;
+ NewArgs.push_back(PTOGV(Buffer));
+ NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end());
+ GenericValue GV = lle_X_sprintf(FT, NewArgs);
+
+ fputs(Buffer, (FILE *) GVTOP(Args[0]));
+ return GV;
+}
+
+} // End extern "C"
+
+// Done with externals; turn the warning back on
+#ifdef _MSC_VER
+ #pragma warning(default: 4190)
+#endif
+
+
+void Interpreter::initializeExternalFunctions() {
+ sys::ScopedLock Writer(*FunctionsLock);
+ FuncNames["lle_X_atexit"] = lle_X_atexit;
+ FuncNames["lle_X_exit"] = lle_X_exit;
+ FuncNames["lle_X_abort"] = lle_X_abort;
+
+ FuncNames["lle_X_printf"] = lle_X_printf;
+ FuncNames["lle_X_sprintf"] = lle_X_sprintf;
+ FuncNames["lle_X_sscanf"] = lle_X_sscanf;
+ FuncNames["lle_X_scanf"] = lle_X_scanf;
+ FuncNames["lle_X_fprintf"] = lle_X_fprintf;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
new file mode 100644
index 000000000000..43e34533c7ba
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -0,0 +1,98 @@
+//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top-level functionality for the LLVM interpreter.
+// This interpreter is designed to be a very simple, portable, inefficient
+// interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include <cstring>
+using namespace llvm;
+
+namespace {
+
+static struct RegisterInterp {
+ RegisterInterp() { Interpreter::Register(); }
+} InterpRegistrator;
+
+}
+
+extern "C" void LLVMLinkInInterpreter() { }
+
+/// create - Create a new interpreter object. This can never fail.
+///
+ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
+ // Tell this Module to materialize everything and release the GVMaterializer.
+ if (M->MaterializeAllPermanently(ErrStr))
+ // We got an error, just return 0
+ return 0;
+
+ return new Interpreter(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Interpreter ctor - Initialize stuff
+//
+Interpreter::Interpreter(Module *M)
+ : ExecutionEngine(M), TD(M) {
+
+ memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+ setTargetData(&TD);
+ // Initialize the "backend"
+ initializeExecutionEngine();
+ initializeExternalFunctions();
+ emitGlobals();
+
+ IL = new IntrinsicLowering(TD);
+}
+
+Interpreter::~Interpreter() {
+ delete IL;
+}
+
+void Interpreter::runAtExitHandlers () {
+ while (!AtExitHandlers.empty()) {
+ callFunction(AtExitHandlers.back(), std::vector<GenericValue>());
+ AtExitHandlers.pop_back();
+ run();
+ }
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue
+Interpreter::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert (F && "Function *F was null at entry to run()");
+
+ // Try extra hard not to pass extra args to a function that isn't
+ // expecting them. C programmers frequently bend the rules and
+ // declare main() with fewer parameters than it actually gets
+ // passed, and the interpreter barfs if you pass a function more
+ // parameters than it is declared to take. This does not attempt to
+ // take into account gratuitous differences in declared types,
+ // though.
+ std::vector<GenericValue> ActualArgs;
+ const unsigned ArgCount = F->getFunctionType()->getNumParams();
+ for (unsigned i = 0; i < ArgCount; ++i)
+ ActualArgs.push_back(ArgValues[i]);
+
+ // Set up the function call.
+ callFunction(F, ActualArgs);
+
+ // Start executing the function.
+ run();
+
+ return ExitValue;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
new file mode 100644
index 000000000000..bfebe3debfcd
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -0,0 +1,242 @@
+//===-- Interpreter.h ------------------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interpreter structure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLI_INTERPRETER_H
+#define LLI_INTERPRETER_H
+
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+
+class IntrinsicLowering;
+struct FunctionInfo;
+template<typename T> class generic_gep_type_iterator;
+class ConstantExpr;
+typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
+
+
+// AllocaHolder - Object to track all of the blocks of memory allocated by
+// alloca. When the function returns, this object is popped off the execution
+// stack, which causes the dtor to be run, which frees all the alloca'd memory.
+//
+class AllocaHolder {
+ friend class AllocaHolderHandle;
+ std::vector<void*> Allocations;
+ unsigned RefCnt;
+public:
+ AllocaHolder() : RefCnt(0) {}
+ void add(void *mem) { Allocations.push_back(mem); }
+ ~AllocaHolder() {
+ for (unsigned i = 0; i < Allocations.size(); ++i)
+ free(Allocations[i]);
+ }
+};
+
+// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into
+// a vector...
+//
+class AllocaHolderHandle {
+ AllocaHolder *H;
+public:
+ AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; }
+ AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; }
+ ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; }
+
+ void add(void *mem) { H->add(mem); }
+};
+
+typedef std::vector<GenericValue> ValuePlaneTy;
+
+// ExecutionContext struct - This struct represents one stack frame currently
+// executing.
+//
+struct ExecutionContext {
+ Function *CurFunction;// The currently executing function
+ BasicBlock *CurBB; // The currently executing BB
+ BasicBlock::iterator CurInst; // The next instruction to execute
+ std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
+ std::vector<GenericValue> VarArgs; // Values passed through an ellipsis
+ CallSite Caller; // Holds the call that called subframes.
+ // NULL if main func or debugger invoked fn
+ AllocaHolderHandle Allocas; // Track memory allocated by alloca
+};
+
+// Interpreter - This class represents the entirety of the interpreter.
+//
+class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
+ GenericValue ExitValue; // The return value of the called function
+ TargetData TD;
+ IntrinsicLowering *IL;
+
+ // The runtime stack of executing code. The top of the stack is the current
+ // function record.
+ std::vector<ExecutionContext> ECStack;
+
+ // AtExitHandlers - List of functions to call when the program exits,
+ // registered with the atexit() library function.
+ std::vector<Function*> AtExitHandlers;
+
+public:
+ explicit Interpreter(Module *M);
+ ~Interpreter();
+
+ /// runAtExitHandlers - Run any functions registered by the program's calls to
+ /// atexit(3), which we intercept and store in AtExitHandlers.
+ ///
+ void runAtExitHandlers();
+
+ static void Register() {
+ InterpCtor = create;
+ }
+
+ /// create - Create an interpreter ExecutionEngine. This can never fail.
+ ///
+ static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0);
+
+ /// run - Start execution with the specified function and arguments.
+ ///
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// recompileAndRelinkFunction - For the interpreter, functions are always
+ /// up-to-date.
+ ///
+ virtual void *recompileAndRelinkFunction(Function *F) {
+ return getPointerToFunction(F);
+ }
+
+ /// freeMachineCodeForFunction - The interpreter does not generate any code.
+ ///
+ void freeMachineCodeForFunction(Function *F) { }
+
+ // Methods used to execute code:
+ // Place a call on the stack
+ void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
+ void run(); // Execute instructions until nothing left to do
+
+ // Opcode Implementations
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+
+ void visitBinaryOperator(BinaryOperator &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitPHINode(PHINode &PN) {
+ llvm_unreachable("PHI nodes already handled!");
+ }
+ void visitTruncInst(TruncInst &I);
+ void visitZExtInst(ZExtInst &I);
+ void visitSExtInst(SExtInst &I);
+ void visitFPTruncInst(FPTruncInst &I);
+ void visitFPExtInst(FPExtInst &I);
+ void visitUIToFPInst(UIToFPInst &I);
+ void visitSIToFPInst(SIToFPInst &I);
+ void visitFPToUIInst(FPToUIInst &I);
+ void visitFPToSIInst(FPToSIInst &I);
+ void visitPtrToIntInst(PtrToIntInst &I);
+ void visitIntToPtrInst(IntToPtrInst &I);
+ void visitBitCastInst(BitCastInst &I);
+ void visitSelectInst(SelectInst &I);
+
+
+ void visitCallSite(CallSite CS);
+ void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
+ void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
+ void visitUnwindInst(UnwindInst &I);
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitShl(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+
+ void visitVAArgInst(VAArgInst &I);
+ void visitInstruction(Instruction &I) {
+ errs() << I;
+ llvm_unreachable("Instruction not interpretable yet!");
+ }
+
+ GenericValue callExternalFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals);
+ void exitCalled(GenericValue GV);
+
+ void addAtExitHandler(Function *F) {
+ AtExitHandlers.push_back(F);
+ }
+
+ GenericValue *getFirstVarArg () {
+ return &(ECStack.back ().VarArgs[0]);
+ }
+
+private: // Helper functions
+ GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, ExecutionContext &SF);
+
+ // SwitchToNewBasicBlock - Start execution in a new basic block and run any
+ // PHI nodes in the top of the block. This is used for intraprocedural
+ // control flow.
+ //
+ void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
+
+ void *getPointerToFunction(Function *F) { return (void*)F; }
+ void *getPointerToBasicBlock(BasicBlock *BB) { return (void*)BB; }
+
+ void initializeExecutionEngine() { }
+ void initializeExternalFunctions();
+ GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
+ GenericValue getOperandValue(Value *V, ExecutionContext &SF);
+ GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
+ const Type *Ty, ExecutionContext &SF);
+ void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
new file mode 100644
index 000000000000..fa8bee460427
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -0,0 +1,161 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call. This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially. For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface. As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ if (!isSymbolSearchingDisabled()) {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // and has an asm specifier, try again without the underscore.
+ if (Name[0] == 1 && NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
new file mode 100644
index 000000000000..445d2d0670c8
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
@@ -0,0 +1,821 @@
+//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool implements a just-in-time compiler for LLVM, allowing direct
+// execution of LLVM bitcode in an efficient manner.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#ifdef __APPLE__
+// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
+// of atexit). It passes the address of linker generated symbol __dso_handle
+// to the function.
+// This configuration change happened at version 5330.
+# include <AvailabilityMacros.h>
+# if defined(MAC_OS_X_VERSION_10_4) && \
+ ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \
+ (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \
+ __APPLE_CC__ >= 5330))
+# ifndef HAVE___DSO_HANDLE
+# define HAVE___DSO_HANDLE 1
+# endif
+# endif
+#endif
+
+#if HAVE___DSO_HANDLE
+extern void *__dso_handle __attribute__ ((__visibility__ ("hidden")));
+#endif
+
+namespace {
+
+static struct RegisterJIT {
+ RegisterJIT() { JIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInJIT() {
+}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+ !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+
+// libgcc defines the __register_frame function to dynamically register new
+// dwarf frames for exception handling. This functionality is not portable
+// across compilers and is only provided by GCC. We use the __register_frame
+// function here so that code generated by the JIT cooperates with the unwinding
+// runtime of libgcc. When JITting with exception handling enable, LLVM
+// generates dwarf frames and registers it to libgcc with __register_frame.
+//
+// The __register_frame function works with Linux.
+//
+// Unfortunately, this functionality seems to be in libgcc after the unwinding
+// library of libgcc for darwin was written. The code for darwin overwrites the
+// value updated by __register_frame with a value fetched with "keymgr".
+// "keymgr" is an obsolete functionality, which should be rewritten some day.
+// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we
+// need a workaround in LLVM which uses the "keymgr" to dynamically modify the
+// values of an opaque key, used by libgcc to find dwarf tables.
+
+extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
+
+#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
+# define USE_KEYMGR 1
+#else
+# define USE_KEYMGR 0
+#endif
+
+#if USE_KEYMGR
+
+namespace {
+
+// LibgccObject - This is the structure defined in libgcc. There is no #include
+// provided for this structure, so we also define it here. libgcc calls it
+// "struct object". The structure is undocumented in libgcc.
+struct LibgccObject {
+ void *unused1;
+ void *unused2;
+ void *unused3;
+
+ /// frame - Pointer to the exception table.
+ void *frame;
+
+ /// encoding - The encoding of the object?
+ union {
+ struct {
+ unsigned long sorted : 1;
+ unsigned long from_array : 1;
+ unsigned long mixed_encoding : 1;
+ unsigned long encoding : 8;
+ unsigned long count : 21;
+ } b;
+ size_t i;
+ } encoding;
+
+ /// fde_end - libgcc defines this field only if some macro is defined. We
+ /// include this field even if it may not there, to make libgcc happy.
+ char *fde_end;
+
+ /// next - At least we know it's a chained list!
+ struct LibgccObject *next;
+};
+
+// "kemgr" stuff. Apparently, all frame tables are stored there.
+extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *);
+extern "C" void *_keymgr_get_and_lock_processwide_ptr(int);
+#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */
+
+/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It
+/// probably contains all dwarf tables that are loaded.
+struct LibgccObjectInfo {
+
+ /// seenObjects - LibgccObjects already parsed by the unwinding runtime.
+ ///
+ struct LibgccObject* seenObjects;
+
+ /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
+ ///
+ struct LibgccObject* unseenObjects;
+
+ unsigned unused[2];
+};
+
+/// darwin_register_frame - Since __register_frame does not work with darwin's
+/// libgcc,we provide our own function, which "tricks" libgcc by modifying the
+/// "Dwarf2 object list" key.
+void DarwinRegisterFrame(void* FrameBegin) {
+ // Get the key.
+ LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+ _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+ assert(LOI && "This should be preallocated by the runtime");
+
+ // Allocate a new LibgccObject to represent this frame. Deallocation of this
+ // object may be impossible: since darwin code in libgcc was written after
+ // the ability to dynamically register frames, things may crash if we
+ // deallocate it.
+ struct LibgccObject* ob = (struct LibgccObject*)
+ malloc(sizeof(struct LibgccObject));
+
+ // Do like libgcc for the values of the field.
+ ob->unused1 = (void *)-1;
+ ob->unused2 = 0;
+ ob->unused3 = 0;
+ ob->frame = FrameBegin;
+ ob->encoding.i = 0;
+ ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
+
+ // Put the info on both places, as libgcc uses the first or the second
+ // field. Note that we rely on having two pointers here. If fde_end was a
+ // char, things would get complicated.
+ ob->fde_end = (char*)LOI->unseenObjects;
+ ob->next = LOI->unseenObjects;
+
+ // Update the key's unseenObjects list.
+ LOI->unseenObjects = ob;
+
+ // Finally update the "key". Apparently, libgcc requires it.
+ _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
+ LOI);
+
+}
+
+}
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter. This takes ownership
+/// of the module.
+ExecutionEngine *JIT::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ TargetMachine *TM) {
+ // Try to register the program as a source of symbols to resolve against.
+ //
+ // FIXME: Don't do this here.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+ // If the target supports JIT code generation, create the JIT.
+ if (TargetJITInfo *TJ = TM->getJITInfo()) {
+ return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+ } else {
+ if (ErrorStr)
+ *ErrorStr = "target does not support JIT code generation";
+ return 0;
+ }
+}
+
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+ SmallPtrSet<JIT*, 1> JITs; // Optimize for process containing just 1 JIT.
+ mutable sys::Mutex Lock;
+public:
+ void Add(JIT *jit) {
+ MutexGuard guard(Lock);
+ JITs.insert(jit);
+ }
+ void Remove(JIT *jit) {
+ MutexGuard guard(Lock);
+ JITs.erase(jit);
+ }
+ void *getPointerToNamedFunction(const char *Name) const {
+ MutexGuard guard(Lock);
+ assert(JITs.size() != 0 && "No Jit registered");
+ //search function in every instance of JIT
+ for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+ end = JITs.end();
+ Jit != end; ++Jit) {
+ if (Function *F = (*Jit)->FindFunctionNamed(Name))
+ return (*Jit)->getPointerToFunction(F);
+ }
+ // The function is not available : fallback on the first created (will
+ // search in symbol of the current program/library)
+ return (*JITs.begin())->getPointerToNamedFunction(Name);
+ }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+ // getPointerToNamedFunction - This function is used as a global wrapper to
+ // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+ // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+ // need to resolve function(s) that are being mis-codegenerated, so we need to
+ // resolve their addresses at runtime, and this is the way to do it.
+ void *getPointerToNamedFunction(const char *Name) {
+ return AllJits->getPointerToNamedFunction(Name);
+ }
+}
+
+JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
+ : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
+ isAlreadyCodeGenerating(false) {
+ setTargetData(TM.getTargetData());
+
+ jitstate = new JITState(M);
+
+ // Initialize JCE
+ JCE = createEmitter(*this, JMM, TM);
+
+ // Register in global list of all JITs.
+ AllJits->Add(this);
+
+ // Add target data
+ MutexGuard locked(lock);
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory that
+ // may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+ report_fatal_error("Target does not support machine code emission!");
+ }
+
+ // Register routine for informing unwinding runtime about new EH frames
+#if HAVE_EHTABLE_SUPPORT
+#if USE_KEYMGR
+ struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+ _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+
+ // The key is created on demand, and libgcc creates it the first time an
+ // exception occurs. Since we need the key to register frames, we create
+ // it now.
+ if (!LOI)
+ LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
+ _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
+ InstallExceptionTableRegister(DarwinRegisterFrame);
+ // Not sure about how to deregister on Darwin.
+#else
+ InstallExceptionTableRegister(__register_frame);
+ InstallExceptionTableDeregister(__deregister_frame);
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+ // Initialize passes.
+ PM.doInitialization();
+}
+
+JIT::~JIT() {
+ // Unregister all exception tables registered by this JIT.
+ DeregisterAllTables();
+ // Cleanup.
+ AllJits->Remove(this);
+ delete jitstate;
+ delete JCE;
+ delete &TM;
+}
+
+/// addModule - Add a new Module to the JIT. If we previously removed the last
+/// Module, we need re-initialize jitstate with a valid Module.
+void JIT::addModule(Module *M) {
+ MutexGuard locked(lock);
+
+ if (Modules.empty()) {
+ assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
+
+ jitstate = new JITState(M);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ report_fatal_error("Target does not support machine code emission!");
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+
+ ExecutionEngine::addModule(M);
+}
+
+/// removeModule - If we are removing the last Module, invalidate the jitstate
+/// since the PassManager it contains references a released Module.
+bool JIT::removeModule(Module *M) {
+ bool result = ExecutionEngine::removeModule(M);
+
+ MutexGuard locked(lock);
+
+ if (jitstate->getModule() == M) {
+ delete jitstate;
+ jitstate = 0;
+ }
+
+ if (!jitstate && !Modules.empty()) {
+ jitstate = new JITState(Modules[0]);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ report_fatal_error("Target does not support machine code emission!");
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+ return result;
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue JIT::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert(F && "Function *F was null at entry to run()");
+
+ void *FPtr = getPointerToFunction(F);
+ assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+ const FunctionType *FTy = F->getFunctionType();
+ const Type *RetTy = FTy->getReturnType();
+
+ assert((FTy->getNumParams() == ArgValues.size() ||
+ (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+ "Wrong number of arguments passed into function!");
+ assert(FTy->getNumParams() == ArgValues.size() &&
+ "This doesn't support passing arguments through varargs (yet)!");
+
+ // Handle some common cases first. These cases correspond to common `main'
+ // prototypes.
+ if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+ switch (ArgValues.size()) {
+ case 3:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy() &&
+ FTy->getParamType(2)->isPointerTy()) {
+ int (*PF)(int, char **, const char **) =
+ (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1]),
+ (const char **)GVTOP(ArgValues[2])));
+ return rv;
+ }
+ break;
+ case 2:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy()) {
+ int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1])));
+ return rv;
+ }
+ break;
+ case 1:
+ if (FTy->getNumParams() == 1 &&
+ FTy->getParamType(0)->isIntegerTy(32)) {
+ GenericValue rv;
+ int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+ return rv;
+ }
+ break;
+ }
+ }
+
+ // Handle cases where no arguments are passed first.
+ if (ArgValues.empty()) {
+ GenericValue rv;
+ switch (RetTy->getTypeID()) {
+ default: llvm_unreachable("Unknown return type for function call!");
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+ if (BitWidth == 1)
+ rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 8)
+ rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 16)
+ rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 32)
+ rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 64)
+ rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+ else
+ llvm_unreachable("Integer types > 64 bits not supported");
+ return rv;
+ }
+ case Type::VoidTyID:
+ rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+ return rv;
+ case Type::FloatTyID:
+ rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::DoubleTyID:
+ rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ llvm_unreachable("long double not supported yet");
+ return rv;
+ case Type::PointerTyID:
+ return PTOGV(((void*(*)())(intptr_t)FPtr)());
+ }
+ }
+
+ // Okay, this is not one of our quick and easy cases. Because we don't have a
+ // full FFI, we have to codegen a nullary stub function that just calls the
+ // function we are interested in, passing in constants for all of the
+ // arguments. Make this function and return.
+
+ // First, create the function.
+ FunctionType *STy=FunctionType::get(RetTy, false);
+ Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
+ F->getParent());
+
+ // Insert a basic block.
+ BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub);
+
+ // Convert all of the GenericValue arguments over to constants. Note that we
+ // currently don't support varargs.
+ SmallVector<Value*, 8> Args;
+ for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
+ Constant *C = 0;
+ const Type *ArgTy = FTy->getParamType(i);
+ const GenericValue &AV = ArgValues[i];
+ switch (ArgTy->getTypeID()) {
+ default: llvm_unreachable("Unknown argument type for function call!");
+ case Type::IntegerTyID:
+ C = ConstantInt::get(F->getContext(), AV.IntVal);
+ break;
+ case Type::FloatTyID:
+ C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal));
+ break;
+ case Type::DoubleTyID:
+ C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal));
+ break;
+ case Type::PPC_FP128TyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal));
+ break;
+ case Type::PointerTyID:
+ void *ArgPtr = GVTOP(AV);
+ if (sizeof(void*) == 4)
+ C = ConstantInt::get(Type::getInt32Ty(F->getContext()),
+ (int)(intptr_t)ArgPtr);
+ else
+ C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
+ (intptr_t)ArgPtr);
+ // Cast the integer to pointer
+ C = ConstantExpr::getIntToPtr(C, ArgTy);
+ break;
+ }
+ Args.push_back(C);
+ }
+
+ CallInst *TheCall = CallInst::Create(F, Args, "", StubBB);
+ TheCall->setCallingConv(F->getCallingConv());
+ TheCall->setTailCall();
+ if (!TheCall->getType()->isVoidTy())
+ // Return result of the call.
+ ReturnInst::Create(F->getContext(), TheCall, StubBB);
+ else
+ ReturnInst::Create(F->getContext(), StubBB); // Just return void.
+
+ // Finally, call our nullary stub function.
+ GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+ // Erase it, since no other function can have a reference to it.
+ Stub->eraseFromParent();
+ // And return the result.
+ return Result;
+}
+
+void JIT::RegisterJITEventListener(JITEventListener *L) {
+ if (L == NULL)
+ return;
+ MutexGuard locked(lock);
+ EventListeners.push_back(L);
+}
+void JIT::UnregisterJITEventListener(JITEventListener *L) {
+ if (L == NULL)
+ return;
+ MutexGuard locked(lock);
+ std::vector<JITEventListener*>::reverse_iterator I=
+ std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+ if (I != EventListeners.rend()) {
+ std::swap(*I, EventListeners.back());
+ EventListeners.pop_back();
+ }
+}
+void JIT::NotifyFunctionEmitted(
+ const Function &F,
+ void *Code, size_t Size,
+ const JITEvent_EmittedFunctionDetails &Details) {
+ MutexGuard locked(lock);
+ for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+ EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
+ }
+}
+
+void JIT::NotifyFreeingMachineCode(void *OldPtr) {
+ MutexGuard locked(lock);
+ for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+ EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
+ }
+}
+
+/// runJITOnFunction - Run the FunctionPassManager full of
+/// just-in-time compilation passes on F, hopefully filling in
+/// GlobalAddress[F] with the address of F's machine code.
+///
+void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
+ MutexGuard locked(lock);
+
+ class MCIListener : public JITEventListener {
+ MachineCodeInfo *const MCI;
+ public:
+ MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
+ virtual void NotifyFunctionEmitted(const Function &,
+ void *Code, size_t Size,
+ const EmittedFunctionDetails &) {
+ MCI->setAddress(Code);
+ MCI->setSize(Size);
+ }
+ };
+ MCIListener MCIL(MCI);
+ if (MCI)
+ RegisterJITEventListener(&MCIL);
+
+ runJITOnFunctionUnlocked(F, locked);
+
+ if (MCI)
+ UnregisterJITEventListener(&MCIL);
+}
+
+void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
+ assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
+
+ jitTheFunction(F, locked);
+
+ // If the function referred to another function that had not yet been
+ // read from bitcode, and we are jitting non-lazily, emit it now.
+ while (!jitstate->getPendingFunctions(locked).empty()) {
+ Function *PF = jitstate->getPendingFunctions(locked).back();
+ jitstate->getPendingFunctions(locked).pop_back();
+
+ assert(!PF->hasAvailableExternallyLinkage() &&
+ "Externally-defined function should not be in pending list.");
+
+ jitTheFunction(PF, locked);
+
+ // Now that the function has been jitted, ask the JITEmitter to rewrite
+ // the stub with real address of the function.
+ updateFunctionStub(PF);
+ }
+}
+
+void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
+ isAlreadyCodeGenerating = true;
+ jitstate->getPM(locked).run(*F);
+ isAlreadyCodeGenerating = false;
+
+ // clear basic block addresses after this function is done
+ getBasicBlockAddressMap(locked).clear();
+}
+
+/// getPointerToFunction - This method is used to get the address of the
+/// specified function, compiling it if necessary.
+///
+void *JIT::getPointerToFunction(Function *F) {
+
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr; // Check if function already code gen'd
+
+ MutexGuard locked(lock);
+
+ // Now that this thread owns the lock, make sure we read in the function if it
+ // exists in this Module.
+ std::string ErrorMsg;
+ if (F->Materialize(&ErrorMsg)) {
+ report_fatal_error("Error reading function '" + F->getName()+
+ "' from bitcode file: " + ErrorMsg);
+ }
+
+ // ... and check if another thread has already code gen'd the function.
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr;
+
+ if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+ bool AbortOnFailure = !F->hasExternalWeakLinkage();
+ void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+ addGlobalMapping(F, Addr);
+ return Addr;
+ }
+
+ runJITOnFunctionUnlocked(F, locked);
+
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+ return Addr;
+}
+
+void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
+ MutexGuard locked(lock);
+
+ BasicBlockAddressMapTy::iterator I =
+ getBasicBlockAddressMap(locked).find(BB);
+ if (I == getBasicBlockAddressMap(locked).end()) {
+ getBasicBlockAddressMap(locked)[BB] = Addr;
+ } else {
+ // ignore repeats: some BBs can be split into few MBBs?
+ }
+}
+
+void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
+ MutexGuard locked(lock);
+ getBasicBlockAddressMap(locked).erase(BB);
+}
+
+void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
+ // make sure it's function is compiled by JIT
+ (void)getPointerToFunction(BB->getParent());
+
+ // resolve basic block address
+ MutexGuard locked(lock);
+
+ BasicBlockAddressMapTy::iterator I =
+ getBasicBlockAddressMap(locked).find(BB);
+ if (I != getBasicBlockAddressMap(locked).end()) {
+ return I->second;
+ } else {
+ assert(0 && "JIT does not have BB address for address-of-label, was"
+ " it eliminated by optimizer?");
+ return 0;
+ }
+}
+
+/// getOrEmitGlobalVariable - Return the address of the specified global
+/// variable, possibly emitting it to memory if needed. This is used by the
+/// Emitter.
+void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
+ MutexGuard locked(lock);
+
+ void *Ptr = getPointerToGlobalIfAvailable(GV);
+ if (Ptr) return Ptr;
+
+ // If the global is external, just remember the address.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
+#if HAVE___DSO_HANDLE
+ if (GV->getName() == "__dso_handle")
+ return (void*)&__dso_handle;
+#endif
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
+ if (Ptr == 0) {
+ report_fatal_error("Could not resolve external global address: "
+ +GV->getName());
+ }
+ addGlobalMapping(GV, Ptr);
+ } else {
+ // If the global hasn't been emitted to memory yet, allocate space and
+ // emit it into memory.
+ Ptr = getMemoryForGV(GV);
+ addGlobalMapping(GV, Ptr);
+ EmitGlobalVariable(GV); // Initialize the variable.
+ }
+ return Ptr;
+}
+
+/// recompileAndRelinkFunction - This method is used to force a function
+/// which has already been compiled, to be compiled again, possibly
+/// after it has been modified. Then the entry to the old copy is overwritten
+/// with a branch to the new copy. If there was no old copy, this acts
+/// just like JIT::getPointerToFunction().
+///
+void *JIT::recompileAndRelinkFunction(Function *F) {
+ void *OldAddr = getPointerToGlobalIfAvailable(F);
+
+ // If it's not already compiled there is no reason to patch it up.
+ if (OldAddr == 0) { return getPointerToFunction(F); }
+
+ // Delete the old function mapping.
+ addGlobalMapping(F, 0);
+
+ // Recodegen the function
+ runJITOnFunction(F);
+
+ // Update state, forward the old function to the new function.
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+ TJI.replaceMachineCodeForFunction(OldAddr, Addr);
+ return Addr;
+}
+
+/// getMemoryForGV - This method abstracts memory allocation of global
+/// variable so that the JIT can allocate thread local variables depending
+/// on the target.
+///
+char* JIT::getMemoryForGV(const GlobalVariable* GV) {
+ char *Ptr;
+
+ // GlobalVariable's which are not "constant" will cause trouble in a server
+ // situation. It's returned in the same block of memory as code which may
+ // not be writable.
+ if (isGVCompilationDisabled() && !GV->isConstant()) {
+ report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
+ }
+
+ // Some applications require globals and code to live together, so they may
+ // be allocated into the same buffer, but in general globals are allocated
+ // through the memory manager which puts them near the code but not in the
+ // same buffer.
+ const Type *GlobalType = GV->getType()->getElementType();
+ size_t S = getTargetData()->getTypeAllocSize(GlobalType);
+ size_t A = getTargetData()->getPreferredAlignment(GV);
+ if (GV->isThreadLocal()) {
+ MutexGuard locked(lock);
+ Ptr = TJI.allocateThreadLocalMemory(S);
+ } else if (TJI.allocateSeparateGVMemory()) {
+ if (A <= 8) {
+ Ptr = (char*)malloc(S);
+ } else {
+ // Allocate S+A bytes of memory, then use an aligned pointer within that
+ // space.
+ Ptr = (char*)malloc(S+A);
+ unsigned MisAligned = ((intptr_t)Ptr & (A-1));
+ Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0);
+ }
+ } else if (AllocateGVsWithCode) {
+ Ptr = (char*)JCE->allocateSpace(S, A);
+ } else {
+ Ptr = (char*)JCE->allocateGlobal(S, A);
+ }
+ return Ptr;
+}
+
+void JIT::addPendingFunction(Function *F) {
+ MutexGuard locked(lock);
+ jitstate->getPendingFunctions(locked).push_back(F);
+}
+
+
+JITEventListener::~JITEventListener() {}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h
new file mode 100644
index 000000000000..b879fc36e59b
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h
@@ -0,0 +1,226 @@
+//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the top-level JIT data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JIT_H
+#define JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+
+class Function;
+struct JITEvent_EmittedFunctionDetails;
+class MachineCodeEmitter;
+class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
+
+class JITState {
+private:
+ FunctionPassManager PM; // Passes to compile a function
+ Module *M; // Module used to create the PM
+
+ /// PendingFunctions - Functions which have not been code generated yet, but
+ /// were called from a function being code generated.
+ std::vector<AssertingVH<Function> > PendingFunctions;
+
+public:
+ explicit JITState(Module *M) : PM(M), M(M) {}
+
+ FunctionPassManager &getPM(const MutexGuard &L) {
+ return PM;
+ }
+
+ Module *getModule() const { return M; }
+ std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
+ return PendingFunctions;
+ }
+};
+
+
+class JIT : public ExecutionEngine {
+ /// types
+ typedef ValueMap<const BasicBlock *, void *>
+ BasicBlockAddressMapTy;
+ /// data
+ TargetMachine &TM; // The current target we are compiling to
+ TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
+ JITCodeEmitter *JCE; // JCE object
+ std::vector<JITEventListener*> EventListeners;
+
+ /// AllocateGVsWithCode - Some applications require that global variables and
+ /// code be allocated into the same region of memory, in which case this flag
+ /// should be set to true. Doing so breaks freeMachineCodeForFunction.
+ bool AllocateGVsWithCode;
+
+ /// True while the JIT is generating code. Used to assert against recursive
+ /// entry.
+ bool isAlreadyCodeGenerating;
+
+ JITState *jitstate;
+
+ /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
+ /// actualized version, only filled for basic blocks that have their address
+ /// taken.
+ BasicBlockAddressMapTy BasicBlockAddressMap;
+
+
+ JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+ bool AllocateGVsWithCode);
+public:
+ ~JIT();
+
+ static void Register() {
+ JITCtor = createJIT;
+ }
+
+ /// getJITInfo - Return the target JIT information structure.
+ ///
+ TargetJITInfo &getJITInfo() const { return TJI; }
+
+ /// create - Create an return a new JIT compiler if there is one available
+ /// for the current target. Otherwise, return null.
+ ///
+ static ExecutionEngine *create(Module *M,
+ std::string *Err,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel =
+ CodeGenOpt::Default,
+ bool GVsWithCode = true,
+ CodeModel::Model CMM = CodeModel::Default) {
+ return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
+ CMM);
+ }
+
+ virtual void addModule(Module *M);
+
+ /// removeModule - Remove a Module from the list of modules. Returns true if
+ /// M is found.
+ virtual bool removeModule(Module *M);
+
+ /// runFunction - Start execution with the specified function and arguments.
+ ///
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
+ // CompilationCallback - Invoked the first time that a call site is found,
+ // which causes lazy compilation of the target function.
+ //
+ static void CompilationCallback();
+
+ /// getPointerToFunction - This returns the address of the specified function,
+ /// compiling it if necessary.
+ ///
+ void *getPointerToFunction(Function *F);
+
+ /// addPointerToBasicBlock - Adds address of the specific basic block.
+ void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
+
+ /// clearPointerToBasicBlock - Removes address of specific basic block.
+ void clearPointerToBasicBlock(const BasicBlock *BB);
+
+ /// getPointerToBasicBlock - This returns the address of the specified basic
+ /// block, assuming function is compiled.
+ void *getPointerToBasicBlock(BasicBlock *BB);
+
+ /// getOrEmitGlobalVariable - Return the address of the specified global
+ /// variable, possibly emitting it to memory if needed. This is used by the
+ /// Emitter.
+ void *getOrEmitGlobalVariable(const GlobalVariable *GV);
+
+ /// getPointerToFunctionOrStub - If the specified function has been
+ /// code-gen'd, return a pointer to the function. If not, compile it, or use
+ /// a stub to implement lazy compilation if available.
+ ///
+ void *getPointerToFunctionOrStub(Function *F);
+
+ /// recompileAndRelinkFunction - This method is used to force a function
+ /// which has already been compiled, to be compiled again, possibly
+ /// after it has been modified. Then the entry to the old copy is overwritten
+ /// with a branch to the new copy. If there was no old copy, this acts
+ /// just like JIT::getPointerToFunction().
+ ///
+ void *recompileAndRelinkFunction(Function *F);
+
+ /// freeMachineCodeForFunction - deallocate memory used to code-generate this
+ /// Function.
+ ///
+ void freeMachineCodeForFunction(Function *F);
+
+ /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
+ /// function was encountered. Add it to a pending list to be processed after
+ /// the current function.
+ ///
+ void addPendingFunction(Function *F);
+
+ /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+ ///
+ JITCodeEmitter *getCodeEmitter() const { return JCE; }
+
+ static ExecutionEngine *createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ TargetMachine *TM);
+
+ // Run the JIT on F and return information about the generated code
+ void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
+
+ virtual void RegisterJITEventListener(JITEventListener *L);
+ virtual void UnregisterJITEventListener(JITEventListener *L);
+ /// These functions correspond to the methods on JITEventListener. They
+ /// iterate over the registered listeners and call the corresponding method on
+ /// each.
+ void NotifyFunctionEmitted(
+ const Function &F, void *Code, size_t Size,
+ const JITEvent_EmittedFunctionDetails &Details);
+ void NotifyFreeingMachineCode(void *OldPtr);
+
+ BasicBlockAddressMapTy &
+ getBasicBlockAddressMap(const MutexGuard &) {
+ return BasicBlockAddressMap;
+ }
+
+
+private:
+ static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
+ TargetMachine &tm);
+ void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
+ void updateFunctionStub(Function *F);
+ void jitTheFunction(Function *F, const MutexGuard &locked);
+
+protected:
+
+ /// getMemoryforGV - Allocate memory for a global variable.
+ virtual char* getMemoryForGV(const GlobalVariable* GV);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
new file mode 100644
index 000000000000..e71c20b89fda
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -0,0 +1,211 @@
+//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITDebugRegisterer.h"
+#include "../../CodeGen/ELF.h"
+#include "../../CodeGen/ELFWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <string>
+
+namespace llvm {
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+ // Debuggers puts a breakpoint in this function.
+ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
+
+ // We put information about the JITed function in this global, which the
+ // debugger reads. Make sure to specify the version statically, because the
+ // debugger checks the version before we can set it during runtime.
+ struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+}
+
+namespace {
+
+ /// JITDebugLock - Used to serialize all code registration events, since they
+ /// modify global variables.
+ sys::Mutex JITDebugLock;
+
+}
+
+JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { }
+
+JITDebugRegisterer::~JITDebugRegisterer() {
+ // Free all ELF memory.
+ for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end();
+ I != E; ++I) {
+ // Call the private method that doesn't update the map so our iterator
+ // doesn't break.
+ UnregisterFunctionInternal(I);
+ }
+ FnMap.clear();
+}
+
+std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
+ // Stack allocate an empty module with an empty LLVMContext for the ELFWriter
+ // API. We don't use the real module because then the ELFWriter would write
+ // out unnecessary GlobalValues during finalization.
+ LLVMContext Context;
+ Module M("", Context);
+
+ // Make a buffer for the ELF in memory.
+ std::string Buffer;
+ raw_string_ostream O(Buffer);
+ ELFWriter EW(O, TM);
+ EW.doInitialization(M);
+
+ // Copy the binary into the .text section. This isn't necessary, but it's
+ // useful to be able to disassemble the ELF by hand.
+ ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
+ Text.Addr = (uint64_t)I.FnStart;
+ // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+ // instead of a vector.
+ Text.getData().assign(I.FnStart, I.FnEnd);
+
+ // Copy the exception handling call frame information into the .eh_frame
+ // section. This allows GDB to get a good stack trace, particularly on
+ // linux x86_64. Mark this as a PROGBITS section that needs to be loaded
+ // into memory at runtime.
+ ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
+ // Pointers in the DWARF EH info are all relative to the EH frame start,
+ // which is stored here.
+ EH.Addr = (uint64_t)I.EhStart;
+ // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+ // instead of a vector.
+ EH.getData().assign(I.EhStart, I.EhEnd);
+
+ // Add this single function to the symbol table, so the debugger prints the
+ // name instead of '???'. We give the symbol default global visibility.
+ ELFSym *FnSym = ELFSym::getGV(F,
+ ELF::STB_GLOBAL,
+ ELF::STT_FUNC,
+ ELF::STV_DEFAULT);
+ FnSym->SectionIdx = Text.SectionIdx;
+ FnSym->Size = I.FnEnd - I.FnStart;
+ FnSym->Value = 0; // Offset from start of section.
+ EW.SymbolList.push_back(FnSym);
+
+ EW.doFinalization(M);
+ O.flush();
+
+ // When trying to debug why GDB isn't getting the debug info right, it's
+ // awfully helpful to write the object file to disk so that it can be
+ // inspected with readelf and objdump.
+ if (JITEmitDebugInfoToDisk) {
+ std::string Filename;
+ raw_string_ostream O2(Filename);
+ O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o";
+ O2.flush();
+ std::string Errors;
+ raw_fd_ostream O3(Filename.c_str(), Errors);
+ O3 << Buffer;
+ O3.close();
+ }
+
+ return Buffer;
+}
+
+void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
+ // TODO: Support non-ELF platforms.
+ if (!TM.getELFWriterInfo())
+ return;
+
+ std::string Buffer = MakeELF(F, I);
+
+ jit_code_entry *JITCodeEntry = new jit_code_entry();
+ JITCodeEntry->symfile_addr = Buffer.c_str();
+ JITCodeEntry->symfile_size = Buffer.size();
+
+ // Add a mapping from F to the entry and buffer, so we can delete this
+ // info later.
+ FnMap[F] = std::make_pair(Buffer, JITCodeEntry);
+
+ // Acquire the lock and do the registration.
+ {
+ MutexGuard locked(JITDebugLock);
+ __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+
+ // Insert this entry at the head of the list.
+ JITCodeEntry->prev_entry = NULL;
+ jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
+ JITCodeEntry->next_entry = NextEntry;
+ if (NextEntry != NULL) {
+ NextEntry->prev_entry = JITCodeEntry;
+ }
+ __jit_debug_descriptor.first_entry = JITCodeEntry;
+ __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+ __jit_debug_register_code();
+ }
+}
+
+void JITDebugRegisterer::UnregisterFunctionInternal(
+ RegisteredFunctionsMap::iterator I) {
+ jit_code_entry *&JITCodeEntry = I->second.second;
+
+ // Acquire the lock and do the unregistration.
+ {
+ MutexGuard locked(JITDebugLock);
+ __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
+
+ // Remove the jit_code_entry from the linked list.
+ jit_code_entry *PrevEntry = JITCodeEntry->prev_entry;
+ jit_code_entry *NextEntry = JITCodeEntry->next_entry;
+ if (NextEntry) {
+ NextEntry->prev_entry = PrevEntry;
+ }
+ if (PrevEntry) {
+ PrevEntry->next_entry = NextEntry;
+ } else {
+ assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
+ __jit_debug_descriptor.first_entry = NextEntry;
+ }
+
+ // Tell GDB which entry we removed, and unregister the code.
+ __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+ __jit_debug_register_code();
+ }
+
+ delete JITCodeEntry;
+ JITCodeEntry = NULL;
+
+ // Free the ELF file in memory.
+ std::string &Buffer = I->second.first;
+ Buffer.clear();
+}
+
+void JITDebugRegisterer::UnregisterFunction(const Function *F) {
+ // TODO: Support non-ELF platforms.
+ if (!TM.getELFWriterInfo())
+ return;
+
+ RegisteredFunctionsMap::iterator I = FnMap.find(F);
+ if (I == FnMap.end()) return;
+ UnregisterFunctionInternal(I);
+ FnMap.erase(I);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
new file mode 100644
index 000000000000..dce506bbfefd
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -0,0 +1,116 @@
+//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+ typedef enum {
+ JIT_NOACTION = 0,
+ JIT_REGISTER_FN,
+ JIT_UNREGISTER_FN
+ } jit_actions_t;
+
+ struct jit_code_entry {
+ struct jit_code_entry *next_entry;
+ struct jit_code_entry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+ };
+
+ struct jit_descriptor {
+ uint32_t version;
+ // This should be jit_actions_t, but we want to be specific about the
+ // bit-width.
+ uint32_t action_flag;
+ struct jit_code_entry *relevant_entry;
+ struct jit_code_entry *first_entry;
+ };
+
+}
+
+namespace llvm {
+
+class ELFSection;
+class Function;
+class TargetMachine;
+
+
+/// This class encapsulates information we want to send to the debugger.
+///
+struct DebugInfo {
+ uint8_t *FnStart;
+ uint8_t *FnEnd;
+ uint8_t *EhStart;
+ uint8_t *EhEnd;
+
+ DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {}
+};
+
+typedef DenseMap< const Function*, std::pair<std::string, jit_code_entry*> >
+ RegisteredFunctionsMap;
+
+/// This class registers debug info for JITed code with an attached debugger.
+/// Without proper debug info, GDB can't do things like source level debugging
+/// or even produce a proper stack trace on linux-x86_64. To use this class,
+/// whenever a function is JITed, create a DebugInfo struct and pass it to the
+/// RegisterFunction method. The method will then do whatever is necessary to
+/// inform the debugger about the JITed function.
+class JITDebugRegisterer {
+
+ TargetMachine &TM;
+
+ /// FnMap - A map of functions that have been registered to the associated
+ /// temporary files. Used for cleanup.
+ RegisteredFunctionsMap FnMap;
+
+ /// MakeELF - Builds the ELF file in memory and returns a std::string that
+ /// contains the ELF.
+ std::string MakeELF(const Function *F, DebugInfo &I);
+
+public:
+ JITDebugRegisterer(TargetMachine &tm);
+
+ /// ~JITDebugRegisterer - Unregisters all code and frees symbol files.
+ ///
+ ~JITDebugRegisterer();
+
+ /// RegisterFunction - Register debug info for the given function with an
+ /// attached debugger. Clients must call UnregisterFunction on all
+ /// registered functions before deleting them to free the associated symbol
+ /// file and unregister it from the debugger.
+ void RegisterFunction(const Function *F, DebugInfo &I);
+
+ /// UnregisterFunction - Unregister the debug info for the given function
+ /// from the debugger and free associated memory.
+ void UnregisterFunction(const Function *F);
+
+private:
+ /// UnregisterFunctionInternal - Unregister the debug info for the given
+ /// function from the debugger and delete any temporary files. The private
+ /// version of this method does not remove the function from FnMap so that it
+ /// can be called while iterating over FnMap.
+ void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I);
+
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
new file mode 100644
index 000000000000..ddb0d5478596
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -0,0 +1,598 @@
+//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
+
+
+unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
+ JITCodeEmitter& jce,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* &EHFramePtr) {
+ assert(MMI && "MachineModuleInfo not registered!");
+
+ const TargetMachine& TM = F.getTarget();
+ TD = TM.getTargetData();
+ stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
+ RI = TM.getRegisterInfo();
+ TFI = TM.getFrameLowering();
+ JCE = &jce;
+
+ unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
+ EndFunction);
+
+ unsigned char* Result = 0;
+
+ const std::vector<const Function *> Personalities = MMI->getPersonalities();
+ EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
+
+ Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
+ StartFunction, EndFunction, ExceptionTable);
+
+ return Result;
+}
+
+
+void
+JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+ PointerSize : -PointerSize;
+ MCSymbol *BaseLabel = 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ MCSymbol *Label = Move.getLabel();
+
+ // Throw out move if the label is invalid.
+ if (Label && (*JCE->getLabelLocations())[Label] == 0)
+ continue;
+
+ intptr_t LabelPtr = 0;
+ if (Label) LabelPtr = JCE->getLabelAddress(Label);
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabelPtr && Label && BaseLabel != Label) {
+ JCE->emitByte(dwarf::DW_CFA_advance_loc4);
+ JCE->emitInt32(LabelPtr - BaseLabelPtr);
+
+ BaseLabel = Label;
+ BaseLabelPtr = LabelPtr;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa_offset);
+ } else {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa);
+ JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
+ }
+
+ JCE->emitULEB128Bytes(-Src.getOffset());
+ } else {
+ llvm_unreachable("Machine move not supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
+ JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ llvm_unreachable("Machine move not supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
+ JCE->emitULEB128Bytes(Reg);
+ JCE->emitSLEB128Bytes(Offset);
+ } else if (Reg < 64) {
+ JCE->emitByte(dwarf::DW_CFA_offset + Reg);
+ JCE->emitULEB128Bytes(Offset);
+ } else {
+ JCE->emitByte(dwarf::DW_CFA_offset_extended);
+ JCE->emitULEB128Bytes(Reg);
+ JCE->emitULEB128Bytes(Offset);
+ }
+ }
+ }
+}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+
+/// PadLT - Order landing pads lexicographically by type id.
+static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+namespace {
+
+/// ActionEntry - Structure describing an entry in the actions table.
+struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+};
+
+/// PadRange - Structure holding a try-range and the associated landing pad.
+struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+};
+
+typedef DenseMap<MCSymbol*, PadRange> RangeMapType;
+
+/// CallSiteEntry - Structure describing an entry in the call-site table.
+struct CallSiteEntry {
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+};
+
+}
+
+unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) const {
+ assert(MMI && "MachineModuleInfo not registered!");
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads(JCE->getLabelLocations());
+
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return 0;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type
+ // id itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries
+ // are written using a variable width encoding which outputs one byte per
+ // entry as long as the value written is not too large, but can differ.
+ // This kind of complication does not occur for positive type ids because
+ // type infos are output using a fixed width encoding.
+ // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ }
+
+ // Compute the call-site table. Entries must be ordered by address.
+ SmallVector<CallSiteEntry, 64> CallSites;
+
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ bool MayThrow = false;
+ MCSymbol *LastLabel = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ MayThrow |= MI->getDesc().isCall();
+ continue;
+ }
+
+ MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ assert(BeginLabel && "Invalid label!");
+
+ if (BeginLabel == LastLabel)
+ MayThrow = false;
+
+ RangeMapType::iterator L = PadMap.find(BeginLabel);
+
+ if (L == PadMap.end())
+ continue;
+
+ PadRange P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ CallSiteEntry Site = {BeginLabel, LastLabel,
+ LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
+
+ assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
+ "Invalid landing pad!");
+
+ // Try to merge with the previous call-site.
+ if (CallSites.size()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ CallSites.push_back(Site);
+ }
+ }
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, 0, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ // Final tallies.
+ unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
+ sizeof(int32_t) + // Site length.
+ sizeof(int32_t)); // Landing pad.
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+ SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+
+ unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ // Call-site table length
+ MCAsmInfo::getULEB128Size(SizeSites) +
+ SizeSites + SizeActions + SizeTypes;
+
+ // Begin the exception table.
+ JCE->emitAlignmentWithFill(4, 0);
+ // Asm->EOL("Padding");
+
+ unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
+
+ // Emit the header.
+ JCE->emitByte(dwarf::DW_EH_PE_omit);
+ // Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ JCE->emitByte(dwarf::DW_EH_PE_absptr);
+ // Asm->EOL("TType format (DW_EH_PE_absptr)");
+ JCE->emitULEB128Bytes(TypeOffset);
+ // Asm->EOL("TType base offset");
+ JCE->emitByte(dwarf::DW_EH_PE_udata4);
+ // Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ JCE->emitULEB128Bytes(SizeSites);
+ // Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information.
+ for (unsigned i = 0; i < CallSites.size(); ++i) {
+ CallSiteEntry &S = CallSites[i];
+ intptr_t BeginLabelPtr = 0;
+ intptr_t EndLabelPtr = 0;
+
+ if (!S.BeginLabel) {
+ BeginLabelPtr = (intptr_t)StartFunction;
+ JCE->emitInt32(0);
+ } else {
+ BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel);
+ JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction);
+ }
+
+ // Asm->EOL("Region start");
+
+ if (!S.EndLabel)
+ EndLabelPtr = (intptr_t)EndFunction;
+ else
+ EndLabelPtr = JCE->getLabelAddress(S.EndLabel);
+
+ JCE->emitInt32(EndLabelPtr - BeginLabelPtr);
+ //Asm->EOL("Region length");
+
+ if (!S.PadLabel) {
+ JCE->emitInt32(0);
+ } else {
+ unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel);
+ JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction);
+ }
+ // Asm->EOL("Landing pad");
+
+ JCE->emitULEB128Bytes(S.Action);
+ // Asm->EOL("Action");
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ JCE->emitSLEB128Bytes(Action.ValueForTypeID);
+ //Asm->EOL("TypeInfo index");
+ JCE->emitSLEB128Bytes(Action.NextAction);
+ //Asm->EOL("Next action");
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ const GlobalVariable *GV = TypeInfos[M - 1];
+
+ if (GV) {
+ if (TD->getPointerSize() == sizeof(int32_t))
+ JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+ else
+ JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+ } else {
+ if (TD->getPointerSize() == sizeof(int32_t))
+ JCE->emitInt32(0);
+ else
+ JCE->emitInt64(0);
+ }
+ // Asm->EOL("TypeInfo");
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ JCE->emitULEB128Bytes(TypeID);
+ //Asm->EOL("Filter TypeInfo index");
+ }
+
+ JCE->emitAlignmentWithFill(4, 0);
+
+ return DwarfExceptionTable;
+}
+
+unsigned char*
+JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+ PointerSize : -PointerSize;
+
+ unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
+ // EH Common Frame header
+ JCE->allocateSpace(4, 0);
+ unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+ JCE->emitInt32((int)0);
+ JCE->emitByte(dwarf::DW_CIE_VERSION);
+ JCE->emitString(Personality ? "zPLR" : "zR");
+ JCE->emitULEB128Bytes(1);
+ JCE->emitSLEB128Bytes(stackGrowth);
+ JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+ if (Personality) {
+ // Augmentation Size: 3 small ULEBs of one byte each, and the personality
+ // function which size is PointerSize.
+ JCE->emitULEB128Bytes(3 + PointerSize);
+
+ // We set the encoding of the personality as direct encoding because we use
+ // the function pointer. The encoding is not relative because the current
+ // PC value may be bigger than the personality function pointer.
+ if (PointerSize == 4) {
+ JCE->emitByte(dwarf::DW_EH_PE_sdata4);
+ JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
+ } else {
+ JCE->emitByte(dwarf::DW_EH_PE_sdata8);
+ JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
+ }
+
+ // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+ if (PointerSize == 4)
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ else
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ } else {
+ JCE->emitULEB128Bytes(1);
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ }
+
+ std::vector<MachineMove> Moves;
+ TFI->getInitialFrameState(Moves);
+ EmitFrameMoves(0, Moves);
+
+ JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+ JCE->emitInt32At((uintptr_t*)StartCommonPtr,
+ (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+ FrameCommonBeginPtr));
+
+ return StartCommonPtr;
+}
+
+
+unsigned char*
+JITDwarfEmitter::EmitEHFrame(const Function* Personality,
+ unsigned char* StartCommonPtr,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* ExceptionTable) const {
+ unsigned PointerSize = TD->getPointerSize();
+
+ // EH frame header.
+ unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
+ JCE->allocateSpace(4, 0);
+ unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+ // FDE CIE Offset
+ JCE->emitInt32(FrameBeginPtr - StartCommonPtr);
+ JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue());
+ JCE->emitInt32(EndFunction - StartFunction);
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (Personality) {
+ JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
+
+ if (PointerSize == 4) {
+ if (!MMI->getLandingPads().empty())
+ JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+ else
+ JCE->emitInt32((int)0);
+ } else {
+ if (!MMI->getLandingPads().empty())
+ JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+ else
+ JCE->emitInt64((int)0);
+ }
+ } else {
+ JCE->emitULEB128Bytes(0);
+ }
+
+ // Indicate locations of function specific callee saved registers in
+ // frame.
+ EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
+
+ JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+ // Indicate the size of the table
+ JCE->emitInt32At((uintptr_t*)StartEHPtr,
+ (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+ StartEHPtr));
+
+ // Double zeroes for the unwind runtime
+ if (PointerSize == 8) {
+ JCE->emitInt64(0);
+ JCE->emitInt64(0);
+ } else {
+ JCE->emitInt32(0);
+ JCE->emitInt32(0);
+ }
+
+ return StartEHPtr;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
new file mode 100644
index 000000000000..e1d00454d8d2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -0,0 +1,73 @@
+//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+
+namespace llvm {
+
+class Function;
+class JITCodeEmitter;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineMove;
+class TargetData;
+class TargetFrameLowering;
+class TargetMachine;
+class TargetRegisterInfo;
+
+class JITDwarfEmitter {
+ const TargetData* TD;
+ JITCodeEmitter* JCE;
+ const TargetRegisterInfo* RI;
+ const TargetFrameLowering *TFI;
+ MachineModuleInfo* MMI;
+ JIT& Jit;
+ bool stackGrowthDirection;
+
+ unsigned char* EmitExceptionTable(MachineFunction* MF,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) const;
+
+ void EmitFrameMoves(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const;
+
+ unsigned char* EmitCommonEHFrame(const Function* Personality) const;
+
+ unsigned char* EmitEHFrame(const Function* Personality,
+ unsigned char* StartBufferPtr,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* ExceptionTable) const;
+
+public:
+
+ JITDwarfEmitter(JIT& jit);
+
+ unsigned char* EmitDwarfTable(MachineFunction& F,
+ JITCodeEmitter& JCE,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* &EHFramePtr);
+
+
+ void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ }
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
new file mode 100644
index 000000000000..d046b8aea641
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -0,0 +1,1308 @@
+//===-- JITEmitter.cpp - Write machine code to executable memory ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineCodeEmitter object that is used by the JIT to
+// write machine code to memory and remember where relocatable values are.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "JIT.h"
+#include "JITDebugRegisterer.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
+#include <algorithm>
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumBytes, "Number of bytes of machine code compiled");
+STATISTIC(NumRelos, "Number of relocations applied");
+STATISTIC(NumRetries, "Number of retries with more memory");
+
+
+// A declaration may stop being a declaration once it's fully read from bitcode.
+// This function returns true if F is fully read and is still a declaration.
+static bool isNonGhostDeclaration(const Function *F) {
+ return F->isDeclaration() && !F->isMaterializable();
+}
+
+//===----------------------------------------------------------------------===//
+// JIT lazy compilation code.
+//
+namespace {
+ class JITEmitter;
+ class JITResolverState;
+
+ template<typename ValueTy>
+ struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
+ typedef JITResolverState *ExtraData;
+ static void onRAUW(JITResolverState *, Value *Old, Value *New) {
+ assert(false && "The JIT doesn't know how to handle a"
+ " RAUW on a value it has emitted.");
+ }
+ };
+
+ struct CallSiteValueMapConfig : public NoRAUWValueMapConfig<Function*> {
+ typedef JITResolverState *ExtraData;
+ static void onDelete(JITResolverState *JRS, Function *F);
+ };
+
+ class JITResolverState {
+ public:
+ typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
+ FunctionToLazyStubMapTy;
+ typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
+ typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
+ CallSiteValueMapConfig> FunctionToCallSitesMapTy;
+ typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
+ private:
+ /// FunctionToLazyStubMap - Keep track of the lazy stub created for a
+ /// particular function so that we can reuse them if necessary.
+ FunctionToLazyStubMapTy FunctionToLazyStubMap;
+
+ /// CallSiteToFunctionMap - Keep track of the function that each lazy call
+ /// site corresponds to, and vice versa.
+ CallSiteToFunctionMapTy CallSiteToFunctionMap;
+ FunctionToCallSitesMapTy FunctionToCallSitesMap;
+
+ /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a
+ /// particular GlobalVariable so that we can reuse them if necessary.
+ GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
+
+ /// Instance of the JIT this ResolverState serves.
+ JIT *TheJIT;
+
+ public:
+ JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+ FunctionToCallSitesMap(this),
+ TheJIT(jit) {}
+
+ FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
+ const MutexGuard& locked) {
+ assert(locked.holds(TheJIT->lock));
+ return FunctionToLazyStubMap;
+ }
+
+ GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
+ assert(lck.holds(TheJIT->lock));
+ return GlobalToIndirectSymMap;
+ }
+
+ std::pair<void *, Function *> LookupFunctionFromCallSite(
+ const MutexGuard &locked, void *CallSite) const {
+ assert(locked.holds(TheJIT->lock));
+
+ // The address given to us for the stub may not be exactly right, it
+ // might be a little bit after the stub. As such, use upper_bound to
+ // find it.
+ CallSiteToFunctionMapTy::const_iterator I =
+ CallSiteToFunctionMap.upper_bound(CallSite);
+ assert(I != CallSiteToFunctionMap.begin() &&
+ "This is not a known call site!");
+ --I;
+ return *I;
+ }
+
+ void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
+ assert(locked.holds(TheJIT->lock));
+
+ bool Inserted = CallSiteToFunctionMap.insert(
+ std::make_pair(CallSite, F)).second;
+ (void)Inserted;
+ assert(Inserted && "Pair was already in CallSiteToFunctionMap");
+ FunctionToCallSitesMap[F].insert(CallSite);
+ }
+
+ void EraseAllCallSitesForPrelocked(Function *F);
+
+ // Erases _all_ call sites regardless of their function. This is used to
+ // unregister the stub addresses from the StubToResolverMap in
+ // ~JITResolver().
+ void EraseAllCallSitesPrelocked();
+ };
+
+ /// JITResolver - Keep track of, and resolve, call sites for functions that
+ /// have not yet been compiled.
+ class JITResolver {
+ typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy;
+ typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy;
+ typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
+
+ /// LazyResolverFn - The target lazy resolver function that we actually
+ /// rewrite instructions to use.
+ TargetJITInfo::LazyResolverFn LazyResolverFn;
+
+ JITResolverState state;
+
+ /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap
+ /// for external functions. TODO: Of course, external functions don't need
+ /// a lazy stub. It's actually here to make it more likely that far calls
+ /// succeed, but no single stub can guarantee that. I'll remove this in a
+ /// subsequent checkin when I actually fix far calls.
+ std::map<void*, void*> ExternalFnToStubMap;
+
+ /// revGOTMap - map addresses to indexes in the GOT
+ std::map<void*, unsigned> revGOTMap;
+ unsigned nextGOTIndex;
+
+ JITEmitter &JE;
+
+ /// Instance of JIT corresponding to this Resolver.
+ JIT *TheJIT;
+
+ public:
+ explicit JITResolver(JIT &jit, JITEmitter &je)
+ : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
+ LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
+ }
+
+ ~JITResolver();
+
+ /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
+ /// lazy-compilation stub if it has already been created.
+ void *getLazyFunctionStubIfAvailable(Function *F);
+
+ /// getLazyFunctionStub - This returns a pointer to a function's
+ /// lazy-compilation stub, creating one on demand as needed.
+ void *getLazyFunctionStub(Function *F);
+
+ /// getExternalFunctionStub - Return a stub for the function at the
+ /// specified address, created lazily on demand.
+ void *getExternalFunctionStub(void *FnAddr);
+
+ /// getGlobalValueIndirectSym - Return an indirect symbol containing the
+ /// specified GV address.
+ void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
+
+ /// getGOTIndexForAddress - Return a new or existing index in the GOT for
+ /// an address. This function only manages slots, it does not manage the
+ /// contents of the slots or the memory associated with the GOT.
+ unsigned getGOTIndexForAddr(void *addr);
+
+ /// JITCompilerFn - This function is called to resolve a stub to a compiled
+ /// address. If the LLVM Function corresponding to the stub has not yet
+ /// been compiled, this function compiles it first.
+ static void *JITCompilerFn(void *Stub);
+ };
+
+ class StubToResolverMapTy {
+ /// Map a stub address to a specific instance of a JITResolver so that
+ /// lazily-compiled functions can find the right resolver to use.
+ ///
+ /// Guarded by Lock.
+ std::map<void*, JITResolver*> Map;
+
+ /// Guards Map from concurrent accesses.
+ mutable sys::Mutex Lock;
+
+ public:
+ /// Registers a Stub to be resolved by Resolver.
+ void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+ MutexGuard guard(Lock);
+ Map.insert(std::make_pair(Stub, Resolver));
+ }
+ /// Unregisters the Stub when it's invalidated.
+ void UnregisterStubResolver(void *Stub) {
+ MutexGuard guard(Lock);
+ Map.erase(Stub);
+ }
+ /// Returns the JITResolver instance that owns the Stub.
+ JITResolver *getResolverFromStub(void *Stub) const {
+ MutexGuard guard(Lock);
+ // The address given to us for the stub may not be exactly right, it might
+ // be a little bit after the stub. As such, use upper_bound to find it.
+ // This is the same trick as in LookupFunctionFromCallSite from
+ // JITResolverState.
+ std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+ assert(I != Map.begin() && "This is not a known stub!");
+ --I;
+ return I->second;
+ }
+ /// True if any stubs refer to the given resolver. Only used in an assert().
+ /// O(N)
+ bool ResolverHasStubs(JITResolver* Resolver) const {
+ MutexGuard guard(Lock);
+ for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+ if (I->second == Resolver)
+ return true;
+ }
+ return false;
+ }
+ };
+ /// This needs to be static so that a lazy call stub can access it with no
+ /// context except the address of the stub.
+ ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
+ /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+ /// used to output functions to memory for execution.
+ class JITEmitter : public JITCodeEmitter {
+ JITMemoryManager *MemMgr;
+
+ // When outputting a function stub in the context of some other function, we
+ // save BufferBegin/BufferEnd/CurBufferPtr here.
+ uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+ // When reattempting to JIT a function after running out of space, we store
+ // the estimated size of the function we're trying to JIT here, so we can
+ // ask the memory manager for at least this much space. When we
+ // successfully emit the function, we reset this back to zero.
+ uintptr_t SizeEstimate;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<uintptr_t> MBBLocations;
+
+ /// ConstantPool - The constant pool for the current function.
+ ///
+ MachineConstantPool *ConstantPool;
+
+ /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+ ///
+ void *ConstantPoolBase;
+
+ /// ConstPoolAddresses - Addresses of individual constant pool entries.
+ ///
+ SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+ /// JumpTable - The jump tables for the current function.
+ ///
+ MachineJumpTableInfo *JumpTable;
+
+ /// JumpTableBase - A pointer to the first entry in the jump table.
+ ///
+ void *JumpTableBase;
+
+ /// Resolver - This contains info about the currently resolved functions.
+ JITResolver Resolver;
+
+ /// DE - The dwarf emitter for the jit.
+ OwningPtr<JITDwarfEmitter> DE;
+
+ /// DR - The debug registerer for the jit.
+ OwningPtr<JITDebugRegisterer> DR;
+
+ /// LabelLocations - This vector is a mapping from Label ID's to their
+ /// address.
+ DenseMap<MCSymbol*, uintptr_t> LabelLocations;
+
+ /// MMI - Machine module info for exception informations
+ MachineModuleInfo* MMI;
+
+ // CurFn - The llvm function being emitted. Only valid during
+ // finishFunction().
+ const Function *CurFn;
+
+ /// Information about emitted code, which is passed to the
+ /// JITEventListeners. This is reset in startFunction and used in
+ /// finishFunction.
+ JITEvent_EmittedFunctionDetails EmissionDetails;
+
+ struct EmittedCode {
+ void *FunctionBody; // Beginning of the function's allocation.
+ void *Code; // The address the function's code actually starts at.
+ void *ExceptionTable;
+ EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+ };
+ struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+ typedef JITEmitter *ExtraData;
+ static void onDelete(JITEmitter *, const Function*);
+ static void onRAUW(JITEmitter *, const Function*, const Function*);
+ };
+ ValueMap<const Function *, EmittedCode,
+ EmittedFunctionConfig> EmittedFunctions;
+
+ DebugLoc PrevDL;
+
+ /// Instance of the JIT
+ JIT *TheJIT;
+
+ public:
+ JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+ : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+ EmittedFunctions(this), TheJIT(&jit) {
+ MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+ if (jit.getJITInfo().needsGOT()) {
+ MemMgr->AllocateGOT();
+ DEBUG(dbgs() << "JIT is managing a GOT\n");
+ }
+
+ if (JITExceptionHandling || JITEmitDebugInfo) {
+ DE.reset(new JITDwarfEmitter(jit));
+ }
+ if (JITEmitDebugInfo) {
+ DR.reset(new JITDebugRegisterer(TM));
+ }
+ }
+ ~JITEmitter() {
+ delete MemMgr;
+ }
+
+ /// classof - Methods for support type inquiry through isa, cast, and
+ /// dyn_cast:
+ ///
+ static inline bool classof(const MachineCodeEmitter*) { return true; }
+
+ JITResolver &getJITResolver() { return Resolver; }
+
+ virtual void startFunction(MachineFunction &F);
+ virtual bool finishFunction(MachineFunction &F);
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+ void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+ void startGVStub(const GlobalValue* GV,
+ unsigned StubSize, unsigned Alignment = 1);
+ void startGVStub(void *Buffer, unsigned StubSize);
+ void finishGVStub();
+ virtual void *allocIndirectGV(const GlobalValue *GV,
+ const uint8_t *Buffer, size_t Size,
+ unsigned Alignment);
+
+ /// allocateSpace - Reserves space in the current block if any, or
+ /// allocate a new one of the given size.
+ virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+ /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace,
+ /// this method does not allocate memory in the current output buffer,
+ /// because a global may live longer than the current function.
+ virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+ if (MBB->hasAddressTaken())
+ TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(),
+ (void*)getCurrentPCValue());
+ DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+ << (void*) getCurrentPCValue() << "]\n");
+ }
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+ /// given function. Increase the minimum allocation size so that we get
+ /// more memory next time.
+ void retryWithMoreMemory(MachineFunction &F);
+
+ /// deallocateMemForFunction - Deallocate all memory for the specified
+ /// function body.
+ void deallocateMemForFunction(const Function *F);
+
+ virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
+ virtual void emitLabel(MCSymbol *Label) {
+ LabelLocations[Label] = getCurrentPCValue();
+ }
+
+ virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
+ return &LabelLocations;
+ }
+
+ virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+ assert(LabelLocations.count(Label) && "Label not emitted!");
+ return LabelLocations.find(Label)->second;
+ }
+
+ virtual void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ if (DE.get()) DE->setModuleInfo(Info);
+ }
+
+ private:
+ void *getPointerToGlobal(GlobalValue *GV, void *Reference,
+ bool MayNeedFarStub);
+ void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
+ };
+}
+
+void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
+ JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+ FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+ if (F2C == FunctionToCallSitesMap.end())
+ return;
+ StubToResolverMapTy &S2RMap = *StubToResolverMap;
+ for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+ E = F2C->second.end(); I != E; ++I) {
+ S2RMap.UnregisterStubResolver(*I);
+ bool Erased = CallSiteToFunctionMap.erase(*I);
+ (void)Erased;
+ assert(Erased && "Missing call site->function mapping");
+ }
+ FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+ StubToResolverMapTy &S2RMap = *StubToResolverMap;
+ for (CallSiteToFunctionMapTy::const_iterator
+ I = CallSiteToFunctionMap.begin(),
+ E = CallSiteToFunctionMap.end(); I != E; ++I) {
+ S2RMap.UnregisterStubResolver(I->first);
+ }
+ CallSiteToFunctionMap.clear();
+ FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+ // No need to lock because we're in the destructor, and state isn't shared.
+ state.EraseAllCallSitesPrelocked();
+ assert(!StubToResolverMap->ResolverHasStubs(this) &&
+ "Resolver destroyed with stubs still alive.");
+}
+
+/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
+/// if it has already been created.
+void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this function, recycle it.
+ return state.getFunctionToLazyStubMap(locked).lookup(F);
+}
+
+/// getFunctionStub - This returns a pointer to a function stub, creating
+/// one on demand as needed.
+void *JITResolver::getLazyFunctionStub(Function *F) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a lazy stub for this function, recycle it.
+ void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+ if (Stub) return Stub;
+
+ // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we
+ // must resolve the symbol now.
+ void *Actual = TheJIT->isCompilingLazily()
+ ? (void *)(intptr_t)LazyResolverFn : (void *)0;
+
+ // If this is an external declaration, attempt to resolve the address now
+ // to place in the stub.
+ if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) {
+ Actual = TheJIT->getPointerToFunction(F);
+
+ // If we resolved the symbol to a null address (eg. a weak external)
+ // don't emit a stub. Return a null pointer to the application.
+ if (!Actual) return 0;
+ }
+
+ TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+ JE.startGVStub(F, SL.Size, SL.Alignment);
+ // Codegen a new stub, calling the lazy resolver or the actual address of the
+ // external function, if it was resolved.
+ Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
+ JE.finishGVStub();
+
+ if (Actual != (void*)(intptr_t)LazyResolverFn) {
+ // If we are getting the stub for an external function, we really want the
+ // address of the stub in the GlobalAddressMap for the JIT, not the address
+ // of the external function.
+ TheJIT->updateGlobalMapping(F, Stub);
+ }
+
+ DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
+ << F->getName() << "'\n");
+
+ if (TheJIT->isCompilingLazily()) {
+ // Register this JITResolver as the one corresponding to this call site so
+ // JITCompilerFn will be able to find it.
+ StubToResolverMap->RegisterStubResolver(Stub, this);
+
+ // Finally, keep track of the stub-to-Function mapping so that the
+ // JITCompilerFn knows which function to compile!
+ state.AddCallSite(locked, Stub, F);
+ } else if (!Actual) {
+ // If we are JIT'ing non-lazily but need to call a function that does not
+ // exist yet, add it to the JIT's work list so that we can fill in the
+ // stub address later.
+ assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+ "'Actual' should have been set above.");
+ TheJIT->addPendingFunction(F);
+ }
+
+ return Stub;
+}
+
+/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
+/// GV address.
+void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this global variable, recycle it.
+ void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+ if (IndirectSym) return IndirectSym;
+
+ // Otherwise, codegen a new indirect symbol.
+ IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
+ JE);
+
+ DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym
+ << "] for GV '" << GV->getName() << "'\n");
+
+ return IndirectSym;
+}
+
+/// getExternalFunctionStub - Return a stub for the function at the
+/// specified address, created lazily on demand.
+void *JITResolver::getExternalFunctionStub(void *FnAddr) {
+ // If we already have a stub for this function, recycle it.
+ void *&Stub = ExternalFnToStubMap[FnAddr];
+ if (Stub) return Stub;
+
+ TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+ JE.startGVStub(0, SL.Size, SL.Alignment);
+ Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
+ JE.finishGVStub();
+
+ DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
+ << "] for external function at '" << FnAddr << "'\n");
+ return Stub;
+}
+
+unsigned JITResolver::getGOTIndexForAddr(void* addr) {
+ unsigned idx = revGOTMap[addr];
+ if (!idx) {
+ idx = ++nextGOTIndex;
+ revGOTMap[addr] = idx;
+ DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr ["
+ << addr << "]\n");
+ }
+ return idx;
+}
+
+/// JITCompilerFn - This function is called when a lazy compilation stub has
+/// been entered. It looks up which function this stub corresponds to, compiles
+/// it if necessary, then returns the resultant function pointer.
+void *JITResolver::JITCompilerFn(void *Stub) {
+ JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+ assert(JR && "Unable to find the corresponding JITResolver to the call site");
+
+ Function* F = 0;
+ void* ActualPtr = 0;
+
+ {
+ // Only lock for getting the Function. The call getPointerToFunction made
+ // in this function might trigger function materializing, which requires
+ // JIT lock to be unlocked.
+ MutexGuard locked(JR->TheJIT->lock);
+
+ // The address given to us for the stub may not be exactly right, it might
+ // be a little bit after the stub. As such, use upper_bound to find it.
+ std::pair<void*, Function*> I =
+ JR->state.LookupFunctionFromCallSite(locked, Stub);
+ F = I.second;
+ ActualPtr = I.first;
+ }
+
+ // If we have already code generated the function, just return the address.
+ void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
+
+ if (!Result) {
+ // Otherwise we don't have it, do lazy compilation now.
+
+ // If lazy compilation is disabled, emit a useful error message and abort.
+ if (!JR->TheJIT->isCompilingLazily()) {
+ report_fatal_error("LLVM JIT requested to do lazy compilation of"
+ " function '"
+ + F->getName() + "' when lazy compiles are disabled!");
+ }
+
+ DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
+ << "' In stub ptr = " << Stub << " actual ptr = "
+ << ActualPtr << "\n");
+
+ Result = JR->TheJIT->getPointerToFunction(F);
+ }
+
+ // Reacquire the lock to update the GOT map.
+ MutexGuard locked(JR->TheJIT->lock);
+
+ // We might like to remove the call site from the CallSiteToFunction map, but
+ // we can't do that! Multiple threads could be stuck, waiting to acquire the
+ // lock above. As soon as the 1st function finishes compiling the function,
+ // the next one will be released, and needs to be able to find the function it
+ // needs to call.
+
+ // FIXME: We could rewrite all references to this stub if we knew them.
+
+ // What we will do is set the compiled function address to map to the
+ // same GOT entry as the stub so that later clients may update the GOT
+ // if they see it still using the stub address.
+ // Note: this is done so the Resolver doesn't have to manage GOT memory
+ // Do this without allocating map space if the target isn't using a GOT
+ if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+ JR->revGOTMap[Result] = JR->revGOTMap[Stub];
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// JITEmitter code.
+//
+void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
+ bool MayNeedFarStub) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return TheJIT->getOrEmitGlobalVariable(GV);
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return TheJIT->getPointerToGlobal(GA->resolveAliasedGlobal(false));
+
+ // If we have already compiled the function, return a pointer to its body.
+ Function *F = cast<Function>(V);
+
+ void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F);
+ if (FnStub) {
+ // Return the function stub if it's already created. We do this first so
+ // that we're returning the same address for the function as any previous
+ // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
+ // close enough to call.
+ return FnStub;
+ }
+
+ // If we know the target can handle arbitrary-distance calls, try to
+ // return a direct pointer.
+ if (!MayNeedFarStub) {
+ // If we have code, go ahead and return that.
+ void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+ if (ResultPtr) return ResultPtr;
+
+ // If this is an external function pointer, we can force the JIT to
+ // 'compile' it, which really just adds it to the map.
+ if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage())
+ return TheJIT->getPointerToFunction(F);
+ }
+
+ // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+ // so. Note that it's possible to return null from getLazyFunctionStub in the
+ // case of a weak extern that fails to resolve.
+ return Resolver.getLazyFunctionStub(F);
+}
+
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
+ // Make sure GV is emitted first, and create a stub containing the fully
+ // resolved address.
+ void *GVAddress = getPointerToGlobal(V, Reference, false);
+ void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
+ return StubAddr;
+}
+
+void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
+ if (DL.isUnknown()) return;
+ if (!BeforePrintingInsn) return;
+
+ const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
+
+ if (DL.getScope(Context) != 0 && PrevDL != DL) {
+ JITEvent_EmittedFunctionDetails::LineStart NextLine;
+ NextLine.Address = getCurrentPCValue();
+ NextLine.Loc = DL;
+ EmissionDetails.LineStarts.push_back(NextLine);
+ }
+
+ PrevDL = DL;
+}
+
+static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
+ const TargetData *TD) {
+ const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+ if (Constants.empty()) return 0;
+
+ unsigned Size = 0;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = Constants[i];
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ Size = (Size + AlignMask) & ~AlignMask;
+ const Type *Ty = CPE.getType();
+ Size += TD->getTypeAllocSize(Ty);
+ }
+ return Size;
+}
+
+void JITEmitter::startFunction(MachineFunction &F) {
+ DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
+ << F.getFunction()->getName() << "\n");
+
+ uintptr_t ActualSize = 0;
+ // Set the memory writable, if it's not already
+ MemMgr->setMemoryWritable();
+
+ if (SizeEstimate > 0) {
+ // SizeEstimate will be non-zero on reallocation attempts.
+ ActualSize = SizeEstimate;
+ }
+
+ BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
+ ActualSize);
+ BufferEnd = BufferBegin+ActualSize;
+ EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin;
+
+ // Ensure the constant pool/jump table info is at least 4-byte aligned.
+ emitAlignment(16);
+
+ emitConstantPool(F.getConstantPool());
+ if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+ initJumpTableInfo(MJTI);
+
+ // About to start emitting the machine code for the function.
+ emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
+ TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
+ EmittedFunctions[F.getFunction()].Code = CurBufferPtr;
+
+ MBBLocations.clear();
+
+ EmissionDetails.MF = &F;
+ EmissionDetails.LineStarts.clear();
+}
+
+bool JITEmitter::finishFunction(MachineFunction &F) {
+ if (CurBufferPtr == BufferEnd) {
+ // We must call endFunctionBody before retrying, because
+ // deallocateMemForFunction requires it.
+ MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+ retryWithMoreMemory(F);
+ return true;
+ }
+
+ if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+ emitJumpTableInfo(MJTI);
+
+ // FnStart is the start of the text, not the start of the constant pool and
+ // other per-function data.
+ uint8_t *FnStart =
+ (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+
+ // FnEnd is the end of the function's machine code.
+ uint8_t *FnEnd = CurBufferPtr;
+
+ if (!Relocations.empty()) {
+ CurFn = F.getFunction();
+ NumRelos += Relocations.size();
+
+ // Resolve the relocations to concrete pointers.
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ void *ResultPtr = 0;
+ if (!MR.letTargetResolve()) {
+ if (MR.isExternalSymbol()) {
+ ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
+ false);
+ DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+ << ResultPtr << "]\n");
+
+ // If the target REALLY wants a stub for this function, emit it now.
+ if (MR.mayNeedFarStub()) {
+ ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
+ }
+ } else if (MR.isGlobalValue()) {
+ ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
+ BufferBegin+MR.getMachineCodeOffset(),
+ MR.mayNeedFarStub());
+ } else if (MR.isIndirectSymbol()) {
+ ResultPtr = getPointerToGVIndirectSym(
+ MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
+ } else if (MR.isBasicBlock()) {
+ ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
+ } else if (MR.isConstantPoolIndex()) {
+ ResultPtr =
+ (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ } else {
+ assert(MR.isJumpTableIndex());
+ ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
+ }
+
+ MR.setResultPointer(ResultPtr);
+ }
+
+ // if we are managing the GOT and the relocation wants an index,
+ // give it one
+ if (MR.isGOTRelative() && MemMgr->isManagingGOT()) {
+ unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
+ MR.setGOTIndex(idx);
+ if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
+ DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr
+ << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+ << "\n");
+ ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
+ }
+ }
+ }
+
+ CurFn = 0;
+ TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
+ Relocations.size(), MemMgr->getGOTBase());
+ }
+
+ // Update the GOT entry for F to point to the new code.
+ if (MemMgr->isManagingGOT()) {
+ unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
+ if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
+ DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin
+ << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+ << "\n");
+ ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
+ }
+ }
+
+ // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for
+ // global variables that were referenced in the relocations.
+ MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+
+ if (CurBufferPtr == BufferEnd) {
+ retryWithMoreMemory(F);
+ return true;
+ } else {
+ // Now that we've succeeded in emitting the function, reset the
+ // SizeEstimate back down to zero.
+ SizeEstimate = 0;
+ }
+
+ BufferBegin = CurBufferPtr = 0;
+ NumBytes += FnEnd-FnStart;
+
+ // Invalidate the icache if necessary.
+ sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
+
+ TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
+ EmissionDetails);
+
+ // Reset the previous debug location.
+ PrevDL = DebugLoc();
+
+ DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
+ << "] Function: " << F.getFunction()->getName()
+ << ": " << (FnEnd-FnStart) << " bytes of text, "
+ << Relocations.size() << " relocations\n");
+
+ Relocations.clear();
+ ConstPoolAddresses.clear();
+
+ // Mark code region readable and executable if it's not so already.
+ MemMgr->setMemoryExecutable();
+
+ DEBUG({
+ if (sys::hasDisassembler()) {
+ dbgs() << "JIT: Disassembled code:\n";
+ dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+ (uintptr_t)FnStart);
+ } else {
+ dbgs() << "JIT: Binary code:\n";
+ uint8_t* q = FnStart;
+ for (int i = 0; q < FnEnd; q += 4, ++i) {
+ if (i == 4)
+ i = 0;
+ if (i == 0)
+ dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
+ bool Done = false;
+ for (int j = 3; j >= 0; --j) {
+ if (q + j >= FnEnd)
+ Done = true;
+ else
+ dbgs() << (unsigned short)q[j];
+ }
+ if (Done)
+ break;
+ dbgs() << ' ';
+ if (i == 3)
+ dbgs() << '\n';
+ }
+ dbgs()<< '\n';
+ }
+ });
+
+ if (JITExceptionHandling || JITEmitDebugInfo) {
+ uintptr_t ActualSize = 0;
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
+ ActualSize);
+ BufferEnd = BufferBegin+ActualSize;
+ EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
+ uint8_t *EhStart;
+ uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd,
+ EhStart);
+ MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
+ FrameRegister);
+ uint8_t *EhEnd = CurBufferPtr;
+ BufferBegin = SavedBufferBegin;
+ BufferEnd = SavedBufferEnd;
+ CurBufferPtr = SavedCurBufferPtr;
+
+ if (JITExceptionHandling) {
+ TheJIT->RegisterTable(F.getFunction(), FrameRegister);
+ }
+
+ if (JITEmitDebugInfo) {
+ DebugInfo I;
+ I.FnStart = FnStart;
+ I.FnEnd = FnEnd;
+ I.EhStart = EhStart;
+ I.EhEnd = EhEnd;
+ DR->RegisterFunction(F.getFunction(), I);
+ }
+ }
+
+ if (MMI)
+ MMI->EndFunction();
+
+ return false;
+}
+
+void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
+ DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n");
+ Relocations.clear(); // Clear the old relocations or we'll reapply them.
+ ConstPoolAddresses.clear();
+ ++NumRetries;
+ deallocateMemForFunction(F.getFunction());
+ // Try again with at least twice as much free space.
+ SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
+
+ for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){
+ if (MBB->hasAddressTaken())
+ TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock());
+ }
+}
+
+/// deallocateMemForFunction - Deallocate all memory for the specified
+/// function body. Also drop any references the function has to stubs.
+/// May be called while the Function is being destroyed inside ~Value().
+void JITEmitter::deallocateMemForFunction(const Function *F) {
+ ValueMap<const Function *, EmittedCode, EmittedFunctionConfig>::iterator
+ Emitted = EmittedFunctions.find(F);
+ if (Emitted != EmittedFunctions.end()) {
+ MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
+ MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable);
+ TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
+
+ EmittedFunctions.erase(Emitted);
+ }
+
+ if(JITExceptionHandling) {
+ TheJIT->DeregisterTable(F);
+ }
+
+ if (JITEmitDebugInfo) {
+ DR->UnregisterFunction(F);
+ }
+}
+
+
+void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+ if (BufferBegin)
+ return JITCodeEmitter::allocateSpace(Size, Alignment);
+
+ // create a new memory block if there is no active one.
+ // care must be taken so that BufferBegin is invalidated when a
+ // block is trimmed
+ BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment);
+ BufferEnd = BufferBegin+Size;
+ return CurBufferPtr;
+}
+
+void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+ // Delegate this call through the memory manager.
+ return MemMgr->allocateGlobal(Size, Alignment);
+}
+
+void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ if (TheJIT->getJITInfo().hasCustomConstantPool())
+ return;
+
+ const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+ if (Constants.empty()) return;
+
+ unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
+ unsigned Align = MCP->getConstantPoolAlignment();
+ ConstantPoolBase = allocateSpace(Size, Align);
+ ConstantPool = MCP;
+
+ if (ConstantPoolBase == 0) return; // Buffer overflow.
+
+ DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
+ << "] (size: " << Size << ", alignment: " << Align << ")\n");
+
+ // Initialize the memory for all of the constant pool entries.
+ unsigned Offset = 0;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = Constants[i];
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ Offset = (Offset + AlignMask) & ~AlignMask;
+
+ uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
+ ConstPoolAddresses.push_back(CAddr);
+ if (CPE.isMachineConstantPoolEntry()) {
+ // FIXME: add support to lower machine constant pool values into bytes!
+ report_fatal_error("Initialize memory with machine specific constant pool"
+ "entry has not been implemented!");
+ }
+ TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
+ DEBUG(dbgs() << "JIT: CP" << i << " at [0x";
+ dbgs().write_hex(CAddr) << "]\n");
+
+ const Type *Ty = CPE.Val.ConstVal->getType();
+ Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
+ }
+}
+
+void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
+ if (TheJIT->getJITInfo().hasCustomJumpTables())
+ return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline)
+ return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ unsigned NumEntries = 0;
+ for (unsigned i = 0, e = JT.size(); i != e; ++i)
+ NumEntries += JT[i].MBBs.size();
+
+ unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getTargetData());
+
+ // Just allocate space for all the jump tables now. We will fix up the actual
+ // MBB entries in the tables after we emit the code for each block, since then
+ // we will know the final locations of the MBBs in memory.
+ JumpTable = MJTI;
+ JumpTableBase = allocateSpace(NumEntries * EntrySize,
+ MJTI->getEntryAlignment(*TheJIT->getTargetData()));
+}
+
+void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
+ if (TheJIT->getJITInfo().hasCustomJumpTables())
+ return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty() || JumpTableBase == 0) return;
+
+
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ return;
+ case MachineJumpTableInfo::EK_BlockAddress: {
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) &&
+ "Cross JIT'ing?");
+
+ // For each jump table, map each target in the jump table to the address of
+ // an emitted MachineBasicBlock.
+ intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ // Store the address of the basic block for this jump table slot in the
+ // memory we allocated for the jump table in 'initJumpTableInfo'
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
+ *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
+ }
+ break;
+ }
+
+ case MachineJumpTableInfo::EK_Custom32:
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == 4&&"Cross JIT'ing?");
+ // For each jump table, place the offset from the beginning of the table
+ // to the target address.
+ int *SlotPtr = (int*)JumpTableBase;
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ // Store the offset of the basic block for this jump table slot in the
+ // memory we allocated for the jump table in 'initJumpTableInfo'
+ uintptr_t Base = (uintptr_t)SlotPtr;
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
+ /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook.
+ *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
+ }
+ }
+ break;
+ }
+ }
+}
+
+void JITEmitter::startGVStub(const GlobalValue* GV,
+ unsigned StubSize, unsigned Alignment) {
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
+ BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) {
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
+ BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::finishGVStub() {
+ assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space.");
+ NumBytes += getCurrentPCOffset();
+ BufferBegin = SavedBufferBegin;
+ BufferEnd = SavedBufferEnd;
+ CurBufferPtr = SavedCurBufferPtr;
+}
+
+void *JITEmitter::allocIndirectGV(const GlobalValue *GV,
+ const uint8_t *Buffer, size_t Size,
+ unsigned Alignment) {
+ uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment);
+ memcpy(IndGV, Buffer, Size);
+ return IndGV;
+}
+
+// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
+// in the constant pool that was last emitted with the 'emitConstantPool'
+// method.
+//
+uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
+ assert(ConstantNum < ConstantPool->getConstants().size() &&
+ "Invalid ConstantPoolIndex!");
+ return ConstPoolAddresses[ConstantNum];
+}
+
+// getJumpTableEntryAddress - Return the address of the JumpTable with index
+// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo'
+//
+uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
+ const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
+ assert(Index < JT.size() && "Invalid jump table index!");
+
+ unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getTargetData());
+
+ unsigned Offset = 0;
+ for (unsigned i = 0; i < Index; ++i)
+ Offset += JT[i].MBBs.size();
+
+ Offset *= EntrySize;
+
+ return (uintptr_t)((char *)JumpTableBase + Offset);
+}
+
+void JITEmitter::EmittedFunctionConfig::onDelete(
+ JITEmitter *Emitter, const Function *F) {
+ Emitter->deallocateMemForFunction(F);
+}
+void JITEmitter::EmittedFunctionConfig::onRAUW(
+ JITEmitter *, const Function*, const Function*) {
+ llvm_unreachable("The JIT doesn't know how to handle a"
+ " RAUW on a value it has emitted.");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public interface to this file
+//===----------------------------------------------------------------------===//
+
+JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
+ TargetMachine &tm) {
+ return new JITEmitter(jit, JMM, tm);
+}
+
+// getPointerToFunctionOrStub - If the specified function has been
+// code-gen'd, return a pointer to the function. If not, compile it, or use
+// a stub to implement lazy compilation if available.
+//
+void *JIT::getPointerToFunctionOrStub(Function *F) {
+ // If we have already code generated the function, just return the address.
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr;
+
+ // Get a stub if the target supports it.
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+ return JE->getJITResolver().getLazyFunctionStub(F);
+}
+
+void JIT::updateFunctionStub(Function *F) {
+ // Get the empty stub we generated earlier.
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+ void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr != Stub && "Function must have non-stub address to be updated.");
+
+ // Tell the target jit info to rewrite the stub at the specified address,
+ // rather than creating a new one.
+ TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout();
+ JE->startGVStub(Stub, layout.Size);
+ getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter());
+ JE->finishGVStub();
+}
+
+/// freeMachineCodeForFunction - release machine code memory for given Function.
+///
+void JIT::freeMachineCodeForFunction(Function *F) {
+ // Delete translation for this from the ExecutionEngine, so it will get
+ // retranslated next time it is used.
+ updateGlobalMapping(F, 0);
+
+ // Free the actual memory for the function body and related stuff.
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
new file mode 100644
index 000000000000..eec23cec0af9
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -0,0 +1,727 @@
+//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DefaultJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <vector>
+#include <cassert>
+#include <climits>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
+
+JITMemoryManager::~JITMemoryManager() {}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// MemoryRangeHeader - For a range of memory, this is the header that we put
+ /// on the block of memory. It is carefully crafted to be one word of memory.
+ /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader
+ /// which starts with this.
+ struct FreeRangeHeader;
+ struct MemoryRangeHeader {
+ /// ThisAllocated - This is true if this block is currently allocated. If
+ /// not, this can be converted to a FreeRangeHeader.
+ unsigned ThisAllocated : 1;
+
+ /// PrevAllocated - Keep track of whether the block immediately before us is
+ /// allocated. If not, the word immediately before this header is the size
+ /// of the previous block.
+ unsigned PrevAllocated : 1;
+
+ /// BlockSize - This is the size in bytes of this memory block,
+ /// including this header.
+ uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
+
+
+ /// getBlockAfter - Return the memory block immediately after this one.
+ ///
+ MemoryRangeHeader &getBlockAfter() const {
+ return *(MemoryRangeHeader*)((char*)this+BlockSize);
+ }
+
+ /// getFreeBlockBefore - If the block before this one is free, return it,
+ /// otherwise return null.
+ FreeRangeHeader *getFreeBlockBefore() const {
+ if (PrevAllocated) return 0;
+ intptr_t PrevSize = ((intptr_t *)this)[-1];
+ return (FreeRangeHeader*)((char*)this-PrevSize);
+ }
+
+ /// FreeBlock - Turn an allocated block into a free block, adjusting
+ /// bits in the object headers, and adding an end of region memory block.
+ FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
+
+ /// TrimAllocationToSize - If this allocated block is significantly larger
+ /// than NewSize, split it into two pieces (where the former is NewSize
+ /// bytes, including the header), and add the new block to the free list.
+ FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
+ uint64_t NewSize);
+ };
+
+ /// FreeRangeHeader - For a memory block that isn't already allocated, this
+ /// keeps track of the current block and has a pointer to the next free block.
+ /// Free blocks are kept on a circularly linked list.
+ struct FreeRangeHeader : public MemoryRangeHeader {
+ FreeRangeHeader *Prev;
+ FreeRangeHeader *Next;
+
+ /// getMinBlockSize - Get the minimum size for a memory block. Blocks
+ /// smaller than this size cannot be created.
+ static unsigned getMinBlockSize() {
+ return sizeof(FreeRangeHeader)+sizeof(intptr_t);
+ }
+
+ /// SetEndOfBlockSizeMarker - The word at the end of every free block is
+ /// known to be the size of the free block. Set it for this block.
+ void SetEndOfBlockSizeMarker() {
+ void *EndOfBlock = (char*)this + BlockSize;
+ ((intptr_t *)EndOfBlock)[-1] = BlockSize;
+ }
+
+ FreeRangeHeader *RemoveFromFreeList() {
+ assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
+ Next->Prev = Prev;
+ return Prev->Next = Next;
+ }
+
+ void AddToFreeList(FreeRangeHeader *FreeList) {
+ Next = FreeList;
+ Prev = FreeList->Prev;
+ Prev->Next = this;
+ Next->Prev = this;
+ }
+
+ /// GrowBlock - The block after this block just got deallocated. Merge it
+ /// into the current block.
+ void GrowBlock(uintptr_t NewSize);
+
+ /// AllocateBlock - Mark this entire block allocated, updating freelists
+ /// etc. This returns a pointer to the circular free-list.
+ FreeRangeHeader *AllocateBlock();
+ };
+}
+
+
+/// AllocateBlock - Mark this entire block allocated, updating freelists
+/// etc. This returns a pointer to the circular free-list.
+FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
+ assert(!ThisAllocated && !getBlockAfter().PrevAllocated &&
+ "Cannot allocate an allocated block!");
+ // Mark this block allocated.
+ ThisAllocated = 1;
+ getBlockAfter().PrevAllocated = 1;
+
+ // Remove it from the free list.
+ return RemoveFromFreeList();
+}
+
+/// FreeBlock - Turn an allocated block into a free block, adjusting
+/// bits in the object headers, and adding an end of region memory block.
+/// If possible, coalesce this block with neighboring blocks. Return the
+/// FreeRangeHeader to allocate from.
+FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
+ MemoryRangeHeader *FollowingBlock = &getBlockAfter();
+ assert(ThisAllocated && "This block is already free!");
+ assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
+
+ FreeRangeHeader *FreeListToReturn = FreeList;
+
+ // If the block after this one is free, merge it into this block.
+ if (!FollowingBlock->ThisAllocated) {
+ FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
+ // "FreeList" always needs to be a valid free block. If we're about to
+ // coalesce with it, update our notion of what the free list is.
+ if (&FollowingFreeBlock == FreeList) {
+ FreeList = FollowingFreeBlock.Next;
+ FreeListToReturn = 0;
+ assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
+ }
+ FollowingFreeBlock.RemoveFromFreeList();
+
+ // Include the following block into this one.
+ BlockSize += FollowingFreeBlock.BlockSize;
+ FollowingBlock = &FollowingFreeBlock.getBlockAfter();
+
+ // Tell the block after the block we are coalescing that this block is
+ // allocated.
+ FollowingBlock->PrevAllocated = 1;
+ }
+
+ assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
+
+ if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
+ PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
+ return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
+ }
+
+ // Otherwise, mark this block free.
+ FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this;
+ FollowingBlock->PrevAllocated = 0;
+ FreeBlock.ThisAllocated = 0;
+
+ // Link this into the linked list of free blocks.
+ FreeBlock.AddToFreeList(FreeList);
+
+ // Add a marker at the end of the block, indicating the size of this free
+ // block.
+ FreeBlock.SetEndOfBlockSizeMarker();
+ return FreeListToReturn ? FreeListToReturn : &FreeBlock;
+}
+
+/// GrowBlock - The block after this block just got deallocated. Merge it
+/// into the current block.
+void FreeRangeHeader::GrowBlock(uintptr_t NewSize) {
+ assert(NewSize > BlockSize && "Not growing block?");
+ BlockSize = NewSize;
+ SetEndOfBlockSizeMarker();
+ getBlockAfter().PrevAllocated = 0;
+}
+
+/// TrimAllocationToSize - If this allocated block is significantly larger
+/// than NewSize, split it into two pieces (where the former is NewSize
+/// bytes, including the header), and add the new block to the free list.
+FreeRangeHeader *MemoryRangeHeader::
+TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
+ assert(ThisAllocated && getBlockAfter().PrevAllocated &&
+ "Cannot deallocate part of an allocated block!");
+
+ // Don't allow blocks to be trimmed below minimum required size
+ NewSize = std::max<uint64_t>(FreeRangeHeader::getMinBlockSize(), NewSize);
+
+ // Round up size for alignment of header.
+ unsigned HeaderAlign = __alignof(FreeRangeHeader);
+ NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
+
+ // Size is now the size of the block we will remove from the start of the
+ // current block.
+ assert(NewSize <= BlockSize &&
+ "Allocating more space from this block than exists!");
+
+ // If splitting this block will cause the remainder to be too small, do not
+ // split the block.
+ if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
+ return FreeList;
+
+ // Otherwise, we splice the required number of bytes out of this block, form
+ // a new block immediately after it, then mark this block allocated.
+ MemoryRangeHeader &FormerNextBlock = getBlockAfter();
+
+ // Change the size of this block.
+ BlockSize = NewSize;
+
+ // Get the new block we just sliced out and turn it into a free block.
+ FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
+ NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
+ NewNextBlock.ThisAllocated = 0;
+ NewNextBlock.PrevAllocated = 1;
+ NewNextBlock.SetEndOfBlockSizeMarker();
+ FormerNextBlock.PrevAllocated = 0;
+ NewNextBlock.AddToFreeList(FreeList);
+ return &NewNextBlock;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+ class DefaultJITMemoryManager;
+
+ class JITSlabAllocator : public SlabAllocator {
+ DefaultJITMemoryManager &JMM;
+ public:
+ JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
+ virtual ~JITSlabAllocator() { }
+ virtual MemSlab *Allocate(size_t Size);
+ virtual void Deallocate(MemSlab *Slab);
+ };
+
+ /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
+ /// This splits a large block of MAP_NORESERVE'd memory into two
+ /// sections, one for function stubs, one for the functions themselves. We
+ /// have to do this because we may need to emit a function stub while in the
+ /// middle of emitting a function, and we don't know how large the function we
+ /// are emitting is.
+ class DefaultJITMemoryManager : public JITMemoryManager {
+
+ // Whether to poison freed memory.
+ bool PoisonMemory;
+
+ /// LastSlab - This points to the last slab allocated and is used as the
+ /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all
+ /// stubs, data, and code contiguously in memory. In general, however, this
+ /// is not possible because the NearBlock parameter is ignored on Windows
+ /// platforms and even on Unix it works on a best-effort pasis.
+ sys::MemoryBlock LastSlab;
+
+ // Memory slabs allocated by the JIT. We refer to them as slabs so we don't
+ // confuse them with the blocks of memory described above.
+ std::vector<sys::MemoryBlock> CodeSlabs;
+ JITSlabAllocator BumpSlabAllocator;
+ BumpPtrAllocator StubAllocator;
+ BumpPtrAllocator DataAllocator;
+
+ // Circular list of free blocks.
+ FreeRangeHeader *FreeMemoryList;
+
+ // When emitting code into a memory block, this is the block.
+ MemoryRangeHeader *CurBlock;
+
+ uint8_t *GOTBase; // Target Specific reserved memory
+ public:
+ DefaultJITMemoryManager();
+ ~DefaultJITMemoryManager();
+
+ /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the
+ /// last slab it allocated, so that subsequent allocations follow it.
+ sys::MemoryBlock allocateNewSlab(size_t size);
+
+ /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at
+ /// least this much unless more is requested.
+ static const size_t DefaultCodeSlabSize;
+
+ /// DefaultSlabSize - Allocate data into slabs of this size unless we get
+ /// an allocation above SizeThreshold.
+ static const size_t DefaultSlabSize;
+
+ /// DefaultSizeThreshold - For any allocation larger than this threshold, we
+ /// should allocate a separate slab.
+ static const size_t DefaultSizeThreshold;
+
+ void AllocateGOT();
+
+ // Testing methods.
+ virtual bool CheckInvariants(std::string &ErrorStr);
+ size_t GetDefaultCodeSlabSize() { return DefaultCodeSlabSize; }
+ size_t GetDefaultDataSlabSize() { return DefaultSlabSize; }
+ size_t GetDefaultStubSlabSize() { return DefaultSlabSize; }
+ unsigned GetNumCodeSlabs() { return CodeSlabs.size(); }
+ unsigned GetNumDataSlabs() { return DataAllocator.GetNumSlabs(); }
+ unsigned GetNumStubSlabs() { return StubAllocator.GetNumSlabs(); }
+
+ /// startFunctionBody - When a function starts, allocate a block of free
+ /// executable memory, returning a pointer to it and its actual size.
+ uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+
+ FreeRangeHeader* candidateBlock = FreeMemoryList;
+ FreeRangeHeader* head = FreeMemoryList;
+ FreeRangeHeader* iter = head->Next;
+
+ uintptr_t largest = candidateBlock->BlockSize;
+
+ // Search for the largest free block
+ while (iter != head) {
+ if (iter->BlockSize > largest) {
+ largest = iter->BlockSize;
+ candidateBlock = iter;
+ }
+ iter = iter->Next;
+ }
+
+ largest = largest - sizeof(MemoryRangeHeader);
+
+ // If this block isn't big enough for the allocation desired, allocate
+ // another block of memory and add it to the free list.
+ if (largest < ActualSize ||
+ largest <= FreeRangeHeader::getMinBlockSize()) {
+ DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+ candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
+ }
+
+ // Select this candidate block for allocation
+ CurBlock = candidateBlock;
+
+ // Allocate the entire memory block.
+ FreeMemoryList = candidateBlock->AllocateBlock();
+ ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader);
+ return (uint8_t *)(CurBlock + 1);
+ }
+
+ /// allocateNewCodeSlab - Helper method to allocate a new slab of code
+ /// memory from the OS and add it to the free list. Returns the new
+ /// FreeRangeHeader at the base of the slab.
+ FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) {
+ // If the user needs at least MinSize free memory, then we account for
+ // two MemoryRangeHeaders: the one in the user's block, and the one at the
+ // end of the slab.
+ size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader);
+ size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin);
+ sys::MemoryBlock B = allocateNewSlab(SlabSize);
+ CodeSlabs.push_back(B);
+ char *MemBase = (char*)(B.base());
+
+ // Put a tiny allocated block at the end of the memory chunk, so when
+ // FreeBlock calls getBlockAfter it doesn't fall off the end.
+ MemoryRangeHeader *EndBlock =
+ (MemoryRangeHeader*)(MemBase + B.size()) - 1;
+ EndBlock->ThisAllocated = 1;
+ EndBlock->PrevAllocated = 0;
+ EndBlock->BlockSize = sizeof(MemoryRangeHeader);
+
+ // Start out with a vast new block of free memory.
+ FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase;
+ NewBlock->ThisAllocated = 0;
+ // Make sure getFreeBlockBefore doesn't look into unmapped memory.
+ NewBlock->PrevAllocated = 1;
+ NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock;
+ NewBlock->SetEndOfBlockSizeMarker();
+ NewBlock->AddToFreeList(FreeMemoryList);
+
+ assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize &&
+ "The block was too small!");
+ return NewBlock;
+ }
+
+ /// endFunctionBody - The function F is now allocated, and takes the memory
+ /// in the range [FunctionStart,FunctionEnd).
+ void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+ uint8_t *FunctionEnd) {
+ assert(FunctionEnd > FunctionStart);
+ assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
+ "Mismatched function start/end!");
+
+ uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
+
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+ }
+
+ /// allocateSpace - Allocate a memory block of the given size. This method
+ /// cannot be called between calls to startFunctionBody and endFunctionBody.
+ uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+ CurBlock = FreeMemoryList;
+ FreeMemoryList = FreeMemoryList->AllocateBlock();
+
+ uint8_t *result = (uint8_t *)(CurBlock + 1);
+
+ if (Alignment == 0) Alignment = 1;
+ result = (uint8_t*)(((intptr_t)result+Alignment-1) &
+ ~(intptr_t)(Alignment-1));
+
+ uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+
+ return result;
+ }
+
+ /// allocateStub - Allocate memory for a function stub.
+ uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment) {
+ return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment);
+ }
+
+ /// allocateGlobal - Allocate memory for a global.
+ uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+ return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+ }
+
+ /// startExceptionTable - Use startFunctionBody to allocate memory for the
+ /// function's exception table.
+ uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
+ return startFunctionBody(F, ActualSize);
+ }
+
+ /// endExceptionTable - The exception table of F is now allocated,
+ /// and takes the memory in the range [TableStart,TableEnd).
+ void endExceptionTable(const Function *F, uint8_t *TableStart,
+ uint8_t *TableEnd, uint8_t* FrameRegister) {
+ assert(TableEnd > TableStart);
+ assert(TableStart == (uint8_t *)(CurBlock+1) &&
+ "Mismatched table start/end!");
+
+ uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
+
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+ }
+
+ uint8_t *getGOTBase() const {
+ return GOTBase;
+ }
+
+ void deallocateBlock(void *Block) {
+ // Find the block that is allocated for this function.
+ MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
+ assert(MemRange->ThisAllocated && "Block isn't allocated!");
+
+ // Fill the buffer with garbage!
+ if (PoisonMemory) {
+ memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+ }
+
+ // Free the memory.
+ FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
+ }
+
+ /// deallocateFunctionBody - Deallocate all memory for the specified
+ /// function body.
+ void deallocateFunctionBody(void *Body) {
+ if (Body) deallocateBlock(Body);
+ }
+
+ /// deallocateExceptionTable - Deallocate memory for the specified
+ /// exception table.
+ void deallocateExceptionTable(void *ET) {
+ if (ET) deallocateBlock(ET);
+ }
+
+ /// setMemoryWritable - When code generation is in progress,
+ /// the code pages may need permissions changed.
+ void setMemoryWritable()
+ {
+ for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+ sys::Memory::setWritable(CodeSlabs[i]);
+ }
+ /// setMemoryExecutable - When code generation is done and we're ready to
+ /// start execution, the code pages may need permissions changed.
+ void setMemoryExecutable()
+ {
+ for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+ sys::Memory::setExecutable(CodeSlabs[i]);
+ }
+
+ /// setPoisonMemory - Controls whether we write garbage over freed memory.
+ ///
+ void setPoisonMemory(bool poison) {
+ PoisonMemory = poison;
+ }
+ };
+}
+
+MemSlab *JITSlabAllocator::Allocate(size_t Size) {
+ sys::MemoryBlock B = JMM.allocateNewSlab(Size);
+ MemSlab *Slab = (MemSlab*)B.base();
+ Slab->Size = B.size();
+ Slab->NextPtr = 0;
+ return Slab;
+}
+
+void JITSlabAllocator::Deallocate(MemSlab *Slab) {
+ sys::MemoryBlock B(Slab, Slab->Size);
+ sys::Memory::ReleaseRWX(B);
+}
+
+DefaultJITMemoryManager::DefaultJITMemoryManager()
+ :
+#ifdef NDEBUG
+ PoisonMemory(false),
+#else
+ PoisonMemory(true),
+#endif
+ LastSlab(0, 0),
+ BumpSlabAllocator(*this),
+ StubAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator),
+ DataAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator) {
+
+ // Allocate space for code.
+ sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
+ CodeSlabs.push_back(MemBlock);
+ uint8_t *MemBase = (uint8_t*)MemBlock.base();
+
+ // We set up the memory chunk with 4 mem regions, like this:
+ // [ START
+ // [ Free #0 ] -> Large space to allocate functions from.
+ // [ Allocated #1 ] -> Tiny space to separate regions.
+ // [ Free #2 ] -> Tiny space so there is always at least 1 free block.
+ // [ Allocated #3 ] -> Tiny space to prevent looking past end of block.
+ // END ]
+ //
+ // The last three blocks are never deallocated or touched.
+
+ // Add MemoryRangeHeader to the end of the memory region, indicating that
+ // the space after the block of memory is allocated. This is block #3.
+ MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
+ Mem3->ThisAllocated = 1;
+ Mem3->PrevAllocated = 0;
+ Mem3->BlockSize = sizeof(MemoryRangeHeader);
+
+ /// Add a tiny free region so that the free list always has one entry.
+ FreeRangeHeader *Mem2 =
+ (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
+ Mem2->ThisAllocated = 0;
+ Mem2->PrevAllocated = 1;
+ Mem2->BlockSize = FreeRangeHeader::getMinBlockSize();
+ Mem2->SetEndOfBlockSizeMarker();
+ Mem2->Prev = Mem2; // Mem2 *is* the free list for now.
+ Mem2->Next = Mem2;
+
+ /// Add a tiny allocated region so that Mem2 is never coalesced away.
+ MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
+ Mem1->ThisAllocated = 1;
+ Mem1->PrevAllocated = 0;
+ Mem1->BlockSize = sizeof(MemoryRangeHeader);
+
+ // Add a FreeRangeHeader to the start of the function body region, indicating
+ // that the space is free. Mark the previous block allocated so we never look
+ // at it.
+ FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase;
+ Mem0->ThisAllocated = 0;
+ Mem0->PrevAllocated = 1;
+ Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
+ Mem0->SetEndOfBlockSizeMarker();
+ Mem0->AddToFreeList(Mem2);
+
+ // Start out with the freelist pointing to Mem0.
+ FreeMemoryList = Mem0;
+
+ GOTBase = NULL;
+}
+
+void DefaultJITMemoryManager::AllocateGOT() {
+ assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+ GOTBase = new uint8_t[sizeof(void*) * 8192];
+ HasGOT = true;
+}
+
+DefaultJITMemoryManager::~DefaultJITMemoryManager() {
+ for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+ sys::Memory::ReleaseRWX(CodeSlabs[i]);
+
+ delete[] GOTBase;
+}
+
+sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
+ // Allocate a new block close to the last one.
+ std::string ErrMsg;
+ sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
+ sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
+ if (B.base() == 0) {
+ report_fatal_error("Allocation failed when allocating new memory in the"
+ " JIT\n" + Twine(ErrMsg));
+ }
+ LastSlab = B;
+ ++NumSlabs;
+ // Initialize the slab to garbage when debugging.
+ if (PoisonMemory) {
+ memset(B.base(), 0xCD, B.size());
+ }
+ return B;
+}
+
+/// CheckInvariants - For testing only. Return "" if all internal invariants
+/// are preserved, and a helpful error message otherwise. For free and
+/// allocated blocks, make sure that adding BlockSize gives a valid block.
+/// For free blocks, make sure they're in the free list and that their end of
+/// block size marker is correct. This function should return an error before
+/// accessing bad memory. This function is defined here instead of in
+/// JITMemoryManagerTest.cpp so that we don't have to expose all of the
+/// implementation details of DefaultJITMemoryManager.
+bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
+ raw_string_ostream Err(ErrorStr);
+
+ // Construct a the set of FreeRangeHeader pointers so we can query it
+ // efficiently.
+ llvm::SmallPtrSet<MemoryRangeHeader*, 16> FreeHdrSet;
+ FreeRangeHeader* FreeHead = FreeMemoryList;
+ FreeRangeHeader* FreeRange = FreeHead;
+
+ do {
+ // Check that the free range pointer is in the blocks we've allocated.
+ bool Found = false;
+ for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+ E = CodeSlabs.end(); I != E && !Found; ++I) {
+ char *Start = (char*)I->base();
+ char *End = Start + I->size();
+ Found = (Start <= (char*)FreeRange && (char*)FreeRange < End);
+ }
+ if (!Found) {
+ Err << "Corrupt free list; points to " << FreeRange;
+ return false;
+ }
+
+ if (FreeRange->Next->Prev != FreeRange) {
+ Err << "Next and Prev pointers do not match.";
+ return false;
+ }
+
+ // Otherwise, add it to the set.
+ FreeHdrSet.insert(FreeRange);
+ FreeRange = FreeRange->Next;
+ } while (FreeRange != FreeHead);
+
+ // Go over each block, and look at each MemoryRangeHeader.
+ for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+ E = CodeSlabs.end(); I != E; ++I) {
+ char *Start = (char*)I->base();
+ char *End = Start + I->size();
+
+ // Check each memory range.
+ for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL;
+ Start <= (char*)Hdr && (char*)Hdr < End;
+ Hdr = &Hdr->getBlockAfter()) {
+ if (Hdr->ThisAllocated == 0) {
+ // Check that this range is in the free list.
+ if (!FreeHdrSet.count(Hdr)) {
+ Err << "Found free header at " << Hdr << " that is not in free list.";
+ return false;
+ }
+
+ // Now make sure the size marker at the end of the block is correct.
+ uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1;
+ if (!(Start <= (char*)Marker && (char*)Marker < End)) {
+ Err << "Block size in header points out of current MemoryBlock.";
+ return false;
+ }
+ if (Hdr->BlockSize != *Marker) {
+ Err << "End of block size marker (" << *Marker << ") "
+ << "and BlockSize (" << Hdr->BlockSize << ") don't match.";
+ return false;
+ }
+ }
+
+ if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) {
+ Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != "
+ << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")";
+ return false;
+ } else if (!LastHdr && !Hdr->PrevAllocated) {
+ Err << "The first header should have PrevAllocated true.";
+ return false;
+ }
+
+ // Remember the last header.
+ LastHdr = Hdr;
+ }
+ }
+
+ // All invariants are preserved.
+ return true;
+}
+
+JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
+ return new DefaultJITMemoryManager();
+}
+
+// Allocate memory for code in 512K slabs.
+const size_t DefaultJITMemoryManager::DefaultCodeSlabSize = 512 * 1024;
+
+// Allocate globals and stubs in slabs of 64K. (probably 16 pages)
+const size_t DefaultJITMemoryManager::DefaultSlabSize = 64 * 1024;
+
+// Waste at most 16K at the end of each bump slab. (probably 4 pages)
+const size_t DefaultJITMemoryManager::DefaultSizeThreshold = 16 * 1024;
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
new file mode 100644
index 000000000000..9a9ed6d33484
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -0,0 +1,192 @@
+//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that calls into OProfile to tell
+// it about JITted functions. For now, we only record function names and sizes,
+// but eventually we'll also record line number information.
+//
+// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
+// definition of the interface we're using.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h"
+#include <stddef.h>
+using namespace llvm;
+
+#if USE_OPROFILE
+
+#include <opagent.h>
+
+namespace {
+
+class OProfileJITEventListener : public JITEventListener {
+ op_agent_t Agent;
+public:
+ OProfileJITEventListener();
+ ~OProfileJITEventListener();
+
+ virtual void NotifyFunctionEmitted(const Function &F,
+ void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details);
+ virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+OProfileJITEventListener::OProfileJITEventListener()
+ : Agent(op_open_agent()) {
+ if (Agent == NULL) {
+ const std::string err_str = sys::StrError();
+ DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+ } else {
+ DEBUG(dbgs() << "Connected to OProfile agent.\n");
+ }
+}
+
+OProfileJITEventListener::~OProfileJITEventListener() {
+ if (Agent != NULL) {
+ if (op_close_agent(Agent) == -1) {
+ const std::string err_str = sys::StrError();
+ DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
+ << err_str << "\n");
+ } else {
+ DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
+ }
+ }
+}
+
+class FilenameCache {
+ // Holds the filename of each Scope, so that we can pass a null-terminated
+ // string into oprofile. Use an AssertingVH rather than a ValueMap because we
+ // shouldn't be modifying any MDNodes while this map is alive.
+ DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+
+ public:
+ const char *getFilename(MDNode *Scope) {
+ std::string &Filename = Filenames[Scope];
+ if (Filename.empty()) {
+ Filename = DIScope(Scope).getFilename();
+ }
+ return Filename.c_str();
+ }
+};
+
+static debug_line_info LineStartToOProfileFormat(
+ const MachineFunction &MF, FilenameCache &Filenames,
+ uintptr_t Address, DebugLoc Loc) {
+ debug_line_info Result;
+ Result.vma = Address;
+ Result.lineno = Loc.getLine();
+ Result.filename = Filenames.getFilename(
+ Loc.getScope(MF.getFunction()->getContext()));
+ DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+ << Result.filename << ":" << Result.lineno << "\n");
+ return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void OProfileJITEventListener::NotifyFunctionEmitted(
+ const Function &F, void *FnStart, size_t FnSize,
+ const EmittedFunctionDetails &Details) {
+ assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
+ if (op_write_native_code(Agent, F.getName().data(),
+ reinterpret_cast<uint64_t>(FnStart),
+ FnStart, FnSize) == -1) {
+ DEBUG(dbgs() << "Failed to tell OProfile about native function "
+ << F.getName() << " at ["
+ << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+ return;
+ }
+
+ if (!Details.LineStarts.empty()) {
+ // Now we convert the line number information from the address/DebugLoc
+ // format in Details to the address/filename/lineno format that OProfile
+ // expects. Note that OProfile 0.9.4 has a bug that causes it to ignore
+ // line numbers for addresses above 4G.
+ FilenameCache Filenames;
+ std::vector<debug_line_info> LineInfo;
+ LineInfo.reserve(1 + Details.LineStarts.size());
+
+ DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+ assert(!FirstLoc.isUnknown()
+ && "LineStarts should not contain unknown DebugLocs");
+ MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+ DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+ if (FunctionDI.Verify()) {
+ // If we have debug info for the function itself, use that as the line
+ // number of the first several instructions. Otherwise, after filling
+ // LineInfo, we'll adjust the address of the first line number to point at
+ // the start of the function.
+ debug_line_info line_info;
+ line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+ line_info.lineno = FunctionDI.getLineNumber();
+ line_info.filename = Filenames.getFilename(FirstLocScope);
+ LineInfo.push_back(line_info);
+ }
+
+ for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+ I = Details.LineStarts.begin(), E = Details.LineStarts.end();
+ I != E; ++I) {
+ LineInfo.push_back(LineStartToOProfileFormat(
+ *Details.MF, Filenames, I->Address, I->Loc));
+ }
+
+ // In case the function didn't have line info of its own, adjust the first
+ // line info's address to include the start of the function.
+ LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
+ if (op_write_debug_line_info(Agent, FnStart,
+ LineInfo.size(), &*LineInfo.begin()) == -1) {
+ DEBUG(dbgs()
+ << "Failed to tell OProfile about line numbers for native function "
+ << F.getName() << " at ["
+ << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+ }
+ }
+}
+
+// Removes the being-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+ assert(FnStart && "Invalid function pointer");
+ if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+ DEBUG(dbgs()
+ << "Failed to tell OProfile about unload of native function at "
+ << FnStart << "\n");
+ }
+}
+
+} // anonymous namespace.
+
+namespace llvm {
+JITEventListener *createOProfileJITEventListener() {
+ return new OProfileJITEventListener;
+}
+}
+
+#else // USE_OPROFILE
+
+namespace llvm {
+// By defining this to return NULL, we can let clients call it unconditionally,
+// even if they haven't configured with the OProfile libraries.
+JITEventListener *createOProfileJITEventListener() {
+ return NULL;
+}
+} // namespace llvm
+
+#endif // USE_OPROFILE
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
new file mode 100644
index 000000000000..f7ed176fef78
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMCJIT
+ MCJIT.cpp
+ TargetSelect.cpp
+ )
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp
new file mode 100644
index 000000000000..e431c848d630
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp
@@ -0,0 +1,161 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call. This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially. For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface. As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ if (!isSymbolSearchingDisabled()) {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // and has an asm specifier, try again without the underscore.
+ if (Name[0] == 1 && NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
new file mode 100644
index 000000000000..4475f4d5c0d8
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -0,0 +1,221 @@
+//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "MCJITMemoryManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+namespace {
+
+static struct RegisterJIT {
+ RegisterJIT() { MCJIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInMCJIT() {
+}
+
+ExecutionEngine *MCJIT::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ TargetMachine *TM) {
+ // Try to register the program as a source of symbols to resolve against.
+ //
+ // FIXME: Don't do this here.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+ // If the target supports JIT code generation, create the JIT.
+ if (TargetJITInfo *TJ = TM->getJITInfo())
+ return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel,
+ GVsWithCode);
+
+ if (ErrorStr)
+ *ErrorStr = "target does not support JIT code generation";
+ return 0;
+}
+
+MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
+ RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
+ bool AllocateGVsWithCode)
+ : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
+
+ PM.add(new TargetData(*TM->getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) {
+ report_fatal_error("Target does not support MC emission!");
+ }
+
+ // Initialize passes.
+ // FIXME: When we support multiple modules, we'll want to move the code
+ // gen and finalization out of the constructor here and do it more
+ // on-demand as part of getPointerToFunction().
+ PM.run(*M);
+ // Flush the output buffer so the SmallVector gets its data.
+ OS.flush();
+
+ // Load the object into the dynamic linker.
+ // FIXME: It would be nice to avoid making yet another copy.
+ MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(),
+ Buffer.size()));
+ if (Dyld.loadObject(MB))
+ report_fatal_error(Dyld.getErrorString());
+ // Resolve any relocations.
+ Dyld.resolveRelocations();
+}
+
+MCJIT::~MCJIT() {
+ delete MemMgr;
+}
+
+void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
+ report_fatal_error("not yet implemented");
+ return 0;
+}
+
+void *MCJIT::getPointerToFunction(Function *F) {
+ if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+ bool AbortOnFailure = !F->hasExternalWeakLinkage();
+ void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+ addGlobalMapping(F, Addr);
+ return Addr;
+ }
+
+ // FIXME: Should we be using the mangler for this? Probably.
+ StringRef BaseName = F->getName();
+ if (BaseName[0] == '\1')
+ return (void*)Dyld.getSymbolAddress(BaseName.substr(1));
+ return (void*)Dyld.getSymbolAddress((TM->getMCAsmInfo()->getGlobalPrefix()
+ + BaseName).str());
+}
+
+void *MCJIT::recompileAndRelinkFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+}
+
+void MCJIT::freeMachineCodeForFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+}
+
+GenericValue MCJIT::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert(F && "Function *F was null at entry to run()");
+
+ void *FPtr = getPointerToFunction(F);
+ assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+ const FunctionType *FTy = F->getFunctionType();
+ const Type *RetTy = FTy->getReturnType();
+
+ assert((FTy->getNumParams() == ArgValues.size() ||
+ (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+ "Wrong number of arguments passed into function!");
+ assert(FTy->getNumParams() == ArgValues.size() &&
+ "This doesn't support passing arguments through varargs (yet)!");
+
+ // Handle some common cases first. These cases correspond to common `main'
+ // prototypes.
+ if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+ switch (ArgValues.size()) {
+ case 3:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy() &&
+ FTy->getParamType(2)->isPointerTy()) {
+ int (*PF)(int, char **, const char **) =
+ (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1]),
+ (const char **)GVTOP(ArgValues[2])));
+ return rv;
+ }
+ break;
+ case 2:
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy()) {
+ int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1])));
+ return rv;
+ }
+ break;
+ case 1:
+ if (FTy->getNumParams() == 1 &&
+ FTy->getParamType(0)->isIntegerTy(32)) {
+ GenericValue rv;
+ int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+ return rv;
+ }
+ break;
+ }
+ }
+
+ // Handle cases where no arguments are passed first.
+ if (ArgValues.empty()) {
+ GenericValue rv;
+ switch (RetTy->getTypeID()) {
+ default: llvm_unreachable("Unknown return type for function call!");
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+ if (BitWidth == 1)
+ rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 8)
+ rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 16)
+ rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 32)
+ rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 64)
+ rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+ else
+ llvm_unreachable("Integer types > 64 bits not supported");
+ return rv;
+ }
+ case Type::VoidTyID:
+ rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+ return rv;
+ case Type::FloatTyID:
+ rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::DoubleTyID:
+ rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ llvm_unreachable("long double not supported yet");
+ return rv;
+ case Type::PointerTyID:
+ return PTOGV(((void*(*)())(intptr_t)FPtr)());
+ }
+ }
+
+ assert("Full-featured argument passing not supported yet!");
+ return GenericValue();
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
new file mode 100644
index 000000000000..b64c21a97360
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -0,0 +1,91 @@
+//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+
+#include "llvm/PassManager.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+// FIXME: This makes all kinds of horrible assumptions for the time being,
+// like only having one module, not needing to worry about multi-threading,
+// blah blah. Purely in get-it-up-and-limping mode for now.
+
+class MCJIT : public ExecutionEngine {
+ MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
+ RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
+ bool AllocateGVsWithCode);
+
+ TargetMachine *TM;
+ MCContext *Ctx;
+ RTDyldMemoryManager *MemMgr;
+
+ // FIXME: These may need moved to a separate 'jitstate' member like the
+ // non-MC JIT does for multithreading and such. Just keep them here for now.
+ PassManager PM;
+ Module *M;
+ // FIXME: This really doesn't belong here.
+ SmallVector<char, 4096> Buffer; // Working buffer into which we JIT.
+ raw_svector_ostream OS;
+
+ RuntimeDyld Dyld;
+
+public:
+ ~MCJIT();
+
+ /// @name ExecutionEngine interface implementation
+ /// @{
+
+ virtual void *getPointerToBasicBlock(BasicBlock *BB);
+
+ virtual void *getPointerToFunction(Function *F);
+
+ virtual void *recompileAndRelinkFunction(Function *F);
+
+ virtual void freeMachineCodeForFunction(Function *F);
+
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+ /// @}
+ /// @name (Private) Registration Interfaces
+ /// @{
+
+ static void Register() {
+ MCJITCtor = createJIT;
+ }
+
+ static ExecutionEngine *createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ TargetMachine *TM);
+
+ // @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
new file mode 100644
index 000000000000..40bc031a0771
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -0,0 +1,69 @@
+//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H
+
+#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include <assert.h>
+
+namespace llvm {
+
+// The MCJIT memory manager is a layer between the standard JITMemoryManager
+// and the RuntimeDyld interface that maps objects, by name, onto their
+// matching LLVM IR counterparts in the module(s) being compiled.
+class MCJITMemoryManager : public RTDyldMemoryManager {
+ JITMemoryManager *JMM;
+
+ // FIXME: Multiple modules.
+ Module *M;
+public:
+ MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : JMM(jmm), M(m) {}
+
+ // Allocate ActualSize bytes, or more, for the named function. Return
+ // a pointer to the allocated memory and update Size to reflect how much
+ // memory was acutally allocated.
+ uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
+ // FIXME: This should really reference the MCAsmInfo to get the global
+ // prefix.
+ if (Name[0] == '_') ++Name;
+ Function *F = M->getFunction(Name);
+ // Some ObjC names have a prefixed \01 in the IR. If we failed to find
+ // the symbol and it's of the ObjC conventions (starts with "-"), try
+ // prepending a \01 and see if we can find it that way.
+ if (!F && Name[0] == '-')
+ F = M->getFunction((Twine("\1") + Name).str());
+ assert(F && "No matching function in JIT IR Module!");
+ return JMM->startFunctionBody(F, Size);
+ }
+
+ // Mark the end of the function, including how much of the allocated
+ // memory was actually used.
+ void endFunctionBody(const char *Name, uint8_t *FunctionStart,
+ uint8_t *FunctionEnd) {
+ // FIXME: This should really reference the MCAsmInfo to get the global
+ // prefix.
+ if (Name[0] == '_') ++Name;
+ Function *F = M->getFunction(Name);
+ // Some ObjC names have a prefixed \01 in the IR. If we failed to find
+ // the symbol and it's of the ObjC conventions (starts with "-"), try
+ // prepending a \01 and see if we can find it that way.
+ if (!F && Name[0] == '-')
+ F = M->getFunction((Twine("\1") + Name).str());
+ assert(F && "No matching function in JIT IR Module!");
+ JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+ }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile
new file mode 100644
index 000000000000..967efbc0efa4
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCJIT
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
new file mode 100644
index 000000000000..9e53f8757ec0
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMRuntimeDyld
+ RuntimeDyld.cpp
+ )
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile
new file mode 100644
index 000000000000..5d6f26d950fe
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMRuntimeDyld
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
new file mode 100644
index 000000000000..33dd70502798
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -0,0 +1,95 @@
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "RuntimeDyldImpl.h"
+using namespace llvm;
+using namespace llvm::object;
+
+// Empty out-of-line virtual destructor as the key function.
+RTDyldMemoryManager::~RTDyldMemoryManager() {}
+RuntimeDyldImpl::~RuntimeDyldImpl() {}
+
+namespace llvm {
+
+void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
+ uint8_t *EndAddress) {
+ // Allocate memory for the function via the memory manager.
+ uintptr_t Size = EndAddress - StartAddress + 1;
+ uintptr_t AllocSize = Size;
+ uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
+ assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
+ "Memory manager failed to allocate enough memory!");
+ // Copy the function payload into the memory block.
+ memcpy(Mem, StartAddress, Size);
+ MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
+ // Remember where we put it.
+ Functions[Name] = sys::MemoryBlock(Mem, Size);
+ // Default the assigned address for this symbol to wherever this
+ // allocated it.
+ SymbolTable[Name] = Mem;
+ DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n");
+}
+
+// Resolve the relocations for all symbols we currently know about.
+void RuntimeDyldImpl::resolveRelocations() {
+ // Just iterate over the symbols in our symbol table and assign their
+ // addresses.
+ StringMap<uint8_t*>::iterator i = SymbolTable.begin();
+ StringMap<uint8_t*>::iterator e = SymbolTable.end();
+ for (;i != e; ++i)
+ reassignSymbolAddress(i->getKey(), i->getValue());
+}
+
+//===----------------------------------------------------------------------===//
+// RuntimeDyld class implementation
+RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+ Dyld = 0;
+ MM = mm;
+}
+
+RuntimeDyld::~RuntimeDyld() {
+ delete Dyld;
+}
+
+bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
+ if (!Dyld) {
+ if (RuntimeDyldMachO::isKnownFormat(InputBuffer))
+ Dyld = new RuntimeDyldMachO(MM);
+ else
+ report_fatal_error("Unknown object format!");
+ } else {
+ if(!Dyld->isCompatibleFormat(InputBuffer))
+ report_fatal_error("Incompatible object format!");
+ }
+
+ return Dyld->loadObject(InputBuffer);
+}
+
+void *RuntimeDyld::getSymbolAddress(StringRef Name) {
+ return Dyld->getSymbolAddress(Name);
+}
+
+void RuntimeDyld::resolveRelocations() {
+ Dyld->resolveRelocations();
+}
+
+void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+ Dyld->reassignSymbolAddress(Name, Addr);
+}
+
+StringRef RuntimeDyld::getErrorString() {
+ return Dyld->getErrorString();
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
new file mode 100644
index 000000000000..bcdfb04801a5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -0,0 +1,152 @@
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the implementations of runtime dynamic linker facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_IMPL_H
+#define LLVM_RUNTIME_DYLD_IMPL_H
+
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+class RuntimeDyldImpl {
+protected:
+ unsigned CPUType;
+ unsigned CPUSubtype;
+
+ // The MemoryManager to load objects into.
+ RTDyldMemoryManager *MemMgr;
+
+ // FIXME: This all assumes we're dealing with external symbols for anything
+ // explicitly referenced. I.e., we can index by name and things
+ // will work out. In practice, this may not be the case, so we
+ // should find a way to effectively generalize.
+
+ // For each function, we have a MemoryBlock of it's instruction data.
+ StringMap<sys::MemoryBlock> Functions;
+
+ // Master symbol table. As modules are loaded and external symbols are
+ // resolved, their addresses are stored here.
+ StringMap<uint8_t*> SymbolTable;
+
+ bool HasError;
+ std::string ErrorStr;
+
+ // Set the error state and record an error string.
+ bool Error(const Twine &Msg) {
+ ErrorStr = Msg.str();
+ HasError = true;
+ return true;
+ }
+
+ void extractFunction(StringRef Name, uint8_t *StartAddress,
+ uint8_t *EndAddress);
+
+public:
+ RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
+
+ virtual ~RuntimeDyldImpl();
+
+ virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+
+ void *getSymbolAddress(StringRef Name) {
+ // FIXME: Just look up as a function for now. Overly simple of course.
+ // Work in progress.
+ return SymbolTable.lookup(Name);
+ }
+
+ void resolveRelocations();
+
+ virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0;
+
+ // Is the linker in an error state?
+ bool hasError() { return HasError; }
+
+ // Mark the error condition as handled and continue.
+ void clearError() { HasError = false; }
+
+ // Get the error message.
+ StringRef getErrorString() { return ErrorStr; }
+
+ virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
+};
+
+
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+
+ // For each symbol, keep a list of relocations based on it. Anytime
+ // its address is reassigned (the JIT re-compiled the function, e.g.),
+ // the relocations get re-resolved.
+ struct RelocationEntry {
+ std::string Target; // Object this relocation is contained in.
+ uint64_t Offset; // Offset into the object for the relocation.
+ uint32_t Data; // Second word of the raw macho relocation entry.
+ int64_t Addend; // Addend encoded in the instruction itself, if any.
+ bool isResolved; // Has this relocation been resolved previously?
+
+ RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
+ : Target(t), Offset(offset), Data(data), Addend(addend),
+ isResolved(false) {}
+ };
+ typedef SmallVector<RelocationEntry, 4> RelocationList;
+ StringMap<RelocationList> Relocations;
+
+ // FIXME: Also keep a map of all the relocations contained in an object. Use
+ // this to dynamically answer whether all of the relocations in it have
+ // been resolved or not.
+
+ bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+ unsigned Type, unsigned Size);
+ bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+ unsigned Type, unsigned Size);
+ bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+ unsigned Type, unsigned Size);
+
+ bool loadSegment32(const MachOObject *Obj,
+ const MachOObject::LoadCommandInfo *SegmentLCI,
+ const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+ bool loadSegment64(const MachOObject *Obj,
+ const MachOObject::LoadCommandInfo *SegmentLCI,
+ const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+
+public:
+ RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+ bool loadObject(MemoryBuffer *InputBuffer);
+
+ void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+
+ static bool isKnownFormat(const MemoryBuffer *InputBuffer);
+
+ bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
+ return isKnownFormat(InputBuffer);
+ };
+};
+
+} // end namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
new file mode 100644
index 000000000000..623e9b2acca3
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -0,0 +1,524 @@
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "RuntimeDyldImpl.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+bool RuntimeDyldMachO::
+resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+ unsigned Type, unsigned Size) {
+ // This just dispatches to the proper target specific routine.
+ switch (CPUType) {
+ default: assert(0 && "Unsupported CPU type!");
+ case mach::CTM_x86_64:
+ return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
+ isPCRel, Type, Size);
+ case mach::CTM_ARM:
+ return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
+ isPCRel, Type, Size);
+ }
+ llvm_unreachable("");
+}
+
+bool RuntimeDyldMachO::
+resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
+ bool isPCRel, unsigned Type,
+ unsigned Size) {
+ // If the relocation is PC-relative, the value to be encoded is the
+ // pointer difference.
+ if (isPCRel)
+ // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
+ // address. Is that expected? Only for branches, perhaps?
+ Value -= Address + 4;
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_X86_64_Unsigned:
+ case macho::RIT_X86_64_Branch: {
+ // Mask in the target value a byte at a time (we don't have an alignment
+ // guarantee for the target address, so this is safest).
+ uint8_t *p = (uint8_t*)Address;
+ for (unsigned i = 0; i < Size; ++i) {
+ *p++ = (uint8_t)Value;
+ Value >>= 8;
+ }
+ return false;
+ }
+ case macho::RIT_X86_64_Signed:
+ case macho::RIT_X86_64_GOTLoad:
+ case macho::RIT_X86_64_GOT:
+ case macho::RIT_X86_64_Subtractor:
+ case macho::RIT_X86_64_Signed1:
+ case macho::RIT_X86_64_Signed2:
+ case macho::RIT_X86_64_Signed4:
+ case macho::RIT_X86_64_TLV:
+ return Error("Relocation type not implemented yet!");
+ }
+ return false;
+}
+
+bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
+ bool isPCRel, unsigned Type,
+ unsigned Size) {
+ // If the relocation is PC-relative, the value to be encoded is the
+ // pointer difference.
+ if (isPCRel) {
+ Value -= Address;
+ // ARM PCRel relocations have an effective-PC offset of two instructions
+ // (four bytes in Thumb mode, 8 bytes in ARM mode).
+ // FIXME: For now, assume ARM mode.
+ Value -= 8;
+ }
+
+ switch(Type) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case macho::RIT_Vanilla: {
+ llvm_unreachable("Invalid relocation type!");
+ // Mask in the target value a byte at a time (we don't have an alignment
+ // guarantee for the target address, so this is safest).
+ uint8_t *p = (uint8_t*)Address;
+ for (unsigned i = 0; i < Size; ++i) {
+ *p++ = (uint8_t)Value;
+ Value >>= 8;
+ }
+ break;
+ }
+ case macho::RIT_ARM_Branch24Bit: {
+ // Mask the value into the target address. We know instructions are
+ // 32-bit aligned, so we can do it all at once.
+ uint32_t *p = (uint32_t*)Address;
+ // The low two bits of the value are not encoded.
+ Value >>= 2;
+ // Mask the value to 24 bits.
+ Value &= 0xffffff;
+ // FIXME: If the destination is a Thumb function (and the instruction
+ // is a non-predicated BL instruction), we need to change it to a BLX
+ // instruction instead.
+
+ // Insert the value into the instruction.
+ *p = (*p & ~0xffffff) | Value;
+ break;
+ }
+ case macho::RIT_ARM_ThumbBranch22Bit:
+ case macho::RIT_ARM_ThumbBranch32Bit:
+ case macho::RIT_ARM_Half:
+ case macho::RIT_ARM_HalfDifference:
+ case macho::RIT_Pair:
+ case macho::RIT_Difference:
+ case macho::RIT_ARM_LocalDifference:
+ case macho::RIT_ARM_PreboundLazyPointer:
+ return Error("Relocation type not implemented yet!");
+ }
+ return false;
+}
+
+bool RuntimeDyldMachO::
+loadSegment32(const MachOObject *Obj,
+ const MachOObject::LoadCommandInfo *SegmentLCI,
+ const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+ InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
+ Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
+ if (!SegmentLC)
+ return Error("unable to load segment load command");
+
+ for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
+ InMemoryStruct<macho::Section> Sect;
+ Obj->ReadSection(*SegmentLCI, SectNum, Sect);
+ if (!Sect)
+ return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+ // FIXME: For the time being, we're only loading text segments.
+ if (Sect->Flags != 0x80000400)
+ continue;
+
+ // Address and names of symbols in the section.
+ typedef std::pair<uint64_t, StringRef> SymbolEntry;
+ SmallVector<SymbolEntry, 64> Symbols;
+ // Index of all the names, in this section or not. Used when we're
+ // dealing with relocation entries.
+ SmallVector<StringRef, 64> SymbolNames;
+ for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+ InMemoryStruct<macho::SymbolTableEntry> STE;
+ Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
+ if (!STE)
+ return Error("unable to read symbol: '" + Twine(i) + "'");
+ if (STE->SectionIndex > SegmentLC->NumSections)
+ return Error("invalid section index for symbol: '" + Twine(i) + "'");
+ // Get the symbol name.
+ StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+ SymbolNames.push_back(Name);
+
+ // Just skip symbols not defined in this section.
+ if ((unsigned)STE->SectionIndex - 1 != SectNum)
+ continue;
+
+ // FIXME: Check the symbol type and flags.
+ if (STE->Type != 0xF) // external, defined in this section.
+ continue;
+ // Flags == 0x8 marks a thumb function for ARM, which is fine as it
+ // doesn't require any special handling here.
+ if (STE->Flags != 0x0 && STE->Flags != 0x8)
+ continue;
+
+ // Remember the symbol.
+ Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+ DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+ (Sect->Address + STE->Value) << "\n");
+ }
+ // Sort the symbols by address, just in case they didn't come in that way.
+ array_pod_sort(Symbols.begin(), Symbols.end());
+
+ // If there weren't any functions (odd, but just in case...)
+ if (!Symbols.size())
+ continue;
+
+ // Extract the function data.
+ uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
+ SegmentLC->FileSize).data();
+ for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+ uint64_t StartOffset = Sect->Address + Symbols[i].first;
+ uint64_t EndOffset = Symbols[i + 1].first - 1;
+ DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+ << " from [" << StartOffset << ", " << EndOffset << "]\n");
+ extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+ }
+ // The last symbol we do after since the end address is calculated
+ // differently because there is no next symbol to reference.
+ uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+ uint64_t EndOffset = Sect->Size - 1;
+ DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+ << " from [" << StartOffset << ", " << EndOffset << "]\n");
+ extractFunction(Symbols[Symbols.size()-1].second,
+ Base + StartOffset, Base + EndOffset);
+
+ // Now extract the relocation information for each function and process it.
+ for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+ if (RE->Word0 & macho::RF_Scattered)
+ return Error("NOT YET IMPLEMENTED: scattered relocations.");
+ // Word0 of the relocation is the offset into the section where the
+ // relocation should be applied. We need to translate that into an
+ // offset into a function since that's our atom.
+ uint32_t Offset = RE->Word0;
+ // Look for the function containing the address. This is used for JIT
+ // code, so the number of functions in section is almost always going
+ // to be very small (usually just one), so until we have use cases
+ // where that's not true, just use a trivial linear search.
+ unsigned SymbolNum;
+ unsigned NumSymbols = Symbols.size();
+ assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+ "No symbol containing relocation!");
+ for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+ if (Symbols[SymbolNum + 1].first > Offset)
+ break;
+ // Adjust the offset to be relative to the symbol.
+ Offset -= Symbols[SymbolNum].first;
+ // Get the name of the symbol containing the relocation.
+ StringRef TargetName = SymbolNames[SymbolNum];
+
+ bool isExtern = (RE->Word1 >> 27) & 1;
+ // Figure out the source symbol of the relocation. If isExtern is true,
+ // this relocation references the symbol table, otherwise it references
+ // a section in the same object, numbered from 1 through NumSections
+ // (SectionBases is [0, NumSections-1]).
+ // FIXME: Some targets (ARM) use internal relocations even for
+ // externally visible symbols, if the definition is in the same
+ // file as the reference. We need to convert those back to by-name
+ // references. We can resolve the address based on the section
+ // offset and see if we have a symbol at that address. If we do,
+ // use that; otherwise, puke.
+ if (!isExtern)
+ return Error("Internal relocations not supported.");
+ uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+ StringRef SourceName = SymbolNames[SourceNum];
+
+ // FIXME: Get the relocation addend from the target address.
+
+ // Now store the relocation information. Associate it with the source
+ // symbol.
+ Relocations[SourceName].push_back(RelocationEntry(TargetName,
+ Offset,
+ RE->Word1,
+ 0 /*Addend*/));
+ DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+ << " from '" << SourceName << "(Word1: "
+ << format("0x%x", RE->Word1) << ")\n");
+ }
+ }
+ return false;
+}
+
+
+bool RuntimeDyldMachO::
+loadSegment64(const MachOObject *Obj,
+ const MachOObject::LoadCommandInfo *SegmentLCI,
+ const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+ InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
+ Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
+ if (!Segment64LC)
+ return Error("unable to load segment load command");
+
+ for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
+ InMemoryStruct<macho::Section64> Sect;
+ Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
+ if (!Sect)
+ return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+ // FIXME: For the time being, we're only loading text segments.
+ if (Sect->Flags != 0x80000400)
+ continue;
+
+ // Address and names of symbols in the section.
+ typedef std::pair<uint64_t, StringRef> SymbolEntry;
+ SmallVector<SymbolEntry, 64> Symbols;
+ // Index of all the names, in this section or not. Used when we're
+ // dealing with relocation entries.
+ SmallVector<StringRef, 64> SymbolNames;
+ for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+ InMemoryStruct<macho::Symbol64TableEntry> STE;
+ Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
+ if (!STE)
+ return Error("unable to read symbol: '" + Twine(i) + "'");
+ if (STE->SectionIndex > Segment64LC->NumSections)
+ return Error("invalid section index for symbol: '" + Twine(i) + "'");
+ // Get the symbol name.
+ StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+ SymbolNames.push_back(Name);
+
+ // Just skip symbols not defined in this section.
+ if ((unsigned)STE->SectionIndex - 1 != SectNum)
+ continue;
+
+ // FIXME: Check the symbol type and flags.
+ if (STE->Type != 0xF) // external, defined in this section.
+ continue;
+ if (STE->Flags != 0x0)
+ continue;
+
+ // Remember the symbol.
+ Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+ DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+ (Sect->Address + STE->Value) << "\n");
+ }
+ // Sort the symbols by address, just in case they didn't come in that way.
+ array_pod_sort(Symbols.begin(), Symbols.end());
+
+ // If there weren't any functions (odd, but just in case...)
+ if (!Symbols.size())
+ continue;
+
+ // Extract the function data.
+ uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
+ Segment64LC->FileSize).data();
+ for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+ uint64_t StartOffset = Sect->Address + Symbols[i].first;
+ uint64_t EndOffset = Symbols[i + 1].first - 1;
+ DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+ << " from [" << StartOffset << ", " << EndOffset << "]\n");
+ extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+ }
+ // The last symbol we do after since the end address is calculated
+ // differently because there is no next symbol to reference.
+ uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+ uint64_t EndOffset = Sect->Size - 1;
+ DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+ << " from [" << StartOffset << ", " << EndOffset << "]\n");
+ extractFunction(Symbols[Symbols.size()-1].second,
+ Base + StartOffset, Base + EndOffset);
+
+ // Now extract the relocation information for each function and process it.
+ for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+ if (RE->Word0 & macho::RF_Scattered)
+ return Error("NOT YET IMPLEMENTED: scattered relocations.");
+ // Word0 of the relocation is the offset into the section where the
+ // relocation should be applied. We need to translate that into an
+ // offset into a function since that's our atom.
+ uint32_t Offset = RE->Word0;
+ // Look for the function containing the address. This is used for JIT
+ // code, so the number of functions in section is almost always going
+ // to be very small (usually just one), so until we have use cases
+ // where that's not true, just use a trivial linear search.
+ unsigned SymbolNum;
+ unsigned NumSymbols = Symbols.size();
+ assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+ "No symbol containing relocation!");
+ for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+ if (Symbols[SymbolNum + 1].first > Offset)
+ break;
+ // Adjust the offset to be relative to the symbol.
+ Offset -= Symbols[SymbolNum].first;
+ // Get the name of the symbol containing the relocation.
+ StringRef TargetName = SymbolNames[SymbolNum];
+
+ bool isExtern = (RE->Word1 >> 27) & 1;
+ // Figure out the source symbol of the relocation. If isExtern is true,
+ // this relocation references the symbol table, otherwise it references
+ // a section in the same object, numbered from 1 through NumSections
+ // (SectionBases is [0, NumSections-1]).
+ if (!isExtern)
+ return Error("Internal relocations not supported.");
+ uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+ StringRef SourceName = SymbolNames[SourceNum];
+
+ // FIXME: Get the relocation addend from the target address.
+
+ // Now store the relocation information. Associate it with the source
+ // symbol.
+ Relocations[SourceName].push_back(RelocationEntry(TargetName,
+ Offset,
+ RE->Word1,
+ 0 /*Addend*/));
+ DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+ << " from '" << SourceName << "(Word1: "
+ << format("0x%x", RE->Word1) << ")\n");
+ }
+ }
+ return false;
+}
+
+bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
+ // If the linker is in an error state, don't do anything.
+ if (hasError())
+ return true;
+ // Load the Mach-O wrapper object.
+ std::string ErrorStr;
+ OwningPtr<MachOObject> Obj(
+ MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
+ if (!Obj)
+ return Error("unable to load object: '" + ErrorStr + "'");
+
+ // Get the CPU type information from the header.
+ const macho::Header &Header = Obj->getHeader();
+
+ // FIXME: Error checking that the loaded object is compatible with
+ // the system we're running on.
+ CPUType = Header.CPUType;
+ CPUSubtype = Header.CPUSubtype;
+
+ // Validate that the load commands match what we expect.
+ const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
+ *DysymtabLCI = 0;
+ for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
+ const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
+ switch (LCI.Command.Type) {
+ case macho::LCT_Segment:
+ case macho::LCT_Segment64:
+ if (SegmentLCI)
+ return Error("unexpected input object (multiple segments)");
+ SegmentLCI = &LCI;
+ break;
+ case macho::LCT_Symtab:
+ if (SymtabLCI)
+ return Error("unexpected input object (multiple symbol tables)");
+ SymtabLCI = &LCI;
+ break;
+ case macho::LCT_Dysymtab:
+ if (DysymtabLCI)
+ return Error("unexpected input object (multiple symbol tables)");
+ DysymtabLCI = &LCI;
+ break;
+ default:
+ return Error("unexpected input object (unexpected load command");
+ }
+ }
+
+ if (!SymtabLCI)
+ return Error("no symbol table found in object");
+ if (!SegmentLCI)
+ return Error("no symbol table found in object");
+
+ // Read and register the symbol table data.
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
+ Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
+ if (!SymtabLC)
+ return Error("unable to load symbol table load command");
+ Obj->RegisterStringTable(*SymtabLC);
+
+ // Read the dynamic link-edit information, if present (not present in static
+ // objects).
+ if (DysymtabLCI) {
+ InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
+ Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
+ if (!DysymtabLC)
+ return Error("unable to load dynamic link-exit load command");
+
+ // FIXME: We don't support anything interesting yet.
+// if (DysymtabLC->LocalSymbolsIndex != 0)
+// return Error("NOT YET IMPLEMENTED: local symbol entries");
+// if (DysymtabLC->ExternalSymbolsIndex != 0)
+// return Error("NOT YET IMPLEMENTED: non-external symbol entries");
+// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
+// return Error("NOT YET IMPLEMENTED: undefined symbol entries");
+ }
+
+ // Load the segment load command.
+ if (SegmentLCI->Command.Type == macho::LCT_Segment) {
+ if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
+ return true;
+ } else {
+ if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
+ return true;
+ }
+
+ return false;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+ // Assign the address in our symbol table.
+ SymbolTable[Name] = Addr;
+
+ RelocationList &Relocs = Relocations[Name];
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ RelocationEntry &RE = Relocs[i];
+ uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
+ bool isPCRel = (RE.Data >> 24) & 1;
+ unsigned Type = (RE.Data >> 28) & 0xf;
+ unsigned Size = 1 << ((RE.Data >> 25) & 3);
+
+ DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
+ << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
+ << " from '" << Name << " (" << format("%p", Addr) << ")"
+ << "(" << (isPCRel ? "pcrel" : "absolute")
+ << ", type: " << Type << ", Size: " << Size << ").\n");
+
+ resolveRelocation(Target, Addr, isPCRel, Type, Size);
+ RE.isResolved = true;
+ }
+}
+
+bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+ StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
+ if (Magic == "\xFE\xED\xFA\xCE") return true;
+ if (Magic == "\xCE\xFA\xED\xFE") return true;
+ if (Magic == "\xFE\xED\xFA\xCF") return true;
+ if (Magic == "\xCF\xFA\xED\xFE") return true;
+ return false;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
new file mode 100644
index 000000000000..f51aff3603b8
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -0,0 +1,90 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch. Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *EngineBuilder::selectTarget(Module *Mod,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs,
+ std::string *ErrorStr) {
+ Triple TheTriple(Mod->getTargetTriple());
+ if (TheTriple.getTriple().empty())
+ TheTriple.setTriple(sys::getHostTriple());
+
+ // Adjust the triple to match what the user requested.
+ const Target *TheTarget = 0;
+ if (!MArch.empty()) {
+ for (TargetRegistry::iterator it = TargetRegistry::begin(),
+ ie = TargetRegistry::end(); it != ie; ++it) {
+ if (MArch == it->getName()) {
+ TheTarget = &*it;
+ break;
+ }
+ }
+
+ if (!TheTarget) {
+ *ErrorStr = "No available targets are compatible with this -march, "
+ "see -version for the available targets.\n";
+ return 0;
+ }
+
+ // Adjust the triple to match (if known), otherwise stick with the
+ // module/host triple.
+ Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+ if (Type != Triple::UnknownArch)
+ TheTriple.setArch(Type);
+ } else {
+ std::string Error;
+ TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+ if (TheTarget == 0) {
+ if (ErrorStr)
+ *ErrorStr = Error;
+ return 0;
+ }
+ }
+
+ if (!TheTarget->hasJIT()) {
+ errs() << "WARNING: This target JIT is not designed for the host you are"
+ << " running. If bad things happen, please choose a different "
+ << "-march switch.\n";
+ }
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+ if (!MAttrs.empty()) {
+ SubtargetFeatures Features;
+ for (unsigned i = 0; i != MAttrs.size(); ++i)
+ Features.AddFeature(MAttrs[i]);
+ FeaturesStr = Features.getString();
+ }
+
+ // Allocate a target...
+ TargetMachine *Target =
+ TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr);
+ assert(Target && "Could not allocate target machine!");
+ return Target;
+}
diff --git a/contrib/llvm/lib/Linker/LinkArchives.cpp b/contrib/llvm/lib/Linker/LinkArchives.cpp
new file mode 100644
index 000000000000..2c4ed7fdc17a
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkArchives.cpp
@@ -0,0 +1,198 @@
+//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Config/config.h"
+#include <memory>
+#include <set>
+using namespace llvm;
+
+/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
+/// exist in an LLVM module. This is a bit tricky because there may be two
+/// symbols with the same name but different LLVM types that will be resolved to
+/// each other but aren't currently (thus we need to treat it as resolved).
+///
+/// Inputs:
+/// M - The module in which to find undefined symbols.
+///
+/// Outputs:
+/// UndefinedSymbols - A set of C++ strings containing the name of all
+/// undefined symbols.
+///
+static void
+GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
+ std::set<std::string> DefinedSymbols;
+ UndefinedSymbols.clear();
+
+ // If the program doesn't define a main, try pulling one in from a .a file.
+ // This is needed for programs where the main function is defined in an
+ // archive, such f2c'd programs.
+ Function *Main = M->getFunction("main");
+ if (Main == 0 || Main->isDeclaration())
+ UndefinedSymbols.insert("main");
+
+ for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+ if (I->hasName()) {
+ if (I->isDeclaration())
+ UndefinedSymbols.insert(I->getName());
+ else if (!I->hasLocalLinkage()) {
+ assert(!I->hasDLLImportLinkage()
+ && "Found dllimported non-external symbol!");
+ DefinedSymbols.insert(I->getName());
+ }
+ }
+
+ for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ if (I->hasName()) {
+ if (I->isDeclaration())
+ UndefinedSymbols.insert(I->getName());
+ else if (!I->hasLocalLinkage()) {
+ assert(!I->hasDLLImportLinkage()
+ && "Found dllimported non-external symbol!");
+ DefinedSymbols.insert(I->getName());
+ }
+ }
+
+ for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ if (I->hasName())
+ DefinedSymbols.insert(I->getName());
+
+ // Prune out any defined symbols from the undefined symbols set...
+ for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
+ I != UndefinedSymbols.end(); )
+ if (DefinedSymbols.count(*I))
+ UndefinedSymbols.erase(I++); // This symbol really is defined!
+ else
+ ++I; // Keep this symbol in the undefined symbols list
+}
+
+/// LinkInArchive - opens an archive library and link in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+/// Filename - The pathname of the archive.
+///
+/// Return Value:
+/// TRUE - An error occurred.
+/// FALSE - No errors.
+bool
+Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
+ // Make sure this is an archive file we're dealing with
+ if (!Filename.isArchive())
+ return error("File '" + Filename.str() + "' is not an archive.");
+
+ // Open the archive file
+ verbose("Linking archive file '" + Filename.str() + "'");
+
+ // Find all of the symbols currently undefined in the bitcode program.
+ // If all the symbols are defined, the program is complete, and there is
+ // no reason to link in any archive files.
+ std::set<std::string> UndefinedSymbols;
+ GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+ if (UndefinedSymbols.empty()) {
+ verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
+ return false; // No need to link anything in!
+ }
+
+ std::string ErrMsg;
+ std::auto_ptr<Archive> AutoArch (
+ Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg));
+
+ Archive* arch = AutoArch.get();
+
+ if (!arch)
+ return error("Cannot read archive '" + Filename.str() +
+ "': " + ErrMsg);
+ if (!arch->isBitcodeArchive()) {
+ is_native = true;
+ return false;
+ }
+ is_native = false;
+
+ // Save a set of symbols that are not defined by the archive. Since we're
+ // entering a loop, there's no point searching for these multiple times. This
+ // variable is used to "set_subtract" from the set of undefined symbols.
+ std::set<std::string> NotDefinedByArchive;
+
+ // Save the current set of undefined symbols, because we may have to make
+ // multiple passes over the archive:
+ std::set<std::string> CurrentlyUndefinedSymbols;
+
+ do {
+ CurrentlyUndefinedSymbols = UndefinedSymbols;
+
+ // Find the modules we need to link into the target module. Note that arch
+ // keeps ownership of these modules and may return the same Module* from a
+ // subsequent call.
+ std::set<Module*> Modules;
+ if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
+ return error("Cannot find symbols in '" + Filename.str() +
+ "': " + ErrMsg);
+
+ // If we didn't find any more modules to link this time, we are done
+ // searching this archive.
+ if (Modules.empty())
+ break;
+
+ // Any symbols remaining in UndefinedSymbols after
+ // findModulesDefiningSymbols are ones that the archive does not define. So
+ // we add them to the NotDefinedByArchive variable now.
+ NotDefinedByArchive.insert(UndefinedSymbols.begin(),
+ UndefinedSymbols.end());
+
+ // Loop over all the Modules that we got back from the archive
+ for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end();
+ I != E; ++I) {
+
+ // Get the module we must link in.
+ std::string moduleErrorMsg;
+ Module* aModule = *I;
+ if (aModule != NULL) {
+ if (aModule->MaterializeAll(&moduleErrorMsg))
+ return error("Could not load a module: " + moduleErrorMsg);
+
+ verbose(" Linking in module: " + aModule->getModuleIdentifier());
+
+ // Link it in
+ if (LinkInModule(aModule, &moduleErrorMsg))
+ return error("Cannot link in module '" +
+ aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
+ }
+ }
+
+ // Get the undefined symbols from the aggregate module. This recomputes the
+ // symbols we still need after the new modules have been linked in.
+ GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+ // At this point we have two sets of undefined symbols: UndefinedSymbols
+ // which holds the undefined symbols from all the modules, and
+ // NotDefinedByArchive which holds symbols we know the archive doesn't
+ // define. There's no point searching for symbols that we won't find in the
+ // archive so we subtract these sets.
+ set_subtract(UndefinedSymbols, NotDefinedByArchive);
+
+ // If there's no symbols left, no point in continuing to search the
+ // archive.
+ if (UndefinedSymbols.empty())
+ break;
+ } while (CurrentlyUndefinedSymbols != UndefinedSymbols);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Linker/LinkItems.cpp b/contrib/llvm/lib/Linker/LinkItems.cpp
new file mode 100644
index 000000000000..52a0d175a5cd
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkItems.cpp
@@ -0,0 +1,241 @@
+//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+// LinkItems - This function is the main entry point into linking. It takes a
+// list of LinkItem which indicates the order the files should be linked and
+// how each file should be treated (plain file or with library search). The
+// function only links bitcode and produces a result list of items that are
+// native objects.
+bool
+Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
+ // Clear the NativeItems just in case
+ NativeItems.clear();
+
+ // For each linkage item ...
+ for (ItemList::const_iterator I = Items.begin(), E = Items.end();
+ I != E; ++I) {
+ if (I->second) {
+ // Link in the library suggested.
+ bool is_native = false;
+ if (LinkInLibrary(I->first, is_native))
+ return true;
+ if (is_native)
+ NativeItems.push_back(*I);
+ } else {
+ // Link in the file suggested
+ bool is_native = false;
+ if (LinkInFile(sys::Path(I->first), is_native))
+ return true;
+ if (is_native)
+ NativeItems.push_back(*I);
+ }
+ }
+
+ // At this point we have processed all the link items provided to us. Since
+ // we have an aggregated module at this point, the dependent libraries in
+ // that module should also be aggregated with duplicates eliminated. This is
+ // now the time to process the dependent libraries to resolve any remaining
+ // symbols.
+ bool is_native;
+ for (Module::lib_iterator I = Composite->lib_begin(),
+ E = Composite->lib_end(); I != E; ++I) {
+ if(LinkInLibrary(*I, is_native))
+ return true;
+ if (is_native)
+ NativeItems.push_back(std::make_pair(*I, true));
+ }
+
+ return false;
+}
+
+
+/// LinkInLibrary - links one library into the HeadModule.
+///
+bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
+ is_native = false;
+ // Determine where this library lives.
+ sys::Path Pathname = FindLib(Lib);
+ if (Pathname.isEmpty())
+ return error("Cannot find library '" + Lib.str() + "'");
+
+ // If its an archive, try to link it in
+ std::string Magic;
+ Pathname.getMagicNumber(Magic, 64);
+ switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+ default: llvm_unreachable("Bad file type identification");
+ case sys::Unknown_FileType:
+ return warning("Supposed library '" + Lib.str() + "' isn't a library.");
+
+ case sys::Bitcode_FileType:
+ // LLVM ".so" file.
+ if (LinkInFile(Pathname, is_native))
+ return true;
+ break;
+
+ case sys::Archive_FileType:
+ if (LinkInArchive(Pathname, is_native))
+ return error("Cannot link archive '" + Pathname.str() + "'");
+ break;
+
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::COFF_FileType:
+ is_native = true;
+ break;
+ }
+ return false;
+}
+
+/// LinkLibraries - takes the specified library files and links them into the
+/// main bitcode object file.
+///
+/// Inputs:
+/// Libraries - The list of libraries to link into the module.
+///
+/// Return value:
+/// FALSE - No error.
+/// TRUE - Error.
+///
+bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
+
+ // Process the set of libraries we've been provided.
+ bool is_native = false;
+ for (unsigned i = 0; i < Libraries.size(); ++i)
+ if (LinkInLibrary(Libraries[i], is_native))
+ return true;
+
+ // At this point we have processed all the libraries provided to us. Since
+ // we have an aggregated module at this point, the dependent libraries in
+ // that module should also be aggregated with duplicates eliminated. This is
+ // now the time to process the dependent libraries to resolve any remaining
+ // symbols.
+ const Module::LibraryListType& DepLibs = Composite->getLibraries();
+ for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
+ E = DepLibs.end(); I != E; ++I)
+ if (LinkInLibrary(*I, is_native))
+ return true;
+
+ return false;
+}
+
+/// LinkInFile - opens a bitcode file and links in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+/// File - The pathname of the bitcode file.
+///
+/// Outputs:
+/// ErrorMessage - A C++ string detailing what error occurred, if any.
+///
+/// Return Value:
+/// TRUE - An error occurred.
+/// FALSE - No errors.
+///
+bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
+ is_native = false;
+
+ // Check for a file of name "-", which means "read standard input"
+ if (File.str() == "-") {
+ std::auto_ptr<Module> M;
+ OwningPtr<MemoryBuffer> Buffer;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
+ if (!Buffer->getBufferSize()) {
+ Error = "standard input is empty";
+ } else {
+ M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
+ if (M.get())
+ if (!LinkInModule(M.get(), &Error))
+ return false;
+ }
+ }
+ return error("Cannot link stdin: " + ec.message());
+ }
+
+ // Determine what variety of file it is.
+ std::string Magic;
+ if (!File.getMagicNumber(Magic, 64))
+ return error("Cannot find linker input '" + File.str() + "'");
+
+ switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+ default: llvm_unreachable("Bad file type identification");
+ case sys::Unknown_FileType:
+ return warning("Ignoring file '" + File.str() +
+ "' because does not contain bitcode.");
+
+ case sys::Archive_FileType:
+ // A user may specify an ar archive without -l, perhaps because it
+ // is not installed as a library. Detect that and link the archive.
+ if (LinkInArchive(File, is_native))
+ return true;
+ break;
+
+ case sys::Bitcode_FileType: {
+ verbose("Linking bitcode file '" + File.str() + "'");
+ std::auto_ptr<Module> M(LoadObject(File));
+ if (M.get() == 0)
+ return error("Cannot load file '" + File.str() + "': " + Error);
+ if (LinkInModule(M.get(), &Error))
+ return error("Cannot link file '" + File.str() + "': " + Error);
+
+ verbose("Linked in file '" + File.str() + "'");
+ break;
+ }
+
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::COFF_FileType:
+ is_native = true;
+ break;
+ }
+ return false;
+}
+
+/// LinkFiles - takes a module and a list of files and links them all together.
+/// It locates the file either in the current directory, as its absolute
+/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH.
+///
+/// Inputs:
+/// Files - A vector of sys::Path indicating the LLVM bitcode filenames
+/// to be linked. The names can refer to a mixture of pure LLVM
+/// bitcode files and archive (ar) formatted files.
+///
+/// Return value:
+/// FALSE - No errors.
+/// TRUE - Some error occurred.
+///
+bool Linker::LinkInFiles(const std::vector<sys::Path> &Files) {
+ bool is_native;
+ for (unsigned i = 0; i < Files.size(); ++i)
+ if (LinkInFile(Files[i], is_native))
+ return true;
+ return false;
+}
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
new file mode 100644
index 000000000000..55aa9bf18887
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -0,0 +1,968 @@
+//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM module linker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TypeMapTy : public ValueMapTypeRemapper {
+ /// MappedTypes - This is a mapping from a source type to a destination type
+ /// to use.
+ DenseMap<Type*, Type*> MappedTypes;
+
+ /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic,
+ /// we speculatively add types to MappedTypes, but keep track of them here in
+ /// case we need to roll back.
+ SmallVector<Type*, 16> SpeculativeTypes;
+
+ /// DefinitionsToResolve - This is a list of non-opaque structs in the source
+ /// module that are mapped to an opaque struct in the destination module.
+ SmallVector<StructType*, 16> DefinitionsToResolve;
+public:
+
+ /// addTypeMapping - Indicate that the specified type in the destination
+ /// module is conceptually equivalent to the specified type in the source
+ /// module.
+ void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+ /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+ /// module from a type definition in the source module.
+ void linkDefinedTypeBodies();
+
+ /// get - Return the mapped type to use for the specified input type from the
+ /// source module.
+ Type *get(Type *SrcTy);
+
+ FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
+
+private:
+ Type *getImpl(Type *T);
+ /// remapType - Implement the ValueMapTypeRemapper interface.
+ Type *remapType(Type *SrcTy) {
+ return get(SrcTy);
+ }
+
+ bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
+
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+ Type *&Entry = MappedTypes[SrcTy];
+ if (Entry) return;
+
+ if (DstTy == SrcTy) {
+ Entry = DstTy;
+ return;
+ }
+
+ // Check to see if these types are recursively isomorphic and establish a
+ // mapping between them if so.
+ if (!areTypesIsomorphic(DstTy, SrcTy)) {
+ // Oops, they aren't isomorphic. Just discard this request by rolling out
+ // any speculative mappings we've established.
+ for (unsigned i = 0, e = SpeculativeTypes.size(); i != e; ++i)
+ MappedTypes.erase(SpeculativeTypes[i]);
+ }
+ SpeculativeTypes.clear();
+}
+
+/// areTypesIsomorphic - Recursively walk this pair of types, returning true
+/// if they are isomorphic, false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+ // Two types with differing kinds are clearly not isomorphic.
+ if (DstTy->getTypeID() != SrcTy->getTypeID()) return false;
+
+ // If we have an entry in the MappedTypes table, then we have our answer.
+ Type *&Entry = MappedTypes[SrcTy];
+ if (Entry)
+ return Entry == DstTy;
+
+ // Two identical types are clearly isomorphic. Remember this
+ // non-speculatively.
+ if (DstTy == SrcTy) {
+ Entry = DstTy;
+ return true;
+ }
+
+ // Okay, we have two types with identical kinds that we haven't seen before.
+
+ // If this is an opaque struct type, special case it.
+ if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+ // Mapping an opaque type to any struct, just keep the dest struct.
+ if (SSTy->isOpaque()) {
+ Entry = DstTy;
+ SpeculativeTypes.push_back(SrcTy);
+ return true;
+ }
+
+ // Mapping a non-opaque source type to an opaque dest. Keep the dest, but
+ // fill it in later. This doesn't need to be speculative.
+ if (cast<StructType>(DstTy)->isOpaque()) {
+ Entry = DstTy;
+ DefinitionsToResolve.push_back(SSTy);
+ return true;
+ }
+ }
+
+ // If the number of subtypes disagree between the two types, then we fail.
+ if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+ return false;
+
+ // Fail if any of the extra properties (e.g. array size) of the type disagree.
+ if (isa<IntegerType>(DstTy))
+ return false; // bitwidth disagrees.
+ if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+ if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+ return false;
+ } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+ if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+ return false;
+ } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+ StructType *SSTy = cast<StructType>(SrcTy);
+ if (DSTy->isAnonymous() != SSTy->isAnonymous() ||
+ DSTy->isPacked() != SSTy->isPacked())
+ return false;
+ } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+ if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+ return false;
+ } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+ if (DVTy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+ return false;
+ }
+
+ // Otherwise, we speculate that these two types will line up and recursively
+ // check the subelements.
+ Entry = DstTy;
+ SpeculativeTypes.push_back(SrcTy);
+
+ for (unsigned i = 0, e = SrcTy->getNumContainedTypes(); i != e; ++i)
+ if (!areTypesIsomorphic(DstTy->getContainedType(i),
+ SrcTy->getContainedType(i)))
+ return false;
+
+ // If everything seems to have lined up, then everything is great.
+ return true;
+}
+
+/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+/// module from a type definition in the source module.
+void TypeMapTy::linkDefinedTypeBodies() {
+ SmallVector<Type*, 16> Elements;
+ SmallString<16> TmpName;
+
+ // Note that processing entries in this loop (calling 'get') can add new
+ // entries to the DefinitionsToResolve vector.
+ while (!DefinitionsToResolve.empty()) {
+ StructType *SrcSTy = DefinitionsToResolve.pop_back_val();
+ StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+
+ // TypeMap is a many-to-one mapping, if there were multiple types that
+ // provide a body for DstSTy then previous iterations of this loop may have
+ // already handled it. Just ignore this case.
+ if (!DstSTy->isOpaque()) continue;
+ assert(!SrcSTy->isOpaque() && "Not resolving a definition?");
+
+ // Map the body of the source type over to a new body for the dest type.
+ Elements.resize(SrcSTy->getNumElements());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+ Elements[i] = getImpl(SrcSTy->getElementType(i));
+
+ DstSTy->setBody(Elements, SrcSTy->isPacked());
+
+ // If DstSTy has no name or has a longer name than STy, then viciously steal
+ // STy's name.
+ if (!SrcSTy->hasName()) continue;
+ StringRef SrcName = SrcSTy->getName();
+
+ if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) {
+ TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end());
+ SrcSTy->setName("");
+ DstSTy->setName(TmpName.str());
+ TmpName.clear();
+ }
+ }
+}
+
+
+/// get - Return the mapped type to use for the specified input type from the
+/// source module.
+Type *TypeMapTy::get(Type *Ty) {
+ Type *Result = getImpl(Ty);
+
+ // If this caused a reference to any struct type, resolve it before returning.
+ if (!DefinitionsToResolve.empty())
+ linkDefinedTypeBodies();
+ return Result;
+}
+
+/// getImpl - This is the recursive version of get().
+Type *TypeMapTy::getImpl(Type *Ty) {
+ // If we already have an entry for this type, return it.
+ Type **Entry = &MappedTypes[Ty];
+ if (*Entry) return *Entry;
+
+ // If this is not a named struct type, then just map all of the elements and
+ // then rebuild the type from inside out.
+ if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) {
+ // If there are no element types to map, then the type is itself. This is
+ // true for the anonymous {} struct, things like 'float', integers, etc.
+ if (Ty->getNumContainedTypes() == 0)
+ return *Entry = Ty;
+
+ // Remap all of the elements, keeping track of whether any of them change.
+ bool AnyChange = false;
+ SmallVector<Type*, 4> ElementTypes;
+ ElementTypes.resize(Ty->getNumContainedTypes());
+ for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) {
+ ElementTypes[i] = getImpl(Ty->getContainedType(i));
+ AnyChange |= ElementTypes[i] != Ty->getContainedType(i);
+ }
+
+ // If we found our type while recursively processing stuff, just use it.
+ Entry = &MappedTypes[Ty];
+ if (*Entry) return *Entry;
+
+ // If all of the element types mapped directly over, then the type is usable
+ // as-is.
+ if (!AnyChange)
+ return *Entry = Ty;
+
+ // Otherwise, rebuild a modified type.
+ switch (Ty->getTypeID()) {
+ default: assert(0 && "unknown derived type to remap");
+ case Type::ArrayTyID:
+ return *Entry = ArrayType::get(ElementTypes[0],
+ cast<ArrayType>(Ty)->getNumElements());
+ case Type::VectorTyID:
+ return *Entry = VectorType::get(ElementTypes[0],
+ cast<VectorType>(Ty)->getNumElements());
+ case Type::PointerTyID:
+ return *Entry = PointerType::get(ElementTypes[0],
+ cast<PointerType>(Ty)->getAddressSpace());
+ case Type::FunctionTyID:
+ return *Entry = FunctionType::get(ElementTypes[0],
+ ArrayRef<Type*>(ElementTypes).slice(1),
+ cast<FunctionType>(Ty)->isVarArg());
+ case Type::StructTyID:
+ // Note that this is only reached for anonymous structs.
+ return *Entry = StructType::get(Ty->getContext(), ElementTypes,
+ cast<StructType>(Ty)->isPacked());
+ }
+ }
+
+ // Otherwise, this is an unmapped named struct. If the struct can be directly
+ // mapped over, just use it as-is. This happens in a case when the linked-in
+ // module has something like:
+ // %T = type {%T*, i32}
+ // @GV = global %T* null
+ // where T does not exist at all in the destination module.
+ //
+ // The other case we watch for is when the type is not in the destination
+ // module, but that it has to be rebuilt because it refers to something that
+ // is already mapped. For example, if the destination module has:
+ // %A = type { i32 }
+ // and the source module has something like
+ // %A' = type { i32 }
+ // %B = type { %A'* }
+ // @GV = global %B* null
+ // then we want to create a new type: "%B = type { %A*}" and have it take the
+ // pristine "%B" name from the source module.
+ //
+ // To determine which case this is, we have to recursively walk the type graph
+ // speculating that we'll be able to reuse it unmodified. Only if this is
+ // safe would we map the entire thing over. Because this is an optimization,
+ // and is not required for the prettiness of the linked module, we just skip
+ // it and always rebuild a type here.
+ StructType *STy = cast<StructType>(Ty);
+
+ // If the type is opaque, we can just use it directly.
+ if (STy->isOpaque())
+ return *Entry = STy;
+
+ // Otherwise we create a new type and resolve its body later. This will be
+ // resolved by the top level of get().
+ DefinitionsToResolve.push_back(STy);
+ return *Entry = StructType::createNamed(STy->getContext(), "");
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ModuleLinker implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// ModuleLinker - This is an implementation class for the LinkModules
+ /// function, which is the entrypoint for this file.
+ class ModuleLinker {
+ Module *DstM, *SrcM;
+
+ TypeMapTy TypeMap;
+
+ /// ValueMap - Mapping of values from what they used to be in Src, to what
+ /// they are now in DstM. ValueToValueMapTy is a ValueMap, which involves
+ /// some overhead due to the use of Value handles which the Linker doesn't
+ /// actually need, but this allows us to reuse the ValueMapper code.
+ ValueToValueMapTy ValueMap;
+
+ struct AppendingVarInfo {
+ GlobalVariable *NewGV; // New aggregate global in dest module.
+ Constant *DstInit; // Old initializer from dest module.
+ Constant *SrcInit; // Old initializer from src module.
+ };
+
+ std::vector<AppendingVarInfo> AppendingVars;
+
+ public:
+ std::string ErrorMsg;
+
+ ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { }
+
+ bool run();
+
+ private:
+ /// emitError - Helper method for setting a message and returning an error
+ /// code.
+ bool emitError(const Twine &Message) {
+ ErrorMsg = Message.str();
+ return true;
+ }
+
+ /// getLinkageResult - This analyzes the two global values and determines
+ /// what the result will look like in the destination module.
+ bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+ GlobalValue::LinkageTypes &LT, bool &LinkFromSrc);
+
+ /// getLinkedToGlobal - Given a global in the source module, return the
+ /// global in the destination module that is being linked to, if any.
+ GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) {
+ // If the source has no name it can't link. If it has local linkage,
+ // there is no name match-up going on.
+ if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+ return 0;
+
+ // Otherwise see if we have a match in the destination module's symtab.
+ GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+ if (DGV == 0) return 0;
+
+ // If we found a global with the same name in the dest module, but it has
+ // internal linkage, we are really not doing any linkage here.
+ if (DGV->hasLocalLinkage())
+ return 0;
+
+ // Otherwise, we do in fact link to the destination global.
+ return DGV;
+ }
+
+ void computeTypeMapping();
+
+ bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
+ bool linkGlobalProto(GlobalVariable *SrcGV);
+ bool linkFunctionProto(Function *SrcF);
+ bool linkAliasProto(GlobalAlias *SrcA);
+
+ void linkAppendingVarInit(const AppendingVarInfo &AVI);
+ void linkGlobalInits();
+ void linkFunctionBody(Function *Dst, Function *Src);
+ void linkAliasBodies();
+ void linkNamedMDNodes();
+ };
+}
+
+
+
+/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+/// in the symbol table. This is good for all clients except for us. Go
+/// through the trouble to force this back.
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+ // If the global doesn't force its name or if it already has the right name,
+ // there is nothing for us to do.
+ if (GV->hasLocalLinkage() || GV->getName() == Name)
+ return;
+
+ Module *M = GV->getParent();
+
+ // If there is a conflict, rename the conflict.
+ if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
+ GV->takeName(ConflictGV);
+ ConflictGV->setName(Name); // This will cause ConflictGV to get renamed
+ assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
+ } else {
+ GV->setName(Name); // Force the name back
+ }
+}
+
+/// CopyGVAttributes - copy additional attributes (those not needed to construct
+/// a GlobalValue) from the SrcGV to the DestGV.
+static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+ // Use the maximum alignment, rather than just copying the alignment of SrcGV.
+ unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
+ DestGV->copyAttributesFrom(SrcGV);
+ DestGV->setAlignment(Alignment);
+
+ forceRenaming(DestGV, SrcGV->getName());
+}
+
+/// getLinkageResult - This analyzes the two global values and determines what
+/// the result will look like in the destination module. In particular, it
+/// computes the resultant linkage type, computes whether the global in the
+/// source should be copied over to the destination (replacing the existing
+/// one), and computes whether this linkage is an error or not. It also performs
+/// visibility checks: we cannot link together two symbols with different
+/// visibilities.
+bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+ GlobalValue::LinkageTypes &LT,
+ bool &LinkFromSrc) {
+ assert(Dest && "Must have two globals being queried");
+ assert(!Src->hasLocalLinkage() &&
+ "If Src has internal linkage, Dest shouldn't be set!");
+
+ bool SrcIsDeclaration = Src->isDeclaration();
+ bool DestIsDeclaration = Dest->isDeclaration();
+
+ if (SrcIsDeclaration) {
+ // If Src is external or if both Src & Dest are external.. Just link the
+ // external globals, we aren't adding anything.
+ if (Src->hasDLLImportLinkage()) {
+ // If one of GVs has DLLImport linkage, result should be dllimport'ed.
+ if (DestIsDeclaration) {
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ }
+ } else if (Dest->hasExternalWeakLinkage()) {
+ // If the Dest is weak, use the source linkage.
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ }
+ } else if (DestIsDeclaration && !Dest->hasDLLImportLinkage()) {
+ // If Dest is external but Src is not:
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else if (Src->isWeakForLinker()) {
+ // At this point we know that Dest has LinkOnce, External*, Weak, Common,
+ // or DLL* linkage.
+ if (Dest->hasExternalWeakLinkage() ||
+ Dest->hasAvailableExternallyLinkage() ||
+ (Dest->hasLinkOnceLinkage() &&
+ (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) {
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ }
+ } else if (Dest->isWeakForLinker()) {
+ // At this point we know that Src has External* or DLL* linkage.
+ if (Src->hasExternalWeakLinkage()) {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ } else {
+ LinkFromSrc = true;
+ LT = GlobalValue::ExternalLinkage;
+ }
+ } else {
+ assert((Dest->hasExternalLinkage() || Dest->hasDLLImportLinkage() ||
+ Dest->hasDLLExportLinkage() || Dest->hasExternalWeakLinkage()) &&
+ (Src->hasExternalLinkage() || Src->hasDLLImportLinkage() ||
+ Src->hasDLLExportLinkage() || Src->hasExternalWeakLinkage()) &&
+ "Unexpected linkage type!");
+ return emitError("Linking globals named '" + Src->getName() +
+ "': symbol multiply defined!");
+ }
+
+ // Check visibility
+ if (Src->getVisibility() != Dest->getVisibility() &&
+ !SrcIsDeclaration && !DestIsDeclaration &&
+ !Src->hasAvailableExternallyLinkage() &&
+ !Dest->hasAvailableExternallyLinkage())
+ return emitError("Linking globals named '" + Src->getName() +
+ "': symbols have different visibilities!");
+ return false;
+}
+
+/// computeTypeMapping - Loop over all of the linked values to compute type
+/// mappings. For example, if we link "extern Foo *x" and "Foo *x = NULL", then
+/// we have two struct types 'Foo' but one got renamed when the module was
+/// loaded into the same LLVMContext.
+void ModuleLinker::computeTypeMapping() {
+ // Incorporate globals.
+ for (Module::global_iterator I = SrcM->global_begin(),
+ E = SrcM->global_end(); I != E; ++I) {
+ GlobalValue *DGV = getLinkedToGlobal(I);
+ if (DGV == 0) continue;
+
+ if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) {
+ TypeMap.addTypeMapping(DGV->getType(), I->getType());
+ continue;
+ }
+
+ // Unify the element type of appending arrays.
+ ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+ ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType());
+ TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+ }
+
+ // Incorporate functions.
+ for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) {
+ if (GlobalValue *DGV = getLinkedToGlobal(I))
+ TypeMap.addTypeMapping(DGV->getType(), I->getType());
+ }
+
+ // Don't bother incorporating aliases, they aren't generally typed well.
+
+ // Now that we have discovered all of the type equivalences, get a body for
+ // any 'opaque' types in the dest module that are now resolved.
+ TypeMap.linkDefinedTypeBodies();
+}
+
+/// linkAppendingVarProto - If there were any appending global variables, link
+/// them together now. Return true on error.
+bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+ GlobalVariable *SrcGV) {
+
+ if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
+ return emitError("Linking globals named '" + SrcGV->getName() +
+ "': can only link appending global with another appending global!");
+
+ ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+ ArrayType *SrcTy =
+ cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
+ Type *EltTy = DstTy->getElementType();
+
+ // Check to see that they two arrays agree on type.
+ if (EltTy != SrcTy->getElementType())
+ return emitError("Appending variables with different element types!");
+ if (DstGV->isConstant() != SrcGV->isConstant())
+ return emitError("Appending variables linked with different const'ness!");
+
+ if (DstGV->getAlignment() != SrcGV->getAlignment())
+ return emitError(
+ "Appending variables with different alignment need to be linked!");
+
+ if (DstGV->getVisibility() != SrcGV->getVisibility())
+ return emitError(
+ "Appending variables with different visibility need to be linked!");
+
+ if (DstGV->getSection() != SrcGV->getSection())
+ return emitError(
+ "Appending variables with different section name need to be linked!");
+
+ uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
+ ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+
+ // Create the new global variable.
+ GlobalVariable *NG =
+ new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
+ DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV,
+ DstGV->isThreadLocal(),
+ DstGV->getType()->getAddressSpace());
+
+ // Propagate alignment, visibility and section info.
+ CopyGVAttributes(NG, DstGV);
+
+ AppendingVarInfo AVI;
+ AVI.NewGV = NG;
+ AVI.DstInit = DstGV->getInitializer();
+ AVI.SrcInit = SrcGV->getInitializer();
+ AppendingVars.push_back(AVI);
+
+ // Replace any uses of the two global variables with uses of the new
+ // global.
+ ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+ DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+ DstGV->eraseFromParent();
+
+ // Zap the initializer in the source variable so we don't try to link it.
+ SrcGV->setInitializer(0);
+ SrcGV->setLinkage(GlobalValue::ExternalLinkage);
+ return false;
+}
+
+/// linkGlobalProto - Loop through the global variables in the src module and
+/// merge them into the dest module.
+bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
+ GlobalValue *DGV = getLinkedToGlobal(SGV);
+
+ if (DGV) {
+ // Concatenation of appending linkage variables is magic and handled later.
+ if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
+ return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
+
+ // Determine whether linkage of these two globals follows the source
+ // module's definition or the destination module's definition.
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ bool LinkFromSrc = false;
+ if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc))
+ return true;
+
+ // If we're not linking from the source, then keep the definition that we
+ // have.
+ if (!LinkFromSrc) {
+ // Special case for const propagation.
+ if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+ if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+ DGVar->setConstant(true);
+
+ // Set calculated linkage.
+ DGV->setLinkage(NewLinkage);
+
+ // Make sure to remember this mapping.
+ ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
+
+ // Destroy the source global's initializer (and convert it to a prototype)
+ // so that we don't attempt to copy it over when processing global
+ // initializers.
+ SGV->setInitializer(0);
+ SGV->setLinkage(GlobalValue::ExternalLinkage);
+ return false;
+ }
+ }
+
+ // No linking to be performed or linking from the source: simply create an
+ // identical version of the symbol over in the dest module... the
+ // initializer will be filled in later by LinkGlobalInits.
+ GlobalVariable *NewDGV =
+ new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
+ SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+ SGV->getName(), /*insertbefore*/0,
+ SGV->isThreadLocal(),
+ SGV->getType()->getAddressSpace());
+ // Propagate alignment, visibility and section info.
+ CopyGVAttributes(NewDGV, SGV);
+
+ if (DGV) {
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+
+ // Make sure to remember this mapping.
+ ValueMap[SGV] = NewDGV;
+ return false;
+}
+
+/// linkFunctionProto - Link the function in the source module into the
+/// destination module if needed, setting up mapping information.
+bool ModuleLinker::linkFunctionProto(Function *SF) {
+ GlobalValue *DGV = getLinkedToGlobal(SF);
+
+ if (DGV) {
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ bool LinkFromSrc = false;
+ if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc))
+ return true;
+
+ if (!LinkFromSrc) {
+ // Set calculated linkage
+ DGV->setLinkage(NewLinkage);
+
+ // Make sure to remember this mapping.
+ ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
+
+ // Remove the body from the source module so we don't attempt to remap it.
+ SF->deleteBody();
+ return false;
+ }
+ }
+
+ // If there is no linkage to be performed or we are linking from the source,
+ // bring SF over.
+ Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
+ SF->getLinkage(), SF->getName(), DstM);
+ CopyGVAttributes(NewDF, SF);
+
+ if (DGV) {
+ // Any uses of DF need to change to NewDF, with cast.
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+
+ ValueMap[SF] = NewDF;
+ return false;
+}
+
+/// LinkAliasProto - Set up prototypes for any aliases that come over from the
+/// source module.
+bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
+ GlobalValue *DGV = getLinkedToGlobal(SGA);
+
+ if (DGV) {
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ bool LinkFromSrc = false;
+ if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc))
+ return true;
+
+ if (!LinkFromSrc) {
+ // Set calculated linkage.
+ DGV->setLinkage(NewLinkage);
+
+ // Make sure to remember this mapping.
+ ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
+
+ // Remove the body from the source module so we don't attempt to remap it.
+ SGA->setAliasee(0);
+ return false;
+ }
+ }
+
+ // If there is no linkage to be performed or we're linking from the source,
+ // bring over SGA.
+ GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
+ SGA->getLinkage(), SGA->getName(),
+ /*aliasee*/0, DstM);
+ CopyGVAttributes(NewDA, SGA);
+
+ if (DGV) {
+ // Any uses of DGV need to change to NewDA, with cast.
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+
+ ValueMap[SGA] = NewDA;
+ return false;
+}
+
+void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
+ // Merge the initializer.
+ SmallVector<Constant*, 16> Elements;
+ if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ Elements.push_back(I->getOperand(i));
+ } else {
+ assert(isa<ConstantAggregateZero>(AVI.DstInit));
+ ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType());
+ Type *EltTy = DstAT->getElementType();
+ Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy));
+ }
+
+ Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
+ if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ Elements.push_back(I->getOperand(i));
+ } else {
+ assert(isa<ConstantAggregateZero>(SrcInit));
+ ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType());
+ Type *EltTy = SrcAT->getElementType();
+ Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy));
+ }
+ ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
+ AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
+}
+
+
+// linkGlobalInits - Update the initializers in the Dest module now that all
+// globals that may be referenced are in Dest.
+void ModuleLinker::linkGlobalInits() {
+ // Loop over all of the globals in the src module, mapping them over as we go
+ for (Module::const_global_iterator I = SrcM->global_begin(),
+ E = SrcM->global_end(); I != E; ++I) {
+ if (!I->hasInitializer()) continue; // Only process initialized GV's.
+
+ // Grab destination global variable.
+ GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
+ // Figure out what the initializer looks like in the dest module.
+ DGV->setInitializer(MapValue(I->getInitializer(), ValueMap,
+ RF_None, &TypeMap));
+ }
+}
+
+// linkFunctionBody - Copy the source function over into the dest function and
+// fix up references to values. At this point we know that Dest is an external
+// function, and that Src is not.
+void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
+ assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
+
+ // Go through and convert function arguments over, remembering the mapping.
+ Function::arg_iterator DI = Dst->arg_begin();
+ for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+ I != E; ++I, ++DI) {
+ DI->setName(I->getName()); // Copy the name over.
+
+ // Add a mapping to our mapping.
+ ValueMap[I] = DI;
+ }
+
+ // Splice the body of the source function into the dest function.
+ Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
+
+ // At this point, all of the instructions and values of the function are now
+ // copied over. The only problem is that they are still referencing values in
+ // the Source function as operands. Loop through all of the operands of the
+ // functions and patch them up to point to the local versions.
+ for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
+
+ // There is no need to map the arguments anymore.
+ for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+ I != E; ++I)
+ ValueMap.erase(I);
+}
+
+
+void ModuleLinker::linkAliasBodies() {
+ for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
+ I != E; ++I)
+ if (Constant *Aliasee = I->getAliasee()) {
+ GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
+ DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap));
+ }
+}
+
+/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
+/// module.
+void ModuleLinker::linkNamedMDNodes() {
+ for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
+ E = SrcM->named_metadata_end(); I != E; ++I) {
+ NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
+ // Add Src elements into Dest node.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap,
+ RF_None, &TypeMap));
+ }
+}
+
+bool ModuleLinker::run() {
+ assert(DstM && "Null Destination module");
+ assert(SrcM && "Null Source Module");
+
+ // Inherit the target data from the source module if the destination module
+ // doesn't have one already.
+ if (DstM->getDataLayout().empty() && !SrcM->getDataLayout().empty())
+ DstM->setDataLayout(SrcM->getDataLayout());
+
+ // Copy the target triple from the source to dest if the dest's is empty.
+ if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+ DstM->setTargetTriple(SrcM->getTargetTriple());
+
+ if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() &&
+ SrcM->getDataLayout() != DstM->getDataLayout())
+ errs() << "WARNING: Linking two modules of different data layouts!\n";
+ if (!SrcM->getTargetTriple().empty() &&
+ DstM->getTargetTriple() != SrcM->getTargetTriple()) {
+ errs() << "WARNING: Linking two modules of different target triples: ";
+ if (!SrcM->getModuleIdentifier().empty())
+ errs() << SrcM->getModuleIdentifier() << ": ";
+ errs() << "'" << SrcM->getTargetTriple() << "' and '"
+ << DstM->getTargetTriple() << "'\n";
+ }
+
+ // Append the module inline asm string.
+ if (!SrcM->getModuleInlineAsm().empty()) {
+ if (DstM->getModuleInlineAsm().empty())
+ DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
+ else
+ DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
+ SrcM->getModuleInlineAsm());
+ }
+
+ // Update the destination module's dependent libraries list with the libraries
+ // from the source module. There's no opportunity for duplicates here as the
+ // Module ensures that duplicate insertions are discarded.
+ for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
+ SI != SE; ++SI)
+ DstM->addLibrary(*SI);
+
+ // If the source library's module id is in the dependent library list of the
+ // destination library, remove it since that module is now linked in.
+ StringRef ModuleId = SrcM->getModuleIdentifier();
+ if (!ModuleId.empty())
+ DstM->removeLibrary(sys::path::stem(ModuleId));
+
+
+ // Loop over all of the linked values to compute type mappings.
+ computeTypeMapping();
+
+ // Remap all of the named mdnoes in Src into the DstM module. We do this
+ // after linking GlobalValues so that MDNodes that reference GlobalValues
+ // are properly remapped.
+ linkNamedMDNodes();
+
+ // Insert all of the globals in src into the DstM module... without linking
+ // initializers (which could refer to functions not yet mapped over).
+ for (Module::global_iterator I = SrcM->global_begin(),
+ E = SrcM->global_end(); I != E; ++I)
+ if (linkGlobalProto(I))
+ return true;
+
+ // Link the functions together between the two modules, without doing function
+ // bodies... this just adds external function prototypes to the DstM
+ // function... We do this so that when we begin processing function bodies,
+ // all of the global values that may be referenced are available in our
+ // ValueMap.
+ for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
+ if (linkFunctionProto(I))
+ return true;
+
+ // If there were any aliases, link them now.
+ for (Module::alias_iterator I = SrcM->alias_begin(),
+ E = SrcM->alias_end(); I != E; ++I)
+ if (linkAliasProto(I))
+ return true;
+
+ for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
+ linkAppendingVarInit(AppendingVars[i]);
+
+ // Update the initializers in the DstM module now that all globals that may
+ // be referenced are in DstM.
+ linkGlobalInits();
+
+ // Link in the function bodies that are defined in the source module into
+ // DstM.
+ for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
+ if (SF->isDeclaration()) continue; // No body if function is external.
+
+ linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+ }
+
+ // Resolve all uses of aliases with aliasees.
+ linkAliasBodies();
+
+ // Now that all of the types from the source are used, resolve any structs
+ // copied over to the dest that didn't exist there.
+ TypeMap.linkDefinedTypeBodies();
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// LinkModules entrypoint.
+//===----------------------------------------------------------------------===//
+
+// LinkModules - This function links two modules together, with the resulting
+// left module modified to be the composite of the two input modules. If an
+// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+// the problem. Upon failure, the Dest module could be in a modified state, and
+// shouldn't be relied on to be consistent.
+bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
+ ModuleLinker TheLinker(Dest, Src);
+ if (TheLinker.run()) {
+ if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp
new file mode 100644
index 000000000000..fba91da5ddd1
--- /dev/null
+++ b/contrib/llvm/lib/Linker/Linker.cpp
@@ -0,0 +1,174 @@
+//===- lib/Linker/Linker.cpp - Basic Linker functionality ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic Linker functionality that all usages will need.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+Linker::Linker(StringRef progname, StringRef modname,
+ LLVMContext& C, unsigned flags):
+ Context(C),
+ Composite(new Module(modname, C)),
+ LibPaths(),
+ Flags(flags),
+ Error(),
+ ProgramName(progname) { }
+
+Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
+ Context(aModule->getContext()),
+ Composite(aModule),
+ LibPaths(),
+ Flags(flags),
+ Error(),
+ ProgramName(progname) { }
+
+Linker::~Linker() {
+ delete Composite;
+}
+
+bool
+Linker::error(StringRef message) {
+ Error = message;
+ if (!(Flags&QuietErrors))
+ errs() << ProgramName << ": error: " << message << "\n";
+ return true;
+}
+
+bool
+Linker::warning(StringRef message) {
+ Error = message;
+ if (!(Flags&QuietWarnings))
+ errs() << ProgramName << ": warning: " << message << "\n";
+ return false;
+}
+
+void
+Linker::verbose(StringRef message) {
+ if (Flags&Verbose)
+ errs() << " " << message << "\n";
+}
+
+void
+Linker::addPath(const sys::Path& path) {
+ LibPaths.push_back(path);
+}
+
+void
+Linker::addPaths(const std::vector<std::string>& paths) {
+ for (unsigned i = 0, e = paths.size(); i != e; ++i)
+ LibPaths.push_back(sys::Path(paths[i]));
+}
+
+void
+Linker::addSystemPaths() {
+ sys::Path::GetBitcodeLibraryPaths(LibPaths);
+ LibPaths.insert(LibPaths.begin(),sys::Path("./"));
+}
+
+Module*
+Linker::releaseModule() {
+ Module* result = Composite;
+ LibPaths.clear();
+ Error.clear();
+ Composite = 0;
+ Flags = 0;
+ return result;
+}
+
+// LoadObject - Read in and parse the bitcode file named by FN and return the
+// module it contains (wrapped in an auto_ptr), or auto_ptr<Module>() and set
+// Error if an error occurs.
+std::auto_ptr<Module>
+Linker::LoadObject(const sys::Path &FN) {
+ std::string ParseErrorMessage;
+ Module *Result = 0;
+
+ OwningPtr<MemoryBuffer> Buffer;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer))
+ ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": "
+ + ec.message();
+ else
+ Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
+ if (Result)
+ return std::auto_ptr<Module>(Result);
+ Error = "Bitcode file '" + FN.str() + "' could not be loaded";
+ if (ParseErrorMessage.size())
+ Error += ": " + ParseErrorMessage;
+ return std::auto_ptr<Module>();
+}
+
+// IsLibrary - Determine if "Name" is a library in "Directory". Return
+// a non-empty sys::Path if its found, an empty one otherwise.
+static inline sys::Path IsLibrary(StringRef Name,
+ const sys::Path &Directory) {
+
+ sys::Path FullPath(Directory);
+
+ // Try the libX.a form
+ FullPath.appendComponent(("lib" + Name).str());
+ FullPath.appendSuffix("a");
+ if (FullPath.isArchive())
+ return FullPath;
+
+ // Try the libX.bca form
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix("bca");
+ if (FullPath.isArchive())
+ return FullPath;
+
+ // Try the libX.so (or .dylib) form
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix(sys::Path::GetDLLSuffix());
+ if (FullPath.isDynamicLibrary()) // Native shared library?
+ return FullPath;
+ if (FullPath.isBitcodeFile()) // .so file containing bitcode?
+ return FullPath;
+
+ // Not found .. fall through
+
+ // Indicate that the library was not found in the directory.
+ FullPath.clear();
+ return FullPath;
+}
+
+/// FindLib - Try to convert Filename into the name of a file that we can open,
+/// if it does not already name a file we can open, by first trying to open
+/// Filename, then libFilename.[suffix] for each of a set of several common
+/// library suffixes, in each of the directories in LibPaths. Returns an empty
+/// Path if no matching file can be found.
+///
+sys::Path
+Linker::FindLib(StringRef Filename) {
+ // Determine if the pathname can be found as it stands.
+ sys::Path FilePath(Filename);
+ if (FilePath.canRead() &&
+ (FilePath.isArchive() || FilePath.isDynamicLibrary()))
+ return FilePath;
+
+ // Iterate over the directories in Paths to see if we can find the library
+ // there.
+ for (unsigned Index = 0; Index != LibPaths.size(); ++Index) {
+ sys::Path Directory(LibPaths[Index]);
+ sys::Path FullPath = IsLibrary(Filename, Directory);
+ if (!FullPath.isEmpty())
+ return FullPath;
+ }
+ return sys::Path();
+}
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
new file mode 100644
index 000000000000..59e1b8eb8105
--- /dev/null
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -0,0 +1,1730 @@
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFObjectWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+#include "../Target/ARM/ARMFixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "reloc-info"
+
+bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+ const MCFixupKindInfo &FKI =
+ Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
+
+ return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+ switch (Variant) {
+ default:
+ return false;
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_PLT:
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_GOTOFF:
+ case MCSymbolRefExpr::VK_TPOFF:
+ case MCSymbolRefExpr::VK_TLSGD:
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ case MCSymbolRefExpr::VK_NTPOFF:
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSLDM:
+ case MCSymbolRefExpr::VK_DTPOFF:
+ case MCSymbolRefExpr::VK_TLSLD:
+ return true;
+ }
+}
+
+ELFObjectWriter::~ELFObjectWriter()
+{}
+
+// Emit the ELF header.
+void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
+ unsigned NumberOfSections) {
+ // ELF Header
+ // ----------
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the latter.
+
+ Write8(0x7f); // e_ident[EI_MAG0]
+ Write8('E'); // e_ident[EI_MAG1]
+ Write8('L'); // e_ident[EI_MAG2]
+ Write8('F'); // e_ident[EI_MAG3]
+
+ Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+
+ // e_ident[EI_DATA]
+ Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+
+ Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ // e_ident[EI_OSABI]
+ switch (TargetObjectWriter->getOSType()) {
+ case Triple::FreeBSD: Write8(ELF::ELFOSABI_FREEBSD); break;
+ case Triple::Linux: Write8(ELF::ELFOSABI_LINUX); break;
+ default: Write8(ELF::ELFOSABI_NONE); break;
+ }
+ Write8(0); // e_ident[EI_ABIVERSION]
+
+ WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
+
+ Write16(ELF::ET_REL); // e_type
+
+ Write16(TargetObjectWriter->getEMachine()); // e_machine = target
+
+ Write32(ELF::EV_CURRENT); // e_version
+ WriteWord(0); // e_entry, no entry point in .o file
+ WriteWord(0); // e_phoff, no program header for .o
+ WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes
+
+ // e_flags = whatever the target wants
+ WriteEFlags();
+
+ // e_ehsize = ELF header size
+ Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+
+ Write16(0); // e_phentsize = prog header entry size
+ Write16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+
+ // e_shnum = # of section header ents
+ if (NumberOfSections >= ELF::SHN_LORESERVE)
+ Write16(0);
+ else
+ Write16(NumberOfSections);
+
+ // e_shstrndx = Section # of '.shstrtab'
+ if (NumberOfSections >= ELF::SHN_LORESERVE)
+ Write16(ELF::SHN_XINDEX);
+ else
+ Write16(ShstrtabIndex);
+}
+
+void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ uint64_t name,
+ uint8_t info, uint64_t value,
+ uint64_t size, uint8_t other,
+ uint32_t shndx,
+ bool Reserved) {
+ if (ShndxF) {
+ if (shndx >= ELF::SHN_LORESERVE && !Reserved)
+ String32(*ShndxF, shndx);
+ else
+ String32(*ShndxF, 0);
+ }
+
+ uint16_t Index = (shndx >= ELF::SHN_LORESERVE && !Reserved) ?
+ uint16_t(ELF::SHN_XINDEX) : shndx;
+
+ if (is64Bit()) {
+ String32(*SymtabF, name); // st_name
+ String8(*SymtabF, info); // st_info
+ String8(*SymtabF, other); // st_other
+ String16(*SymtabF, Index); // st_shndx
+ String64(*SymtabF, value); // st_value
+ String64(*SymtabF, size); // st_size
+ } else {
+ String32(*SymtabF, name); // st_name
+ String32(*SymtabF, value); // st_value
+ String32(*SymtabF, size); // st_size
+ String8(*SymtabF, info); // st_info
+ String8(*SymtabF, other); // st_other
+ String16(*SymtabF, Index); // st_shndx
+ }
+}
+
+uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
+ const MCAsmLayout &Layout) {
+ if (Data.isCommon() && Data.isExternal())
+ return Data.getCommonAlignment();
+
+ const MCSymbol &Symbol = Data.getSymbol();
+
+ if (Symbol.isAbsolute() && Symbol.isVariable()) {
+ if (const MCExpr *Value = Symbol.getVariableValue()) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, Layout))
+ return (uint64_t)IntValue;
+ }
+ }
+
+ if (!Symbol.isInSection())
+ return 0;
+
+
+ if (Data.getFragment()) {
+ if (Data.getFlags() & ELF_Other_ThumbFunc)
+ return Layout.getSymbolOffset(&Data)+1;
+ else
+ return Layout.getSymbolOffset(&Data);
+ }
+
+ return 0;
+}
+
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // The presence of symbol versions causes undefined symbols and
+ // versions declared with @@@ to be renamed.
+
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Alias = it->getSymbol();
+ const MCSymbol &Symbol = Alias.AliasedSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+ // Not an alias.
+ if (&Symbol == &Alias)
+ continue;
+
+ StringRef AliasName = Alias.getName();
+ size_t Pos = AliasName.find('@');
+ if (Pos == StringRef::npos)
+ continue;
+
+ // Aliases defined with .symvar copy the binding from the symbol they alias.
+ // This is the first place we are able to copy this information.
+ it->setExternal(SD.isExternal());
+ MCELF::SetBinding(*it, MCELF::GetBinding(SD));
+
+ StringRef Rest = AliasName.substr(Pos);
+ if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
+ continue;
+
+ // FIXME: produce a better error message.
+ if (Symbol.isUndefined() && Rest.startswith("@@") &&
+ !Rest.startswith("@@@"))
+ report_fatal_error("A @@ version cannot be undefined");
+
+ Renames.insert(std::make_pair(&Symbol, &Alias));
+ }
+}
+
+void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &OrigData = *MSD.SymbolData;
+ MCSymbolData &Data =
+ Layout.getAssembler().getSymbolData(OrigData.getSymbol().AliasedSymbol());
+
+ bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
+ Data.getSymbol().isVariable();
+
+ uint8_t Binding = MCELF::GetBinding(OrigData);
+ uint8_t Visibility = MCELF::GetVisibility(OrigData);
+ uint8_t Type = MCELF::GetType(Data);
+
+ uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+ uint8_t Other = Visibility;
+
+ uint64_t Value = SymbolValue(Data, Layout);
+ uint64_t Size = 0;
+
+ assert(!(Data.isCommon() && !Data.isExternal()));
+
+ const MCExpr *ESize = Data.getSize();
+ if (ESize) {
+ int64_t Res;
+ if (!ESize->EvaluateAsAbsolute(Res, Layout))
+ report_fatal_error("Size expression must be absolute.");
+ Size = Res;
+ }
+
+ // Write out the symbol table entry
+ WriteSymbolEntry(SymtabF, ShndxF, MSD.StringIndex, Info, Value,
+ Size, Other, MSD.SectionIndex, IsReserved);
+}
+
+void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap) {
+ // The string table must be emitted first because we need the index
+ // into the string table for all the symbol names.
+ assert(StringTable.size() && "Missing string table");
+
+ // FIXME: Make sure the start of the symbol table is aligned.
+
+ // The first entry is the undefined symbol entry.
+ WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
+
+ // Write the symbol table entries.
+ LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = LocalSymbolData[i];
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ }
+
+ // Write out a symbol table entry for each regular section.
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
+ ++i) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(i->getSection());
+ if (Section.getType() == ELF::SHT_RELA ||
+ Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_STRTAB ||
+ Section.getType() == ELF::SHT_SYMTAB)
+ continue;
+ WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
+ ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = ExternalSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ assert(((Data.getFlags() & ELF_STB_Global) ||
+ (Data.getFlags() & ELF_STB_Weak)) &&
+ "External symbol requires STB_GLOBAL or STB_WEAK flag");
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = UndefinedSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+ LastLocalSymbolIndex++;
+ }
+}
+
+const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+ const MCSymbol *Renamed = Renames.lookup(&Symbol);
+ const MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+ if (ASymbol.isUndefined()) {
+ if (Renamed)
+ return Renamed;
+ return &ASymbol;
+ }
+
+ if (SD.isExternal()) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(ASymbol.getSection());
+ const SectionKind secKind = Section.getKind();
+
+ if (secKind.isBSS())
+ return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+
+ if (secKind.isThreadLocal()) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ const MCSectionELF &Sec2 =
+ static_cast<const MCSectionELF&>(F.getParent()->getSection());
+
+ if (&Sec2 != &Section &&
+ (Kind == MCSymbolRefExpr::VK_PLT ||
+ Kind == MCSymbolRefExpr::VK_GOTPCREL ||
+ Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ if (Section.getFlags() & ELF::SHF_MERGE) {
+ if (Target.getConstant() == 0)
+ return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+
+}
+
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ int64_t Addend = 0;
+ int Index = 0;
+ int64_t Value = Target.getConstant();
+ const MCSymbol *RelocSymbol = NULL;
+
+ bool IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+ if (!Target.isAbsolute()) {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+ RelocSymbol = SymbolToReloc(Asm, Target, *Fragment, Fixup, IsPCRel);
+
+ if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+ const MCSymbol &SymbolB = RefB->getSymbol();
+ MCSymbolData &SDB = Asm.getSymbolData(SymbolB);
+ IsPCRel = true;
+
+ // Offset of the symbol in the section
+ int64_t a = Layout.getSymbolOffset(&SDB);
+
+ // Ofeset of the relocation in the section
+ int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ Value += b - a;
+ }
+
+ if (!RelocSymbol) {
+ MCSymbolData &SD = Asm.getSymbolData(ASymbol);
+ MCFragment *F = SD.getFragment();
+
+ Index = F->getParent()->getOrdinal() + 1;
+
+ // Offset of the symbol in the section
+ Value += Layout.getSymbolOffset(&SD);
+ } else {
+ if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
+ WeakrefUsedInReloc.insert(RelocSymbol);
+ else
+ UsedInReloc.insert(RelocSymbol);
+ Index = -1;
+ }
+ Addend = Value;
+ // Compensate for the addend on i386.
+ if (is64Bit())
+ Value = 0;
+ }
+
+ FixedValue = Value;
+ unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
+ (RelocSymbol != 0), Addend);
+
+ uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
+ Fixup.getOffset();
+
+ if (!hasRelocationAddend())
+ Addend = 0;
+ ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+ Relocations[Fragment->getParent()].push_back(ERE);
+}
+
+
+uint64_t
+ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S) {
+ MCSymbolData &SD = Asm.getSymbolData(*S);
+ return SD.getIndex();
+}
+
+bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
+ const MCSymbolData &Data,
+ bool Used, bool Renamed) {
+ if (Data.getFlags() & ELF_Other_Weakref)
+ return false;
+
+ if (Used)
+ return true;
+
+ if (Renamed)
+ return false;
+
+ const MCSymbol &Symbol = Data.getSymbol();
+
+ if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
+ return true;
+
+ const MCSymbol &A = Symbol.AliasedSymbol();
+ if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
+ return false;
+
+ bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
+ if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
+ return false;
+
+ if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
+ return false;
+
+ if (Symbol.isTemporary())
+ return false;
+
+ return true;
+}
+
+bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature,
+ bool isUsedInReloc) {
+ if (Data.isExternal())
+ return false;
+
+ const MCSymbol &Symbol = Data.getSymbol();
+ const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+ if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
+ if (isSignature && !isUsedInReloc)
+ return true;
+
+ return false;
+ }
+
+ return true;
+}
+
+void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap) {
+ unsigned Index = 1;
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() != ELF::SHT_GROUP)
+ continue;
+ SectionIndexMap[&Section] = Index++;
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_GROUP ||
+ Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_RELA)
+ continue;
+ SectionIndexMap[&Section] = Index++;
+ const MCSectionELF *RelSection = RelMap.lookup(&Section);
+ if (RelSection)
+ SectionIndexMap[RelSection] = Index++;
+ }
+}
+
+void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap,
+ unsigned NumRegularSections) {
+ // FIXME: Is this the correct place to do this?
+ // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
+ if (NeedsGOT) {
+ llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+ MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+ MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
+ Data.setExternal(true);
+ MCELF::SetBinding(Data, ELF::STB_GLOBAL);
+ }
+
+ // Index 0 is always the empty string.
+ StringMap<uint64_t> StringIndexMap;
+ StringTable += '\x00';
+
+ // FIXME: We could optimize suffixes in strtab in the same way we
+ // optimize them in shstrtab.
+
+ // Add the data for the symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ bool Used = UsedInReloc.count(&Symbol);
+ bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
+ bool isSignature = RevGroupMap.count(&Symbol);
+
+ if (!isInSymtab(Asm, *it,
+ Used || WeakrefUsed || isSignature,
+ Renames.count(&Symbol)))
+ continue;
+
+ ELFSymbolData MSD;
+ MSD.SymbolData = it;
+ const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+ // Undefined symbols are global, but this is the first place we
+ // are able to set it.
+ bool Local = isLocal(*it, isSignature, Used);
+ if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) {
+ MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
+ MCELF::SetBinding(*it, ELF::STB_GLOBAL);
+ MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ }
+
+ if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
+ MCELF::SetBinding(*it, ELF::STB_WEAK);
+
+ if (it->isCommon()) {
+ assert(!Local);
+ MSD.SectionIndex = ELF::SHN_COMMON;
+ } else if (Symbol.isAbsolute() || RefSymbol.isVariable()) {
+ MSD.SectionIndex = ELF::SHN_ABS;
+ } else if (RefSymbol.isUndefined()) {
+ if (isSignature && !Used)
+ MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+ else
+ MSD.SectionIndex = ELF::SHN_UNDEF;
+ } else {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(RefSymbol.getSection());
+ MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+ if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+ NeedsSymtabShndx = true;
+ assert(MSD.SectionIndex && "Invalid section index!");
+ }
+
+ // The @@@ in symbol version is replaced with @ in undefined symbols and
+ // @@ in defined ones.
+ StringRef Name = Symbol.getName();
+ SmallString<32> Buf;
+
+ size_t Pos = Name.find("@@@");
+ if (Pos != StringRef::npos) {
+ Buf += Name.substr(0, Pos);
+ unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+ Buf += Name.substr(Pos + Skip);
+ Name = Buf;
+ }
+
+ uint64_t &Entry = StringIndexMap[Name];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Name;
+ StringTable += '\x00';
+ }
+ MSD.StringIndex = Entry;
+ if (MSD.SectionIndex == ELF::SHN_UNDEF)
+ UndefinedSymbolData.push_back(MSD);
+ else if (Local)
+ LocalSymbolData.push_back(MSD);
+ else
+ ExternalSymbolData.push_back(MSD);
+ }
+
+ // Symbols are required to be in lexicographic order.
+ array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
+ array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+ array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+ // Set the symbol indices. Local symbols must come before all other
+ // symbols with non-local bindings.
+ unsigned Index = 1;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ LocalSymbolData[i].SymbolData->setIndex(Index++);
+
+ Index += NumRegularSections;
+
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ ExternalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+}
+
+void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ RelMapTy &RelMap) {
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ if (Relocations[&SD].empty())
+ continue;
+
+ MCContext &Ctx = Asm.getContext();
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ const StringRef SectionName = Section.getSectionName();
+ std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
+ RelaSectionName += SectionName;
+
+ unsigned EntrySize;
+ if (hasRelocationAddend())
+ EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+ else
+ EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+
+ const MCSectionELF *RelaSection =
+ Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
+ ELF::SHT_RELA : ELF::SHT_REL, 0,
+ SectionKind::getReadOnly(),
+ EntrySize, "");
+ RelMap[&Section] = RelaSection;
+ Asm.getOrCreateSectionData(*RelaSection);
+ }
+}
+
+void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+ const RelMapTy &RelMap) {
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ const MCSectionELF *RelaSection = RelMap.lookup(&Section);
+ if (!RelaSection)
+ continue;
+ MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
+ RelaSD.setAlignment(is64Bit() ? 8 : 4);
+
+ MCDataFragment *F = new MCDataFragment(&RelaSD);
+ WriteRelocationsFragment(Asm, F, &*it);
+ }
+}
+
+void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+ uint64_t Flags, uint64_t Address,
+ uint64_t Offset, uint64_t Size,
+ uint32_t Link, uint32_t Info,
+ uint64_t Alignment,
+ uint64_t EntrySize) {
+ Write32(Name); // sh_name: index into string table
+ Write32(Type); // sh_type
+ WriteWord(Flags); // sh_flags
+ WriteWord(Address); // sh_addr
+ WriteWord(Offset); // sh_offset
+ WriteWord(Size); // sh_size
+ Write32(Link); // sh_link
+ Write32(Info); // sh_info
+ WriteWord(Alignment); // sh_addralign
+ WriteWord(EntrySize); // sh_entsize
+}
+
+void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD) {
+ std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
+ // sort by the r_offset just like gnu as does
+ array_pod_sort(Relocs.begin(), Relocs.end());
+
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ ELFRelocationEntry entry = Relocs[e - i - 1];
+
+ if (!entry.Index)
+ ;
+ else if (entry.Index < 0)
+ entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
+ else
+ entry.Index += LocalSymbolData.size();
+ if (is64Bit()) {
+ String64(*F, entry.r_offset);
+
+ struct ELF::Elf64_Rela ERE64;
+ ERE64.setSymbolAndType(entry.Index, entry.Type);
+ String64(*F, ERE64.r_info);
+
+ if (hasRelocationAddend())
+ String64(*F, entry.r_addend);
+ } else {
+ String32(*F, entry.r_offset);
+
+ struct ELF::Elf32_Rela ERE32;
+ ERE32.setSymbolAndType(entry.Index, entry.Type);
+ String32(*F, ERE32.r_info);
+
+ if (hasRelocationAddend())
+ String32(*F, entry.r_addend);
+ }
+ }
+}
+
+static int compareBySuffix(const void *a, const void *b) {
+ const MCSectionELF *secA = *static_cast<const MCSectionELF* const *>(a);
+ const MCSectionELF *secB = *static_cast<const MCSectionELF* const *>(b);
+ const StringRef &NameA = secA->getSectionName();
+ const StringRef &NameB = secB->getSectionName();
+ const unsigned sizeA = NameA.size();
+ const unsigned sizeB = NameB.size();
+ const unsigned len = std::min(sizeA, sizeB);
+ for (unsigned int i = 0; i < len; ++i) {
+ char ca = NameA[sizeA - i - 1];
+ char cb = NameB[sizeB - i - 1];
+ if (ca != cb)
+ return cb - ca;
+ }
+
+ return sizeB - sizeA;
+}
+
+void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap) {
+ MCContext &Ctx = Asm.getContext();
+ MCDataFragment *F;
+
+ unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+
+ // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
+ const MCSectionELF *ShstrtabSection =
+ Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly());
+ MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+ ShstrtabSD.setAlignment(1);
+
+ const MCSectionELF *SymtabSection =
+ Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ EntrySize, "");
+ MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+ SymtabSD.setAlignment(is64Bit() ? 8 : 4);
+
+ MCSectionData *SymtabShndxSD = NULL;
+
+ if (NeedsSymtabShndx) {
+ const MCSectionELF *SymtabShndxSection =
+ Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0,
+ SectionKind::getReadOnly(), 4, "");
+ SymtabShndxSD = &Asm.getOrCreateSectionData(*SymtabShndxSection);
+ SymtabShndxSD->setAlignment(4);
+ }
+
+ const MCSectionELF *StrtabSection;
+ StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly());
+ MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
+ StrtabSD.setAlignment(1);
+
+ ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+ ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection);
+ SymbolTableIndex = SectionIndexMap.lookup(SymtabSection);
+ StringTableIndex = SectionIndexMap.lookup(StrtabSection);
+
+ // Symbol table
+ F = new MCDataFragment(&SymtabSD);
+ MCDataFragment *ShndxF = NULL;
+ if (NeedsSymtabShndx) {
+ ShndxF = new MCDataFragment(SymtabShndxSD);
+ }
+ WriteSymbolTable(F, ShndxF, Asm, Layout, SectionIndexMap);
+
+ F = new MCDataFragment(&StrtabSD);
+ F->getContents().append(StringTable.begin(), StringTable.end());
+
+ F = new MCDataFragment(&ShstrtabSD);
+
+ std::vector<const MCSectionELF*> Sections;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ Sections.push_back(&Section);
+ }
+ array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);
+
+ // Section header string table.
+ //
+ // The first entry of a string table holds a null character so skip
+ // section 0.
+ uint64_t Index = 1;
+ F->getContents() += '\x00';
+
+ for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
+ const MCSectionELF &Section = *Sections[I];
+
+ StringRef Name = Section.getSectionName();
+ if (I != 0) {
+ StringRef PreviousName = Sections[I - 1]->getSectionName();
+ if (PreviousName.endswith(Name)) {
+ SectionStringTableIndex[&Section] = Index - Name.size() - 1;
+ continue;
+ }
+ }
+ // Remember the index into the string table so we can write it
+ // into the sh_name field of the section header table.
+ SectionStringTableIndex[&Section] = Index;
+
+ Index += Name.size() + 1;
+ F->getContents() += Name;
+ F->getContents() += '\x00';
+ }
+}
+
+void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap) {
+ // Create the .note.GNU-stack section if needed.
+ MCContext &Ctx = Asm.getContext();
+ if (Asm.getNoExecStack()) {
+ const MCSectionELF *GnuStackSection =
+ Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ Asm.getOrCreateSectionData(*GnuStackSection);
+ }
+
+ // Build the groups
+ for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+ it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ if (!(Section.getFlags() & ELF::SHF_GROUP))
+ continue;
+
+ const MCSymbol *SignatureSymbol = Section.getGroup();
+ Asm.getOrCreateSymbolData(*SignatureSymbol);
+ const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
+ if (!Group) {
+ Group = Ctx.CreateELFGroupSection();
+ MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+ Data.setAlignment(4);
+ MCDataFragment *F = new MCDataFragment(&Data);
+ String32(*F, ELF::GRP_COMDAT);
+ }
+ GroupMap[Group] = SignatureSymbol;
+ }
+
+ ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+ // Add sections to the groups
+ for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+ it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ if (!(Section.getFlags() & ELF::SHF_GROUP))
+ continue;
+ const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
+ MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+ // FIXME: we could use the previous fragment
+ MCDataFragment *F = new MCDataFragment(&Data);
+ unsigned Index = SectionIndexMap.lookup(&Section);
+ String32(*F, Index);
+ }
+}
+
+void ELFObjectWriter::WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size,
+ uint64_t Alignment,
+ const MCSectionELF &Section) {
+ uint64_t sh_link = 0;
+ uint64_t sh_info = 0;
+
+ switch(Section.getType()) {
+ case ELF::SHT_DYNAMIC:
+ sh_link = SectionStringTableIndex[&Section];
+ sh_info = 0;
+ break;
+
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA: {
+ const MCSectionELF *SymtabSection;
+ const MCSectionELF *InfoSection;
+ SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
+ 0,
+ SectionKind::getReadOnly());
+ sh_link = SectionIndexMap.lookup(SymtabSection);
+ assert(sh_link && ".symtab not found");
+
+ // Remove ".rel" and ".rela" prefixes.
+ unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+ StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+ InfoSection = Asm.getContext().getELFSection(SectionName,
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ sh_info = SectionIndexMap.lookup(InfoSection);
+ break;
+ }
+
+ case ELF::SHT_SYMTAB:
+ case ELF::SHT_DYNSYM:
+ sh_link = StringTableIndex;
+ sh_info = LastLocalSymbolIndex;
+ break;
+
+ case ELF::SHT_SYMTAB_SHNDX:
+ sh_link = SymbolTableIndex;
+ break;
+
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_STRTAB:
+ case ELF::SHT_NOBITS:
+ case ELF::SHT_NOTE:
+ case ELF::SHT_NULL:
+ case ELF::SHT_ARM_ATTRIBUTES:
+ case ELF::SHT_INIT_ARRAY:
+ case ELF::SHT_FINI_ARRAY:
+ case ELF::SHT_PREINIT_ARRAY:
+ case ELF::SHT_X86_64_UNWIND:
+ // Nothing to do.
+ break;
+
+ case ELF::SHT_GROUP: {
+ sh_link = SymbolTableIndex;
+ sh_info = GroupSymbolIndex;
+ break;
+ }
+
+ default:
+ assert(0 && "FIXME: sh_type value not supported!");
+ break;
+ }
+
+ WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+ Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
+ Alignment, Section.getEntrySize());
+}
+
+bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) {
+ return SD.getOrdinal() == ~UINT32_C(0) &&
+ !SD.getSection().isVirtualSection();
+}
+
+uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
+ uint64_t Ret = 0;
+ for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+ ++i) {
+ const MCFragment &F = *i;
+ assert(F.getKind() == MCFragment::FT_Data);
+ Ret += cast<MCDataFragment>(F).getContents().size();
+ }
+ return Ret;
+}
+
+uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (IsELFMetaDataSection(SD))
+ return DataSectionSize(SD);
+ return Layout.getSectionFileSize(&SD);
+}
+
+uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (IsELFMetaDataSection(SD))
+ return DataSectionSize(SD);
+ return Layout.getSectionAddressSize(&SD);
+}
+
+void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionELF &Section) {
+ uint64_t FileOff = OS.tell();
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+ uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+ WriteZeros(Padding);
+ FileOff += Padding;
+
+ FileOff += GetSectionFileSize(Layout, SD);
+
+ if (IsELFMetaDataSection(SD)) {
+ for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+ ++i) {
+ const MCFragment &F = *i;
+ assert(F.getKind() == MCFragment::FT_Data);
+ WriteBytes(cast<MCDataFragment>(F).getContents().str());
+ }
+ } else {
+ Asm.WriteSectionData(&SD, Layout);
+ }
+}
+
+void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm,
+ const GroupMapTy &GroupMap,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap,
+ const SectionOffsetMapTy &SectionOffsetMap) {
+ const unsigned NumSections = Asm.size() + 1;
+
+ std::vector<const MCSectionELF*> Sections;
+ Sections.resize(NumSections - 1);
+
+ for (SectionIndexMapTy::const_iterator i=
+ SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+ const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+ Sections[p.second - 1] = p.first;
+ }
+
+ // Null section first.
+ uint64_t FirstSectionSize =
+ NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+ uint32_t FirstSectionLink =
+ ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+ WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+ for (unsigned i = 0; i < NumSections - 1; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+ uint32_t GroupSymbolIndex;
+ if (Section.getType() != ELF::SHT_GROUP)
+ GroupSymbolIndex = 0;
+ else
+ GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm,
+ GroupMap.lookup(&Section));
+
+ uint64_t Size = GetSectionAddressSize(Layout, SD);
+
+ WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+ SectionOffsetMap.lookup(&Section), Size,
+ SD.getAlignment(), Section);
+ }
+}
+
+void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
+ std::vector<const MCSectionELF*> &Sections) {
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_GROUP)
+ Sections.push_back(&Section);
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() != ELF::SHT_GROUP &&
+ Section.getType() != ELF::SHT_REL &&
+ Section.getType() != ELF::SHT_RELA)
+ Sections.push_back(&Section);
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_RELA)
+ Sections.push_back(&Section);
+ }
+}
+
+void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ GroupMapTy GroupMap;
+ RevGroupMapTy RevGroupMap;
+ SectionIndexMapTy SectionIndexMap;
+
+ unsigned NumUserSections = Asm.size();
+
+ DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
+ CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+ const unsigned NumUserAndRelocSections = Asm.size();
+ CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+ RevGroupMap, SectionIndexMap, RelMap);
+ const unsigned AllSections = Asm.size();
+ const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
+
+ unsigned NumRegularSections = NumUserSections + NumIndexedSections;
+
+ // Compute symbol table information.
+ ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap, NumRegularSections);
+
+
+ WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+ CreateMetadataSections(const_cast<MCAssembler&>(Asm),
+ const_cast<MCAsmLayout&>(Layout),
+ SectionIndexMap,
+ RelMap);
+
+ uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
+ uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr);
+ uint64_t FileOff = HeaderSize;
+
+ std::vector<const MCSectionELF*> Sections;
+ ComputeSectionOrder(Asm, Sections);
+ unsigned NumSections = Sections.size();
+ SectionOffsetMapTy SectionOffsetMap;
+ for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+ FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&Section] = FileOff;
+
+ // Get the size of the section in the output file (including padding).
+ FileOff += GetSectionFileSize(Layout, SD);
+ }
+
+ FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
+
+ const unsigned SectionHeaderOffset = FileOff - HeaderSize;
+
+ uint64_t SectionHeaderEntrySize = is64Bit() ?
+ sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
+ FileOff += (NumSections + 1) * SectionHeaderEntrySize;
+
+ for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+ FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&Section] = FileOff;
+
+ // Get the size of the section in the output file (including padding).
+ FileOff += GetSectionFileSize(Layout, SD);
+ }
+
+ // Write out the ELF header ...
+ WriteHeader(SectionHeaderOffset, NumSections + 1);
+
+ // ... then the regular sections ...
+ // + because of .shstrtab
+ for (unsigned i = 0; i < NumRegularSections + 1; ++i)
+ WriteDataSectionData(Asm, Layout, *Sections[i]);
+
+ FileOff = OS.tell();
+ uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+ WriteZeros(Padding);
+
+ // ... then the section header table ...
+ WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
+ SectionOffsetMap);
+
+ FileOff = OS.tell();
+
+ // ... and then the remainting sections ...
+ for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
+ WriteDataSectionData(Asm, Layout, *Sections[i]);
+}
+
+bool
+ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ if (DataA.getFlags() & ELF_STB_Weak)
+ return false;
+ return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+ Asm, DataA, FB,InSet, IsPCRel);
+}
+
+MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ switch (MOTW->getEMachine()) {
+ case ELF::EM_386:
+ case ELF::EM_X86_64:
+ return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ case ELF::EM_ARM:
+ return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ case ELF::EM_MBLAZE:
+ return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ default: llvm_unreachable("Unsupported architecture"); break;
+ }
+}
+
+
+/// START OF SUBCLASSES for ELFObjectWriter
+//===- ARMELFObjectWriter -------------------------------------------===//
+
+ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+ARMELFObjectWriter::~ARMELFObjectWriter()
+{}
+
+// FIXME: get the real EABI Version from the Triple.
+void ARMELFObjectWriter::WriteEFlags() {
+ Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is an approximation of what ARM/gcc does.
+
+STATISTIC(PCRelCount, "Total number of PIC Relocations");
+STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ bool EmitThisSym = false;
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol.getSection());
+ bool InNormalSection = true;
+ unsigned RelocType = 0;
+ RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+ DEBUG(
+ const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ MCSymbolRefExpr::VariantKind Kind2;
+ Kind2 = Target.getSymB() ? Target.getSymB()->getKind() :
+ MCSymbolRefExpr::VK_None;
+ dbgs() << "considering symbol "
+ << Section.getSectionName() << "/"
+ << Symbol.getName() << "/"
+ << " Rel:" << (unsigned)RelocType
+ << " Kind: " << (int)Kind << "/" << (int)Kind2
+ << " Tmp:"
+ << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
+ << Symbol.isVariable() << "/" << Symbol.isTemporary()
+ << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
+
+ if (IsPCRel) { ++PCRelCount;
+ switch (RelocType) {
+ default:
+ // Most relocation types are emitted as explicit symbols
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = true;
+ break;
+ case ELF::R_ARM_ABS32:
+ // But things get strange with R_ARM_ABS32
+ // In this case, most things that go in .rodata show up
+ // as section relative relocations
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".rodata", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = false;
+ break;
+ }
+ } else {
+ NonPCRelCount++;
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".rodata", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+
+ switch (RelocType) {
+ default: EmitThisSym = true; break;
+ case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ }
+ }
+
+ if (EmitThisSym)
+ return &Symbol;
+ if (! Symbol.isTemporary() && InNormalSection) {
+ return &Symbol;
+ }
+ return NULL;
+}
+
+// Need to examine the Fixup when determining whether to
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ unsigned Type = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+ if (RelocNeedsGOT(Modifier))
+ NeedsGOT = true;
+
+ return Type;
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ unsigned Type = 0;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: assert(0 && "Unimplemented");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_REL32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ assert(0 && "unimplemented");
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_uncondbranch:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_PLT:
+ Type = ELF::R_ARM_PLT32;
+ break;
+ default:
+ Type = ELF::R_ARM_CALL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Type = ELF::R_ARM_MOVT_PREL;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ Type = ELF::R_ARM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Type = ELF::R_ARM_THM_MOVT_PREL;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_PLT:
+ Type = ELF::R_ARM_THM_CALL;
+ break;
+ default:
+ Type = ELF::R_ARM_NONE;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier"); break;
+ case MCSymbolRefExpr::VK_ARM_GOT:
+ Type = ELF::R_ARM_GOT_BREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ Type = ELF::R_ARM_TLS_GD32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ Type = ELF::R_ARM_TLS_LE32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTOFF:
+ Type = ELF::R_ARM_GOTOFF32;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_arm_thumb_br:
+ assert(0 && "Unimplemented");
+ break;
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_CALL;
+ break;
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+//===- MBlazeELFObjectWriter -------------------------------------------===//
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
+}
+
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
+}
+
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case FK_PCRel_4:
+ Type = ELF::R_MICROBLAZE_64_PCREL;
+ break;
+ case FK_PCRel_2:
+ Type = ELF::R_MICROBLAZE_32_PCREL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ Type = ((IsRelocWithSymbol || Addend !=0)
+ ? ELF::R_MICROBLAZE_32
+ : ELF::R_MICROBLAZE_64);
+ break;
+ case FK_Data_2:
+ Type = ELF::R_MICROBLAZE_32;
+ break;
+ }
+ }
+ return Type;
+}
+
+//===- X86ELFObjectWriter -------------------------------------------===//
+
+
+X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ // determine the type of the relocation
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ unsigned Type;
+ if (is64Bit()) {
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
+ case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
+ case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
+
+ case FK_PCRel_8:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC64;
+ break;
+ case X86::reloc_signed_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_riprel_4byte:
+ case FK_PCRel_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_X86_64_PLT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_X86_64_GOTTPOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_X86_64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Type = ELF::R_X86_64_TLSLD;
+ break;
+ }
+ break;
+ case FK_PCRel_2:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC16;
+ break;
+ case FK_PCRel_1:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC8;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_8: Type = ELF::R_X86_64_64; break;
+ case X86::reloc_signed_4byte:
+ assert(isInt<32>(Target.getConstant()));
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_32S;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_X86_64_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_X86_64_TPOFF32;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_X86_64_DTPOFF32;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_X86_64_32;
+ break;
+ case FK_Data_2: Type = ELF::R_X86_64_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_X86_64_8; break;
+ }
+ }
+ } else {
+ if (IsPCRel) {
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_386_PLT32;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
+ break;
+
+ // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+ // instead?
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_32;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_386_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_386_GOTOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_386_TLS_GD;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_386_TLS_LE_32;
+ break;
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ Type = ELF::R_386_TLS_IE;
+ break;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ Type = ELF::R_386_TLS_LE;
+ break;
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ Type = ELF::R_386_TLS_GOTIE;
+ break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ Type = ELF::R_386_TLS_LDM;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_386_TLS_LDO_32;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_386_TLS_IE_32;
+ break;
+ }
+ break;
+ case FK_Data_2: Type = ELF::R_386_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_386_8; break;
+ }
+ }
+ }
+
+ if (RelocNeedsGOT(Modifier))
+ NeedsGOT = true;
+
+ return Type;
+}
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.h b/contrib/llvm/lib/MC/ELFObjectWriter.h
new file mode 100644
index 000000000000..7593099fb447
--- /dev/null
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.h
@@ -0,0 +1,414 @@
+//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_ELFOBJECTWRITER_H
+#define LLVM_MC_ELFOBJECTWRITER_H
+
+#include "MCELF.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+
+class MCSection;
+class MCDataFragment;
+class MCSectionELF;
+
+class ELFObjectWriter : public MCObjectWriter {
+ protected:
+
+ static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+ static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+ static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+ bool Used, bool Renamed);
+ static bool isLocal(const MCSymbolData &Data, bool isSignature,
+ bool isUsedInReloc);
+ static bool IsELFMetaDataSection(const MCSectionData &SD);
+ static uint64_t DataSectionSize(const MCSectionData &SD);
+ static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD);
+ static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD);
+
+ void WriteDataSectionData(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionELF &Section);
+
+ /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+ }*/
+
+ /// ELFSymbolData - Helper struct for containing some precomputed
+ /// information on symbols.
+ struct ELFSymbolData {
+ MCSymbolData *SymbolData;
+ uint64_t StringIndex;
+ uint32_t SectionIndex;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFSymbolData &RHS) const {
+ if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+ return true;
+ if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+ return false;
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+ }
+ };
+
+ /// @name Relocation Data
+ /// @{
+
+ struct ELFRelocationEntry {
+ // Make these big enough for both 32-bit and 64-bit
+ uint64_t r_offset;
+ int Index;
+ unsigned Type;
+ const MCSymbol *Symbol;
+ uint64_t r_addend;
+
+ ELFRelocationEntry()
+ : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
+
+ ELFRelocationEntry(uint64_t RelocOffset, int Idx,
+ unsigned RelType, const MCSymbol *Sym,
+ uint64_t Addend)
+ : r_offset(RelocOffset), Index(Idx), Type(RelType),
+ Symbol(Sym), r_addend(Addend) {}
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFRelocationEntry &RE) const {
+ return RE.r_offset < r_offset;
+ }
+ };
+
+ /// The target specific ELF writer instance.
+ llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+ SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+ SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+ DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+ llvm::DenseMap<const MCSectionData*,
+ std::vector<ELFRelocationEntry> > Relocations;
+ DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+ /// @}
+ /// @name Symbol Table Data
+ /// @{
+
+ SmallString<256> StringTable;
+ std::vector<ELFSymbolData> LocalSymbolData;
+ std::vector<ELFSymbolData> ExternalSymbolData;
+ std::vector<ELFSymbolData> UndefinedSymbolData;
+
+ /// @}
+
+ bool NeedsGOT;
+
+ bool NeedsSymtabShndx;
+
+ // This holds the symbol table index of the last local symbol.
+ unsigned LastLocalSymbolIndex;
+ // This holds the .strtab section index.
+ unsigned StringTableIndex;
+ // This holds the .symtab section index.
+ unsigned SymbolTableIndex;
+
+ unsigned ShstrtabIndex;
+
+
+ virtual const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+ // For arch-specific emission of explicit reloc symbol
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return NULL;
+ }
+
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool hasRelocationAddend() const {
+ return TargetObjectWriter->hasRelocationAddend();
+ }
+
+ public:
+ ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS, bool IsLittleEndian)
+ : MCObjectWriter(_OS, IsLittleEndian),
+ TargetObjectWriter(MOTW),
+ NeedsGOT(false), NeedsSymtabShndx(false){
+ }
+
+ virtual ~ELFObjectWriter();
+
+ void WriteWord(uint64_t W) {
+ if (is64Bit())
+ Write64(W);
+ else
+ Write32(W);
+ }
+
+ void StringLE16(char *buf, uint16_t Value) {
+ buf[0] = char(Value >> 0);
+ buf[1] = char(Value >> 8);
+ }
+
+ void StringLE32(char *buf, uint32_t Value) {
+ StringLE16(buf, uint16_t(Value >> 0));
+ StringLE16(buf + 2, uint16_t(Value >> 16));
+ }
+
+ void StringLE64(char *buf, uint64_t Value) {
+ StringLE32(buf, uint32_t(Value >> 0));
+ StringLE32(buf + 4, uint32_t(Value >> 32));
+ }
+
+ void StringBE16(char *buf ,uint16_t Value) {
+ buf[0] = char(Value >> 8);
+ buf[1] = char(Value >> 0);
+ }
+
+ void StringBE32(char *buf, uint32_t Value) {
+ StringBE16(buf, uint16_t(Value >> 16));
+ StringBE16(buf + 2, uint16_t(Value >> 0));
+ }
+
+ void StringBE64(char *buf, uint64_t Value) {
+ StringBE32(buf, uint32_t(Value >> 32));
+ StringBE32(buf + 4, uint32_t(Value >> 0));
+ }
+
+ void String8(MCDataFragment &F, uint8_t Value) {
+ char buf[1];
+ buf[0] = Value;
+ F.getContents() += StringRef(buf, 1);
+ }
+
+ void String16(MCDataFragment &F, uint16_t Value) {
+ char buf[2];
+ if (isLittleEndian())
+ StringLE16(buf, Value);
+ else
+ StringBE16(buf, Value);
+ F.getContents() += StringRef(buf, 2);
+ }
+
+ void String32(MCDataFragment &F, uint32_t Value) {
+ char buf[4];
+ if (isLittleEndian())
+ StringLE32(buf, Value);
+ else
+ StringBE32(buf, Value);
+ F.getContents() += StringRef(buf, 4);
+ }
+
+ void String64(MCDataFragment &F, uint64_t Value) {
+ char buf[8];
+ if (isLittleEndian())
+ StringLE64(buf, Value);
+ else
+ StringBE64(buf, Value);
+ F.getContents() += StringRef(buf, 8);
+ }
+
+ virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+ /// Default e_flags = 0
+ virtual void WriteEFlags() { Write32(0); }
+
+ virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ uint64_t name, uint8_t info,
+ uint64_t value, uint64_t size,
+ uint8_t other, uint32_t shndx,
+ bool Reserved);
+
+ virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
+ const MCAsmLayout &Layout);
+
+ typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+ virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap);
+
+ virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+ virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S);
+
+ // Map from a group section to the signature symbol
+ typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+ // Map from a signature symbol to the group section
+ typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+ // Map from a section to the section with the relocations
+ typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
+ // Map from a section to its offset
+ typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+
+ /// ComputeSymbolTable - Compute the symbol table data
+ ///
+ /// \param StringTable [out] - The string table data.
+ /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+ /// string table.
+ virtual void ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap,
+ unsigned NumRegularSections);
+
+ virtual void ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ RelMapTy &RelMap);
+
+ void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+ const RelMapTy &RelMap);
+
+ virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ // Create the sections that show up in the symbol table. Currently
+ // those are the .note.GNU-stack section and the group sections.
+ virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap,
+ SectionIndexMapTy &SectionIndexMap,
+ const RelMapTy &RelMap);
+
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout);
+
+ void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap,
+ const SectionOffsetMapTy &SectionOffsetMap);
+
+ void ComputeSectionOrder(MCAssembler &Asm,
+ std::vector<const MCSectionELF*> &Sections);
+
+ virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Size, uint32_t Link, uint32_t Info,
+ uint64_t Alignment, uint64_t EntrySize);
+
+ virtual void WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD);
+
+ virtual bool
+ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const;
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+ virtual void WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size, uint64_t Alignment,
+ const MCSectionELF &Section);
+
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) = 0;
+ };
+
+ //===- X86ELFObjectWriter -------------------------------------------===//
+
+ class X86ELFObjectWriter : public ELFObjectWriter {
+ public:
+ X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~X86ELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ };
+
+
+ //===- ARMELFObjectWriter -------------------------------------------===//
+
+ class ARMELFObjectWriter : public ELFObjectWriter {
+ public:
+ // FIXME: MCAssembler can't yet return the Subtarget,
+ enum { DefaultEABIVersion = 0x05000000U };
+
+ ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~ARMELFObjectWriter();
+
+ virtual void WriteEFlags();
+ protected:
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ private:
+ unsigned GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const;
+
+ };
+
+ //===- MBlazeELFObjectWriter -------------------------------------------===//
+
+ class MBlazeELFObjectWriter : public ELFObjectWriter {
+ public:
+ MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~MBlazeELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
new file mode 100644
index 000000000000..502b60b0edf4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -0,0 +1,137 @@
+//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Dwarf.h"
+#include <cctype>
+#include <cstring>
+using namespace llvm;
+
+MCAsmInfo::MCAsmInfo() {
+ PointerSize = 4;
+ IsLittleEndian = true;
+ StackGrowsUp = false;
+ HasSubsectionsViaSymbols = false;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = false;
+ MaxInstLength = 4;
+ PCSymbol = "$";
+ SeparatorString = ";";
+ CommentColumn = 40;
+ CommentString = "#";
+ LabelSuffix = ":";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".";
+ LinkerPrivateGlobalPrefix = "";
+ InlineAsmStart = "APP";
+ InlineAsmEnd = "NO_APP";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = true;
+ ZeroDirective = "\t.zero\t";
+ AsciiDirective = "\t.ascii\t";
+ AscizDirective = "\t.asciz\t";
+ Data8bitsDirective = "\t.byte\t";
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = "\t.quad\t";
+ SunStyleELFSectionSwitchSyntax = false;
+ UsesELFSectionDirectiveForBSS = false;
+ AlignDirective = "\t.align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+ GPRel32Directive = 0;
+ GlobalDirective = "\t.globl\t";
+ HasSetDirective = true;
+ HasAggressiveSymbolFolding = true;
+ HasLCOMMDirective = false;
+ COMMDirectiveAlignmentIsInBytes = true;
+ HasDotTypeDotSizeDirective = true;
+ HasSingleParameterDotFile = true;
+ HasNoDeadStrip = false;
+ HasSymbolResolver = false;
+ WeakRefDirective = 0;
+ WeakDefDirective = 0;
+ LinkOnceDirective = 0;
+ HiddenVisibilityAttr = MCSA_Hidden;
+ HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+ ProtectedVisibilityAttr = MCSA_Protected;
+ HasLEB128 = false;
+ SupportsDebugInformation = false;
+ ExceptionsType = ExceptionHandling::None;
+ DwarfUsesInlineInfoSection = false;
+ DwarfRequiresRelocationForSectionOffset = true;
+ DwarfSectionOffsetDirective = 0;
+ DwarfUsesLabelOffsetForRanges = true;
+ DwarfRegNumForCFI = false;
+ HasMicrosoftFastStdCallMangling = false;
+
+ AsmTransCBE = 0;
+}
+
+MCAsmInfo::~MCAsmInfo() {
+}
+
+
+unsigned MCAsmInfo::getULEB128Size(unsigned Value) {
+ unsigned Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t);
+ } while (Value);
+ return Size;
+}
+
+unsigned MCAsmInfo::getSLEB128Size(int Value) {
+ unsigned Size = 0;
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ Size += sizeof(int8_t);
+ } while (IsMore);
+ return Size;
+}
+
+const MCExpr *
+MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ return getExprForFDESymbol(Sym, Encoding, Streamer);
+}
+
+const MCExpr *
+MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ if (!(Encoding & dwarf::DW_EH_PE_pcrel))
+ return MCSymbolRefExpr::Create(Sym, Streamer.getContext());
+
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
+ MCSymbol *PCSym = Context.CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
+ return MCBinaryExpr::CreateSub(Res, PC, Context);
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
new file mode 100644
index 000000000000..7fc7d7abb232
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -0,0 +1,37 @@
+//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on COFF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+MCAsmInfoCOFF::MCAsmInfoCOFF() {
+ GlobalPrefix = "_";
+ COMMDirectiveAlignmentIsInBytes = false;
+ HasLCOMMDirective = true;
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ PrivateGlobalPrefix = "L"; // Prefix for private global symbols
+ WeakRefDirective = "\t.weak\t";
+ LinkOnceDirective = "\t.linkonce discard\n";
+
+ // Doesn't support visibility:
+ HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid;
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = "\t.secrel32\t";
+ HasMicrosoftFastStdCallMangling = true;
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
new file mode 100644
index 000000000000..5851cb0391d8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -0,0 +1,61 @@
+//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+MCAsmInfoDarwin::MCAsmInfoDarwin() {
+ // Common settings for all Darwin targets.
+ // Syntax:
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LinkerPrivateGlobalPrefix = "l";
+ AllowQuotesInName = true;
+ HasSingleParameterDotFile = false;
+ HasSubsectionsViaSymbols = true;
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveAlignmentIsInBytes = false;
+ InlineAsmStart = " InlineAsm Start";
+ InlineAsmEnd = " InlineAsm End";
+
+ // Directives:
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
+ HasMachoZeroFillDirective = true; // Uses .zerofill
+ HasMachoTBSSDirective = true; // Uses .tbss
+ HasStaticCtorDtorReferenceInStaticMode = true;
+
+ // FIXME: Darwin 10 and newer don't need this.
+ LinkerRequiresNonEmptyDwarfLines = true;
+
+ // FIXME: Change this once MC is the system assembler.
+ HasAggressiveSymbolFolding = false;
+
+ HiddenVisibilityAttr = MCSA_PrivateExtern;
+ HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+ // Doesn't support protected visibility.
+ ProtectedVisibilityAttr = MCSA_Global;
+
+ HasDotTypeDotSizeDirective = false;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = true;
+
+ DwarfRequiresRelocationForSectionOffset = false;
+ DwarfUsesLabelOffsetForRanges = false;
+}
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
new file mode 100644
index 000000000000..d5d08e8f69fb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -0,0 +1,1256 @@
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+class MCAsmStreamer : public MCStreamer {
+protected:
+ formatted_raw_ostream &OS;
+ const MCAsmInfo &MAI;
+private:
+ OwningPtr<MCInstPrinter> InstPrinter;
+ OwningPtr<MCCodeEmitter> Emitter;
+ OwningPtr<TargetAsmBackend> AsmBackend;
+
+ SmallString<128> CommentToEmit;
+ raw_svector_ostream CommentStream;
+
+ unsigned IsVerboseAsm : 1;
+ unsigned ShowInst : 1;
+ unsigned UseLoc : 1;
+ unsigned UseCFI : 1;
+
+ enum EHSymbolFlags { EHGlobal = 1,
+ EHWeakDefinition = 1 << 1,
+ EHPrivateExtern = 1 << 2 };
+ DenseMap<const MCSymbol*, unsigned> FlagMap;
+
+ bool needsSet(const MCExpr *Value);
+
+ void EmitRegisterName(int64_t Register);
+
+public:
+ MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ MCInstPrinter *printer, MCCodeEmitter *emitter,
+ TargetAsmBackend *asmbackend,
+ bool showInst)
+ : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+ CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) {
+ if (InstPrinter && IsVerboseAsm)
+ InstPrinter->setCommentStream(CommentStream);
+ }
+ ~MCAsmStreamer() {}
+
+ inline void EmitEOL() {
+ // If we don't have any comments, just emit a \n.
+ if (!IsVerboseAsm) {
+ OS << '\n';
+ return;
+ }
+ EmitCommentsAndEOL();
+ }
+ void EmitCommentsAndEOL();
+
+ /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+ /// all.
+ virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+ /// hasRawTextSupport - We support EmitRawText.
+ virtual bool hasRawTextSupport() const { return true; }
+
+ /// AddComment - Add a comment that can be emitted to the generated .s
+ /// file if applicable as a QoI issue to make the output of the compiler
+ /// more readable. This only affects the MCAsmStreamer, and only when
+ /// verbose assembly output is enabled.
+ virtual void AddComment(const Twine &T);
+
+ /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+ virtual void AddEncodingComment(const MCInst &Inst);
+
+ /// GetCommentOS - Return a raw_ostream that comments can be written to.
+ /// Unlike AddComment, you are required to terminate comments with \n if you
+ /// use this method.
+ virtual raw_ostream &GetCommentOS() {
+ if (!IsVerboseAsm)
+ return nulls(); // Discard comments unless in verbose asm mode.
+ return CommentStream;
+ }
+
+ /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+ virtual void AddBlankLine() {
+ EmitEOL();
+ }
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void ChangeSection(const MCSection *Section);
+
+ virtual void InitSections() {
+ // FIXME, this is MachO specific, but the testsuite
+ // expects this.
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+ }
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize);
+ virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label);
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+ ///
+ /// @param Symbol - The common symbol to emit.
+ /// @param Size - The size of the common symbol.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+
+ virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+ virtual void EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace = 0);
+
+ virtual void EmitULEB128Value(const MCExpr *Value);
+
+ virtual void EmitSLEB128Value(const MCExpr *Value);
+
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator,
+ StringRef FileName);
+
+ virtual void EmitCFISections(bool EH, bool Debug);
+ virtual void EmitCFIStartProc();
+ virtual void EmitCFIEndProc();
+ virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
+ virtual void EmitCFIDefCfaOffset(int64_t Offset);
+ virtual void EmitCFIDefCfaRegister(int64_t Register);
+ virtual void EmitCFIOffset(int64_t Register, int64_t Offset);
+ virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+ virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+ virtual void EmitCFIRememberState();
+ virtual void EmitCFIRestoreState();
+ virtual void EmitCFISameValue(int64_t Register);
+ virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
+ virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+
+ virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
+ virtual void EmitWin64EHEndProc();
+ virtual void EmitWin64EHStartChained();
+ virtual void EmitWin64EHEndChained();
+ virtual void EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except);
+ virtual void EmitWin64EHHandlerData();
+ virtual void EmitWin64EHPushReg(unsigned Register);
+ virtual void EmitWin64EHSetFrame(unsigned Register, unsigned Offset);
+ virtual void EmitWin64EHAllocStack(unsigned Size);
+ virtual void EmitWin64EHSaveReg(unsigned Register, unsigned Offset);
+ virtual void EmitWin64EHSaveXMM(unsigned Register, unsigned Offset);
+ virtual void EmitWin64EHPushFrame(bool Code);
+ virtual void EmitWin64EHEndProlog();
+
+ virtual void EmitFnStart();
+ virtual void EmitFnEnd();
+ virtual void EmitCantUnwind();
+ virtual void EmitPersonality(const MCSymbol *Personality);
+ virtual void EmitHandlerData();
+ virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void EmitPad(int64_t Offset);
+ virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
+
+
+ virtual void EmitInstruction(const MCInst &Inst);
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+ /// the specified string in the output .s file. This capability is
+ /// indicated by the hasRawTextSupport() predicate.
+ virtual void EmitRawText(StringRef String);
+
+ virtual void Finish();
+
+ /// @}
+};
+
+} // end anonymous namespace.
+
+/// AddComment - Add a comment that can be emitted to the generated .s
+/// file if applicable as a QoI issue to make the output of the compiler
+/// more readable. This only affects the MCAsmStreamer, and only when
+/// verbose assembly output is enabled.
+void MCAsmStreamer::AddComment(const Twine &T) {
+ if (!IsVerboseAsm) return;
+
+ // Make sure that CommentStream is flushed.
+ CommentStream.flush();
+
+ T.toVector(CommentToEmit);
+ // Each comment goes on its own line.
+ CommentToEmit.push_back('\n');
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+void MCAsmStreamer::EmitCommentsAndEOL() {
+ if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+ OS << '\n';
+ return;
+ }
+
+ CommentStream.flush();
+ StringRef Comments = CommentToEmit.str();
+
+ assert(Comments.back() == '\n' &&
+ "Comment array not newline terminated");
+ do {
+ // Emit a line of comments.
+ OS.PadToColumn(MAI.getCommentColumn());
+ size_t Position = Comments.find('\n');
+ OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+ Comments = Comments.substr(Position+1);
+ } while (!Comments.empty());
+
+ CommentToEmit.clear();
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+ assert(Bytes && "Invalid size!");
+ return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+ Section->PrintSwitchToSection(MAI, OS);
+}
+
+void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol) {
+ if (UseCFI)
+ return;
+
+ unsigned Flags = FlagMap.lookup(Symbol);
+
+ if (Flags & EHGlobal)
+ EmitSymbolAttribute(EHSymbol, MCSA_Global);
+ if (Flags & EHWeakDefinition)
+ EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+ if (Flags & EHPrivateExtern)
+ EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ MCStreamer::EmitLabel(Symbol);
+
+ OS << *Symbol << MAI.getLabelSuffix();
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: assert(0 && "Invalid flag!");
+ case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break;
+ case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
+ case MCAF_Code16: OS << "\t.code\t16"; break;
+ case MCAF_Code32: OS << "\t.code\t32"; break;
+ }
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
+ // This needs to emit to a temporary string to get properly quoted
+ // MCSymbols when they have spaces in them.
+ OS << "\t.thumb_func";
+ // Only Mach-O hasSubsectionsViaSymbols()
+ if (MAI.hasSubsectionsViaSymbols())
+ OS << '\t' << *Func;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << *Symbol << " = " << *Value;
+ EmitEOL();
+
+ // FIXME: Lift context changes into super class.
+ Symbol->setVariableValue(Value);
+}
+
+void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ EmitIntValue(dwarf::DW_CFA_advance_loc4, 1);
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ AddrDelta = ForceExpAbs(AddrDelta);
+ EmitValue(AddrDelta, 4);
+}
+
+
+void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ switch (Attribute) {
+ case MCSA_Invalid: assert(0 && "Invalid symbol attribute");
+ case MCSA_ELF_TypeFunction: /// .type _foo, STT_FUNC # aka @function
+ case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
+ case MCSA_ELF_TypeObject: /// .type _foo, STT_OBJECT # aka @object
+ case MCSA_ELF_TypeTLS: /// .type _foo, STT_TLS # aka @tls_object
+ case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common
+ case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype
+ case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object
+ assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
+ OS << "\t.type\t" << *Symbol << ','
+ << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
+ switch (Attribute) {
+ default: assert(0 && "Unknown ELF .type");
+ case MCSA_ELF_TypeFunction: OS << "function"; break;
+ case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
+ case MCSA_ELF_TypeObject: OS << "object"; break;
+ case MCSA_ELF_TypeTLS: OS << "tls_object"; break;
+ case MCSA_ELF_TypeCommon: OS << "common"; break;
+ case MCSA_ELF_TypeNoType: OS << "no_type"; break;
+ case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
+ }
+ EmitEOL();
+ return;
+ case MCSA_Global: // .globl/.global
+ OS << MAI.getGlobalDirective();
+ FlagMap[Symbol] |= EHGlobal;
+ break;
+ case MCSA_Hidden: OS << "\t.hidden\t"; break;
+ case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
+ case MCSA_Internal: OS << "\t.internal\t"; break;
+ case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break;
+ case MCSA_Local: OS << "\t.local\t"; break;
+ case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break;
+ case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
+ case MCSA_PrivateExtern:
+ OS << "\t.private_extern\t";
+ FlagMap[Symbol] |= EHPrivateExtern;
+ break;
+ case MCSA_Protected: OS << "\t.protected\t"; break;
+ case MCSA_Reference: OS << "\t.reference\t"; break;
+ case MCSA_Weak: OS << "\t.weak\t"; break;
+ case MCSA_WeakDefinition:
+ OS << "\t.weak_definition\t";
+ FlagMap[Symbol] |= EHWeakDefinition;
+ break;
+ // .weak_reference
+ case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break;
+ case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
+ }
+
+ OS << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ OS << ".desc" << ' ' << *Symbol << ',' << DescValue;
+ EmitEOL();
+}
+
+void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ OS << "\t.def\t " << *Symbol << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+ OS << "\t.scl\t" << StorageClass << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+ OS << "\t.type\t" << Type << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EndCOFFSymbolDef() {
+ OS << "\t.endef";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ assert(MAI.hasDotTypeDotSizeDirective());
+ OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
+}
+
+void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ OS << "\t.comm\t" << *Symbol << ',' << Size;
+ if (ByteAlignment != 0) {
+ if (MAI.getCOMMDirectiveAlignmentIsInBytes())
+ OS << ',' << ByteAlignment;
+ else
+ OS << ',' << Log2_32(ByteAlignment);
+ }
+ EmitEOL();
+}
+
+/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+///
+/// @param Symbol - The common symbol to emit.
+/// @param Size - The size of the common symbol.
+void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!");
+ OS << "\t.lcomm\t" << *Symbol << ',' << Size;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {
+ // Note: a .zerofill directive does not switch sections.
+ OS << ".zerofill ";
+
+ // This is a mach-o specific directive.
+ const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
+ OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
+
+ if (Symbol != NULL) {
+ OS << ',' << *Symbol << ',' << Size;
+ if (ByteAlignment != 0)
+ OS << ',' << Log2_32(ByteAlignment);
+ }
+ EmitEOL();
+}
+
+// .tbss sym, size, align
+// This depends that the symbol has already been mangled from the original,
+// e.g. _a.
+void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+ // Instead of using the Section we'll just use the shortcut.
+ // This is a mach-o specific directive and section.
+ OS << ".tbss " << *Symbol << ", " << Size;
+
+ // Output align if we have it. We default to 1 so don't bother printing
+ // that.
+ if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
+
+ EmitEOL();
+}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+ OS << '"';
+
+ for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+ unsigned char C = Data[i];
+ if (C == '"' || C == '\\') {
+ OS << '\\' << (char)C;
+ continue;
+ }
+
+ if (isprint((unsigned char)C)) {
+ OS << (char)C;
+ continue;
+ }
+
+ switch (C) {
+ case '\b': OS << "\\b"; break;
+ case '\f': OS << "\\f"; break;
+ case '\n': OS << "\\n"; break;
+ case '\r': OS << "\\r"; break;
+ case '\t': OS << "\\t"; break;
+ default:
+ OS << '\\';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ break;
+ }
+ }
+
+ OS << '"';
+}
+
+
+void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ if (Data.empty()) return;
+
+ if (Data.size() == 1) {
+ OS << MAI.getData8bitsDirective(AddrSpace);
+ OS << (unsigned)(unsigned char)Data[0];
+ EmitEOL();
+ return;
+ }
+
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii
+ if (MAI.getAscizDirective() && Data.back() == 0) {
+ OS << MAI.getAscizDirective();
+ Data = Data.substr(0, Data.size()-1);
+ } else {
+ OS << MAI.getAsciiDirective();
+ }
+
+ OS << ' ';
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ const char *Directive = 0;
+ switch (Size) {
+ default: break;
+ case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+ case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+ case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+ case 8:
+ Directive = MAI.getData64bitsDirective(AddrSpace);
+ // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+ if (Directive) break;
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ } else {
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ }
+ return;
+ }
+
+ assert(Directive && "Invalid size for machine code value!");
+ OS << Directive << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue)) {
+ EmitULEB128IntValue(IntValue);
+ return;
+ }
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue)) {
+ EmitSLEB128IntValue(IntValue);
+ return;
+ }
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ assert(MAI.getGPRel32Directive() != 0);
+ OS << MAI.getGPRel32Directive() << *Value;
+ EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ if (NumBytes == 0) return;
+
+ if (AddrSpace == 0)
+ if (const char *ZeroDirective = MAI.getZeroDirective()) {
+ OS << ZeroDirective << NumBytes;
+ if (FillValue != 0)
+ OS << ',' << (int)FillValue;
+ EmitEOL();
+ return;
+ }
+
+ // Emit a byte at a time.
+ MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // Some assemblers don't support non-power of two alignments, so we always
+ // emit alignments as a power of two if possible.
+ if (isPowerOf2_32(ByteAlignment)) {
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << MAI.getAlignDirective(); break;
+ // FIXME: use MAI for this!
+ case 2: OS << ".p2alignw "; break;
+ case 4: OS << ".p2alignl "; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ if (MAI.getAlignmentIsInBytes())
+ OS << ByteAlignment;
+ else
+ OS << Log2_32(ByteAlignment);
+
+ if (Value || MaxBytesToEmit) {
+ OS << ", 0x";
+ OS.write_hex(truncateToSize(Value, ValueSize));
+
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ }
+ EmitEOL();
+ return;
+ }
+
+ // Non-power of two alignment. This is not widely supported by assemblers.
+ // FIXME: Parameterize this based on MAI.
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << ".balign"; break;
+ case 2: OS << ".balignw"; break;
+ case 4: OS << ".balignl"; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ OS << ' ' << ByteAlignment;
+ OS << ", " << truncateToSize(Value, ValueSize);
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // Emit with a text fill value.
+ EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+ 1, MaxBytesToEmit);
+}
+
+void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ // FIXME: Verify that Offset is associated with the current section.
+ OS << ".org " << *Offset << ", " << (unsigned) Value;
+ EmitEOL();
+}
+
+
+void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
+ assert(MAI.hasSingleParameterDotFile());
+ OS << "\t.file\t";
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+}
+
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+ if (UseLoc) {
+ OS << "\t.file\t" << FileNo << ' ';
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ }
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa,
+ unsigned Discriminator,
+ StringRef FileName) {
+ this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+ Isa, Discriminator, FileName);
+ if (!UseLoc)
+ return;
+
+ OS << "\t.loc\t" << FileNo << " " << Line << " " << Column;
+ if (Flags & DWARF2_FLAG_BASIC_BLOCK)
+ OS << " basic_block";
+ if (Flags & DWARF2_FLAG_PROLOGUE_END)
+ OS << " prologue_end";
+ if (Flags & DWARF2_FLAG_EPILOGUE_BEGIN)
+ OS << " epilogue_begin";
+
+ unsigned OldFlags = getContext().getCurrentDwarfLoc().getFlags();
+ if ((Flags & DWARF2_FLAG_IS_STMT) != (OldFlags & DWARF2_FLAG_IS_STMT)) {
+ OS << " is_stmt ";
+
+ if (Flags & DWARF2_FLAG_IS_STMT)
+ OS << "1";
+ else
+ OS << "0";
+ }
+
+ if (Isa)
+ OS << "isa " << Isa;
+ if (Discriminator)
+ OS << "discriminator " << Discriminator;
+
+ if (IsVerboseAsm) {
+ OS.PadToColumn(MAI.getCommentColumn());
+ OS << MAI.getCommentString() << ' ' << FileName << ':'
+ << Line << ':' << Column;
+ }
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
+ MCStreamer::EmitCFISections(EH, Debug);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_sections ";
+ if (EH) {
+ OS << ".eh_frame";
+ if (Debug)
+ OS << ", .debug_frame";
+ } else if (Debug) {
+ OS << ".debug_frame";
+ }
+
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIStartProc() {
+ MCStreamer::EmitCFIStartProc();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_startproc";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIEndProc() {
+ MCStreamer::EmitCFIEndProc();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_endproc";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegisterName(int64_t Register) {
+ if (InstPrinter && !MAI.useDwarfRegNumForCFI()) {
+ const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+ unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true);
+ InstPrinter->printRegName(OS, LLVMRegister);
+ } else {
+ OS << Register;
+ }
+}
+
+void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+ MCStreamer::EmitCFIDefCfa(Register, Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_def_cfa ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+ MCStreamer::EmitCFIDefCfaOffset(Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_def_cfa_offset " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+ MCStreamer::EmitCFIDefCfaRegister(Register);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_def_cfa_register ";
+ EmitRegisterName(Register);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+ this->MCStreamer::EmitCFIOffset(Register, Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_offset ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+ unsigned Encoding) {
+ MCStreamer::EmitCFIPersonality(Sym, Encoding);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+ MCStreamer::EmitCFILsda(Sym, Encoding);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRememberState() {
+ MCStreamer::EmitCFIRememberState();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_remember_state";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRestoreState() {
+ MCStreamer::EmitCFIRestoreState();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_restore_state";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISameValue(int64_t Register) {
+ MCStreamer::EmitCFISameValue(Register);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_same_value ";
+ EmitRegisterName(Register);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+ MCStreamer::EmitCFIRelOffset(Register, Offset);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_rel_offset ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+ MCStreamer::EmitCFIAdjustCfaOffset(Adjustment);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_adjust_cfa_offset " << Adjustment;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+ MCStreamer::EmitWin64EHStartProc(Symbol);
+
+ OS << ".seh_proc " << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndProc() {
+ MCStreamer::EmitWin64EHEndProc();
+
+ OS << "\t.seh_endproc";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHStartChained() {
+ MCStreamer::EmitWin64EHStartChained();
+
+ OS << "\t.seh_startchained";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndChained() {
+ MCStreamer::EmitWin64EHEndChained();
+
+ OS << "\t.seh_endchained";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except) {
+ MCStreamer::EmitWin64EHHandler(Sym, Unwind, Except);
+
+ OS << "\t.seh_handler " << *Sym;
+ if (Unwind)
+ OS << ", @unwind";
+ if (Except)
+ OS << ", @except";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHHandlerData() {
+ MCStreamer::EmitWin64EHHandlerData();
+
+ // Switch sections. Don't call SwitchSection directly, because that will
+ // cause the section switch to be visible in the emitted assembly.
+ // We only do this so the section switch that terminates the handler
+ // data block is visible.
+ MCWin64EHUnwindInfo *CurFrame = getCurrentW64UnwindInfo();
+ StringRef suffix=MCWin64EHUnwindEmitter::GetSectionSuffix(CurFrame->Function);
+ const MCSection *xdataSect =
+ getContext().getTargetAsmInfo().getWin64EHTableSection(suffix);
+ if (xdataSect)
+ SwitchSectionNoChange(xdataSect);
+
+ OS << "\t.seh_handlerdata";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) {
+ MCStreamer::EmitWin64EHPushReg(Register);
+
+ OS << "\t.seh_pushreg " << Register;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWin64EHSetFrame(Register, Offset);
+
+ OS << "\t.seh_setframe " << Register << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) {
+ MCStreamer::EmitWin64EHAllocStack(Size);
+
+ OS << "\t.seh_stackalloc " << Size;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWin64EHSaveReg(Register, Offset);
+
+ OS << "\t.seh_savereg " << Register << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWin64EHSaveXMM(Register, Offset);
+
+ OS << "\t.seh_savexmm " << Register << ", " << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) {
+ MCStreamer::EmitWin64EHPushFrame(Code);
+
+ OS << "\t.seh_pushframe";
+ if (Code)
+ OS << " @code";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndProlog(void) {
+ MCStreamer::EmitWin64EHEndProlog();
+
+ OS << "\t.seh_endprologue";
+ EmitEOL();
+}
+
+void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
+ raw_ostream &OS = GetCommentOS();
+ SmallString<256> Code;
+ SmallVector<MCFixup, 4> Fixups;
+ raw_svector_ostream VecOS(Code);
+ Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // If we are showing fixups, create symbolic markers in the encoded
+ // representation. We do this by making a per-bit map to the fixup item index,
+ // then trying to display it as nicely as possible.
+ SmallVector<uint8_t, 64> FixupMap;
+ FixupMap.resize(Code.size() * 8);
+ for (unsigned i = 0, e = Code.size() * 8; i != e; ++i)
+ FixupMap[i] = 0;
+
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ MCFixup &F = Fixups[i];
+ const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+ for (unsigned j = 0; j != Info.TargetSize; ++j) {
+ unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
+ assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
+ FixupMap[Index] = 1 + i;
+ }
+ }
+
+ // FIXME: Note the fixup comments for Thumb2 are completely bogus since the
+ // high order halfword of a 32-bit Thumb2 instruction is emitted first.
+ OS << "encoding: [";
+ for (unsigned i = 0, e = Code.size(); i != e; ++i) {
+ if (i)
+ OS << ',';
+
+ // See if all bits are the same map entry.
+ uint8_t MapEntry = FixupMap[i * 8 + 0];
+ for (unsigned j = 1; j != 8; ++j) {
+ if (FixupMap[i * 8 + j] == MapEntry)
+ continue;
+
+ MapEntry = uint8_t(~0U);
+ break;
+ }
+
+ if (MapEntry != uint8_t(~0U)) {
+ if (MapEntry == 0) {
+ OS << format("0x%02x", uint8_t(Code[i]));
+ } else {
+ if (Code[i]) {
+ // FIXME: Some of the 8 bits require fix up.
+ OS << format("0x%02x", uint8_t(Code[i])) << '\''
+ << char('A' + MapEntry - 1) << '\'';
+ } else
+ OS << char('A' + MapEntry - 1);
+ }
+ } else {
+ // Otherwise, write out in binary.
+ OS << "0b";
+ for (unsigned j = 8; j--;) {
+ unsigned Bit = (Code[i] >> j) & 1;
+
+ unsigned FixupBit;
+ if (getContext().getAsmInfo().isLittleEndian())
+ FixupBit = i * 8 + j;
+ else
+ FixupBit = i * 8 + (7-j);
+
+ if (uint8_t MapEntry = FixupMap[FixupBit]) {
+ assert(Bit == 0 && "Encoder wrote into fixed up bit!");
+ OS << char('A' + MapEntry - 1);
+ } else
+ OS << Bit;
+ }
+ }
+ }
+ OS << "]\n";
+
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ MCFixup &F = Fixups[i];
+ const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+ OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
+ << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
+ }
+}
+
+void MCAsmStreamer::EmitFnStart() {
+ OS << "\t.fnstart";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitFnEnd() {
+ OS << "\t.fnend";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCantUnwind() {
+ OS << "\t.cantunwind";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitHandlerData() {
+ OS << "\t.handlerdata";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) {
+ OS << "\t.personality " << Personality->getName();
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+ OS << "\t.setfp\t";
+ InstPrinter->printRegName(OS, FpReg);
+ OS << ", ";
+ InstPrinter->printRegName(OS, SpReg);
+ if (Offset)
+ OS << ", #" << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitPad(int64_t Offset) {
+ OS << "\t.pad\t#" << Offset;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ assert(RegList.size() && "RegList should not be empty");
+ if (isVector)
+ OS << "\t.vsave\t{";
+ else
+ OS << "\t.save\t{";
+
+ InstPrinter->printRegName(OS, RegList[0]);
+
+ for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+ OS << ", ";
+ InstPrinter->printRegName(OS, RegList[i]);
+ }
+
+ OS << "}";
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ // Show the encoding in a comment if we have a code emitter.
+ if (Emitter)
+ AddEncodingComment(Inst);
+
+ // Show the MCInst if enabled.
+ if (ShowInst) {
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+ GetCommentOS() << "\n";
+ }
+
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+ if (InstPrinter)
+ InstPrinter->printInst(&Inst, OS);
+ else
+ Inst.print(OS, &MAI);
+ EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCAsmStreamer::EmitRawText(StringRef String) {
+ if (!String.empty() && String.back() == '\n')
+ String = String.substr(0, String.size()-1);
+ OS << String;
+ EmitEOL();
+}
+
+void MCAsmStreamer::Finish() {
+ // Dump out the dwarf file & directory tables and line tables.
+ if (getContext().hasDwarfFiles() && !UseLoc)
+ MCDwarfFileTable::Emit(this);
+
+ if (!UseCFI)
+ EmitFrames(false);
+}
+MCStreamer *llvm::createAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ bool useCFI, MCInstPrinter *IP,
+ MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ bool ShowInst) {
+ return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
+ IP, CE, TAB, ShowInst);
+}
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
new file mode 100644
index 000000000000..527a63caaee5
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -0,0 +1,977 @@
+//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "assembler"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
+STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(FragmentLayouts, "Number of fragment layouts");
+STATISTIC(ObjectBytes, "Number of emitted object file bytes");
+STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
+STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
+}
+}
+
+// FIXME FIXME FIXME: There are number of places in this file where we convert
+// what is a 64-bit assembler value used for computation into a value in the
+// object file, which may truncate it. We should detect that truncation where
+// invalid and report errors back.
+
+/* *** */
+
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+ : Assembler(Asm), LastValidFragment()
+ {
+ // Compute the section layout order. Virtual sections must go last.
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+ if (!it->getSection().isVirtualSection())
+ SectionOrder.push_back(&*it);
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+ if (it->getSection().isVirtualSection())
+ SectionOrder.push_back(&*it);
+}
+
+bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
+ const MCSectionData &SD = *F->getParent();
+ const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+ if (!LastValid)
+ return false;
+ assert(LastValid->getParent() == F->getParent());
+ return F->getLayoutOrder() <= LastValid->getLayoutOrder();
+}
+
+void MCAsmLayout::Invalidate(MCFragment *F) {
+ // If this fragment wasn't already up-to-date, we don't need to do anything.
+ if (!isFragmentUpToDate(F))
+ return;
+
+ // Otherwise, reset the last valid fragment to this fragment.
+ const MCSectionData &SD = *F->getParent();
+ LastValidFragment[&SD] = F;
+}
+
+void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+ MCSectionData &SD = *F->getParent();
+
+ MCFragment *Cur = LastValidFragment[&SD];
+ if (!Cur)
+ Cur = &*SD.begin();
+ else
+ Cur = Cur->getNextNode();
+
+ // Advance the layout position until the fragment is up-to-date.
+ while (!isFragmentUpToDate(F)) {
+ const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
+ Cur = Cur->getNextNode();
+ }
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+ EnsureValid(F);
+ assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+ return F->Offset;
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+ const MCSymbol &S = SD->getSymbol();
+
+ // If this is a variable, then recursively evaluate now.
+ if (S.isVariable()) {
+ MCValue Target;
+ if (!S.getVariableValue()->EvaluateAsRelocatable(Target, *this))
+ report_fatal_error("unable to evaluate offset for variable '" +
+ S.getName() + "'");
+
+ // Verify that any used symbols are defined.
+ if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymA()->getSymbol().getName() + "'");
+ if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymB()->getSymbol().getName() + "'");
+
+ uint64_t Offset = Target.getConstant();
+ if (Target.getSymA())
+ Offset += getSymbolOffset(&Assembler.getSymbolData(
+ Target.getSymA()->getSymbol()));
+ if (Target.getSymB())
+ Offset -= getSymbolOffset(&Assembler.getSymbolData(
+ Target.getSymB()->getSymbol()));
+ return Offset;
+ }
+
+ assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
+ return getFragmentOffset(SD->getFragment()) + SD->getOffset();
+}
+
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+ // The size is the last fragment's end offset.
+ const MCFragment &F = SD->getFragmentList().back();
+ return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
+ // Virtual sections have no file size.
+ if (SD->getSection().isVirtualSection())
+ return 0;
+
+ // Otherwise, the file size is the same as the address space size.
+ return getSectionAddressSize(SD);
+}
+
+/* *** */
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)) {
+}
+
+MCFragment::~MCFragment() {
+}
+
+MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
+ : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
+{
+ if (Parent)
+ Parent->getFragmentList().push_back(this);
+}
+
+/* *** */
+
+MCSectionData::MCSectionData() : Section(0) {}
+
+MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
+ : Section(&_Section),
+ Ordinal(~UINT32_C(0)),
+ Alignment(1),
+ HasInstructions(false)
+{
+ if (A)
+ A->getSectionList().push_back(this);
+}
+
+/* *** */
+
+MCSymbolData::MCSymbolData() : Symbol(0) {}
+
+MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
+ uint64_t _Offset, MCAssembler *A)
+ : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
+ IsExternal(false), IsPrivateExtern(false),
+ CommonSize(0), SymbolSize(0), CommonAlign(0),
+ Flags(0), Index(0)
+{
+ if (A)
+ A->getSymbolList().push_back(this);
+}
+
+/* *** */
+
+MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+ MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+ raw_ostream &OS_)
+ : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+ OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false)
+{
+}
+
+MCAssembler::~MCAssembler() {
+}
+
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
+ // Non-temporary labels should always be visible to the linker.
+ if (!Symbol.isTemporary())
+ return true;
+
+ // Absolute temporary labels are never visible.
+ if (!Symbol.isInSection())
+ return false;
+
+ // Otherwise, check if the section requires symbols even for temporary labels.
+ return getBackend().doesSectionRequireSymbols(Symbol.getSection());
+}
+
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+ // Linker visible symbols define atoms.
+ if (isSymbolLinkerVisible(SD->getSymbol()))
+ return SD;
+
+ // Absolute and undefined symbols have no defining atom.
+ if (!SD->getFragment())
+ return 0;
+
+ // Non-linker visible symbols in sections which can't be atomized have no
+ // defining atom.
+ if (!getBackend().isSectionAtomizable(
+ SD->getFragment()->getParent()->getSection()))
+ return 0;
+
+ // Otherwise, return the atom for the containing fragment.
+ return SD->getFragment()->getAtom();
+}
+
+bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value) const {
+ ++stats::EvaluateFixup;
+
+ if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
+ report_fatal_error("expected relocatable expression");
+
+ bool IsPCRel = Backend.getFixupKindInfo(
+ Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
+
+ bool IsResolved;
+ if (IsPCRel) {
+ if (Target.getSymB()) {
+ IsResolved = false;
+ } else if (!Target.getSymA()) {
+ IsResolved = false;
+ } else {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol &SA = A->getSymbol();
+ if (A->getKind() != MCSymbolRefExpr::VK_None ||
+ SA.AliasedSymbol().isUndefined()) {
+ IsResolved = false;
+ } else {
+ const MCSymbolData &DataA = getSymbolData(SA);
+ IsResolved =
+ getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
+ *DF, false, true);
+ }
+ }
+ } else {
+ IsResolved = Target.isAbsolute();
+ }
+
+ Value = Target.getConstant();
+
+ bool IsThumb = false;
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Sym.isDefined())
+ Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+ if (isThumbFunc(&Sym))
+ IsThumb = true;
+ }
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+ if (Sym.isDefined())
+ Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
+ }
+
+
+ bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+ assert((ShouldAlignPC ? IsPCRel : true) &&
+ "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
+
+ if (IsPCRel) {
+ uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+
+ // A number of ARM fixups in Thumb mode require that the effective PC
+ // address be determined as the 32-bit aligned version of the actual offset.
+ if (ShouldAlignPC) Offset &= ~0x3;
+ Value -= Offset;
+ }
+
+ // ARM fixups based from a thumb function address need to have the low
+ // bit set. The actual value is always at least 16-bit aligned, so the
+ // low bit is normally clear and available for use as an ISA flag for
+ // interworking.
+ if (IsThumb)
+ Value |= 1;
+
+ return IsResolved;
+}
+
+uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+ const MCFragment &F) const {
+ switch (F.getKind()) {
+ case MCFragment::FT_Data:
+ return cast<MCDataFragment>(F).getContents().size();
+ case MCFragment::FT_Fill:
+ return cast<MCFillFragment>(F).getSize();
+ case MCFragment::FT_Inst:
+ return cast<MCInstFragment>(F).getInstSize();
+
+ case MCFragment::FT_LEB:
+ return cast<MCLEBFragment>(F).getContents().size();
+
+ case MCFragment::FT_Align: {
+ const MCAlignFragment &AF = cast<MCAlignFragment>(F);
+ unsigned Offset = Layout.getFragmentOffset(&AF);
+ unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+ if (Size > AF.getMaxBytesToEmit())
+ return 0;
+ return Size;
+ }
+
+ case MCFragment::FT_Org: {
+ MCOrgFragment &OF = cast<MCOrgFragment>(F);
+ int64_t TargetLocation;
+ if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
+ report_fatal_error("expected assembly-time absolute expression");
+
+ // FIXME: We need a way to communicate this error.
+ uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+ int64_t Size = TargetLocation - FragmentOffset;
+ if (Size < 0 || Size >= 0x40000000)
+ report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
+ "' (at offset '" + Twine(FragmentOffset) + "')");
+ return Size;
+ }
+
+ case MCFragment::FT_Dwarf:
+ return cast<MCDwarfLineAddrFragment>(F).getContents().size();
+ case MCFragment::FT_DwarfFrame:
+ return cast<MCDwarfCallFrameFragment>(F).getContents().size();
+ }
+
+ assert(0 && "invalid fragment kind");
+ return 0;
+}
+
+void MCAsmLayout::LayoutFragment(MCFragment *F) {
+ MCFragment *Prev = F->getPrevNode();
+
+ // We should never try to recompute something which is up-to-date.
+ assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
+ // We should never try to compute the fragment layout if it's predecessor
+ // isn't up-to-date.
+ assert((!Prev || isFragmentUpToDate(Prev)) &&
+ "Attempt to compute fragment before it's predecessor!");
+
+ ++stats::FragmentLayouts;
+
+ // Compute fragment offset and size.
+ uint64_t Offset = 0;
+ if (Prev)
+ Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+
+ F->Offset = Offset;
+ LastValidFragment[F->getParent()] = F;
+}
+
+/// WriteFragmentData - Write the \arg F data to the output file.
+static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment &F) {
+ MCObjectWriter *OW = &Asm.getWriter();
+ uint64_t Start = OW->getStream().tell();
+ (void) Start;
+
+ ++stats::EmittedFragments;
+
+ // FIXME: Embed in fragments instead?
+ uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+ switch (F.getKind()) {
+ case MCFragment::FT_Align: {
+ MCAlignFragment &AF = cast<MCAlignFragment>(F);
+ uint64_t Count = FragmentSize / AF.getValueSize();
+
+ assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+ // FIXME: This error shouldn't actually occur (the front end should emit
+ // multiple .align directives to enforce the semantics it wants), but is
+ // severe enough that we want to report it. How to handle this?
+ if (Count * AF.getValueSize() != FragmentSize)
+ report_fatal_error("undefined .align directive, value size '" +
+ Twine(AF.getValueSize()) +
+ "' is not a divisor of padding size '" +
+ Twine(FragmentSize) + "'");
+
+ // See if we are aligning with nops, and if so do that first to try to fill
+ // the Count bytes. Then if that did not fill any bytes or there are any
+ // bytes left to fill use the the Value and ValueSize to fill the rest.
+ // If we are aligning with nops, ask that target to emit the right data.
+ if (AF.hasEmitNops()) {
+ if (!Asm.getBackend().WriteNopData(Count, OW))
+ report_fatal_error("unable to write nop sequence of " +
+ Twine(Count) + " bytes");
+ break;
+ }
+
+ // Otherwise, write out in multiples of the value size.
+ for (uint64_t i = 0; i != Count; ++i) {
+ switch (AF.getValueSize()) {
+ default:
+ assert(0 && "Invalid size!");
+ case 1: OW->Write8 (uint8_t (AF.getValue())); break;
+ case 2: OW->Write16(uint16_t(AF.getValue())); break;
+ case 4: OW->Write32(uint32_t(AF.getValue())); break;
+ case 8: OW->Write64(uint64_t(AF.getValue())); break;
+ }
+ }
+ break;
+ }
+
+ case MCFragment::FT_Data: {
+ MCDataFragment &DF = cast<MCDataFragment>(F);
+ assert(FragmentSize == DF.getContents().size() && "Invalid size!");
+ OW->WriteBytes(DF.getContents().str());
+ break;
+ }
+
+ case MCFragment::FT_Fill: {
+ MCFillFragment &FF = cast<MCFillFragment>(F);
+
+ assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+ for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
+ switch (FF.getValueSize()) {
+ default:
+ assert(0 && "Invalid size!");
+ case 1: OW->Write8 (uint8_t (FF.getValue())); break;
+ case 2: OW->Write16(uint16_t(FF.getValue())); break;
+ case 4: OW->Write32(uint32_t(FF.getValue())); break;
+ case 8: OW->Write64(uint64_t(FF.getValue())); break;
+ }
+ }
+ break;
+ }
+
+ case MCFragment::FT_Inst: {
+ MCInstFragment &IF = cast<MCInstFragment>(F);
+ OW->WriteBytes(StringRef(IF.getCode().begin(), IF.getCode().size()));
+ break;
+ }
+
+ case MCFragment::FT_LEB: {
+ MCLEBFragment &LF = cast<MCLEBFragment>(F);
+ OW->WriteBytes(LF.getContents().str());
+ break;
+ }
+
+ case MCFragment::FT_Org: {
+ MCOrgFragment &OF = cast<MCOrgFragment>(F);
+
+ for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
+ OW->Write8(uint8_t(OF.getValue()));
+
+ break;
+ }
+
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
+ OW->WriteBytes(OF.getContents().str());
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
+ OW->WriteBytes(CF.getContents().str());
+ break;
+ }
+ }
+
+ assert(OW->getStream().tell() - Start == FragmentSize);
+}
+
+void MCAssembler::WriteSectionData(const MCSectionData *SD,
+ const MCAsmLayout &Layout) const {
+ // Ignore virtual sections.
+ if (SD->getSection().isVirtualSection()) {
+ assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+
+ // Check that contents are only things legal inside a virtual section.
+ for (MCSectionData::const_iterator it = SD->begin(),
+ ie = SD->end(); it != ie; ++it) {
+ switch (it->getKind()) {
+ default:
+ assert(0 && "Invalid fragment in virtual section!");
+ case MCFragment::FT_Data: {
+ // Check that we aren't trying to write a non-zero contents (or fixups)
+ // into a virtual section. This is to support clients which use standard
+ // directives to fill the contents of virtual sections.
+ MCDataFragment &DF = cast<MCDataFragment>(*it);
+ assert(DF.fixup_begin() == DF.fixup_end() &&
+ "Cannot have fixups in virtual section!");
+ for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
+ assert(DF.getContents()[i] == 0 &&
+ "Invalid data value for virtual section!");
+ break;
+ }
+ case MCFragment::FT_Align:
+ // Check that we aren't trying to write a non-zero value into a virtual
+ // section.
+ assert((!cast<MCAlignFragment>(it)->getValueSize() ||
+ !cast<MCAlignFragment>(it)->getValue()) &&
+ "Invalid align in virtual section!");
+ break;
+ case MCFragment::FT_Fill:
+ assert(!cast<MCFillFragment>(it)->getValueSize() &&
+ "Invalid fill in virtual section!");
+ break;
+ }
+ }
+
+ return;
+ }
+
+ uint64_t Start = getWriter().getStream().tell();
+ (void) Start;
+
+ for (MCSectionData::const_iterator it = SD->begin(),
+ ie = SD->end(); it != ie; ++it)
+ WriteFragmentData(*this, Layout, *it);
+
+ assert(getWriter().getStream().tell() - Start ==
+ Layout.getSectionAddressSize(SD));
+}
+
+
+uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+ MCFragment &F,
+ const MCFixup &Fixup) {
+ // Evaluate the fixup.
+ MCValue Target;
+ uint64_t FixedValue;
+ if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+ // The fixup was unresolved, we need a relocation. Inform the object
+ // writer of the relocation, and give it an opportunity to adjust the
+ // fixup value if need be.
+ getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
+ }
+ return FixedValue;
+ }
+
+void MCAssembler::Finish() {
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - pre-layout\n--\n";
+ dump(); });
+
+ // Create the layout object.
+ MCAsmLayout Layout(*this);
+
+ // Create dummy fragments and assign section ordinals.
+ unsigned SectionIndex = 0;
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ // Create dummy fragments to eliminate any empty sections, this simplifies
+ // layout.
+ if (it->getFragmentList().empty())
+ new MCDataFragment(it);
+
+ it->setOrdinal(SectionIndex++);
+ }
+
+ // Assign layout order indices to sections and fragments.
+ for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
+ MCSectionData *SD = Layout.getSectionOrder()[i];
+ SD->setLayoutOrder(i);
+
+ unsigned FragmentIndex = 0;
+ for (MCSectionData::iterator it2 = SD->begin(),
+ ie2 = SD->end(); it2 != ie2; ++it2)
+ it2->setLayoutOrder(FragmentIndex++);
+ }
+
+ // Layout until everything fits.
+ while (LayoutOnce(Layout))
+ continue;
+
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - post-relaxation\n--\n";
+ dump(); });
+
+ // Finalize the layout, including fragment lowering.
+ FinishLayout(Layout);
+
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - final-layout\n--\n";
+ dump(); });
+
+ uint64_t StartOffset = OS.tell();
+
+ // Allow the object writer a chance to perform post-layout binding (for
+ // example, to set the index fields in the symbol data).
+ getWriter().ExecutePostLayoutBinding(*this, Layout);
+
+ // Evaluate and apply the fixups, generating relocation entries as necessary.
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ for (MCSectionData::iterator it2 = it->begin(),
+ ie2 = it->end(); it2 != ie2; ++it2) {
+ MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
+ if (DF) {
+ for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+ ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+ MCFixup &Fixup = *it3;
+ uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+ getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+ DF->getContents().size(), FixedValue);
+ }
+ }
+ MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+ if (IF) {
+ for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
+ ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
+ MCFixup &Fixup = *it3;
+ uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+ getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+ IF->getCode().size(), FixedValue);
+ }
+ }
+ }
+ }
+
+ // Write the object file.
+ getWriter().WriteObject(*this, Layout);
+
+ stats::ObjectBytes += OS.tell() - StartOffset;
+}
+
+bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
+ const MCFragment *DF,
+ const MCAsmLayout &Layout) const {
+ if (getRelaxAll())
+ return true;
+
+ // If we cannot resolve the fixup value, it requires relaxation.
+ MCValue Target;
+ uint64_t Value;
+ if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+ return true;
+
+ // Otherwise, relax if the value is too big for a (signed) i8.
+ //
+ // FIXME: This is target dependent!
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+ const MCAsmLayout &Layout) const {
+ // If this inst doesn't ever need relaxation, ignore it. This occurs when we
+ // are intentionally pushing out inst fragments, or because we relaxed a
+ // previous instruction to one that doesn't need relaxation.
+ if (!getBackend().MayNeedRelaxation(IF->getInst()))
+ return false;
+
+ for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
+ ie = IF->fixup_end(); it != ie; ++it)
+ if (FixupNeedsRelaxation(*it, IF, Layout))
+ return true;
+
+ return false;
+}
+
+bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+ MCInstFragment &IF) {
+ if (!FragmentNeedsRelaxation(&IF, Layout))
+ return false;
+
+ ++stats::RelaxedInstructions;
+
+ // FIXME-PERF: We could immediately lower out instructions if we can tell
+ // they are fully resolved, to avoid retesting on later passes.
+
+ // Relax the fragment.
+
+ MCInst Relaxed;
+ getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+
+ // Encode the new instruction.
+ //
+ // FIXME-PERF: If it matters, we could let the target do this. It can
+ // probably do so more efficiently in many cases.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+ VecOS.flush();
+
+ // Update the instruction fragment.
+ IF.setInst(Relaxed);
+ IF.getCode() = Code;
+ IF.getFixups().clear();
+ // FIXME: Eliminate copy.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+ IF.getFixups().push_back(Fixups[i]);
+
+ return true;
+}
+
+bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+ int64_t Value = 0;
+ uint64_t OldSize = LF.getContents().size();
+ bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ SmallString<8> &Data = LF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ if (LF.isSigned())
+ MCObjectWriter::EncodeSLEB128(Value, OSE);
+ else
+ MCObjectWriter::EncodeULEB128(Value, OSE);
+ OSE.flush();
+ return OldSize != LF.getContents().size();
+}
+
+bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
+ MCDwarfLineAddrFragment &DF) {
+ int64_t AddrDelta = 0;
+ uint64_t OldSize = DF.getContents().size();
+ bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ int64_t LineDelta;
+ LineDelta = DF.getLineDelta();
+ SmallString<8> &Data = DF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE);
+ OSE.flush();
+ return OldSize != Data.size();
+}
+
+bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+ MCDwarfCallFrameFragment &DF) {
+ int64_t AddrDelta = 0;
+ uint64_t OldSize = DF.getContents().size();
+ bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ SmallString<8> &Data = DF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+ OSE.flush();
+ return OldSize != Data.size();
+}
+
+bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+ MCSectionData &SD) {
+ MCFragment *FirstInvalidFragment = NULL;
+ // Scan for fragments that need relaxation.
+ for (MCSectionData::iterator it2 = SD.begin(),
+ ie2 = SD.end(); it2 != ie2; ++it2) {
+ // Check if this is an fragment that needs relaxation.
+ bool relaxedFrag = false;
+ switch(it2->getKind()) {
+ default:
+ break;
+ case MCFragment::FT_Inst:
+ relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+ break;
+ case MCFragment::FT_Dwarf:
+ relaxedFrag = RelaxDwarfLineAddr(Layout,
+ *cast<MCDwarfLineAddrFragment>(it2));
+ break;
+ case MCFragment::FT_DwarfFrame:
+ relaxedFrag =
+ RelaxDwarfCallFrameFragment(Layout,
+ *cast<MCDwarfCallFrameFragment>(it2));
+ break;
+ case MCFragment::FT_LEB:
+ relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+ break;
+ }
+ // Update the layout, and remember that we relaxed.
+ if (relaxedFrag && !FirstInvalidFragment)
+ FirstInvalidFragment = it2;
+ }
+ if (FirstInvalidFragment) {
+ Layout.Invalidate(FirstInvalidFragment);
+ return true;
+ }
+ return false;
+}
+
+bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+ ++stats::RelaxationSteps;
+
+ bool WasRelaxed = false;
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ MCSectionData &SD = *it;
+ while(LayoutSectionOnce(Layout, SD))
+ WasRelaxed = true;
+ }
+
+ return WasRelaxed;
+}
+
+void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+ // The layout is done. Mark every fragment as valid.
+ for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+ Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
+ }
+}
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+ OS << "<MCFixup" << " Offset:" << AF.getOffset()
+ << " Value:" << *AF.getValue()
+ << " Kind:" << AF.getKind() << ">";
+ return OS;
+}
+
+}
+
+void MCFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<";
+ switch (getKind()) {
+ case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+ case MCFragment::FT_Data: OS << "MCDataFragment"; break;
+ case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
+ case MCFragment::FT_Inst: OS << "MCInstFragment"; break;
+ case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
+ case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+ case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+ case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
+ }
+
+ OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+ << " Offset:" << Offset << ">";
+
+ switch (getKind()) {
+ case MCFragment::FT_Align: {
+ const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+ if (AF->hasEmitNops())
+ OS << " (emit nops)";
+ OS << "\n ";
+ OS << " Alignment:" << AF->getAlignment()
+ << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+ << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+ break;
+ }
+ case MCFragment::FT_Data: {
+ const MCDataFragment *DF = cast<MCDataFragment>(this);
+ OS << "\n ";
+ OS << " Contents:[";
+ const SmallVectorImpl<char> &Contents = DF->getContents();
+ for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+ }
+ OS << "] (" << Contents.size() << " bytes)";
+
+ if (!DF->getFixups().empty()) {
+ OS << ",\n ";
+ OS << " Fixups:[";
+ for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+ ie = DF->fixup_end(); it != ie; ++it) {
+ if (it != DF->fixup_begin()) OS << ",\n ";
+ OS << *it;
+ }
+ OS << "]";
+ }
+ break;
+ }
+ case MCFragment::FT_Fill: {
+ const MCFillFragment *FF = cast<MCFillFragment>(this);
+ OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+ << " Size:" << FF->getSize();
+ break;
+ }
+ case MCFragment::FT_Inst: {
+ const MCInstFragment *IF = cast<MCInstFragment>(this);
+ OS << "\n ";
+ OS << " Inst:";
+ IF->getInst().dump_pretty(OS);
+ break;
+ }
+ case MCFragment::FT_Org: {
+ const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+ OS << "\n ";
+ OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+ break;
+ }
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << OF->getAddrDelta()
+ << " LineDelta:" << OF->getLineDelta();
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << CF->getAddrDelta();
+ break;
+ }
+ case MCFragment::FT_LEB: {
+ const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+ OS << "\n ";
+ OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+ break;
+ }
+ }
+ OS << ">";
+}
+
+void MCSectionData::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCSectionData";
+ OS << " Alignment:" << getAlignment() << " Fragments:[\n ";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "]>";
+}
+
+void MCSymbolData::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCSymbolData Symbol:" << getSymbol()
+ << " Fragment:" << getFragment() << " Offset:" << getOffset()
+ << " Flags:" << getFlags() << " Index:" << getIndex();
+ if (isCommon())
+ OS << " (common, size:" << getCommonSize()
+ << " align: " << getCommonAlignment() << ")";
+ if (isExternal())
+ OS << " (external)";
+ if (isPrivateExtern())
+ OS << " (private extern)";
+ OS << ">";
+}
+
+void MCAssembler::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCAssembler\n";
+ OS << " Sections:[\n ";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "],\n";
+ OS << " Symbols:[";
+
+ for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+ if (it != symbol_begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "]>\n";
+}
diff --git a/contrib/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
new file mode 100644
index 000000000000..c122763b2fe5
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
@@ -0,0 +1,18 @@
+//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeEmitter.h"
+
+using namespace llvm;
+
+MCCodeEmitter::MCCodeEmitter() {
+}
+
+MCCodeEmitter::~MCCodeEmitter() {
+}
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
new file mode 100644
index 000000000000..8faa72ecb4e8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -0,0 +1,315 @@
+//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCLabel.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+
+MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
+ MAI(mai), TAI(tai),
+ Allocator(), Symbols(Allocator), UsedNames(Allocator),
+ NextUniqueID(0),
+ CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
+ AllowTemporaryLabels(true) {
+ MachOUniquingMap = 0;
+ ELFUniquingMap = 0;
+ COFFUniquingMap = 0;
+
+ SecureLogFile = getenv("AS_SECURE_LOG_FILE");
+ SecureLog = 0;
+ SecureLogUsed = false;
+
+ DwarfLocSeen = false;
+}
+
+MCContext::~MCContext() {
+ // NOTE: The symbols are all allocated out of a bump pointer allocator,
+ // we don't need to free them here.
+
+ // If we have the MachO uniquing map, free it.
+ delete (MachOUniqueMapTy*)MachOUniquingMap;
+ delete (ELFUniqueMapTy*)ELFUniquingMap;
+ delete (COFFUniqueMapTy*)COFFUniquingMap;
+
+ // If the stream for the .secure_log_unique directive was created free it.
+ delete (raw_ostream*)SecureLog;
+
+ delete TAI;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Manipulation
+//===----------------------------------------------------------------------===//
+
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
+ assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+
+ // Do the lookup and get the entire StringMapEntry. We want access to the
+ // key if we are creating the entry.
+ StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
+ MCSymbol *Sym = Entry.getValue();
+
+ if (Sym)
+ return Sym;
+
+ Sym = CreateSymbol(Name);
+ Entry.setValue(Sym);
+ return Sym;
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+ // Determine whether this is an assembler temporary or normal label, if used.
+ bool isTemporary = false;
+ if (AllowTemporaryLabels)
+ isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
+ StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
+ if (NameEntry->getValue()) {
+ assert(isTemporary && "Cannot rename non temporary symbols");
+ SmallString<128> NewName = Name;
+ do {
+ NewName.resize(Name.size());
+ raw_svector_ostream(NewName) << NextUniqueID++;
+ NameEntry = &UsedNames.GetOrCreateValue(NewName);
+ } while (NameEntry->getValue());
+ }
+ NameEntry->setValue(true);
+
+ // Ok, the entry doesn't already exist. Have the MCSymbol object itself refer
+ // to the copy of the string that is embedded in the UsedNames entry.
+ MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+
+ return Result;
+}
+
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+ SmallString<128> NameSV;
+ Name.toVector(NameSV);
+ return GetOrCreateSymbol(NameSV.str());
+}
+
+MCSymbol *MCContext::CreateTempSymbol() {
+ SmallString<128> NameSV;
+ raw_svector_ostream(NameSV)
+ << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
+ return CreateSymbol(NameSV);
+}
+
+unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
+ MCLabel *&Label = Instances[LocalLabelVal];
+ if (!Label)
+ Label = new (*this) MCLabel(0);
+ return Label->incInstance();
+}
+
+unsigned MCContext::GetInstance(int64_t LocalLabelVal) {
+ MCLabel *&Label = Instances[LocalLabelVal];
+ if (!Label)
+ Label = new (*this) MCLabel(0);
+ return Label->getInstance();
+}
+
+MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ Twine(LocalLabelVal) +
+ "\2" +
+ Twine(NextInstance(LocalLabelVal)));
+}
+MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
+ int bORf) {
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ Twine(LocalLabelVal) +
+ "\2" +
+ Twine(GetInstance(LocalLabelVal) + bORf));
+}
+
+MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
+ return Symbols.lookup(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Section Management
+//===----------------------------------------------------------------------===//
+
+const MCSectionMachO *MCContext::
+getMachOSection(StringRef Segment, StringRef Section,
+ unsigned TypeAndAttributes,
+ unsigned Reserved2, SectionKind Kind) {
+
+ // We unique sections by their segment/section pair. The returned section
+ // may not have the same flags as the requested section, if so this should be
+ // diagnosed by the client as an error.
+
+ // Create the map if it doesn't already exist.
+ if (MachOUniquingMap == 0)
+ MachOUniquingMap = new MachOUniqueMapTy();
+ MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
+
+ // Form the name to look up.
+ SmallString<64> Name;
+ Name += Segment;
+ Name.push_back(',');
+ Name += Section;
+
+ // Do the lookup, if we have a hit, return it.
+ const MCSectionMachO *&Entry = Map[Name.str()];
+ if (Entry) return Entry;
+
+ // Otherwise, return a new section.
+ return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
+ Reserved2, Kind);
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind) {
+ return getELFSection(Section, Type, Flags, Kind, 0, "");
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind, unsigned EntrySize, StringRef Group) {
+ if (ELFUniquingMap == 0)
+ ELFUniquingMap = new ELFUniqueMapTy();
+ ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+ if (Entry.getValue()) return Entry.getValue();
+
+ // Possibly refine the entry size first.
+ if (!EntrySize) {
+ EntrySize = MCSectionELF::DetermineEntrySize(Kind);
+ }
+
+ MCSymbol *GroupSym = NULL;
+ if (!Group.empty())
+ GroupSym = GetOrCreateSymbol(Group);
+
+ MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
+ Kind, EntrySize, GroupSym);
+ Entry.setValue(Result);
+ return Result;
+}
+
+const MCSectionELF *MCContext::CreateELFGroupSection() {
+ MCSectionELF *Result =
+ new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
+ SectionKind::getReadOnly(), 4, NULL);
+ return Result;
+}
+
+const MCSection *MCContext::getCOFFSection(StringRef Section,
+ unsigned Characteristics,
+ int Selection,
+ SectionKind Kind) {
+ if (COFFUniquingMap == 0)
+ COFFUniquingMap = new COFFUniqueMapTy();
+ COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
+ if (Entry.getValue()) return Entry.getValue();
+
+ MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
+ Characteristics,
+ Selection, Kind);
+
+ Entry.setValue(Result);
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Management
+//===----------------------------------------------------------------------===//
+
+/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// directory tables. If the file number has already been allocated it is an
+/// error and zero is returned and the client reports the error, else the
+/// allocated file number is returned. The file numbers may be in any order.
+unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+ // TODO: a FileNumber of zero says to use the next available file number.
+ // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
+ // to not be less than one. This needs to be change to be not less than zero.
+
+ // Make space for this FileNumber in the MCDwarfFiles vector if needed.
+ if (FileNumber >= MCDwarfFiles.size()) {
+ MCDwarfFiles.resize(FileNumber + 1);
+ } else {
+ MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+ if (ExistingFile)
+ // It is an error to use see the same number more than once.
+ return 0;
+ }
+
+ // Get the new MCDwarfFile slot for this FileNumber.
+ MCDwarfFile *&File = MCDwarfFiles[FileNumber];
+
+ // Separate the directory part from the basename of the FileName.
+ std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+
+ // Find or make a entry in the MCDwarfDirs vector for this Directory.
+ StringRef Name;
+ unsigned DirIndex;
+ // Capture directory name.
+ if (Slash.second.empty()) {
+ Name = Slash.first;
+ DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+ } else {
+ StringRef Directory = Slash.first;
+ Name = Slash.second;
+ for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) {
+ if (Directory == MCDwarfDirs[DirIndex])
+ break;
+ }
+ if (DirIndex >= MCDwarfDirs.size()) {
+ char *Buf = static_cast<char *>(Allocate(Directory.size()));
+ memcpy(Buf, Directory.data(), Directory.size());
+ MCDwarfDirs.push_back(StringRef(Buf, Directory.size()));
+ }
+ // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
+ // no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
+ // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
+ // stored at MCDwarfFiles[FileNumber].Name .
+ DirIndex++;
+ }
+
+ // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
+ // vector.
+ char *Buf = static_cast<char *>(Allocate(Name.size()));
+ memcpy(Buf, Name.data(), Name.size());
+ File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+
+ // return the allocated FileNumber.
+ return FileNumber;
+}
+
+/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
+/// currently is assigned and false otherwise.
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
+ if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+ return false;
+
+ return MCDwarfFiles[FileNumber] != 0;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler.cpp
new file mode 100644
index 000000000000..08096906462f
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler.cpp
@@ -0,0 +1,14 @@
+//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+using namespace llvm;
+
+MCDisassembler::~MCDisassembler() {
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
new file mode 100644
index 000000000000..5480b4b12b2c
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -0,0 +1,164 @@
+//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Disassembler.h"
+#include "llvm-c/Disassembler.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h" // FIXME.
+#include "llvm/Target/TargetMachine.h" // FIXME.
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/MemoryObject.h"
+
+namespace llvm {
+class Target;
+} // namespace llvm
+using namespace llvm;
+
+// LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic
+// disassembly is supported by passing a block of information in the DisInfo
+// parameter and specifying the TagType and callback functions as described in
+// the header llvm-c/Disassembler.h . The pointer to the block and the
+// functions can all be passed as NULL. If successful, this returns a
+// disassembler context. If not, it returns NULL.
+//
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
+ int TagType, LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp) {
+ // Initialize targets and assembly printers/parsers.
+ llvm::InitializeAllTargetInfos();
+ // FIXME: We shouldn't need to initialize the Target(Machine)s.
+ llvm::InitializeAllTargets();
+ llvm::InitializeAllMCAsmInfos();
+ llvm::InitializeAllAsmPrinters();
+ llvm::InitializeAllAsmParsers();
+ llvm::InitializeAllDisassemblers();
+
+ // Get the target.
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+ assert(TheTarget && "Unable to create target!");
+
+ // Get the assembler info needed to setup the MCContext.
+ const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
+ assert(MAI && "Unable to create target asm info!");
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+ std::string CPU;
+
+ // FIXME: We shouldn't need to do this (and link in codegen).
+ // When we split this out, we should do it in a way that makes
+ // it straightforward to switch subtargets on the fly.
+ TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU,
+ FeaturesStr);
+ assert(TM && "Unable to create target machine!");
+
+ // Get the target assembler info needed to setup the context.
+ const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
+ assert(tai && "Unable to create target assembler!");
+
+ // Set up the MCContext for creating symbols and MCExpr's.
+ MCContext *Ctx = new MCContext(*MAI, tai);
+ assert(Ctx && "Unable to create MCContext!");
+
+ // Set up disassembler.
+ MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
+ assert(DisAsm && "Unable to create disassembler!");
+ DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx);
+
+ // Set up the instruction printer.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
+ *MAI);
+ assert(IP && "Unable to create instruction printer!");
+
+ LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
+ GetOpInfo, SymbolLookUp,
+ TheTarget, MAI, TM, tai, Ctx,
+ DisAsm, IP);
+ assert(DC && "Allocation failure!");
+ return DC;
+}
+
+//
+// LLVMDisasmDispose() disposes of the disassembler specified by the context.
+//
+void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ delete DC;
+}
+
+namespace {
+//
+// The memory object created by LLVMDisasmInstruction().
+//
+class DisasmMemoryObject : public MemoryObject {
+ uint8_t *Bytes;
+ uint64_t Size;
+ uint64_t BasePC;
+public:
+ DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+ Bytes(bytes), Size(size), BasePC(basePC) {}
+
+ uint64_t getBase() const { return BasePC; }
+ uint64_t getExtent() const { return Size; }
+
+ int readByte(uint64_t Addr, uint8_t *Byte) const {
+ if (Addr - BasePC >= Size)
+ return -1;
+ *Byte = Bytes[Addr - BasePC];
+ return 0;
+ }
+};
+} // end anonymous namespace
+
+//
+// LLVMDisasmInstruction() disassembles a single instruction using the
+// disassembler context specified in the parameter DC. The bytes of the
+// instruction are specified in the parameter Bytes, and contains at least
+// BytesSize number of bytes. The instruction is at the address specified by
+// the PC parameter. If a valid instruction can be disassembled its string is
+// returned indirectly in OutString which whos size is specified in the
+// parameter OutStringSize. This function returns the number of bytes in the
+// instruction or zero if there was no valid instruction. If this function
+// returns zero the caller will have to pick how many bytes they want to step
+// over by printing a .byte, .long etc. to continue.
+//
+size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
+ uint64_t BytesSize, uint64_t PC, char *OutString,
+ size_t OutStringSize){
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
+ DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC);
+
+ uint64_t Size;
+ MCInst Inst;
+ const MCDisassembler *DisAsm = DC->getDisAsm();
+ MCInstPrinter *IP = DC->getIP();
+ if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls()))
+ return 0;
+
+ SmallVector<char, 64> InsnStr;
+ raw_svector_ostream OS(InsnStr);
+ IP->printInst(&Inst, OS);
+ OS.flush();
+
+ assert(OutStringSize != 0 && "Output buffer cannot be zero size");
+ size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
+ std::memcpy(OutString, InsnStr.data(), OutputSize);
+ OutString[OutputSize] = '\0'; // Terminate string.
+
+ return Size;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h
new file mode 100644
index 000000000000..f0ec42a017a4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h
@@ -0,0 +1,96 @@
+//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Disassembly library's disassembler
+// context. The disassembler is responsible for producing strings for
+// individual instructions according to a given architecture and disassembly
+// syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_DISASSEMBLER_H
+#define LLVM_MC_DISASSEMBLER_H
+
+#include "llvm-c/Disassembler.h"
+#include <string>
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+class TargetAsmInfo;
+class MCContext;
+class MCAsmInfo;
+class MCDisassembler;
+class MCInstPrinter;
+class Target;
+class TargetMachine;
+
+//
+// This is the disassembler context returned by LLVMCreateDisasm().
+//
+class LLVMDisasmContext {
+private:
+ //
+ // The passed parameters when the disassembler context is created.
+ //
+ // The TripleName for this disassembler.
+ std::string TripleName;
+ // The pointer to the caller's block of symbolic information.
+ void *DisInfo;
+ // The Triple specific symbolic information type returned by GetOpInfo.
+ int TagType;
+ // The function to get the symbolic information for operands.
+ LLVMOpInfoCallback GetOpInfo;
+ // The function to look up a symbol name.
+ LLVMSymbolLookupCallback SymbolLookUp;
+ //
+ // The objects created and saved by LLVMCreateDisasm() then used by
+ // LLVMDisasmInstruction().
+ //
+ // The LLVM target corresponding to the disassembler.
+ // FIXME: using llvm::OwningPtr<const llvm::Target> causes a malloc error
+ // when this LLVMDisasmContext is deleted.
+ const Target *TheTarget;
+ // The assembly information for the target architecture.
+ llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
+ // The target machine instance.
+ llvm::OwningPtr<llvm::TargetMachine> TM;
+ // The disassembler for the target architecture.
+ // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc
+ // error when this LLVMDisasmContext is deleted.
+ const TargetAsmInfo *Tai;
+ // The assembly context for creating symbols and MCExprs.
+ llvm::OwningPtr<const llvm::MCContext> Ctx;
+ // The disassembler for the target architecture.
+ llvm::OwningPtr<const llvm::MCDisassembler> DisAsm;
+ // The instruction printer for the target architecture.
+ llvm::OwningPtr<llvm::MCInstPrinter> IP;
+
+public:
+ LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
+ LLVMOpInfoCallback getOpInfo,
+ LLVMSymbolLookupCallback symbolLookUp,
+ const Target *theTarget, const MCAsmInfo *mAI,
+ llvm::TargetMachine *tM, const TargetAsmInfo *tai,
+ llvm::MCContext *ctx, const MCDisassembler *disAsm,
+ MCInstPrinter *iP) : TripleName(tripleName),
+ DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
+ SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) {
+ TM.reset(tM);
+ MAI.reset(mAI);
+ Ctx.reset(ctx);
+ DisAsm.reset(disAsm);
+ IP.reset(iP);
+ }
+ const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
+ MCInstPrinter *getIP() { return IP.get(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
new file mode 100644
index 000000000000..bdd99afe1ae4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -0,0 +1,419 @@
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct TripleMap {
+ Triple::ArchType Arch;
+ const char *String;
+};
+
+static struct TripleMap triplemap[] = {
+ { Triple::x86, "i386-unknown-unknown" },
+ { Triple::x86_64, "x86_64-unknown-unknown" },
+ { Triple::arm, "arm-unknown-unknown" },
+ { Triple::thumb, "thumb-unknown-unknown" },
+ { Triple::InvalidArch, NULL, }
+};
+
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
+/// or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const char *tripleFromArch(Triple::ArchType arch) {
+ unsigned int infoIndex;
+
+ for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+ if (arch == triplemap[infoIndex].Arch)
+ return triplemap[infoIndex].String;
+ }
+
+ return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+/// for the desired assembly syntax, suitable for passing to
+/// Target::createMCInstPrinter()
+///
+/// @arg arch - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+ EDDisassembler::AssemblySyntax syntax) {
+ switch (syntax) {
+ default:
+ return -1;
+ // Mappings below from X86AsmPrinter.cpp
+ case EDDisassembler::kEDAssemblySyntaxX86ATT:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 0;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxX86Intel:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 1;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxARMUAL:
+ if (arch == Triple::arm || arch == Triple::thumb)
+ return 0;
+ else
+ return -1;
+ }
+}
+
+void EDDisassembler::initialize() {
+ if (sInitialized)
+ return;
+
+ sInitialized = true;
+
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllMCAsmInfos();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+ InitializeAllDisassemblers();
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+ AssemblySyntax syntax) {
+ CPUKey key;
+ key.Arch = arch;
+ key.Syntax = syntax;
+
+ EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+
+ if (i != sDisassemblers.end()) {
+ return i->second;
+ } else {
+ EDDisassembler* sdd = new EDDisassembler(key);
+ if (!sdd->valid()) {
+ delete sdd;
+ return NULL;
+ }
+
+ sDisassemblers[key] = sdd;
+
+ return sdd;
+ }
+
+ return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+ AssemblySyntax syntax) {
+ return getDisassembler(Triple(str).getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) :
+ Valid(false),
+ HasSemantics(false),
+ ErrorStream(nulls()),
+ Key(key) {
+ const char *triple = tripleFromArch(key.Arch);
+
+ if (!triple)
+ return;
+
+ LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+
+ if (LLVMSyntaxVariant < 0)
+ return;
+
+ std::string tripleString(triple);
+ std::string errorString;
+
+ Tgt = TargetRegistry::lookupTarget(tripleString,
+ errorString);
+
+ if (!Tgt)
+ return;
+
+ std::string CPU;
+ std::string featureString;
+ TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
+ featureString));
+
+ const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
+
+ if (!registerInfo)
+ return;
+
+ initMaps(*registerInfo);
+
+ AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
+
+ if (!AsmInfo)
+ return;
+
+ Disassembler.reset(Tgt->createMCDisassembler());
+
+ if (!Disassembler)
+ return;
+
+ InstInfos = Disassembler->getEDInfo();
+
+ InstString.reset(new std::string);
+ InstStream.reset(new raw_string_ostream(*InstString));
+ InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+
+ if (!InstPrinter)
+ return;
+
+ GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+ SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+ SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+
+ initMaps(*TargetMachine->getRegisterInfo());
+
+ Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+ if (!valid())
+ return;
+}
+
+namespace {
+ /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+ /// as provided by the sd interface. See MemoryObject.
+ class EDMemoryObject : public llvm::MemoryObject {
+ private:
+ EDByteReaderCallback Callback;
+ void *Arg;
+ public:
+ EDMemoryObject(EDByteReaderCallback callback,
+ void *arg) : Callback(callback), Arg(arg) { }
+ ~EDMemoryObject() { }
+ uint64_t getBase() const { return 0x0; }
+ uint64_t getExtent() const { return (uint64_t)-1; }
+ int readByte(uint64_t address, uint8_t *ptr) const {
+ if (!Callback)
+ return -1;
+
+ if (Callback(ptr, address, Arg))
+ return -1;
+
+ return 0;
+ }
+ };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ EDMemoryObject memoryObject(byteReader, arg);
+
+ MCInst* inst = new MCInst;
+ uint64_t byteSize;
+
+ if (!Disassembler->getInstruction(*inst,
+ byteSize,
+ memoryObject,
+ address,
+ ErrorStream)) {
+ delete inst;
+ return NULL;
+ } else {
+ const llvm::EDInstInfo *thisInstInfo = NULL;
+
+ if (InstInfos) {
+ thisInstInfo = &InstInfos[inst->getOpcode()];
+ }
+
+ EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+ return sdInst;
+ }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+ unsigned numRegisters = registerInfo.getNumRegs();
+ unsigned registerIndex;
+
+ for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+ const char* registerName = registerInfo.get(registerIndex).Name;
+
+ RegVec.push_back(registerName);
+ RegRMap[registerName] = registerIndex;
+ }
+
+ switch (Key.Arch) {
+ default:
+ break;
+ case Triple::x86:
+ case Triple::x86_64:
+ stackPointers.insert(registerIDWithName("SP"));
+ stackPointers.insert(registerIDWithName("ESP"));
+ stackPointers.insert(registerIDWithName("RSP"));
+
+ programCounters.insert(registerIDWithName("IP"));
+ programCounters.insert(registerIDWithName("EIP"));
+ programCounters.insert(registerIDWithName("RIP"));
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ stackPointers.insert(registerIDWithName("SP"));
+
+ programCounters.insert(registerIDWithName("PC"));
+ break;
+ }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+ if (registerID >= RegVec.size())
+ return NULL;
+ else
+ return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+ regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+ if (iter == RegRMap.end())
+ return 0;
+ else
+ return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+ return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+ return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string &str, MCInst &inst) {
+ PrinterMutex.acquire();
+
+ InstPrinter->printInst(&inst, *InstStream);
+ InstStream->flush();
+ str = *InstString;
+ InstString->clear();
+
+ PrinterMutex.release();
+
+ return 0;
+}
+
+static void diag_handler(const SMDiagnostic &diag,
+ void *context)
+{
+ if (context) {
+ EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
+ diag.Print("", disassembler->ErrorStream);
+ }
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+ SmallVectorImpl<AsmToken> &tokens,
+ const std::string &str) {
+ int ret = 0;
+
+ switch (Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ case Triple::arm:
+ case Triple::thumb:
+ break;
+ }
+
+ const char *cStr = str.c_str();
+ MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+
+ StringRef instName;
+ SMLoc instLoc;
+
+ SourceMgr sourceMgr;
+ sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
+ sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+ MCContext context(*AsmInfo, NULL);
+ OwningPtr<MCStreamer> streamer(createNullStreamer(context));
+ OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+ context, *streamer,
+ *AsmInfo));
+
+ StringRef triple = tripleFromArch(Key.Arch);
+ OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
+ OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
+ *genericParser));
+
+ AsmToken OpcodeToken = genericParser->Lex();
+ AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
+
+ if (OpcodeToken.is(AsmToken::Identifier)) {
+ instName = OpcodeToken.getString();
+ instLoc = OpcodeToken.getLoc();
+
+ if (NextToken.isNot(AsmToken::Eof) &&
+ TargetParser->ParseInstruction(instName, instLoc, operands))
+ ret = -1;
+ } else {
+ ret = -1;
+ }
+
+ ParserMutex.acquire();
+
+ if (!ret) {
+ GenericAsmLexer->setBuffer(buf);
+
+ while (SpecificAsmLexer->Lex(),
+ SpecificAsmLexer->isNot(AsmToken::Eof) &&
+ SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+ if (SpecificAsmLexer->is(AsmToken::Error)) {
+ ret = -1;
+ break;
+ }
+ tokens.push_back(SpecificAsmLexer->getTok());
+ }
+ }
+
+ ParserMutex.release();
+
+ return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+ return LLVMSyntaxVariant;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
new file mode 100644
index 000000000000..11d69c151cf9
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
@@ -0,0 +1,269 @@
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class. The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
+
+#include "EDInfo.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+class MCSubtargetInfo;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetMachine;
+class TargetRegisterInfo;
+
+struct EDInstInfo;
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+/// disassembly syntax. Also manages the static disassembler registry.
+struct EDDisassembler {
+ typedef enum {
+ /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86Intel = 0,
+ /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86ATT = 1,
+ kEDAssemblySyntaxARMUAL = 2
+ } AssemblySyntax;
+
+
+ ////////////////////
+ // Static members //
+ ////////////////////
+
+ /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+ /// pair
+ struct CPUKey {
+ /// The architecture type
+ llvm::Triple::ArchType Arch;
+
+ /// The assembly syntax
+ AssemblySyntax Syntax;
+
+ /// operator== - Equality operator
+ bool operator==(const CPUKey &key) const {
+ return (Arch == key.Arch &&
+ Syntax == key.Syntax);
+ }
+
+ /// operator< - Less-than operator
+ bool operator<(const CPUKey &key) const {
+ return ((Arch < key.Arch) ||
+ ((Arch == key.Arch) && Syntax < (key.Syntax)));
+ }
+ };
+
+ typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+
+ /// True if the disassembler registry has been initialized; false if not
+ static bool sInitialized;
+ /// A map from disassembler specifications to disassemblers. Populated
+ /// lazily.
+ static DisassemblerMap_t sDisassemblers;
+
+ /// getDisassembler - Returns the specified disassemble, or NULL on failure
+ ///
+ /// @arg arch - The desired architecture
+ /// @arg syntax - The desired disassembly syntax
+ static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+ AssemblySyntax syntax);
+
+ /// getDisassembler - Returns the disassembler for a given combination of
+ /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
+ ///
+ /// @arg str - The string representation of the architecture triple, e.g.,
+ /// "x86_64-apple-darwin"
+ /// @arg syntax - The disassembly syntax for the required disassembler
+ static EDDisassembler *getDisassembler(llvm::StringRef str,
+ AssemblySyntax syntax);
+
+ /// initialize - Initializes the disassembler registry and the LLVM backend
+ static void initialize();
+
+ ////////////////////////
+ // Per-object members //
+ ////////////////////////
+
+ /// True only if the object has been successfully initialized
+ bool Valid;
+ /// True if the disassembler can provide semantic information
+ bool HasSemantics;
+
+ /// The stream to write errors to
+ llvm::raw_ostream &ErrorStream;
+
+ /// The architecture/syntax pair for the current architecture
+ CPUKey Key;
+ /// The LLVM target corresponding to the disassembler
+ const llvm::Target *Tgt;
+ /// The target machine instance.
+ llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
+ /// The assembly information for the target architecture
+ llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+ /// The disassembler for the target architecture
+ llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+ /// The output string for the instruction printer; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<std::string> InstString;
+ /// The output stream for the disassembler; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+ /// The instruction printer for the target architecture; must be guarded with
+ /// PrinterMutex when printing
+ llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+ /// The mutex that guards the instruction printer's printing functions, which
+ /// use a shared stream
+ llvm::sys::Mutex PrinterMutex;
+ /// The array of instruction information provided by the TableGen backend for
+ /// the target architecture
+ const llvm::EDInstInfo *InstInfos;
+ /// The target-specific lexer for use in tokenizing strings, in
+ /// target-independent and target-specific portions
+ llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+ llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+ /// The guard for the above
+ llvm::sys::Mutex ParserMutex;
+ /// The LLVM number used for the target disassembly syntax variant
+ int LLVMSyntaxVariant;
+
+ typedef std::vector<std::string> regvec_t;
+ typedef std::map<std::string, unsigned> regrmap_t;
+
+ /// A vector of registers for quick mapping from LLVM register IDs to names
+ regvec_t RegVec;
+ /// A map of registers for quick mapping from register names to LLVM IDs
+ regrmap_t RegRMap;
+
+ /// A set of register IDs for aliases of the stack pointer for the current
+ /// architecture
+ std::set<unsigned> stackPointers;
+ /// A set of register IDs for aliases of the program counter for the current
+ /// architecture
+ std::set<unsigned> programCounters;
+
+ /// Constructor - initializes a disassembler with all the necessary objects,
+ /// which come pre-allocated from the registry accessor function
+ ///
+ /// @arg key - the architecture and disassembly syntax for the
+ /// disassembler
+ EDDisassembler(CPUKey& key);
+
+ /// valid - reports whether there was a failure in the constructor.
+ bool valid() {
+ return Valid;
+ }
+
+ /// hasSemantics - reports whether the disassembler can provide operands and
+ /// tokens.
+ bool hasSemantics() {
+ return HasSemantics;
+ }
+
+ ~EDDisassembler();
+
+ /// createInst - creates and returns an instruction given a callback and
+ /// memory address, or NULL on failure
+ ///
+ /// @arg byteReader - A callback function that provides machine code bytes
+ /// @arg address - The address of the first byte of the instruction,
+ /// suitable for passing to byteReader
+ /// @arg arg - An opaque argument for byteReader
+ EDInst *createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg);
+
+ /// initMaps - initializes regVec and regRMap using the provided register
+ /// info
+ ///
+ /// @arg registerInfo - the register information to use as a source
+ void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+ /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
+ /// register for a given register ID, or NULL on failure
+ ///
+ /// @arg registerID - the ID of the register to be queried
+ const char *nameWithRegisterID(unsigned registerID) const;
+ /// registerIDWithName - Returns the ID of a register for a given register
+ /// name, or (unsigned)-1 on failure
+ ///
+ /// @arg name - The name of the register
+ unsigned registerIDWithName(const char *name) const;
+
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsStackPointer(unsigned registerID);
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsProgramCounter(unsigned registerID);
+
+ /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+ /// otherwise
+ ///
+ /// @arg str - A reference to a string which is filled in with the string
+ /// representation of the instruction
+ /// @arg inst - A reference to the MCInst to be printed
+ int printInst(std::string& str,
+ llvm::MCInst& inst);
+
+ /// parseInst - extracts operands and tokens from a string for use in
+ /// tokenizing the string. Returns 0 on success, or -1 otherwise.
+ ///
+ /// @arg operands - A reference to a vector that will be filled in with the
+ /// parsed operands
+ /// @arg tokens - A reference to a vector that will be filled in with the
+ /// tokens
+ /// @arg str - The string representation of the instruction
+ int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+ llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+ const std::string &str);
+
+ /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+ int llvmSyntaxVariant() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h b/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h
new file mode 100644
index 000000000000..e43ad1635246
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h
@@ -0,0 +1,84 @@
+//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINFO_H
+#define LLVM_EDINFO_H
+
+enum {
+ EDIS_MAX_OPERANDS = 13,
+ EDIS_MAX_SYNTAXES = 2
+};
+
+enum OperandTypes {
+ kOperandTypeNone,
+ kOperandTypeImmediate,
+ kOperandTypeRegister,
+ kOperandTypeX86Memory,
+ kOperandTypeX86EffectiveAddress,
+ kOperandTypeX86PCRelative,
+ kOperandTypeARMBranchTarget,
+ kOperandTypeARMSoReg,
+ kOperandTypeARMSoImm,
+ kOperandTypeARMRotImm,
+ kOperandTypeARMSoImm2Part,
+ kOperandTypeARMPredicate,
+ kOperandTypeAddrModeImm12,
+ kOperandTypeLdStSOReg,
+ kOperandTypeARMAddrMode2,
+ kOperandTypeARMAddrMode2Offset,
+ kOperandTypeARMAddrMode3,
+ kOperandTypeARMAddrMode3Offset,
+ kOperandTypeARMAddrMode4,
+ kOperandTypeARMAddrMode5,
+ kOperandTypeARMAddrMode6,
+ kOperandTypeARMAddrMode6Offset,
+ kOperandTypeARMAddrMode7,
+ kOperandTypeARMAddrModePC,
+ kOperandTypeARMRegisterList,
+ kOperandTypeARMDPRRegisterList,
+ kOperandTypeARMSPRRegisterList,
+ kOperandTypeARMTBAddrMode,
+ kOperandTypeThumbITMask,
+ kOperandTypeThumbAddrModeRegS1,
+ kOperandTypeThumbAddrModeRegS2,
+ kOperandTypeThumbAddrModeRegS4,
+ kOperandTypeThumbAddrModeImmS1,
+ kOperandTypeThumbAddrModeImmS2,
+ kOperandTypeThumbAddrModeImmS4,
+ kOperandTypeThumbAddrModeRR,
+ kOperandTypeThumbAddrModeSP,
+ kOperandTypeThumbAddrModePC,
+ kOperandTypeThumb2AddrModeReg,
+ kOperandTypeThumb2SoReg,
+ kOperandTypeThumb2SoImm,
+ kOperandTypeThumb2AddrModeImm8,
+ kOperandTypeThumb2AddrModeImm8Offset,
+ kOperandTypeThumb2AddrModeImm12,
+ kOperandTypeThumb2AddrModeSoReg,
+ kOperandTypeThumb2AddrModeImm8s4,
+ kOperandTypeThumb2AddrModeImm8s4Offset
+};
+
+enum OperandFlags {
+ kOperandFlagSource = 0x1,
+ kOperandFlagTarget = 0x2
+};
+
+enum InstructionTypes {
+ kInstructionTypeNone,
+ kInstructionTypeMove,
+ kInstructionTypeBranch,
+ kInstructionTypePush,
+ kInstructionTypePop,
+ kInstructionTypeCall,
+ kInstructionTypeReturn
+};
+
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
new file mode 100644
index 000000000000..6057e169e347
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
@@ -0,0 +1,212 @@
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation,
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDInst.h"
+#include "EDDisassembler.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *info) :
+ Disassembler(disassembler),
+ Inst(inst),
+ ThisInstInfo(info),
+ ByteSize(byteSize),
+ BranchTarget(-1),
+ MoveSource(-1),
+ MoveTarget(-1) {
+ OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+}
+
+EDInst::~EDInst() {
+ unsigned int index;
+ unsigned int numOperands = Operands.size();
+
+ for (index = 0; index < numOperands; ++index)
+ delete Operands[index];
+
+ unsigned int numTokens = Tokens.size();
+
+ for (index = 0; index < numTokens; ++index)
+ delete Tokens[index];
+
+ delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+ return ByteSize;
+}
+
+int EDInst::stringify() {
+ if (StringifyResult.valid())
+ return StringifyResult.result();
+
+ if (Disassembler.printInst(String, *Inst))
+ return StringifyResult.setResult(-1);
+
+ String.push_back('\n');
+
+ return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+ if (stringify())
+ return -1;
+
+ str = String.c_str();
+
+ return 0;
+}
+
+unsigned EDInst::instID() {
+ return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+ if (ThisInstInfo)
+ return
+ ThisInstInfo->instructionType == kInstructionTypeBranch ||
+ ThisInstInfo->instructionType == kInstructionTypeCall;
+ else
+ return false;
+}
+
+bool EDInst::isMove() {
+ if (ThisInstInfo)
+ return ThisInstInfo->instructionType == kInstructionTypeMove;
+ else
+ return false;
+}
+
+int EDInst::parseOperands() {
+ if (ParseResult.valid())
+ return ParseResult.result();
+
+ if (!ThisInstInfo)
+ return ParseResult.setResult(-1);
+
+ unsigned int opIndex;
+ unsigned int mcOpIndex = 0;
+
+ for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+ if (isBranch() &&
+ (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+ BranchTarget = opIndex;
+ }
+ else if (isMove()) {
+ if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+ MoveSource = opIndex;
+ else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+ MoveTarget = opIndex;
+ }
+
+ EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+
+ Operands.push_back(operand);
+ }
+
+ return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+ if (parseOperands())
+ return -1;
+ return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+ if (parseOperands())
+ return -1;
+ return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+ if (parseOperands())
+ return -1;
+ return MoveTarget;
+}
+
+int EDInst::numOperands() {
+ if (parseOperands())
+ return -1;
+ return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+ if (parseOperands())
+ return -1;
+
+ if (index >= Operands.size())
+ return -1;
+
+ operand = Operands[index];
+ return 0;
+}
+
+int EDInst::tokenize() {
+ if (TokenizeResult.valid())
+ return TokenizeResult.result();
+
+ if (ThisInstInfo == NULL)
+ return TokenizeResult.setResult(-1);
+
+ if (stringify())
+ return TokenizeResult.setResult(-1);
+
+ return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+ String,
+ OperandOrder,
+ Disassembler));
+
+}
+
+int EDInst::numTokens() {
+ if (tokenize())
+ return -1;
+ return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+ if (tokenize())
+ return -1;
+ token = Tokens[index];
+ return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+ if (tokenize())
+ return -1;
+
+ tokvec_t::iterator iter;
+
+ for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+ int ret = visitor(*iter);
+ if (ret == 1)
+ return 0;
+ if (ret != 0)
+ return -1;
+ }
+
+ return 0;
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
new file mode 100644
index 000000000000..ceb9505028de
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
@@ -0,0 +1,182 @@
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class. The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+ class MCInst;
+ struct EDInstInfo;
+ struct EDToken;
+ struct EDDisassembler;
+ struct EDOperand;
+
+#ifdef __BLOCKS__
+ typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+/// of that result, so that slow functions don't need to run twice
+struct CachedResult {
+ /// True if the result has been obtained by executing the function
+ bool Valid;
+ /// The result last obtained from the function
+ int Result;
+
+ /// Constructor - Initializes an invalid result
+ CachedResult() : Valid(false) { }
+ /// valid - Returns true if the result has been obtained by executing the
+ /// function and false otherwise
+ bool valid() { return Valid; }
+ /// result - Returns the result of the function or an undefined value if
+ /// valid() is false
+ int result() { return Result; }
+ /// setResult - Sets the result of the function and declares it valid
+ /// returning the result (so that setResult() can be called from inside a
+ /// return statement)
+ /// @arg result - The result of the function
+ int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+/// string representation, as well as its operands and tokens
+struct EDInst {
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+ /// The containing MCInst
+ llvm::MCInst *Inst;
+ /// The instruction information provided by TableGen for this instruction
+ const llvm::EDInstInfo *ThisInstInfo;
+ /// The number of bytes for the machine code representation of the instruction
+ uint64_t ByteSize;
+
+ /// The result of the stringify() function
+ CachedResult StringifyResult;
+ /// The string representation of the instruction
+ std::string String;
+ /// The order in which operands from the InstInfo's operand information appear
+ /// in String
+ const char* OperandOrder;
+
+ /// The result of the parseOperands() function
+ CachedResult ParseResult;
+ typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+ /// The instruction's operands
+ opvec_t Operands;
+ /// The operand corresponding to the target, if the instruction is a branch
+ int BranchTarget;
+ /// The operand corresponding to the source, if the instruction is a move
+ int MoveSource;
+ /// The operand corresponding to the target, if the instruction is a move
+ int MoveTarget;
+
+ /// The result of the tokenize() function
+ CachedResult TokenizeResult;
+ typedef std::vector<EDToken*> tokvec_t;
+ /// The instruction's tokens
+ tokvec_t Tokens;
+
+ /// Constructor - initializes an instruction given the output of the LLVM
+ /// C++ disassembler
+ ///
+ /// @arg inst - The MCInst, which will now be owned by this object
+ /// @arg byteSize - The size of the consumed instruction, in bytes
+ /// @arg disassembler - The parent disassembler
+ /// @arg instInfo - The instruction information produced by the table
+ /// generator for this instruction
+ EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *instInfo);
+ ~EDInst();
+
+ /// byteSize - returns the number of bytes consumed by the machine code
+ /// representation of the instruction
+ uint64_t byteSize();
+ /// instID - returns the LLVM instruction ID of the instruction
+ unsigned instID();
+
+ /// stringify - populates the String and AsmString members of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int stringify();
+ /// getString - retrieves a pointer to the string representation of the
+ /// instructinon, returning 0 on success or -1 otherwise
+ ///
+ /// @arg str - A reference to a pointer that, on success, is set to point to
+ /// the string representation of the instruction; this string is still owned
+ /// by the instruction and will be deleted when it is
+ int getString(const char *&str);
+
+ /// isBranch - Returns true if the instruction is a branch
+ bool isBranch();
+ /// isMove - Returns true if the instruction is a move
+ bool isMove();
+
+ /// parseOperands - populates the Operands member of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int parseOperands();
+ /// branchTargetID - returns the ID (suitable for use with getOperand()) of
+ /// the target operand if the instruction is a branch, or -1 otherwise
+ int branchTargetID();
+ /// moveSourceID - returns the ID of the source operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveSourceID();
+ /// moveTargetID - returns the ID of the target operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveTargetID();
+
+ /// numOperands - returns the number of operands available to retrieve, or -1
+ /// on error
+ int numOperands();
+ /// getOperand - retrieves an operand from the instruction's operand list by
+ /// index, returning 0 on success or -1 on error
+ ///
+ /// @arg operand - A reference whose target is pointed at the operand on
+ /// success, although the operand is still owned by the EDInst
+ /// @arg index - The index of the operand in the instruction
+ int getOperand(EDOperand *&operand, unsigned int index);
+
+ /// tokenize - populates the Tokens member of the instruction, returning 0 on
+ /// success or -1 otherwise
+ int tokenize();
+ /// numTokens - returns the number of tokens in the instruction, or -1 on
+ /// error
+ int numTokens();
+ /// getToken - retrieves a token from the instruction's token list by index,
+ /// returning 0 on success or -1 on error
+ ///
+ /// @arg token - A reference whose target is pointed at the token on success,
+ /// although the token is still owned by the EDInst
+ /// @arg index - The index of the token in the instrcutino
+ int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+ /// visitTokens - Visits each token in turn and applies a block to it,
+ /// returning 0 if all blocks are visited and/or the block signals
+ /// termination by returning 1; returns -1 on error
+ ///
+ /// @arg visitor - The visitor block to apply to all tokens.
+ int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
new file mode 100644
index 000000000000..6a4e56ff72c4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
@@ -0,0 +1,315 @@
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class. The
+// operand is responsible for allowing evaluation given a particular register
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDOperand.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex) :
+ Disassembler(disassembler),
+ Inst(inst),
+ OpIndex(opIndex),
+ MCOpIndex(mcOpIndex) {
+ unsigned int numMCOperands = 0;
+
+ if (Disassembler.Key.Arch == Triple::x86 ||
+ Disassembler.Key.Arch == Triple::x86_64) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ break;
+ case kOperandTypeImmediate:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeRegister:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeX86Memory:
+ numMCOperands = 5;
+ break;
+ case kOperandTypeX86EffectiveAddress:
+ numMCOperands = 4;
+ break;
+ case kOperandTypeX86PCRelative:
+ numMCOperands = 1;
+ break;
+ }
+ }
+ else if (Disassembler.Key.Arch == Triple::arm ||
+ Disassembler.Key.Arch == Triple::thumb) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ case kOperandTypeARMRegisterList:
+ case kOperandTypeARMDPRRegisterList:
+ case kOperandTypeARMSPRRegisterList:
+ break;
+ case kOperandTypeImmediate:
+ case kOperandTypeRegister:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeARMRotImm:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeARMSoImm2Part:
+ case kOperandTypeARMPredicate:
+ case kOperandTypeThumbITMask:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeARMTBAddrMode:
+ case kOperandTypeThumb2AddrModeImm8s4Offset:
+ case kOperandTypeARMAddrMode7:
+ case kOperandTypeThumb2AddrModeReg:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeThumb2SoReg:
+ case kOperandTypeAddrModeImm12:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ case kOperandTypeThumbAddrModeImmS1:
+ case kOperandTypeThumbAddrModeImmS2:
+ case kOperandTypeThumbAddrModeImmS4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ case kOperandTypeThumbAddrModePC:
+ numMCOperands = 2;
+ break;
+ case kOperandTypeARMSoReg:
+ case kOperandTypeLdStSOReg:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumbAddrModeRegS1:
+ case kOperandTypeThumbAddrModeRegS2:
+ case kOperandTypeThumbAddrModeRegS4:
+ case kOperandTypeARMAddrMode6Offset:
+ numMCOperands = 3;
+ break;
+ case kOperandTypeARMAddrMode6:
+ numMCOperands = 4;
+ break;
+ }
+ }
+
+ mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg) {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (Disassembler.Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeX86PCRelative:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t ripVal;
+
+ // TODO fix how we do this
+
+ if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+ return -1;
+
+ result = ripVal + displacement;
+ return 0;
+ }
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86EffectiveAddress:
+ {
+ unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+ unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+
+ uint64_t addr = 0;
+
+ unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+
+ if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) {
+ unsigned fsID = Disassembler.registerIDWithName("FS");
+ unsigned gsID = Disassembler.registerIDWithName("GS");
+
+ if (segmentReg == fsID ||
+ segmentReg == gsID) {
+ uint64_t segmentBase;
+ if (!callback(&segmentBase, segmentReg, arg))
+ addr += segmentBase;
+ }
+ }
+
+ if (baseReg) {
+ uint64_t baseVal;
+ if (callback(&baseVal, baseReg, arg))
+ return -1;
+ addr += baseVal;
+ }
+
+ if (indexReg) {
+ uint64_t indexVal;
+ if (callback(&indexVal, indexReg, arg))
+ return -1;
+ addr += (scaleAmount * indexVal);
+ }
+
+ addr += displacement;
+
+ result = addr;
+ return 0;
+ }
+ } // switch (operandType)
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ if (!Inst.Inst->getOperand(MCOpIndex).isImm())
+ return -1;
+
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ if (!Inst.Inst->getOperand(MCOpIndex).isReg())
+ return -1;
+
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeARMBranchTarget:
+ {
+ if (!Inst.Inst->getOperand(MCOpIndex).isImm())
+ return -1;
+
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t pcVal;
+
+ if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+ return -1;
+
+ result = pcVal + displacement;
+ return 0;
+ }
+ }
+ break;
+ }
+
+ return -1;
+}
+
+int EDOperand::isRegister() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
+}
+
+unsigned EDOperand::regVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getReg();
+}
+
+int EDOperand::isImmediate() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
+}
+
+uint64_t EDOperand::immediateVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getImm();
+}
+
+int EDOperand::isMemory() {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (operandType) {
+ default:
+ return 0;
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86PCRelative:
+ case kOperandTypeX86EffectiveAddress:
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrMode6:
+ case kOperandTypeARMAddrMode7:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeThumbAddrModeRegS1:
+ case kOperandTypeThumbAddrModeRegS2:
+ case kOperandTypeThumbAddrModeRegS4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ case kOperandTypeThumb2AddrModeReg:
+ return 1;
+ }
+}
+
+#ifdef __BLOCKS__
+namespace {
+ struct RegisterReaderWrapper {
+ EDOperand::EDRegisterBlock_t regBlock;
+ };
+}
+
+static int readerWrapperCallback(uint64_t *value, unsigned regID, void *arg) {
+ RegisterReaderWrapper *wrapper = (RegisterReaderWrapper *)arg;
+ return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result, EDRegisterBlock_t regBlock) {
+ RegisterReaderWrapper wrapper;
+ wrapper.regBlock = regBlock;
+ return evaluate(result, readerWrapperCallback, (void*)&wrapper);
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
new file mode 100644
index 000000000000..50260ec965a6
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
@@ -0,0 +1,91 @@
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// operand class. The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
+ void* arg);
+
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+/// client
+struct EDOperand {
+ /// The parent disassembler
+ const EDDisassembler &Disassembler;
+ /// The parent instruction
+ const EDInst &Inst;
+
+ /// The index of the operand in the EDInst
+ unsigned int OpIndex;
+ /// The index of the first component of the operand in the MCInst
+ unsigned int MCOpIndex;
+
+ /// Constructor - Initializes an EDOperand
+ ///
+ /// @arg disassembler - The disassembler responsible for the operand
+ /// @arg inst - The instruction containing this operand
+ /// @arg opIndex - The index of the operand in inst
+ /// @arg mcOpIndex - The index of the operand in the original MCInst
+ EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex);
+ ~EDOperand();
+
+ /// evaluate - Returns the numeric value of an operand to the extent possible,
+ /// returning 0 on success or -1 if there was some problem (such as a
+ /// register not being readable)
+ ///
+ /// @arg result - A reference whose target is filled in with the value of
+ /// the operand (the address if it is a memory operand)
+ /// @arg callback - A function to call to obtain register values
+ /// @arg arg - An opaque argument to pass to callback
+ int evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg);
+
+ /// isRegister - Returns 1 if the operand is a register or 0 otherwise
+ int isRegister();
+ /// regVal - Returns the register value.
+ unsigned regVal();
+
+ /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
+ int isImmediate();
+ /// immediateVal - Returns the immediate value.
+ uint64_t immediateVal();
+
+ /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
+ int isMemory();
+
+#ifdef __BLOCKS__
+ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+ /// evaluate - Like evaluate for a callback, but uses a block instead
+ int evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp
new file mode 100644
index 000000000000..de770b41ef35
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp
@@ -0,0 +1,210 @@
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class. The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDToken.h"
+#include "EDDisassembler.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler) :
+ Disassembler(disassembler),
+ Str(str),
+ Type(type),
+ LocalType(localType),
+ OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+ Type = kTokenLiteral;
+ LiteralSign = sign;
+ LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+ Type = kTokenRegister;
+ RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+ OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+ return Type;
+}
+
+uint64_t EDToken::localType() const {
+ return LocalType;
+}
+
+StringRef EDToken::string() const {
+ return Str;
+}
+
+int EDToken::operandID() const {
+ return OperandID;
+}
+
+int EDToken::literalSign() const {
+ if (Type != kTokenLiteral)
+ return -1;
+ return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+ if (Type != kTokenLiteral)
+ return -1;
+ value = LiteralAbsoluteValue;
+ return 0;
+}
+
+int EDToken::registerID(unsigned &registerID) const {
+ if (Type != kTokenRegister)
+ return -1;
+ registerID = RegisterID;
+ return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler) {
+ SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+ SmallVector<AsmToken, 10> asmTokens;
+
+ if (disassembler.parseInst(parsedOperands, asmTokens, str))
+ return -1;
+
+ SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+ unsigned int operandIndex;
+ SmallVectorImpl<AsmToken>::iterator tokenIterator;
+
+ operandIterator = parsedOperands.begin();
+ operandIndex = 0;
+
+ bool readOpcode = false;
+
+ const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
+
+ for (tokenIterator = asmTokens.begin();
+ tokenIterator != asmTokens.end();
+ ++tokenIterator) {
+ SMLoc tokenLoc = tokenIterator->getLoc();
+
+ const char *tokenPointer = tokenLoc.getPointer();
+
+ if (tokenPointer > wsPointer) {
+ unsigned long wsLength = tokenPointer - wsPointer;
+
+ EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
+ EDToken::kTokenWhitespace,
+ 0,
+ disassembler);
+
+ tokens.push_back(whitespaceToken);
+ }
+
+ wsPointer = tokenPointer + tokenIterator->getString().size();
+
+ while (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >
+ (*operandIterator)->getEndLoc().getPointer()) {
+ ++operandIterator;
+ ++operandIndex;
+ }
+
+ EDToken *token;
+
+ switch (tokenIterator->getKind()) {
+ case AsmToken::Identifier:
+ if (!readOpcode) {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenOpcode,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ readOpcode = true;
+ break;
+ }
+ // any identifier that isn't an opcode is mere punctuation; so we fall
+ // through
+ default:
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenPunctuation,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ break;
+ case AsmToken::Integer:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ int64_t intVal = tokenIterator->getIntVal();
+
+ if (intVal < 0)
+ token->makeLiteral(true, -intVal);
+ else
+ token->makeLiteral(false, intVal);
+ break;
+ }
+ case AsmToken::Register:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ token->makeRegister((unsigned)tokenIterator->getRegVal());
+ break;
+ }
+ }
+
+ if (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >=
+ (*operandIterator)->getStartLoc().getPointer()) {
+ /// operandIndex == 0 means the operand is the instruction (which the
+ /// AsmParser treats as an operand but edis does not). We therefore skip
+ /// operandIndex == 0 and subtract 1 from all other operand indices.
+
+ if (operandIndex > 0)
+ token->setOperandID(operandOrder[operandIndex - 1]);
+ }
+
+ tokens.push_back(token);
+ }
+
+ // Free any parsed operands.
+ for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
+ delete parsedOperands[i];
+
+ return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+ if (PermStr.length() == 0) {
+ PermStr = Str.str();
+ }
+ buf = PermStr.c_str();
+ return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
new file mode 100644
index 000000000000..ba467078686a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
@@ -0,0 +1,139 @@
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class. The token is responsible for vending information about the token,
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+struct EDDisassembler;
+
+/// EDToken - Encapsulates a single token, which can provide a string
+/// representation of itself or interpret itself in various ways, depending
+/// on the token type.
+struct EDToken {
+ enum tokenType {
+ kTokenWhitespace,
+ kTokenOpcode,
+ kTokenLiteral,
+ kTokenRegister,
+ kTokenPunctuation
+ };
+
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+
+ /// The token's string representation
+ llvm::StringRef Str;
+ /// The token's string representation, but in a form suitable for export
+ std::string PermStr;
+ /// The type of the token, as exposed through the external API
+ enum tokenType Type;
+ /// The type of the token, as recorded by the syntax-specific tokenizer
+ uint64_t LocalType;
+ /// The operand corresponding to the token, or (unsigned int)-1 if not
+ /// part of an operand.
+ int OperandID;
+
+ /// The sign if the token is a literal (1 if negative, 0 otherwise)
+ bool LiteralSign;
+ /// The absolute value if the token is a literal
+ uint64_t LiteralAbsoluteValue;
+ /// The LLVM register ID if the token is a register name
+ unsigned RegisterID;
+
+ /// Constructor - Initializes an EDToken with the information common to all
+ /// tokens
+ ///
+ /// @arg str - The string corresponding to the token
+ /// @arg type - The token's type as exposed through the public API
+ /// @arg localType - The token's type as recorded by the tokenizer
+ /// @arg disassembler - The disassembler responsible for the token
+ EDToken(llvm::StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler);
+
+ /// makeLiteral - Adds the information specific to a literal
+ /// @arg sign - The sign of the literal (1 if negative, 0
+ /// otherwise)
+ ///
+ /// @arg absoluteValue - The absolute value of the literal
+ void makeLiteral(bool sign, uint64_t absoluteValue);
+ /// makeRegister - Adds the information specific to a register
+ ///
+ /// @arg registerID - The LLVM register ID
+ void makeRegister(unsigned registerID);
+
+ /// setOperandID - Links the token to a numbered operand
+ ///
+ /// @arg operandID - The operand ID to link to
+ void setOperandID(int operandID);
+
+ ~EDToken();
+
+ /// type - Returns the public type of the token
+ enum tokenType type() const;
+ /// localType - Returns the tokenizer-specific type of the token
+ uint64_t localType() const;
+ /// string - Returns the string representation of the token
+ llvm::StringRef string() const;
+ /// operandID - Returns the operand ID of the token
+ int operandID() const;
+
+ /// literalSign - Returns the sign of the token
+ /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+ int literalSign() const;
+ /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+ /// returns -1 if the token is not a literal
+ /// @arg value - A reference to a value that is filled in with the absolute
+ /// value, if it is valid
+ int literalAbsoluteValue(uint64_t &value) const;
+ /// registerID - Retrieves the register ID of the token, and returns -1 if the
+ /// token is not a register
+ ///
+ /// @arg registerID - A reference to a value that is filled in with the
+ /// register ID, if it is valid
+ int registerID(unsigned &registerID) const;
+
+ /// tokenize - Tokenizes a string using the platform- and syntax-specific
+ /// tokenizer, and returns 0 on success (-1 on failure)
+ ///
+ /// @arg tokens - A vector that will be filled in with pointers to
+ /// allocated tokens
+ /// @arg str - The string, as outputted by the AsmPrinter
+ /// @arg operandOrder - The order of the operands from the operandFlags array
+ /// as they appear in str
+ /// @arg disassembler - The disassembler for the desired target and
+ // assembly syntax
+ static int tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler);
+
+ /// getString - Directs a character pointer to the string, returning 0 on
+ /// success (-1 on failure)
+ /// @arg buf - A reference to a pointer that is set to point to the string.
+ /// The string is still owned by the token.
+ int getString(const char*& buf);
+};
+
+} // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
new file mode 100644
index 000000000000..ad86db13d510
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -0,0 +1,1086 @@
+//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+// Given a special op, return the address skip amount (in units of
+// DWARF2_LINE_MIN_INSN_LENGTH.
+#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
+
+// The maximum address skip amount that can be encoded with a special op.
+#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255)
+
+// First special line opcode - leave room for the standard opcodes.
+// Note: If you want to change this, you'll have to update the
+// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().
+#define DWARF2_LINE_OPCODE_BASE 13
+
+// Minimum line offset in a special line info. opcode. This value
+// was chosen to give a reasonable range of values.
+#define DWARF2_LINE_BASE -5
+
+// Range of line offsets in a special line info. opcode.
+#define DWARF2_LINE_RANGE 14
+
+// Define the architecture-dependent minimum instruction length (in bytes).
+// This value should be rather too small than too big.
+#define DWARF2_LINE_MIN_INSN_LENGTH 1
+
+// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
+// this routine is a nop and will be optimized away.
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) {
+ if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
+ return AddrDelta;
+ if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
+ // TODO: report this error, but really only once.
+ ;
+ }
+ return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH;
+}
+
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+ if (!MCOS->getContext().getDwarfLocSeen())
+ return;
+
+ // Create a symbol at in the current section for use in the line entry.
+ MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+ // Set the value of the symbol to use for the MCLineEntry.
+ MCOS->EmitLabel(LineSym);
+
+ // Get the current .loc info saved in the context.
+ const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
+
+ // Create a (local) line entry with the symbol and the current .loc info.
+ MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+ // clear DwarfLocSeen saying the current .loc info is now used.
+ MCOS->getContext().ClearDwarfLocSeen();
+
+ // Get the MCLineSection for this section, if one does not exist for this
+ // section create it.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ MCLineSection *LineSection = MCLineSections.lookup(Section);
+ if (!LineSection) {
+ // Create a new MCLineSection. This will be deleted after the dwarf line
+ // table is created using it by iterating through the MCLineSections
+ // DenseMap.
+ LineSection = new MCLineSection;
+ // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+ MCOS->getContext().addMCLineSection(Section, LineSection);
+ }
+
+ // Add the line entry to this section's entries.
+ LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal .
+//
+static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
+ const MCSymbol &Start,
+ const MCSymbol &End,
+ int IntVal) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+ const MCExpr *RHS =
+ MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+ const MCExpr *Res1 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+ const MCExpr *Res2 =
+ MCConstantExpr::Create(IntVal, MCOS.getContext());
+ const MCExpr *Res3 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+ return Res3;
+}
+
+//
+// This emits the Dwarf line table for the specified section from the entries
+// in the LineSection.
+//
+static inline void EmitDwarfLineTable(MCStreamer *MCOS,
+ const MCSection *Section,
+ const MCLineSection *LineSection) {
+ unsigned FileNum = 1;
+ unsigned LastLine = 1;
+ unsigned Column = 0;
+ unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ unsigned Isa = 0;
+ MCSymbol *LastLabel = NULL;
+
+ // Loop through each MCLineEntry and encode the dwarf line number table.
+ for (MCLineSection::const_iterator
+ it = LineSection->getMCLineEntries()->begin(),
+ ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) {
+
+ if (FileNum != it->getFileNum()) {
+ FileNum = it->getFileNum();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
+ MCOS->EmitULEB128IntValue(FileNum);
+ }
+ if (Column != it->getColumn()) {
+ Column = it->getColumn();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
+ MCOS->EmitULEB128IntValue(Column);
+ }
+ if (Isa != it->getIsa()) {
+ Isa = it->getIsa();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
+ MCOS->EmitULEB128IntValue(Isa);
+ }
+ if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
+ Flags = it->getFlags();
+ MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
+ }
+ if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
+ if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
+ if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+ int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
+ MCSymbol *Label = it->getLabel();
+
+ // At this point we want to emit/create the sequence to encode the delta in
+ // line numbers and the increment of the address from the previous Label
+ // and the current Label.
+ const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+ MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+ asmInfo.getPointerSize());
+
+ LastLine = it->getLine();
+ LastLabel = Label;
+ }
+
+ // Emit a DW_LNE_end_sequence for the end of the section.
+ // Using the pointer Section create a temporary label at the end of the
+ // section and use that and the LastLabel to compute the address delta
+ // and use INT64_MAX as the line delta which is the signal that this is
+ // actually a DW_LNE_end_sequence.
+
+ // Switch to the section to be able to create a symbol at its end.
+ MCOS->SwitchSection(Section);
+
+ MCContext &context = MCOS->getContext();
+ // Create a symbol at the end of the section.
+ MCSymbol *SectionEnd = context.CreateTempSymbol();
+ // Set the value of the symbol, as we are at the end of the section.
+ MCOS->EmitLabel(SectionEnd);
+
+ // Switch back the the dwarf line section.
+ MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+ const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+ MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
+ asmInfo.getPointerSize());
+}
+
+//
+// This emits the Dwarf file and the line tables.
+//
+void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ // Switch to the section where the table will be emitted into.
+ MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+ // Create a symbol at the beginning of this section.
+ MCSymbol *LineStartSym = context.CreateTempSymbol();
+ // Set the value of the symbol, as we are at the start of the section.
+ MCOS->EmitLabel(LineStartSym);
+
+ // Create a symbol for the end of the section (to be set when we get there).
+ MCSymbol *LineEndSym = context.CreateTempSymbol();
+
+ // The first 4 bytes is the total length of the information for this
+ // compilation unit (not including these 4 bytes for the length).
+ MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
+ 4);
+
+ // Next 2 bytes is the Version, which is Dwarf 2.
+ MCOS->EmitIntValue(2, 2);
+
+ // Create a symbol for the end of the prologue (to be set when we get there).
+ MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+
+ // Length of the prologue, is the next 4 bytes. Which is the start of the
+ // section to the end of the prologue. Not including the 4 bytes for the
+ // total length, the 2 bytes for the version, and these 4 bytes for the
+ // length of the prologue.
+ MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
+ (4 + 2 + 4)),
+ 4, 0);
+
+ // Parameters of the state machine, are next.
+ MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
+
+ // Standard opcode lengths
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+ MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
+
+ // Put out the directory and file tables.
+
+ // First the directory table.
+ const std::vector<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs();
+ for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+ MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+ MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+ }
+ MCOS->EmitIntValue(0, 1); // Terminate the directory list
+
+ // Second the file table.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles();
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+ MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+ // the Directory num
+ MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
+ MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
+ MCOS->EmitIntValue(0, 1); // filesize (always 0)
+ }
+ MCOS->EmitIntValue(0, 1); // Terminate the file list
+
+ // This is the end of the prologue, so set the value of the symbol at the
+ // end of the prologue (that was used in a previous expression).
+ MCOS->EmitLabel(ProEndSym);
+
+ // Put out the line tables.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ const std::vector<const MCSection *> &MCLineSectionOrder =
+ MCOS->getContext().getMCLineSectionOrder();
+ for (std::vector<const MCSection*>::const_iterator it =
+ MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+ ++it) {
+ const MCSection *Sec = *it;
+ const MCLineSection *Line = MCLineSections.lookup(Sec);
+ EmitDwarfLineTable(MCOS, Sec, Line);
+
+ // Now delete the MCLineSections that were created in MCLineEntry::Make()
+ // and used to emit the line table.
+ delete Line;
+ }
+
+ if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
+ && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
+ // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
+ // it requires:
+ // total_length >= prologue_length + 10
+ // We are 4 bytes short, since we have total_length = 51 and
+ // prologue_length = 45
+
+ // The regular end_sequence should be sufficient.
+ MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
+ }
+
+ // This is the end of the section, so set the value of the symbol at the end
+ // of this section (that was used in a previous expression).
+ MCOS->EmitLabel(LineEndSym);
+}
+
+/// Utility function to write the encoding to an object writer.
+void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+ OW->WriteBytes(OS.str());
+}
+
+/// Utility function to emit the encoding to a streamer.
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+ MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS) {
+ uint64_t Temp, Opcode;
+ bool NeedCopy = false;
+
+ // Scale the address delta by the minimum instruction length.
+ AddrDelta = ScaleAddrDelta(AddrDelta);
+
+ // A LineDelta of INT64_MAX is a signal that this is actually a
+ // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the
+ // end_sequence to emit the matrix entry.
+ if (LineDelta == INT64_MAX) {
+ if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+ OS << char(dwarf::DW_LNS_const_add_pc);
+ else {
+ OS << char(dwarf::DW_LNS_advance_pc);
+ MCObjectWriter::EncodeULEB128(AddrDelta, OS);
+ }
+ OS << char(dwarf::DW_LNS_extended_op);
+ OS << char(1);
+ OS << char(dwarf::DW_LNE_end_sequence);
+ return;
+ }
+
+ // Bias the line delta by the base.
+ Temp = LineDelta - DWARF2_LINE_BASE;
+
+ // If the line increment is out of range of a special opcode, we must encode
+ // it with DW_LNS_advance_line.
+ if (Temp >= DWARF2_LINE_RANGE) {
+ OS << char(dwarf::DW_LNS_advance_line);
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
+ OS << OSE.str();
+
+ LineDelta = 0;
+ Temp = 0 - DWARF2_LINE_BASE;
+ NeedCopy = true;
+ }
+
+ // Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
+ if (LineDelta == 0 && AddrDelta == 0) {
+ OS << char(dwarf::DW_LNS_copy);
+ return;
+ }
+
+ // Bias the opcode by the special opcode base.
+ Temp += DWARF2_LINE_OPCODE_BASE;
+
+ // Avoid overflow when addr_delta is large.
+ if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+ // Try using a special opcode.
+ Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+ if (Opcode <= 255) {
+ OS << char(Opcode);
+ return;
+ }
+
+ // Try using DW_LNS_const_add_pc followed by special op.
+ Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+ if (Opcode <= 255) {
+ OS << char(dwarf::DW_LNS_const_add_pc);
+ OS << char(Opcode);
+ return;
+ }
+ }
+
+ // Otherwise use DW_LNS_advance_pc.
+ OS << char(dwarf::DW_LNS_advance_pc);
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+ OS << OSE.str();
+
+ if (NeedCopy)
+ OS << char(dwarf::DW_LNS_copy);
+ else
+ OS << char(Temp);
+}
+
+void MCDwarfFile::print(raw_ostream &OS) const {
+ OS << '"' << getName() << '"';
+}
+
+void MCDwarfFile::dump() const {
+ print(dbgs());
+}
+
+static int getDataAlignmentFactor(MCStreamer &streamer) {
+ MCContext &context = streamer.getContext();
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int size = asmInfo.getPointerSize();
+ if (asmInfo.isStackGrowthDirectionUp())
+ return size;
+ else
+ return -size;
+}
+
+static unsigned getSizeForEncoding(MCStreamer &streamer,
+ unsigned symbolEncoding) {
+ MCContext &context = streamer.getContext();
+ unsigned format = symbolEncoding & 0x0f;
+ switch (format) {
+ default:
+ assert(0 && "Unknown Encoding");
+ case dwarf::DW_EH_PE_absptr:
+ case dwarf::DW_EH_PE_signed:
+ return context.getAsmInfo().getPointerSize();
+ case dwarf::DW_EH_PE_udata2:
+ case dwarf::DW_EH_PE_sdata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ case dwarf::DW_EH_PE_sdata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ case dwarf::DW_EH_PE_sdata8:
+ return 8;
+ }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding, const char *comment = 0) {
+ MCContext &context = streamer.getContext();
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol,
+ symbolEncoding,
+ streamer);
+ unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+ if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
+ streamer.EmitAbsValue(v, size);
+}
+
+static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding) {
+ MCContext &context = streamer.getContext();
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol,
+ symbolEncoding,
+ streamer);
+ unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+ streamer.EmitValue(v, size);
+}
+
+static const MachineLocation TranslateMachineLocation(
+ const TargetAsmInfo &TAI,
+ const MachineLocation &Loc) {
+ unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+ MachineLocation::VirtualFP :
+ unsigned(TAI.getDwarfRegNum(Loc.getReg(), true));
+ const MachineLocation &NewLoc = Loc.isReg() ?
+ MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+ return NewLoc;
+}
+
+namespace {
+ class FrameEmitterImpl {
+ int CFAOffset;
+ int CIENum;
+ bool UsingCFI;
+ bool IsEH;
+ const MCSymbol *SectionStart;
+ public:
+ FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) :
+ CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
+ SectionStart(sectionStart) {
+ }
+
+ /// EmitCompactUnwind - Emit the unwind information in a compact way. If
+ /// we're successful, return 'true'. Otherwise, return 'false' and it will
+ /// emit the normal CIE and FDE.
+ bool EmitCompactUnwind(MCStreamer &streamer,
+ const MCDwarfFrameInfo &frame);
+
+ const MCSymbol &EmitCIE(MCStreamer &streamer,
+ const MCSymbol *personality,
+ unsigned personalityEncoding,
+ const MCSymbol *lsda,
+ unsigned lsdaEncoding);
+ MCSymbol *EmitFDE(MCStreamer &streamer,
+ const MCSymbol &cieStart,
+ const MCDwarfFrameInfo &frame);
+ void EmitCFIInstructions(MCStreamer &streamer,
+ const std::vector<MCCFIInstruction> &Instrs,
+ MCSymbol *BaseLabel);
+ void EmitCFIInstruction(MCStreamer &Streamer,
+ const MCCFIInstruction &Instr);
+ };
+
+} // end anonymous namespace
+
+static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
+ StringRef Prefix) {
+ if (Streamer.isVerboseAsm()) {
+ const char *EncStr = 0;
+ switch (Encoding) {
+ default: EncStr = "<unknown encoding>";
+ case dwarf::DW_EH_PE_absptr: EncStr = "absptr";
+ case dwarf::DW_EH_PE_omit: EncStr = "omit";
+ case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel";
+ case dwarf::DW_EH_PE_udata4: EncStr = "udata4";
+ case dwarf::DW_EH_PE_udata8: EncStr = "udata8";
+ case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4";
+ case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8";
+ case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
+ EncStr = "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
+ EncStr = "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
+ EncStr = "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
+ EncStr = "indirect pcrel sdata8";
+ }
+
+ Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
+ }
+
+ Streamer.EmitIntValue(Encoding, 1);
+}
+
+void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
+ const MCCFIInstruction &Instr) {
+ int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+ bool VerboseAsm = Streamer.isVerboseAsm();
+
+ switch (Instr.getOperation()) {
+ case MCCFIInstruction::Move:
+ case MCCFIInstruction::RelMove: {
+ const MachineLocation &Dst = Instr.getDestination();
+ const MachineLocation &Src = Instr.getSource();
+ const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove;
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
+ } else {
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") +
+ Twine(Src.getReg()));
+ Streamer.EmitULEB128IntValue(Src.getReg());
+ }
+
+ if (IsRelative)
+ CFAOffset += Src.getOffset();
+ else
+ CFAOffset = -Src.getOffset();
+
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+ Streamer.EmitULEB128IntValue(CFAOffset);
+ return;
+ }
+
+ if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg()));
+ Streamer.EmitULEB128IntValue(Dst.getReg());
+ return;
+ }
+
+ unsigned Reg = Src.getReg();
+ int Offset = Dst.getOffset();
+ if (IsRelative)
+ Offset -= CFAOffset;
+ Offset = Offset / dataAlignmentFactor;
+
+ if (Offset < 0) {
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ Streamer.EmitULEB128IntValue(Reg);
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+ Streamer.EmitSLEB128IntValue(Offset);
+ } else if (Reg < 64) {
+ if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
+ Twine(Reg) + ")");
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+ Streamer.EmitULEB128IntValue(Offset);
+ } else {
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ Streamer.EmitULEB128IntValue(Reg);
+ if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+ Streamer.EmitULEB128IntValue(Offset);
+ }
+ return;
+ }
+ case MCCFIInstruction::Remember:
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
+ Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
+ return;
+ case MCCFIInstruction::Restore:
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
+ return;
+ case MCCFIInstruction::SameValue: {
+ unsigned Reg = Instr.getDestination().getReg();
+ if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
+ Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
+ if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ Streamer.EmitULEB128IntValue(Reg);
+ return;
+ }
+ }
+ llvm_unreachable("Unhandled case in switch");
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
+ const std::vector<MCCFIInstruction> &Instrs,
+ MCSymbol *BaseLabel) {
+ for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
+ const MCCFIInstruction &Instr = Instrs[i];
+ MCSymbol *Label = Instr.getLabel();
+ // Throw out move if the label is invalid.
+ if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+ // Advance row if new location.
+ if (BaseLabel && Label) {
+ MCSymbol *ThisSym = Label;
+ if (ThisSym != BaseLabel) {
+ if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
+ streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+ BaseLabel = ThisSym;
+ }
+ }
+
+ EmitCFIInstruction(streamer, Instr);
+ }
+}
+
+/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
+/// successful, return 'true'. Otherwise, return 'false' and it will emit the
+/// normal CIE and FDE.
+bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+ const MCDwarfFrameInfo &Frame) {
+#if 1
+ return false;
+#else
+ MCContext &Context = Streamer.getContext();
+ const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+ bool VerboseAsm = Streamer.isVerboseAsm();
+
+ // range-start range-length compact-unwind-enc personality-func lsda
+ // _foo LfooEnd-_foo 0x00000023 0 0
+ // _bar LbarEnd-_bar 0x00000025 __gxx_personality except_tab1
+ //
+ // .section __LD,__compact_unwind,regular,debug
+ //
+ // # compact unwind for _foo
+ // .quad _foo
+ // .set L1,LfooEnd-_foo
+ // .long L1
+ // .long 0x01010001
+ // .quad 0
+ // .quad 0
+ //
+ // # compact unwind for _bar
+ // .quad _bar
+ // .set L2,LbarEnd-_bar
+ // .long L2
+ // .long 0x01020011
+ // .quad __gxx_personality
+ // .quad except_tab1
+
+ uint32_t Encoding =
+ TAI.getCompactUnwindEncoding(Frame.Instructions,
+ getDataAlignmentFactor(Streamer), IsEH);
+ if (!Encoding) return false;
+
+ // The encoding needs to know we have an LSDA.
+ if (Frame.Lsda)
+ Encoding |= 0x40000000;
+
+ Streamer.SwitchSection(TAI.getCompactUnwindSection());
+
+ // Range Start
+ unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI);
+ unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
+ if (VerboseAsm) Streamer.AddComment("Range Start");
+ Streamer.EmitSymbolValue(Frame.Function, Size);
+
+ // Range Length
+ const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
+ *Frame.End, 0);
+ if (VerboseAsm) Streamer.AddComment("Range Length");
+ Streamer.EmitAbsValue(Range, 4);
+
+ // Compact Encoding
+ Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
+ if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") +
+ Twine(llvm::utohexstr(Encoding)));
+ Streamer.EmitIntValue(Encoding, Size);
+
+ // Personality Function
+ Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
+ if (VerboseAsm) Streamer.AddComment("Personality Function");
+ if (Frame.Personality)
+ Streamer.EmitSymbolValue(Frame.Personality, Size);
+ else
+ Streamer.EmitIntValue(0, Size); // No personality fn
+
+ // LSDA
+ Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
+ if (VerboseAsm) Streamer.AddComment("LSDA");
+ if (Frame.Lsda)
+ Streamer.EmitSymbolValue(Frame.Lsda, Size);
+ else
+ Streamer.EmitIntValue(0, Size); // No LSDA
+
+ return true;
+#endif
+}
+
+const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
+ const MCSymbol *personality,
+ unsigned personalityEncoding,
+ const MCSymbol *lsda,
+ unsigned lsdaEncoding) {
+ MCContext &context = streamer.getContext();
+ const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+ bool verboseAsm = streamer.isVerboseAsm();
+
+ MCSymbol *sectionStart;
+ if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH)
+ sectionStart = context.CreateTempSymbol();
+ else
+ sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
+
+ streamer.EmitLabel(sectionStart);
+ CIENum++;
+
+ MCSymbol *sectionEnd = context.CreateTempSymbol();
+
+ // Length
+ const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
+ *sectionEnd, 4);
+ if (verboseAsm) streamer.AddComment("CIE Length");
+ streamer.EmitAbsValue(Length, 4);
+
+ // CIE ID
+ unsigned CIE_ID = IsEH ? 0 : -1;
+ if (verboseAsm) streamer.AddComment("CIE ID Tag");
+ streamer.EmitIntValue(CIE_ID, 4);
+
+ // Version
+ if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
+ streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+
+ // Augmentation String
+ SmallString<8> Augmentation;
+ if (IsEH) {
+ if (verboseAsm) streamer.AddComment("CIE Augmentation");
+ Augmentation += "z";
+ if (personality)
+ Augmentation += "P";
+ if (lsda)
+ Augmentation += "L";
+ Augmentation += "R";
+ streamer.EmitBytes(Augmentation.str(), 0);
+ }
+ streamer.EmitIntValue(0, 1);
+
+ // Code Alignment Factor
+ if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
+ streamer.EmitULEB128IntValue(1);
+
+ // Data Alignment Factor
+ if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
+ streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+
+ // Return Address Register
+ if (verboseAsm) streamer.AddComment("CIE Return Address Column");
+ streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true));
+
+ // Augmentation Data Length (optional)
+
+ unsigned augmentationLength = 0;
+ if (IsEH) {
+ if (personality) {
+ // Personality Encoding
+ augmentationLength += 1;
+ // Personality
+ augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
+ }
+ if (lsda)
+ augmentationLength += 1;
+ // Encoding of the FDE pointers
+ augmentationLength += 1;
+
+ if (verboseAsm) streamer.AddComment("Augmentation Size");
+ streamer.EmitULEB128IntValue(augmentationLength);
+
+ // Augmentation Data (optional)
+ if (personality) {
+ // Personality Encoding
+ EmitEncodingByte(streamer, personalityEncoding,
+ "Personality Encoding");
+ // Personality
+ if (verboseAsm) streamer.AddComment("Personality");
+ EmitPersonality(streamer, *personality, personalityEncoding);
+ }
+
+ if (lsda)
+ EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
+
+ // Encoding of the FDE pointers
+ EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI),
+ "FDE Encoding");
+ }
+
+ // Initial Instructions
+
+ const std::vector<MachineMove> &Moves = TAI.getInitialFrameState();
+ std::vector<MCCFIInstruction> Instructions;
+
+ for (int i = 0, n = Moves.size(); i != n; ++i) {
+ MCSymbol *Label = Moves[i].getLabel();
+ const MachineLocation &Dst =
+ TranslateMachineLocation(TAI, Moves[i].getDestination());
+ const MachineLocation &Src =
+ TranslateMachineLocation(TAI, Moves[i].getSource());
+ MCCFIInstruction Inst(Label, Dst, Src);
+ Instructions.push_back(Inst);
+ }
+
+ EmitCFIInstructions(streamer, Instructions, NULL);
+
+ // Padding
+ streamer.EmitValueToAlignment(IsEH
+ ? 4 : context.getAsmInfo().getPointerSize());
+
+ streamer.EmitLabel(sectionEnd);
+ return *sectionStart;
+}
+
+MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
+ const MCSymbol &cieStart,
+ const MCDwarfFrameInfo &frame) {
+ MCContext &context = streamer.getContext();
+ MCSymbol *fdeStart = context.CreateTempSymbol();
+ MCSymbol *fdeEnd = context.CreateTempSymbol();
+ const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+ bool verboseAsm = streamer.isVerboseAsm();
+
+ if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) {
+ MCSymbol *EHSym =
+ context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
+ streamer.EmitEHSymAttributes(frame.Function, EHSym);
+ streamer.EmitLabel(EHSym);
+ }
+
+ // Length
+ const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+ if (verboseAsm) streamer.AddComment("FDE Length");
+ streamer.EmitAbsValue(Length, 4);
+
+ streamer.EmitLabel(fdeStart);
+
+ // CIE Pointer
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ if (IsEH) {
+ const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
+ 0);
+ if (verboseAsm) streamer.AddComment("FDE CIE Offset");
+ streamer.EmitAbsValue(offset, 4);
+ } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) {
+ const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
+ cieStart, 0);
+ streamer.EmitAbsValue(offset, 4);
+ } else {
+ streamer.EmitSymbolValue(&cieStart, 4);
+ }
+
+ unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI);
+ unsigned size = getSizeForEncoding(streamer, fdeEncoding);
+
+ // PC Begin
+ unsigned PCBeginEncoding = IsEH ? fdeEncoding :
+ (unsigned)dwarf::DW_EH_PE_absptr;
+ unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding);
+ EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location");
+
+ // PC Range
+ const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
+ *frame.End, 0);
+ if (verboseAsm) streamer.AddComment("FDE address range");
+ streamer.EmitAbsValue(Range, size);
+
+ if (IsEH) {
+ // Augmentation Data Length
+ unsigned augmentationLength = 0;
+
+ if (frame.Lsda)
+ augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
+
+ if (verboseAsm) streamer.AddComment("Augmentation size");
+ streamer.EmitULEB128IntValue(augmentationLength);
+
+ // Augmentation Data
+ if (frame.Lsda)
+ EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
+ "Language Specific Data Area");
+ }
+
+ // Call Frame Instructions
+
+ EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+
+ // Padding
+ streamer.EmitValueToAlignment(PCBeginSize);
+
+ return fdeEnd;
+}
+
+namespace {
+ struct CIEKey {
+ static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
+ static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+
+ CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
+ unsigned LsdaEncoding_) : Personality(Personality_),
+ PersonalityEncoding(PersonalityEncoding_),
+ LsdaEncoding(LsdaEncoding_) {
+ }
+ const MCSymbol* Personality;
+ unsigned PersonalityEncoding;
+ unsigned LsdaEncoding;
+ };
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<CIEKey> {
+ static CIEKey getEmptyKey() {
+ return CIEKey::getEmptyKey();
+ }
+ static CIEKey getTombstoneKey() {
+ return CIEKey::getTombstoneKey();
+ }
+ static unsigned getHashValue(const CIEKey &Key) {
+ FoldingSetNodeID ID;
+ ID.AddPointer(Key.Personality);
+ ID.AddInteger(Key.PersonalityEncoding);
+ ID.AddInteger(Key.LsdaEncoding);
+ return ID.ComputeHash();
+ }
+ static bool isEqual(const CIEKey &LHS,
+ const CIEKey &RHS) {
+ return LHS.Personality == RHS.Personality &&
+ LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+ LHS.LsdaEncoding == RHS.LsdaEncoding;
+ }
+ };
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
+ bool UsingCFI,
+ bool IsEH) {
+ MCContext &Context = Streamer.getContext();
+ const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+ const MCSection &Section = IsEH ? *TAI.getEHFrameSection() :
+ *TAI.getDwarfFrameSection();
+ Streamer.SwitchSection(&Section);
+ MCSymbol *SectionStart = Context.CreateTempSymbol();
+ Streamer.EmitLabel(SectionStart);
+
+ MCSymbol *FDEEnd = NULL;
+ DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+ FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart);
+
+ const MCSymbol *DummyDebugKey = NULL;
+ for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
+ const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+ if (IsEH && TAI.getCompactUnwindSection() &&
+ Emitter.EmitCompactUnwind(Streamer, Frame)) {
+ FDEEnd = NULL;
+ continue;
+ }
+
+ CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
+ Frame.LsdaEncoding);
+ const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
+ if (!CIEStart)
+ CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
+ Frame.PersonalityEncoding, Frame.Lsda,
+ Frame.LsdaEncoding);
+
+ FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+
+ if (i != n - 1)
+ Streamer.EmitLabel(FDEEnd);
+ }
+
+ Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+ if (FDEEnd)
+ Streamer.EmitLabel(FDEEnd);
+}
+
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+ Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
+ raw_ostream &OS) {
+ // FIXME: Assumes the code alignment factor is 1.
+ if (AddrDelta == 0) {
+ } else if (isUIntN(6, AddrDelta)) {
+ uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
+ OS << Opcode;
+ } else if (isUInt<8>(AddrDelta)) {
+ OS << uint8_t(dwarf::DW_CFA_advance_loc1);
+ OS << uint8_t(AddrDelta);
+ } else if (isUInt<16>(AddrDelta)) {
+ // FIXME: check what is the correct behavior on a big endian machine.
+ OS << uint8_t(dwarf::DW_CFA_advance_loc2);
+ OS << uint8_t( AddrDelta & 0xff);
+ OS << uint8_t((AddrDelta >> 8) & 0xff);
+ } else {
+ // FIXME: check what is the correct behavior on a big endian machine.
+ assert(isUInt<32>(AddrDelta));
+ OS << uint8_t(dwarf::DW_CFA_advance_loc4);
+ OS << uint8_t( AddrDelta & 0xff);
+ OS << uint8_t((AddrDelta >> 8) & 0xff);
+ OS << uint8_t((AddrDelta >> 16) & 0xff);
+ OS << uint8_t((AddrDelta >> 24) & 0xff);
+
+ }
+}
diff --git a/contrib/llvm/lib/MC/MCELF.cpp b/contrib/llvm/lib/MC/MCELF.cpp
new file mode 100644
index 000000000000..2c3f8e8f7866
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELF.cpp
@@ -0,0 +1,72 @@
+//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+namespace llvm {
+
+void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+ SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+unsigned MCELF::GetBinding(const MCSymbolData &SD) {
+ uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ return Binding;
+}
+
+void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
+ assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+ Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+ Type == ELF::STT_TLS);
+
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+ SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+unsigned MCELF::GetType(const MCSymbolData &SD) {
+ uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+ assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+ Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+ Type == ELF::STT_TLS);
+ return Type;
+}
+
+void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+ uint32_t OtherFlags = SD.getFlags() & ~(0x3 << ELF_STV_Shift);
+ SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
+unsigned MCELF::GetVisibility(MCSymbolData &SD) {
+ unsigned Visibility =
+ (SD.getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+ return Visibility;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCELF.h b/contrib/llvm/lib/MC/MCELF.h
new file mode 100644
index 000000000000..e08f1e65429a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELF.h
@@ -0,0 +1,35 @@
+//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some support functions used by the ELF Streamer and
+// ObjectWriter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELF_H
+#define LLVM_MC_MCELF_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCSymbolData;
+
+class MCELF {
+ public:
+ static void SetBinding(MCSymbolData &SD, unsigned Binding);
+ static unsigned GetBinding(const MCSymbolData &SD);
+ static void SetType(MCSymbolData &SD, unsigned Type);
+ static unsigned GetType(const MCSymbolData &SD);
+ static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
+ static unsigned GetVisibility(MCSymbolData &SD);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
new file mode 100644
index 000000000000..12a02a9e9740
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -0,0 +1,23 @@
+//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
+ Triple::OSType OSType_,
+ uint16_t EMachine_,
+ bool HasRelocationAddend_)
+ : OSType(OSType_), EMachine(EMachine_),
+ HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
+}
+
+MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
new file mode 100644
index 000000000000..49340edbed5e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -0,0 +1,386 @@
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELFStreamer.h"
+#include "MCELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+void MCELFStreamer::InitSections() {
+ // This emulates the same behavior of GNU as. This makes it easier
+ // to compare the output as the major sections are in the same order.
+ SetSectionText();
+ SetSectionData();
+ SetSectionBss();
+ SetSectionText();
+}
+
+void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ MCObjectStreamer::EmitLabel(Symbol);
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol->getSection());
+ MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+ if (Section.getFlags() & ELF::SHF_TLS)
+ MCELF::SetType(SD, ELF::STT_TLS);
+}
+
+void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ case MCAF_SyntaxUnified: return; // no-op here.
+ case MCAF_Code16: return; // no-op here.
+ case MCAF_Code32: return; // no-op here.
+ case MCAF_SubsectionsViaSymbols:
+ getAssembler().setSubsectionsViaSymbols(true);
+ return;
+ }
+
+ assert(0 && "invalid assembler flag!");
+}
+
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+ // FIXME: Anything needed here to flag the function as thumb?
+
+ getAssembler().setIsThumbFunc(Func);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+ SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+}
+
+void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ // FIXME: Lift context changes into super class.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCELFStreamer::ChangeSection(const MCSection *Section) {
+ const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+ if (Grp)
+ getAssembler().getOrCreateSymbolData(*Grp);
+ this->MCObjectStreamer::ChangeSection(Section);
+}
+
+void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ MCSymbolData &AliasSD = getAssembler().getOrCreateSymbolData(*Alias);
+ AliasSD.setFlags(AliasSD.getFlags() | ELF_Other_Weakref);
+ const MCExpr *Value = MCSymbolRefExpr::Create(Symbol, getContext());
+ Alias->setVariableValue(Value);
+}
+
+void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ // Indirect symbols are handled differently, to match how 'as' handles
+ // them. This makes writing matching .o files easier.
+ if (Attribute == MCSA_IndirectSymbol) {
+ // Note that we intentionally cannot use the symbol data here; this is
+ // important for matching the string table that 'as' generates.
+ IndirectSymbolData ISD;
+ ISD.Symbol = Symbol;
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
+ return;
+ }
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling getOrCreateSymbolData here is to register
+ // the symbol with the assembler.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // The implementation of symbol attributes is designed to match 'as', but it
+ // leaves much to desired. It doesn't really make sense to arbitrarily add and
+ // remove flags, but 'as' allows this (in particular, see .desc).
+ //
+ // In the future it might be worth trying to make these operations more well
+ // defined.
+ switch (Attribute) {
+ case MCSA_LazyReference:
+ case MCSA_Reference:
+ case MCSA_NoDeadStrip:
+ case MCSA_SymbolResolver:
+ case MCSA_PrivateExtern:
+ case MCSA_WeakDefinition:
+ case MCSA_WeakDefAutoPrivate:
+ case MCSA_Invalid:
+ case MCSA_ELF_TypeIndFunction:
+ case MCSA_IndirectSymbol:
+ assert(0 && "Invalid symbol attribute for ELF!");
+ break;
+
+ case MCSA_ELF_TypeGnuUniqueObject:
+ // Ignore for now.
+ break;
+
+ case MCSA_Global:
+ MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ SD.setExternal(true);
+ BindingExplicitlySet.insert(Symbol);
+ break;
+
+ case MCSA_WeakReference:
+ case MCSA_Weak:
+ MCELF::SetBinding(SD, ELF::STB_WEAK);
+ SD.setExternal(true);
+ BindingExplicitlySet.insert(Symbol);
+ break;
+
+ case MCSA_Local:
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ BindingExplicitlySet.insert(Symbol);
+ break;
+
+ case MCSA_ELF_TypeFunction:
+ MCELF::SetType(SD, ELF::STT_FUNC);
+ break;
+
+ case MCSA_ELF_TypeObject:
+ MCELF::SetType(SD, ELF::STT_OBJECT);
+ break;
+
+ case MCSA_ELF_TypeTLS:
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+
+ case MCSA_ELF_TypeCommon:
+ MCELF::SetType(SD, ELF::STT_COMMON);
+ break;
+
+ case MCSA_ELF_TypeNoType:
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ break;
+
+ case MCSA_Protected:
+ MCELF::SetVisibility(SD, ELF::STV_PROTECTED);
+ break;
+
+ case MCSA_Hidden:
+ MCELF::SetVisibility(SD, ELF::STV_HIDDEN);
+ break;
+
+ case MCSA_Internal:
+ MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
+ break;
+ }
+}
+
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ if (!BindingExplicitlySet.count(Symbol)) {
+ MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ SD.setExternal(true);
+ }
+
+ MCELF::SetType(SD, ELF::STT_OBJECT);
+
+ if (MCELF::GetBinding(SD) == ELF_STB_Local) {
+ const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
+ ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+ Symbol->setSection(*Section);
+
+ struct LocalCommon L = {&SD, Size, ByteAlignment};
+ LocalCommons.push_back(L);
+ } else {
+ SD.setCommon(Size, ByteAlignment);
+ }
+
+ SD.setSize(MCConstantExpr::Create(Size, getContext()));
+}
+
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ // FIXME: Should this be caught and done earlier?
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ BindingExplicitlySet.insert(Symbol);
+ // FIXME: ByteAlignment is not needed here, but is required.
+ EmitCommonSymbol(Symbol, Size, 1);
+}
+
+void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+// Add a symbol for the file name of this module. This is the second
+// entry in the module's symbol table (the first being the null symbol).
+void MCELFStreamer::EmitFileDirective(StringRef Filename) {
+ MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
+ Symbol->setSection(*getCurrentSection());
+ Symbol->setAbsolute();
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+}
+
+void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+ switch (expr->getKind()) {
+ case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+ fixSymbolsInTLSFixups(be->getLHS());
+ fixSymbolsInTLSFixups(be->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+ switch (symRef.getKind()) {
+ default:
+ return;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ case MCSymbolRefExpr::VK_NTPOFF:
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSGD:
+ case MCSymbolRefExpr::VK_TLSLD:
+ case MCSymbolRefExpr::VK_TLSLDM:
+ case MCSymbolRefExpr::VK_TPOFF:
+ case MCSymbolRefExpr::VK_DTPOFF:
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ break;
+ }
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+ break;
+ }
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+ this->MCObjectStreamer::EmitInstToFragment(Inst);
+ MCInstFragment &F = *cast<MCInstFragment>(getCurrentFragment());
+
+ for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
+ fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
+}
+
+void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+ fixSymbolsInTLSFixups(Fixups[i].getValue());
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCELFStreamer::Finish() {
+ EmitFrames(true);
+
+ for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
+ e = LocalCommons.end();
+ i != e; ++i) {
+ MCSymbolData *SD = i->SD;
+ uint64_t Size = i->Size;
+ unsigned ByteAlignment = i->ByteAlignment;
+ const MCSymbol &Symbol = SD->getSymbol();
+ const MCSection &Section = Symbol.getSection();
+
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
+ new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
+
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD->setFragment(F);
+
+ // Update the maximum alignment of the section if necessary.
+ if (ByteAlignment > SectData.getAlignment())
+ SectData.setAlignment(ByteAlignment);
+ }
+
+ this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll, bool NoExecStack) {
+ MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.h b/contrib/llvm/lib/MC/MCELFStreamer.h
new file mode 100644
index 000000000000..855e7e9ca60f
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFStreamer.h
@@ -0,0 +1,140 @@
+//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+ MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ MCAssembler *Assembler)
+ : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
+
+
+ ~MCELFStreamer() {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void ChangeSection(const MCSection *Section);
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolType(int Type) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EndCOFFSymbolDef() {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setSize(Value);
+ }
+
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+
+ virtual void Finish();
+
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst);
+ virtual void EmitInstToData(const MCInst &Inst);
+
+ void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+ struct LocalCommon {
+ MCSymbolData *SD;
+ uint64_t Size;
+ unsigned ByteAlignment;
+ };
+ std::vector<LocalCommon> LocalCommons;
+
+ SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+ /// @}
+ void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind) {
+ SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+ }
+
+ void SetSectionData() {
+ SetSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+ }
+ void SetSectionText() {
+ SetSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC, SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+ }
+ void SetSectionBss() {
+ SetSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC, SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
new file mode 100644
index 000000000000..fcf1aabb5a86
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -0,0 +1,605 @@
+//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcexpr"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
+}
+}
+
+void MCExpr::print(raw_ostream &OS) const {
+ switch (getKind()) {
+ case MCExpr::Target:
+ return cast<MCTargetExpr>(this)->PrintImpl(OS);
+ case MCExpr::Constant:
+ OS << cast<MCConstantExpr>(*this).getValue();
+ return;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
+ const MCSymbol &Sym = SRE.getSymbol();
+ // Parenthesize names that start with $ so that they don't look like
+ // absolute names.
+ bool UseParens = Sym.getName()[0] == '$';
+
+ if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_HA16 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_LO16) {
+ OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+ UseParens = true;
+ }
+
+ if (UseParens)
+ OS << '(' << Sym << ')';
+ else
+ OS << Sym;
+
+ if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+ OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+ else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+ SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
+ SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_LO16)
+ OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
+ return;
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
+ switch (UE.getOpcode()) {
+ default: assert(0 && "Invalid opcode!");
+ case MCUnaryExpr::LNot: OS << '!'; break;
+ case MCUnaryExpr::Minus: OS << '-'; break;
+ case MCUnaryExpr::Not: OS << '~'; break;
+ case MCUnaryExpr::Plus: OS << '+'; break;
+ }
+ OS << *UE.getSubExpr();
+ return;
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
+
+ // Only print parens around the LHS if it is non-trivial.
+ if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
+ OS << *BE.getLHS();
+ } else {
+ OS << '(' << *BE.getLHS() << ')';
+ }
+
+ switch (BE.getOpcode()) {
+ default: assert(0 && "Invalid opcode!");
+ case MCBinaryExpr::Add:
+ // Print "X-42" instead of "X+-42".
+ if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+ if (RHSC->getValue() < 0) {
+ OS << RHSC->getValue();
+ return;
+ }
+ }
+
+ OS << '+';
+ break;
+ case MCBinaryExpr::And: OS << '&'; break;
+ case MCBinaryExpr::Div: OS << '/'; break;
+ case MCBinaryExpr::EQ: OS << "=="; break;
+ case MCBinaryExpr::GT: OS << '>'; break;
+ case MCBinaryExpr::GTE: OS << ">="; break;
+ case MCBinaryExpr::LAnd: OS << "&&"; break;
+ case MCBinaryExpr::LOr: OS << "||"; break;
+ case MCBinaryExpr::LT: OS << '<'; break;
+ case MCBinaryExpr::LTE: OS << "<="; break;
+ case MCBinaryExpr::Mod: OS << '%'; break;
+ case MCBinaryExpr::Mul: OS << '*'; break;
+ case MCBinaryExpr::NE: OS << "!="; break;
+ case MCBinaryExpr::Or: OS << '|'; break;
+ case MCBinaryExpr::Shl: OS << "<<"; break;
+ case MCBinaryExpr::Shr: OS << ">>"; break;
+ case MCBinaryExpr::Sub: OS << '-'; break;
+ case MCBinaryExpr::Xor: OS << '^'; break;
+ }
+
+ // Only print parens around the LHS if it is non-trivial.
+ if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+ OS << *BE.getRHS();
+ } else {
+ OS << '(' << *BE.getRHS() << ')';
+ }
+ return;
+ }
+ }
+
+ assert(0 && "Invalid expression kind!");
+}
+
+void MCExpr::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+/* *** */
+
+const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS,
+ const MCExpr *RHS, MCContext &Ctx) {
+ return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
+}
+
+const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) MCUnaryExpr(Opc, Expr);
+}
+
+const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
+ return new (Ctx) MCConstantExpr(Value);
+}
+
+/* *** */
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+ VariantKind Kind,
+ MCContext &Ctx) {
+ return new (Ctx) MCSymbolRefExpr(Sym, Kind);
+}
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
+ MCContext &Ctx) {
+ return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
+}
+
+StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
+ switch (Kind) {
+ default:
+ case VK_Invalid: return "<<invalid>>";
+ case VK_None: return "<<none>>";
+
+ case VK_GOT: return "GOT";
+ case VK_GOTOFF: return "GOTOFF";
+ case VK_GOTPCREL: return "GOTPCREL";
+ case VK_GOTTPOFF: return "GOTTPOFF";
+ case VK_INDNTPOFF: return "INDNTPOFF";
+ case VK_NTPOFF: return "NTPOFF";
+ case VK_GOTNTPOFF: return "GOTNTPOFF";
+ case VK_PLT: return "PLT";
+ case VK_TLSGD: return "TLSGD";
+ case VK_TLSLD: return "TLSLD";
+ case VK_TLSLDM: return "TLSLDM";
+ case VK_TPOFF: return "TPOFF";
+ case VK_DTPOFF: return "DTPOFF";
+ case VK_TLVP: return "TLVP";
+ case VK_ARM_PLT: return "(PLT)";
+ case VK_ARM_GOT: return "(GOT)";
+ case VK_ARM_GOTOFF: return "(GOTOFF)";
+ case VK_ARM_TPOFF: return "(tpoff)";
+ case VK_ARM_GOTTPOFF: return "(gottpoff)";
+ case VK_ARM_TLSGD: return "(tlsgd)";
+ case VK_PPC_TOC: return "toc";
+ case VK_PPC_DARWIN_HA16: return "ha16";
+ case VK_PPC_DARWIN_LO16: return "lo16";
+ case VK_PPC_GAS_HA16: return "ha";
+ case VK_PPC_GAS_LO16: return "l";
+ }
+}
+
+MCSymbolRefExpr::VariantKind
+MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
+ return StringSwitch<VariantKind>(Name)
+ .Case("GOT", VK_GOT)
+ .Case("got", VK_GOT)
+ .Case("GOTOFF", VK_GOTOFF)
+ .Case("gotoff", VK_GOTOFF)
+ .Case("GOTPCREL", VK_GOTPCREL)
+ .Case("gotpcrel", VK_GOTPCREL)
+ .Case("GOTTPOFF", VK_GOTTPOFF)
+ .Case("gottpoff", VK_GOTTPOFF)
+ .Case("INDNTPOFF", VK_INDNTPOFF)
+ .Case("indntpoff", VK_INDNTPOFF)
+ .Case("NTPOFF", VK_NTPOFF)
+ .Case("ntpoff", VK_NTPOFF)
+ .Case("GOTNTPOFF", VK_GOTNTPOFF)
+ .Case("gotntpoff", VK_GOTNTPOFF)
+ .Case("PLT", VK_PLT)
+ .Case("plt", VK_PLT)
+ .Case("TLSGD", VK_TLSGD)
+ .Case("tlsgd", VK_TLSGD)
+ .Case("TLSLD", VK_TLSLD)
+ .Case("tlsld", VK_TLSLD)
+ .Case("TLSLDM", VK_TLSLDM)
+ .Case("tlsldm", VK_TLSLDM)
+ .Case("TPOFF", VK_TPOFF)
+ .Case("tpoff", VK_TPOFF)
+ .Case("DTPOFF", VK_DTPOFF)
+ .Case("dtpoff", VK_DTPOFF)
+ .Case("TLVP", VK_TLVP)
+ .Case("tlvp", VK_TLVP)
+ .Default(VK_Invalid);
+}
+
+/* *** */
+
+void MCTargetExpr::Anchor() {}
+
+/* *** */
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+ return EvaluateAsAbsolute(Res, 0, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout) const {
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout,
+ const SectionAddrMap &Addrs) const {
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+ return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs) const {
+ MCValue Value;
+
+ // Fast path constants.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) {
+ Res = CE->getValue();
+ return true;
+ }
+
+ // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+ // absolutize differences across sections and that is what the MachO writer
+ // uses Addrs for.
+ bool IsRelocatable =
+ EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
+
+ // Record the current value.
+ Res = Value.getConstant();
+
+ return IsRelocatable && Value.isAbsolute();
+}
+
+/// \brief Helper method for \see EvaluateSymbolAdd().
+static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet,
+ const MCSymbolRefExpr *&A,
+ const MCSymbolRefExpr *&B,
+ int64_t &Addend) {
+ if (!A || !B)
+ return;
+
+ const MCSymbol &SA = A->getSymbol();
+ const MCSymbol &SB = B->getSymbol();
+
+ if (SA.isUndefined() || SB.isUndefined())
+ return;
+
+ if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+ return;
+
+ MCSymbolData &AD = Asm->getSymbolData(SA);
+ MCSymbolData &BD = Asm->getSymbolData(SB);
+
+ if (AD.getFragment() == BD.getFragment()) {
+ Addend += (AD.getOffset() - BD.getOffset());
+
+ // Pointers to Thumb symbols need to have their low-bit set to allow
+ // for interworking.
+ if (Asm->isThumbFunc(&SA))
+ Addend |= 1;
+
+ // Clear the symbol expr pointers to indicate we have folded these
+ // operands.
+ A = B = 0;
+ return;
+ }
+
+ if (!Layout)
+ return;
+
+ const MCSectionData &SecA = *AD.getFragment()->getParent();
+ const MCSectionData &SecB = *BD.getFragment()->getParent();
+
+ if ((&SecA != &SecB) && !Addrs)
+ return;
+
+ // Eagerly evaluate.
+ Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
+ Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+ if (Addrs && (&SecA != &SecB))
+ Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+ // Clear the symbol expr pointers to indicate we have folded these
+ // operands.
+ A = B = 0;
+}
+
+/// \brief Evaluate the result of an add between (conceptually) two MCValues.
+///
+/// This routine conceptually attempts to construct an MCValue:
+/// Result = (Result_A - Result_B + Result_Cst)
+/// from two MCValue's LHS and RHS where
+/// Result = LHS + RHS
+/// and
+/// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+///
+/// This routine attempts to aggresively fold the operands such that the result
+/// is representable in an MCValue, but may not always succeed.
+///
+/// \returns True on success, false if the result is not representable in an
+/// MCValue.
+
+/// NOTE: It is really important to have both the Asm and Layout arguments.
+/// They might look redundant, but this function can be used before layout
+/// is done (see the object streamer for example) and having the Asm argument
+/// lets us avoid relaxations early.
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet,
+ const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+ const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
+ MCValue &Res) {
+ // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
+ // about dealing with modifiers. This will ultimately bite us, one day.
+ const MCSymbolRefExpr *LHS_A = LHS.getSymA();
+ const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+ int64_t LHS_Cst = LHS.getConstant();
+
+ // Fold the result constant immediately.
+ int64_t Result_Cst = LHS_Cst + RHS_Cst;
+
+ assert((!Layout || Asm) &&
+ "Must have an assembler object if layout is given!");
+
+ // If we have a layout, we can fold resolved differences.
+ if (Asm) {
+ // First, fold out any differences which are fully resolved. By
+ // reassociating terms in
+ // Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+ // we have the four possible differences:
+ // (LHS_A - LHS_B),
+ // (LHS_A - RHS_B),
+ // (RHS_A - LHS_B),
+ // (RHS_A - RHS_B).
+ // Since we are attempting to be as aggressive as possible about folding, we
+ // attempt to evaluate each possible alternative.
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
+ Result_Cst);
+ }
+
+ // We can't represent the addition or subtraction of two symbols.
+ if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
+ return false;
+
+ // At this point, we have at most one additive symbol and one subtractive
+ // symbol -- find them.
+ const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
+ const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
+
+ // If we have a negated symbol, then we must have also have a non-negated
+ // symbol in order to encode the expression.
+ if (B && !A)
+ return false;
+
+ Res = MCValue::get(A, B, Result_Cst);
+ return true;
+}
+
+bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
+ const MCAsmLayout &Layout) const {
+ return EvaluateAsRelocatableImpl(Res, &Layout.getAssembler(), &Layout,
+ 0, false);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet) const {
+ ++stats::MCExprEvaluate;
+
+ switch (getKind()) {
+ case Target:
+ return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
+
+ case Constant:
+ Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
+ return true;
+
+ case SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+ const MCSymbol &Sym = SRE->getSymbol();
+
+ // Evaluate recursively if this is a variable.
+ if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+ bool Ret = Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm,
+ Layout,
+ Addrs,
+ true);
+ // If we failed to simplify this to a constant, let the target
+ // handle it.
+ if (Ret && !Res.getSymA() && !Res.getSymB())
+ return true;
+ }
+
+ Res = MCValue::get(SRE, 0, 0);
+ return true;
+ }
+
+ case Unary: {
+ const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
+ MCValue Value;
+
+ if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+ Addrs, InSet))
+ return false;
+
+ switch (AUE->getOpcode()) {
+ case MCUnaryExpr::LNot:
+ if (!Value.isAbsolute())
+ return false;
+ Res = MCValue::get(!Value.getConstant());
+ break;
+ case MCUnaryExpr::Minus:
+ /// -(a - b + const) ==> (b - a - const)
+ if (Value.getSymA() && !Value.getSymB())
+ return false;
+ Res = MCValue::get(Value.getSymB(), Value.getSymA(),
+ -Value.getConstant());
+ break;
+ case MCUnaryExpr::Not:
+ if (!Value.isAbsolute())
+ return false;
+ Res = MCValue::get(~Value.getConstant());
+ break;
+ case MCUnaryExpr::Plus:
+ Res = Value;
+ break;
+ }
+
+ return true;
+ }
+
+ case Binary: {
+ const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
+ MCValue LHSValue, RHSValue;
+
+ if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+ Addrs, InSet) ||
+ !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+ Addrs, InSet))
+ return false;
+
+ // We only support a few operations on non-constant expressions, handle
+ // those first.
+ if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
+ switch (ABE->getOpcode()) {
+ default:
+ return false;
+ case MCBinaryExpr::Sub:
+ // Negate RHS and add.
+ return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+ RHSValue.getSymB(), RHSValue.getSymA(),
+ -RHSValue.getConstant(),
+ Res);
+
+ case MCBinaryExpr::Add:
+ return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+ RHSValue.getSymA(), RHSValue.getSymB(),
+ RHSValue.getConstant(),
+ Res);
+ }
+ }
+
+ // FIXME: We need target hooks for the evaluation. It may be limited in
+ // width, and gas defines the result of comparisons and right shifts
+ // differently from Apple as.
+ int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
+ int64_t Result = 0;
+ switch (ABE->getOpcode()) {
+ case MCBinaryExpr::Add: Result = LHS + RHS; break;
+ case MCBinaryExpr::And: Result = LHS & RHS; break;
+ case MCBinaryExpr::Div: Result = LHS / RHS; break;
+ case MCBinaryExpr::EQ: Result = LHS == RHS; break;
+ case MCBinaryExpr::GT: Result = LHS > RHS; break;
+ case MCBinaryExpr::GTE: Result = LHS >= RHS; break;
+ case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
+ case MCBinaryExpr::LOr: Result = LHS || RHS; break;
+ case MCBinaryExpr::LT: Result = LHS < RHS; break;
+ case MCBinaryExpr::LTE: Result = LHS <= RHS; break;
+ case MCBinaryExpr::Mod: Result = LHS % RHS; break;
+ case MCBinaryExpr::Mul: Result = LHS * RHS; break;
+ case MCBinaryExpr::NE: Result = LHS != RHS; break;
+ case MCBinaryExpr::Or: Result = LHS | RHS; break;
+ case MCBinaryExpr::Shl: Result = LHS << RHS; break;
+ case MCBinaryExpr::Shr: Result = LHS >> RHS; break;
+ case MCBinaryExpr::Sub: Result = LHS - RHS; break;
+ case MCBinaryExpr::Xor: Result = LHS ^ RHS; break;
+ }
+
+ Res = MCValue::get(Result);
+ return true;
+ }
+ }
+
+ assert(0 && "Invalid assembly expression kind!");
+ return false;
+}
+
+const MCSection *MCExpr::FindAssociatedSection() const {
+ switch (getKind()) {
+ case Target:
+ // We never look through target specific expressions.
+ return cast<MCTargetExpr>(this)->FindAssociatedSection();
+
+ case Constant:
+ return MCSymbol::AbsolutePseudoSection;
+
+ case SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+ const MCSymbol &Sym = SRE->getSymbol();
+
+ if (Sym.isDefined())
+ return &Sym.getSection();
+
+ return 0;
+ }
+
+ case Unary:
+ return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection();
+
+ case Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
+ const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
+ const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
+
+ // If either section is absolute, return the other.
+ if (LHS_S == MCSymbol::AbsolutePseudoSection)
+ return RHS_S;
+ if (RHS_S == MCSymbol::AbsolutePseudoSection)
+ return LHS_S;
+
+ // Otherwise, return the first non-null section.
+ return LHS_S ? LHS_S : RHS_S;
+ }
+ }
+
+ assert(0 && "Invalid assembly expression kind!");
+ return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
new file mode 100644
index 000000000000..4cb628b395c3
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -0,0 +1,66 @@
+//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ OS << "<MCOperand ";
+ if (!isValid())
+ OS << "INVALID";
+ else if (isReg())
+ OS << "Reg:" << getReg();
+ else if (isImm())
+ OS << "Imm:" << getImm();
+ else if (isExpr()) {
+ OS << "Expr:(" << *getExpr() << ")";
+ } else
+ OS << "UNDEFINED";
+ OS << ">";
+}
+
+void MCOperand::dump() const {
+ print(dbgs(), 0);
+ dbgs() << "\n";
+}
+
+void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ OS << "<MCInst " << getOpcode();
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ OS << " ";
+ getOperand(i).print(OS, MAI);
+ }
+ OS << ">";
+}
+
+void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
+ const MCInstPrinter *Printer,
+ StringRef Separator) const {
+ OS << "<MCInst #" << getOpcode();
+
+ // Show the instruction opcode name if we have access to a printer.
+ if (Printer)
+ OS << ' ' << Printer->getOpcodeName(getOpcode());
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ OS << Separator;
+ getOperand(i).print(OS, MAI);
+ }
+ OS << ">";
+}
+
+void MCInst::dump() const {
+ print(dbgs(), 0);
+ dbgs() << "\n";
+}
diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp
new file mode 100644
index 000000000000..81a939ffaaef
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp
@@ -0,0 +1,25 @@
+//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+
+MCInstPrinter::~MCInstPrinter() {
+}
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return "";
+}
+
+void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ assert(0 && "Target should implement this");
+}
diff --git a/contrib/llvm/lib/MC/MCLabel.cpp b/contrib/llvm/lib/MC/MCLabel.cpp
new file mode 100644
index 000000000000..9c0fc92e6c05
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCLabel.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCLabel::print(raw_ostream &OS) const {
+ OS << '"' << getInstance() << '"';
+}
+
+void MCLabel::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
new file mode 100644
index 000000000000..309752ec5f02
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
@@ -0,0 +1,249 @@
+//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class MCLoggingStreamer : public MCStreamer {
+ llvm::OwningPtr<MCStreamer> Child;
+
+ raw_ostream &OS;
+
+public:
+ MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
+ : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
+
+ void LogCall(const char *Function) {
+ OS << Function << "\n";
+ }
+
+ void LogCall(const char *Function, const Twine &Message) {
+ OS << Function << ": " << Message << "\n";
+ }
+
+ virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
+
+ virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
+
+ virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
+
+ virtual void AddComment(const Twine &T) {
+ LogCall("AddComment", T);
+ return Child->AddComment(T);
+ }
+
+ virtual void AddBlankLine() {
+ LogCall("AddBlankLine");
+ return Child->AddBlankLine();
+ }
+
+ virtual void ChangeSection(const MCSection *Section) {
+ LogCall("ChangeSection");
+ return Child->ChangeSection(Section);
+ }
+
+ virtual void InitSections() {
+ LogCall("InitSections");
+ return Child->InitSections();
+ }
+
+ virtual void EmitLabel(MCSymbol *Symbol) {
+ LogCall("EmitLabel");
+ return Child->EmitLabel(Symbol);
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ LogCall("EmitAssemblerFlag");
+ return Child->EmitAssemblerFlag(Flag);
+ }
+
+ virtual void EmitThumbFunc(MCSymbol *Func) {
+ LogCall("EmitThumbFunc");
+ return Child->EmitThumbFunc(Func);
+ }
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ LogCall("EmitAssignment");
+ return Child->EmitAssignment(Symbol, Value);
+ }
+
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ LogCall("EmitWeakReference");
+ return Child->EmitWeakReference(Alias, Symbol);
+ }
+
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ LogCall("EmitDwarfAdvanceLineAddr");
+ return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+ PointerSize);
+ }
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ LogCall("EmitSymbolAttribute");
+ return Child->EmitSymbolAttribute(Symbol, Attribute);
+ }
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ LogCall("EmitSymbolDesc");
+ return Child->EmitSymbolDesc(Symbol, DescValue);
+ }
+
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ LogCall("BeginCOFFSymbolDef");
+ return Child->BeginCOFFSymbolDef(Symbol);
+ }
+
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ LogCall("EmitCOFFSymbolStorageClass");
+ return Child->EmitCOFFSymbolStorageClass(StorageClass);
+ }
+
+ virtual void EmitCOFFSymbolType(int Type) {
+ LogCall("EmitCOFFSymbolType");
+ return Child->EmitCOFFSymbolType(Type);
+ }
+
+ virtual void EndCOFFSymbolDef() {
+ LogCall("EndCOFFSymbolDef");
+ return Child->EndCOFFSymbolDef();
+ }
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ LogCall("EmitELFSize");
+ return Child->EmitELFSize(Symbol, Value);
+ }
+
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ LogCall("EmitCommonSymbol");
+ return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
+ }
+
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ LogCall("EmitLocalCommonSymbol");
+ return Child->EmitLocalCommonSymbol(Symbol, Size);
+ }
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {
+ LogCall("EmitZerofill");
+ return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
+ }
+
+ virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ LogCall("EmitTBSSSymbol");
+ return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
+ }
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ LogCall("EmitBytes");
+ return Child->EmitBytes(Data, AddrSpace);
+ }
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace){
+ LogCall("EmitValue");
+ return Child->EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitULEB128Value(const MCExpr *Value) {
+ LogCall("EmitULEB128Value");
+ return Child->EmitULEB128Value(Value);
+ }
+
+ virtual void EmitSLEB128Value(const MCExpr *Value) {
+ LogCall("EmitSLEB128Value");
+ return Child->EmitSLEB128Value(Value);
+ }
+
+ virtual void EmitGPRel32Value(const MCExpr *Value) {
+ LogCall("EmitGPRel32Value");
+ return Child->EmitGPRel32Value(Value);
+ }
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ LogCall("EmitFill");
+ return Child->EmitFill(NumBytes, FillValue, AddrSpace);
+ }
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0) {
+ LogCall("EmitValueToAlignment");
+ return Child->EmitValueToAlignment(ByteAlignment, Value,
+ ValueSize, MaxBytesToEmit);
+ }
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {
+ LogCall("EmitCodeAlignment");
+ return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
+ }
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0) {
+ LogCall("EmitValueToOffset");
+ return Child->EmitValueToOffset(Offset, Value);
+ }
+
+ virtual void EmitFileDirective(StringRef Filename) {
+ LogCall("EmitFileDirective", "FileName:" + Filename);
+ return Child->EmitFileDirective(Filename);
+ }
+
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ LogCall("EmitDwarfFileDirective",
+ "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
+ return Child->EmitDwarfFileDirective(FileNo, Filename);
+ }
+
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator,
+ StringRef FileName) {
+ LogCall("EmitDwarfLocDirective",
+ "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
+ " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
+ " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
+ return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+ Isa, Discriminator, FileName);
+ }
+
+ virtual void EmitInstruction(const MCInst &Inst) {
+ LogCall("EmitInstruction");
+ return Child->EmitInstruction(Inst);
+ }
+
+ virtual void EmitRawText(StringRef String) {
+ LogCall("EmitRawText", "\"" + String + "\"");
+ return Child->EmitRawText(String);
+ }
+
+ virtual void Finish() {
+ LogCall("Finish");
+ return Child->Finish();
+ }
+
+};
+
+} // end anonymous namespace.
+
+MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
+ return new MCLoggingStreamer(Child, OS);
+}
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
new file mode 100644
index 000000000000..1b21249ca321
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -0,0 +1,420 @@
+//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCMachOStreamer : public MCObjectStreamer {
+private:
+ virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+ MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitCOFFSymbolType(int Type) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EndCOFFSymbolDef() {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitFileDirective(StringRef Filename) {
+ // FIXME: Just ignore the .file; it isn't important enough to fail the
+ // entire assembly.
+
+ //report_fatal_error("unsupported directive: '.file'");
+ }
+
+ virtual void Finish();
+
+ /// @}
+};
+
+} // end anonymous namespace.
+
+void MCMachOStreamer::InitSections() {
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+
+}
+
+void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol) {
+ MCSymbolData &SD =
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ if (SD.isExternal())
+ EmitSymbolAttribute(EHSymbol, MCSA_Global);
+ if (SD.getFlags() & SF_WeakDefinition)
+ EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+ if (SD.isPrivateExtern())
+ EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ // isSymbolLinkerVisible uses the section.
+ Symbol->setSection(*getCurrentSection());
+ // We have to create a new fragment if this is an atom defining symbol,
+ // fragments cannot span atoms.
+ if (getAssembler().isSymbolLinkerVisible(*Symbol))
+ new MCDataFragment(getCurrentSectionData());
+
+ MCObjectStreamer::EmitLabel(Symbol);
+
+ MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+ // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
+ // to clear the weak reference and weak definition bits too, but the
+ // implementation was buggy. For now we just try to match 'as', for
+ // diffability.
+ //
+ // FIXME: Cleanup this code, these bits should be emitted based on semantic
+ // properties, not on the order of definition, etc.
+ SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+}
+
+void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ // Let the target do whatever target specific stuff it needs to do.
+ getAssembler().getBackend().HandleAssemblerFlag(Flag);
+ // Do any generic stuff we need to do.
+ switch (Flag) {
+ case MCAF_SyntaxUnified: return; // no-op here.
+ case MCAF_Code16: return; // no-op here.
+ case MCAF_Code32: return; // no-op here.
+ case MCAF_SubsectionsViaSymbols:
+ getAssembler().setSubsectionsViaSymbols(true);
+ return;
+ default:
+ llvm_unreachable("invalid assembler flag!");
+ }
+}
+
+void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+ // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
+
+ // Remember that the function is a thumb function. Fixup and relocation
+ // values will need adjusted.
+ getAssembler().setIsThumbFunc(Symbol);
+
+ // Mark the thumb bit on the symbol.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setFlags(SD.getFlags() | SF_ThumbFunc);
+}
+
+void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ // FIXME: Lift context changes into super class.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ // Indirect symbols are handled differently, to match how 'as' handles
+ // them. This makes writing matching .o files easier.
+ if (Attribute == MCSA_IndirectSymbol) {
+ // Note that we intentionally cannot use the symbol data here; this is
+ // important for matching the string table that 'as' generates.
+ IndirectSymbolData ISD;
+ ISD.Symbol = Symbol;
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
+ return;
+ }
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling getOrCreateSymbolData here is to register
+ // the symbol with the assembler.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // The implementation of symbol attributes is designed to match 'as', but it
+ // leaves much to desired. It doesn't really make sense to arbitrarily add and
+ // remove flags, but 'as' allows this (in particular, see .desc).
+ //
+ // In the future it might be worth trying to make these operations more well
+ // defined.
+ switch (Attribute) {
+ case MCSA_Invalid:
+ case MCSA_ELF_TypeFunction:
+ case MCSA_ELF_TypeIndFunction:
+ case MCSA_ELF_TypeObject:
+ case MCSA_ELF_TypeTLS:
+ case MCSA_ELF_TypeCommon:
+ case MCSA_ELF_TypeNoType:
+ case MCSA_ELF_TypeGnuUniqueObject:
+ case MCSA_IndirectSymbol:
+ case MCSA_Hidden:
+ case MCSA_Internal:
+ case MCSA_Protected:
+ case MCSA_Weak:
+ case MCSA_Local:
+ assert(0 && "Invalid symbol attribute for Mach-O!");
+ break;
+
+ case MCSA_Global:
+ SD.setExternal(true);
+ // This effectively clears the undefined lazy bit, in Darwin 'as', although
+ // it isn't very consistent because it implements this as part of symbol
+ // lookup.
+ //
+ // FIXME: Cleanup this code, these bits should be emitted based on semantic
+ // properties, not on the order of definition, etc.
+ SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
+ break;
+
+ case MCSA_LazyReference:
+ // FIXME: This requires -dynamic.
+ SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+ if (Symbol->isUndefined())
+ SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy);
+ break;
+
+ // Since .reference sets the no dead strip bit, it is equivalent to
+ // .no_dead_strip in practice.
+ case MCSA_Reference:
+ case MCSA_NoDeadStrip:
+ SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+ break;
+
+ case MCSA_SymbolResolver:
+ SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+ break;
+
+ case MCSA_PrivateExtern:
+ SD.setExternal(true);
+ SD.setPrivateExtern(true);
+ break;
+
+ case MCSA_WeakReference:
+ // FIXME: This requires -dynamic.
+ if (Symbol->isUndefined())
+ SD.setFlags(SD.getFlags() | SF_WeakReference);
+ break;
+
+ case MCSA_WeakDefinition:
+ // FIXME: 'as' enforces that this is defined and global. The manual claims
+ // it has to be in a coalesced section, but this isn't enforced.
+ SD.setFlags(SD.getFlags() | SF_WeakDefinition);
+ break;
+
+ case MCSA_WeakDefAutoPrivate:
+ SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
+ break;
+ }
+}
+
+void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ // Encode the 'desc' value into the lowest implementation defined bits.
+ assert(DescValue == (DescValue & SF_DescFlagsMask) &&
+ "Invalid .desc value!");
+ getAssembler().getOrCreateSymbolData(*Symbol).setFlags(
+ DescValue & SF_DescFlagsMask);
+}
+
+void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setExternal(true);
+ SD.setCommon(Size, ByteAlignment);
+}
+
+void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+
+ // The symbol may not be present, which only creates the section.
+ if (!Symbol)
+ return;
+
+ // FIXME: Assert that this section has the zerofill type.
+
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // Emit an align fragment if necessary.
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD.setFragment(F);
+
+ Symbol->setSection(*Section);
+
+ // Update the maximum alignment on the zero fill section if necessary.
+ if (ByteAlignment > SectData.getAlignment())
+ SectData.setAlignment(ByteAlignment);
+}
+
+// This should always be called with the thread local bss section. Like the
+// .zerofill directive this doesn't actually switch sections on us.
+void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ EmitZerofill(Section, Symbol, Size, ByteAlignment);
+ return;
+}
+
+void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCMachOStreamer::Finish() {
+ EmitFrames(true);
+
+ // We have to set the fragment atom associations so we can relax properly for
+ // Mach-O.
+
+ // First, scan the symbol table to build a lookup table from fragments to
+ // defining symbols.
+ DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
+ for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(),
+ ie = getAssembler().symbol_end(); it != ie; ++it) {
+ if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) &&
+ it->getFragment()) {
+ // An atom defining symbol should never be internal to a fragment.
+ assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!");
+ DefiningSymbolMap[it->getFragment()] = it;
+ }
+ }
+
+ // Set the fragment atom associations by tracking the last seen atom defining
+ // symbol.
+ for (MCAssembler::iterator it = getAssembler().begin(),
+ ie = getAssembler().end(); it != ie; ++it) {
+ MCSymbolData *CurrentAtom = 0;
+ for (MCSectionData::iterator it2 = it->begin(),
+ ie2 = it->end(); it2 != ie2; ++it2) {
+ if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
+ CurrentAtom = SD;
+ it2->setAtom(CurrentAtom);
+ }
+ }
+
+ this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
new file mode 100644
index 000000000000..146cebf01a3a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
@@ -0,0 +1,22 @@
+//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+
+using namespace llvm;
+
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(
+ bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
+ bool UseAggressiveSymbolFolding_)
+ : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
+ UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+}
+
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
new file mode 100644
index 000000000000..9577af010205
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -0,0 +1,104 @@
+//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+namespace {
+
+ class MCNullStreamer : public MCStreamer {
+ public:
+ MCNullStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections() {
+ }
+
+ virtual void ChangeSection(const MCSection *Section) {
+ }
+
+ virtual void EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection());
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+ virtual void EmitThumbFunc(MCSymbol *Func) {}
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {}
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+ virtual void EmitCOFFSymbolType(int Type) {}
+ virtual void EndCOFFSymbolDef() {}
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {}
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {}
+ virtual void EmitULEB128Value(const MCExpr *Value) {}
+ virtual void EmitSLEB128Value(const MCExpr *Value) {}
+ virtual void EmitGPRel32Value(const MCExpr *Value) {}
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0) {}
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {}
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0) {}
+
+ virtual void EmitFileDirective(StringRef Filename) {}
+ virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+ return false;
+ }
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator,
+ StringRef FileName) {}
+ virtual void EmitInstruction(const MCInst &Inst) {}
+
+ virtual void Finish() {}
+
+ /// @}
+ };
+
+}
+
+MCStreamer *llvm::createNullStreamer(MCContext &Context) {
+ return new MCNullStreamer(Context);
+}
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
new file mode 100644
index 000000000000..8635aac00302
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -0,0 +1,254 @@
+//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter_)
+ : MCStreamer(Context),
+ Assembler(new MCAssembler(Context, TAB,
+ *Emitter_, *TAB.createObjectWriter(OS),
+ OS)),
+ CurSectionData(0)
+{
+}
+
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter_,
+ MCAssembler *_Assembler)
+ : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0)
+{
+}
+
+MCObjectStreamer::~MCObjectStreamer() {
+ delete &Assembler->getBackend();
+ delete &Assembler->getEmitter();
+ delete &Assembler->getWriter();
+ delete Assembler;
+}
+
+MCFragment *MCObjectStreamer::getCurrentFragment() const {
+ assert(getCurrentSectionData() && "No current section!");
+
+ if (!getCurrentSectionData()->empty())
+ return &getCurrentSectionData()->getFragmentList().back();
+
+ return 0;
+}
+
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+ MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!F)
+ F = new MCDataFragment(getCurrentSectionData());
+ return F;
+}
+
+const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols(BE->getLHS());
+ AddValueSymbols(BE->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+ break;
+ }
+
+ return Value;
+}
+
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+ EmitIntValue(AbsValue, Size, AddrSpace);
+ return;
+ }
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ Value,
+ MCFixup::getKindForSize(Size, false)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+}
+
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
+ MCStreamer::EmitLabel(Symbol);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+}
+
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ EmitULEB128IntValue(IntValue);
+ return;
+ }
+ Value = ForceExpAbs(Value);
+ new MCLEBFragment(*Value, false, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ EmitSLEB128IntValue(IntValue);
+ return;
+ }
+ Value = ForceExpAbs(Value);
+ new MCLEBFragment(*Value, true, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ report_fatal_error("This file format doesn't support weak aliases.");
+}
+
+void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+
+ CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+}
+
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--; )
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ // Now that a machine instruction has been assembled into this section, make
+ // a line entry for any .loc directive that has been seen.
+ MCLineEntry::Make(this, getCurrentSection());
+
+ // If this instruction doesn't need relaxation, just emit it as data.
+ if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+ EmitInstToData(Inst);
+ return;
+ }
+
+ // Otherwise, if we are relaxing everything, relax the instruction as much as
+ // possible and emit it as data.
+ if (getAssembler().getRelaxAll()) {
+ MCInst Relaxed;
+ getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+ EmitInstToData(Relaxed);
+ return;
+ }
+
+ // Otherwise emit to a separate fragment.
+ EmitInstToFragment(Inst);
+}
+
+void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+ SmallString<128> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
+ VecOS.flush();
+ IF->getCode().append(Code.begin(), Code.end());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ if (!LastLabel) {
+ EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+ return;
+ }
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ int64_t Res;
+ if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ MCDwarfLineAddr::Emit(this, LineDelta, Res);
+ return;
+ }
+ AddrDelta = ForceExpAbs(AddrDelta);
+ new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ int64_t Res;
+ if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
+ return;
+ }
+ AddrDelta = ForceExpAbs(AddrDelta);
+ new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ int64_t Res;
+ if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+ return;
+ }
+
+ MCSymbol *CurrentPos = getContext().CreateTempSymbol();
+ EmitLabel(CurrentPos);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Ref =
+ MCSymbolRefExpr::Create(CurrentPos, Variant, getContext());
+ const MCExpr *Delta =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
+
+ if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
+ report_fatal_error("expected assembly-time absolute expression");
+ EmitFill(Res, Value, 0);
+}
+
+void MCObjectStreamer::Finish() {
+ // Dump out the dwarf file & directory tables and line tables.
+ if (getContext().hasDwarfFiles())
+ MCDwarfFileTable::Emit(this);
+
+ getAssembler().Finish();
+}
diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp
new file mode 100644
index 000000000000..efe9f68ee22b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp
@@ -0,0 +1,80 @@
+//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+MCObjectWriter::~MCObjectWriter() {
+}
+
+/// Utility function to encode a SLEB128 value.
+void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
+ bool More;
+ do {
+ uint8_t Byte = Value & 0x7f;
+ // NOTE: this assumes that this signed shift is an arithmetic right shift.
+ Value >>= 7;
+ More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+ ((Value == -1) && ((Byte & 0x40) != 0))));
+ if (More)
+ Byte |= 0x80; // Mark this byte that that more bytes will follow.
+ OS << char(Byte);
+ } while (More);
+}
+
+/// Utility function to encode a ULEB128 value.
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ if (Value != 0)
+ Byte |= 0x80; // Mark this byte that that more bytes will follow.
+ OS << char(Byte);
+ } while (Value != 0);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+ const MCSymbolRefExpr *A,
+ const MCSymbolRefExpr *B,
+ bool InSet) const {
+ // Modified symbol references cannot be resolved.
+ if (A->getKind() != MCSymbolRefExpr::VK_None ||
+ B->getKind() != MCSymbolRefExpr::VK_None)
+ return false;
+
+ const MCSymbol &SA = A->getSymbol();
+ const MCSymbol &SB = B->getSymbol();
+ if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+ return false;
+
+ const MCSymbolData &DataA = Asm.getSymbolData(SA);
+ const MCSymbolData &DataB = Asm.getSymbolData(SB);
+
+ return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
+ *DataB.getFragment(),
+ InSet,
+ false);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+ const MCSection &SecB = FB.getParent()->getSection();
+ // On ELF and COFF A - B is absolute if A and B are in the same section.
+ return &SecA == &SecB;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
new file mode 100644
index 000000000000..0c1f8f0df774
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -0,0 +1,441 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
+ CurBuf = NULL;
+ CurPtr = NULL;
+}
+
+AsmLexer::~AsmLexer() {
+}
+
+void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
+ CurBuf = buf;
+
+ if (ptr)
+ CurPtr = ptr;
+ else
+ CurPtr = CurBuf->getBufferStart();
+
+ TokStart = 0;
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return AsmToken::Error.
+AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ SetError(SMLoc::getFromPointer(Loc), Msg);
+
+ return AsmToken(AsmToken::Error, StringRef(Loc, 0));
+}
+
+int AsmLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+}
+
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+ // Skip the fractional digit sequence.
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+
+ // Check for exponent; we intentionally accept a slighlty wider set of
+ // literals here and rely on the upstream client to reject invalid ones (e.g.,
+ // "1e+").
+ if (*CurPtr == 'e' || *CurPtr == 'E') {
+ ++CurPtr;
+ if (*CurPtr == '-' || *CurPtr == '+')
+ ++CurPtr;
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ }
+
+ return AsmToken(AsmToken::Real,
+ StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+ return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
+AsmToken AsmLexer::LexIdentifier() {
+ // Check for floating point literals.
+ if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+ // Disambiguate a .1243foo identifier from a floating literal.
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+ return LexFloatLiteral();
+ }
+
+ while (IsIdentifierChar(*CurPtr))
+ ++CurPtr;
+
+ // Handle . as a special case.
+ if (CurPtr == TokStart+1 && TokStart[0] == '.')
+ return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
+
+ return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+AsmToken AsmLexer::LexSlash() {
+ switch (*CurPtr) {
+ case '*': break; // C style comment.
+ case '/': return ++CurPtr, LexLineComment();
+ default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
+ }
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ return ReturnError(TokStart, "unterminated comment");
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexLineComment: Comment: #[^\n]*
+/// : //[^\n]*
+AsmToken AsmLexer::LexLineComment() {
+ // FIXME: This is broken if we happen to a comment at the end of a file, which
+ // was .included, and which doesn't end with a newline.
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
+ return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+}
+
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+ if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
+ CurPtr += 2;
+ if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
+ CurPtr += 3;
+}
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+
+/// Decimal integer: [1-9][0-9]*
+AsmToken AsmLexer::LexDigit() {
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+
+ // Check for floating point literals.
+ if (*CurPtr == '.' || *CurPtr == 'e') {
+ ++CurPtr;
+ return LexFloatLiteral();
+ }
+
+ StringRef Result(TokStart, CurPtr - TokStart);
+
+ long long Value;
+ if (Result.getAsInteger(10, Value)) {
+ // Allow positive values that are too large to fit into a signed 64-bit
+ // integer, but that do fit in an unsigned one, we just convert them over.
+ unsigned long long UValue;
+ if (Result.getAsInteger(10, UValue))
+ return ReturnError(TokStart, "invalid decimal number");
+ Value = (long long)UValue;
+ }
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, Result, Value);
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ // See if we actually have "0b" as part of something like "jmp 0b\n"
+ if (!isdigit(CurPtr[0])) {
+ --CurPtr;
+ StringRef Result(TokStart, CurPtr - TokStart);
+ return AsmToken(AsmToken::Integer, Result, 0);
+ }
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(TokStart, "invalid binary number");
+
+ StringRef Result(TokStart, CurPtr - TokStart);
+
+ long long Value;
+ if (Result.substr(2).getAsInteger(2, Value))
+ return ReturnError(TokStart, "invalid binary number");
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, Result, Value);
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "invalid hexadecimal number");
+
+ unsigned long long Result;
+ if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
+ return ReturnError(TokStart, "invalid hexadecimal number");
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+ (int64_t)Result);
+ }
+
+ // Must be an octal number, it starts with 0.
+ while (*CurPtr >= '0' && *CurPtr <= '9')
+ ++CurPtr;
+
+ StringRef Result(TokStart, CurPtr - TokStart);
+ long long Value;
+ if (Result.getAsInteger(8, Value))
+ return ReturnError(TokStart, "invalid octal number");
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
+ return AsmToken(AsmToken::Integer, Result, Value);
+}
+
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+ int CurChar = getNextChar();
+
+ if (CurChar == '\\')
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated single quote");
+
+ CurChar = getNextChar();
+
+ if (CurChar != '\'')
+ return ReturnError(TokStart, "single quote way too long");
+
+ // The idea here being that 'c' is basically just an integral
+ // constant.
+ StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+ long long Value;
+
+ if (Res.startswith("\'\\")) {
+ char theChar = Res[2];
+ switch (theChar) {
+ default: Value = theChar; break;
+ case '\'': Value = '\''; break;
+ case 't': Value = '\t'; break;
+ case 'n': Value = '\n'; break;
+ case 'b': Value = '\b'; break;
+ }
+ } else
+ Value = TokStart[1];
+
+ return AsmToken(AsmToken::Integer, Res, Value);
+}
+
+
+/// LexQuote: String: "..."
+AsmToken AsmLexer::LexQuote() {
+ int CurChar = getNextChar();
+ // TODO: does gas allow multiline string constants?
+ while (CurChar != '"') {
+ if (CurChar == '\\') {
+ // Allow \", etc.
+ CurChar = getNextChar();
+ }
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+
+ CurChar = getNextChar();
+ }
+
+ return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+}
+
+StringRef AsmLexer::LexUntilEndOfStatement() {
+ TokStart = CurPtr;
+
+ while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
+ !isAtStatementSeparator(CurPtr) && // End of statement marker.
+ *CurPtr != '\n' &&
+ *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+ ++CurPtr;
+ }
+ return StringRef(TokStart, CurPtr-TokStart);
+}
+
+bool AsmLexer::isAtStartOfComment(char Char) {
+ // FIXME: This won't work for multi-character comment indicators like "//".
+ return Char == *MAI.getCommentString();
+}
+
+bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
+ return strncmp(Ptr, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0;
+}
+
+AsmToken AsmLexer::LexToken() {
+ TokStart = CurPtr;
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ if (isAtStartOfComment(CurChar))
+ return LexLineComment();
+ if (isAtStatementSeparator(TokStart)) {
+ CurPtr += strlen(MAI.getSeparatorString()) - 1;
+ return AsmToken(AsmToken::EndOfStatement,
+ StringRef(TokStart, strlen(MAI.getSeparatorString())));
+ }
+
+ switch (CurChar) {
+ default:
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
+
+ // Unknown character, emit an error.
+ return ReturnError(TokStart, "invalid character in input");
+ case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+ case 0:
+ case ' ':
+ case '\t':
+ // Ignore whitespace.
+ return LexToken();
+ case '\n': // FALL THROUGH.
+ case '\r':
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+ case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
+ case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
+ case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
+ case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
+ case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
+ case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
+ case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
+ case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
+ case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
+ case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
+ case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
+ case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
+ case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+ case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
+ case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
+ case '=':
+ if (*CurPtr == '=')
+ return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+ case '|':
+ if (*CurPtr == '|')
+ return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
+ case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
+ case '&':
+ if (*CurPtr == '&')
+ return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
+ case '!':
+ if (*CurPtr == '=')
+ return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
+ case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
+ case '/': return LexSlash();
+ case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ case '\'': return LexSingleQuote();
+ case '"': return LexQuote();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+ case '<':
+ switch (*CurPtr) {
+ case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
+ StringRef(TokStart, 2));
+ case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
+ StringRef(TokStart, 2));
+ case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
+ StringRef(TokStart, 2));
+ default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
+ }
+ case '>':
+ switch (*CurPtr) {
+ case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
+ StringRef(TokStart, 2));
+ case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
+ StringRef(TokStart, 2));
+ default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
+ }
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
+ }
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
new file mode 100644
index 000000000000..0c181f39611e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -0,0 +1,2731 @@
+//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/AsmCond.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include <cctype>
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+FatalAssemblerWarnings("fatal-assembler-warnings",
+ cl::desc("Consider warnings as error"));
+
+namespace {
+
+/// \brief Helper class for tracking macro definitions.
+struct Macro {
+ StringRef Name;
+ StringRef Body;
+ std::vector<StringRef> Parameters;
+
+public:
+ Macro(StringRef N, StringRef B, const std::vector<StringRef> &P) :
+ Name(N), Body(B), Parameters(P) {}
+};
+
+/// \brief Helper class for storing information about an active macro
+/// instantiation.
+struct MacroInstantiation {
+ /// The macro being instantiated.
+ const Macro *TheMacro;
+
+ /// The macro instantiation with substitutions.
+ MemoryBuffer *Instantiation;
+
+ /// The location of the instantiation.
+ SMLoc InstantiationLoc;
+
+ /// The location where parsing should resume upon instantiation completion.
+ SMLoc ExitLoc;
+
+public:
+ MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+ MemoryBuffer *I);
+};
+
+/// \brief The concrete assembly parser instance.
+class AsmParser : public MCAsmParser {
+ friend class GenericAsmParser;
+
+ AsmParser(const AsmParser &); // DO NOT IMPLEMENT
+ void operator=(const AsmParser &); // DO NOT IMPLEMENT
+private:
+ AsmLexer Lexer;
+ MCContext &Ctx;
+ MCStreamer &Out;
+ const MCAsmInfo &MAI;
+ SourceMgr &SrcMgr;
+ MCAsmParserExtension *GenericParser;
+ MCAsmParserExtension *PlatformParser;
+
+ /// This is the current buffer index we're lexing from as managed by the
+ /// SourceMgr object.
+ int CurBuffer;
+
+ AsmCond TheCondState;
+ std::vector<AsmCond> TheCondStack;
+
+ /// DirectiveMap - This is a table handlers for directives. Each handler is
+ /// invoked after the directive identifier is read and is responsible for
+ /// parsing and validating the rest of the directive. The handler is passed
+ /// in the directive name and the location of the directive keyword.
+ StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
+
+ /// MacroMap - Map of currently defined macros.
+ StringMap<Macro*> MacroMap;
+
+ /// ActiveMacros - Stack of active macro instantiations.
+ std::vector<MacroInstantiation*> ActiveMacros;
+
+ /// Boolean tracking whether macro substitution is enabled.
+ unsigned MacrosEnabled : 1;
+
+ /// Flag tracking whether any errors have been encountered.
+ unsigned HadError : 1;
+
+public:
+ AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+ const MCAsmInfo &MAI);
+ ~AsmParser();
+
+ virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
+
+ void AddDirectiveHandler(MCAsmParserExtension *Object,
+ StringRef Directive,
+ DirectiveHandler Handler) {
+ DirectiveMap[Directive] = std::make_pair(Object, Handler);
+ }
+
+public:
+ /// @name MCAsmParser Interface
+ /// {
+
+ virtual SourceMgr &getSourceManager() { return SrcMgr; }
+ virtual MCAsmLexer &getLexer() { return Lexer; }
+ virtual MCContext &getContext() { return Ctx; }
+ virtual MCStreamer &getStreamer() { return Out; }
+
+ virtual bool Warning(SMLoc L, const Twine &Msg);
+ virtual bool Error(SMLoc L, const Twine &Msg);
+
+ const AsmToken &Lex();
+
+ bool ParseExpression(const MCExpr *&Res);
+ virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool ParseAbsoluteExpression(int64_t &Res);
+
+ /// }
+
+private:
+ void CheckForValidSection();
+
+ bool ParseStatement();
+
+ bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
+ bool expandMacro(SmallString<256> &Buf, StringRef Body,
+ const std::vector<StringRef> &Parameters,
+ const std::vector<std::vector<AsmToken> > &A,
+ const SMLoc &L);
+ void HandleMacroExit();
+
+ void PrintMacroInstantiations();
+ void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
+ bool ShowLine = true) const {
+ SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
+ }
+
+ /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+ bool EnterIncludeFile(const std::string &Filename);
+
+ /// \brief Reset the current lexer position to that given by \arg Loc. The
+ /// current token is not set; clients should ensure Lex() is called
+ /// subsequently.
+ void JumpToLoc(SMLoc Loc);
+
+ void EatToEndOfStatement();
+
+ /// \brief Parse up to the end of statement and a return the contents from the
+ /// current token until the end of the statement; the current token on exit
+ /// will be either the EndOfStatement or EOF.
+ StringRef ParseStringToEndOfStatement();
+
+ bool ParseAssignment(StringRef Name, bool allow_redef);
+
+ bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
+ /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+ /// and set \arg Res to the identifier contents.
+ bool ParseIdentifier(StringRef &Res);
+
+ // Directive Parsing.
+
+ // ".ascii", ".asciiz", ".string"
+ bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+ bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+ bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
+ bool ParseDirectiveFill(); // ".fill"
+ bool ParseDirectiveSpace(); // ".space"
+ bool ParseDirectiveZero(); // ".zero"
+ bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); // ".set", ".equ", ".equiv"
+ bool ParseDirectiveOrg(); // ".org"
+ // ".align{,32}", ".p2align{,w,l}"
+ bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+
+ /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+ /// accepts a single symbol (which should be a label or an external).
+ bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+
+ bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+
+ bool ParseDirectiveAbort(); // ".abort"
+ bool ParseDirectiveInclude(); // ".include"
+
+ bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+ // ".ifdef" or ".ifndef", depending on expect_defined
+ bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
+ bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+ bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+ bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+
+ /// ParseEscapedString - Parse the current token as a string which may include
+ /// escaped characters and return the string contents.
+ bool ParseEscapedString(std::string &Data);
+
+ const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant);
+};
+
+/// \brief Generic implementations of directive handling, etc. which is shared
+/// (or the default, at least) for all assembler parser.
+class GenericAsmParser : public MCAsmParserExtension {
+ template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<GenericAsmParser, Handler>);
+ }
+public:
+ GenericAsmParser() {}
+
+ AsmParser &getParser() {
+ return (AsmParser&) this->MCAsmParserExtension::getParser();
+ }
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ // Debugging directives.
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs");
+
+ // CFI directives.
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFISections>(
+ ".cfi_sections");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>(
+ ".cfi_startproc");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>(
+ ".cfi_endproc");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>(
+ ".cfi_def_cfa");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
+ ".cfi_def_cfa_offset");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>(
+ ".cfi_adjust_cfa_offset");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
+ ".cfi_def_cfa_register");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
+ ".cfi_offset");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>(
+ ".cfi_rel_offset");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value");
+
+ // Macro directives.
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+ ".macros_on");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+ ".macros_off");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
+
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128");
+ }
+
+ bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+
+ bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFISections(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc);
+
+ bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
+
+ bool ParseDirectiveLEB128(StringRef, SMLoc);
+};
+
+}
+
+namespace llvm {
+
+extern MCAsmParserExtension *createDarwinAsmParser();
+extern MCAsmParserExtension *createELFAsmParser();
+extern MCAsmParserExtension *createCOFFAsmParser();
+
+}
+
+enum { DEFAULT_ADDRSPACE = 0 };
+
+AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
+ MCStreamer &_Out, const MCAsmInfo &_MAI)
+ : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
+ GenericParser(new GenericAsmParser), PlatformParser(0),
+ CurBuffer(0), MacrosEnabled(true) {
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+ // Initialize the generic parser.
+ GenericParser->Initialize(*this);
+
+ // Initialize the platform / file format parser.
+ //
+ // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
+ // created.
+ if (_MAI.hasMicrosoftFastStdCallMangling()) {
+ PlatformParser = createCOFFAsmParser();
+ PlatformParser->Initialize(*this);
+ } else if (_MAI.hasSubsectionsViaSymbols()) {
+ PlatformParser = createDarwinAsmParser();
+ PlatformParser->Initialize(*this);
+ } else {
+ PlatformParser = createELFAsmParser();
+ PlatformParser->Initialize(*this);
+ }
+}
+
+AsmParser::~AsmParser() {
+ assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+
+ // Destroy any macros.
+ for (StringMap<Macro*>::iterator it = MacroMap.begin(),
+ ie = MacroMap.end(); it != ie; ++it)
+ delete it->getValue();
+
+ delete PlatformParser;
+ delete GenericParser;
+}
+
+void AsmParser::PrintMacroInstantiations() {
+ // Print the active macro instantiation stack.
+ for (std::vector<MacroInstantiation*>::const_reverse_iterator
+ it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
+ PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
+ "note");
+}
+
+bool AsmParser::Warning(SMLoc L, const Twine &Msg) {
+ if (FatalAssemblerWarnings)
+ return Error(L, Msg);
+ PrintMessage(L, Msg, "warning");
+ PrintMacroInstantiations();
+ return false;
+}
+
+bool AsmParser::Error(SMLoc L, const Twine &Msg) {
+ HadError = true;
+ PrintMessage(L, Msg, "error");
+ PrintMacroInstantiations();
+ return true;
+}
+
+bool AsmParser::EnterIncludeFile(const std::string &Filename) {
+ std::string IncludedFile;
+ int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (NewBuf == -1)
+ return true;
+
+ CurBuffer = NewBuf;
+
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+ return false;
+}
+
+void AsmParser::JumpToLoc(SMLoc Loc) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+}
+
+const AsmToken &AsmParser::Lex() {
+ const AsmToken *tok = &Lexer.Lex();
+
+ if (tok->is(AsmToken::Eof)) {
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ JumpToLoc(ParentIncludeLoc);
+ tok = &Lexer.Lex();
+ }
+ }
+
+ if (tok->is(AsmToken::Error))
+ Error(Lexer.getErrLoc(), Lexer.getErr());
+
+ return *tok;
+}
+
+bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
+ // Create the initial section, if requested.
+ if (!NoInitialTextSection)
+ Out.InitSections();
+
+ // Prime the lexer.
+ Lex();
+
+ HadError = false;
+ AsmCond StartingCondState = TheCondState;
+
+ // While we have input, parse each statement.
+ while (Lexer.isNot(AsmToken::Eof)) {
+ if (!ParseStatement()) continue;
+
+ // We had an error, validate that one was emitted and recover by skipping to
+ // the next line.
+ assert(HadError && "Parse statement returned an error, but none emitted!");
+ EatToEndOfStatement();
+ }
+
+ if (TheCondState.TheCond != StartingCondState.TheCond ||
+ TheCondState.Ignore != StartingCondState.Ignore)
+ return TokError("unmatched .ifs or .elses");
+
+ // Check to see there are no empty DwarfFile slots.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ if (!MCDwarfFiles[i])
+ TokError("unassigned file number: " + Twine(i) + " for .file directives");
+ }
+
+ // Check to see that all assembler local symbols were actually defined.
+ // Targets that don't do subsections via symbols may not want this, though,
+ // so conservatively exclude them. Only do this if we're finalizing, though,
+ // as otherwise we won't necessarilly have seen everything yet.
+ if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
+ const MCContext::SymbolTable &Symbols = getContext().getSymbols();
+ for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
+ e = Symbols.end();
+ i != e; ++i) {
+ MCSymbol *Sym = i->getValue();
+ // Variable symbols may not be marked as defined, so check those
+ // explicitly. If we know it's a variable, we have a definition for
+ // the purposes of this check.
+ if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
+ // FIXME: We would really like to refer back to where the symbol was
+ // first referenced for a source location. We need to add something
+ // to track that. Currently, we just point to the end of the file.
+ PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
+ Sym->getName() + "' not defined", "error", false);
+ }
+ }
+
+
+ // Finalize the output stream if there are no errors and if the client wants
+ // us to.
+ if (!HadError && !NoFinalize)
+ Out.Finish();
+
+ return HadError;
+}
+
+void AsmParser::CheckForValidSection() {
+ if (!getStreamer().getCurrentSection()) {
+ TokError("expected section directive before assembly directive");
+ Out.SwitchSection(Ctx.getMachOSection(
+ "__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+ }
+}
+
+/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::EatToEndOfStatement() {
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ // Eat EOL.
+ if (Lexer.is(AsmToken::EndOfStatement))
+ Lex();
+}
+
+StringRef AsmParser::ParseStringToEndOfStatement() {
+ const char *Start = getTok().getLoc().getPointer();
+
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ const char *End = getTok().getLoc().getPointer();
+ return StringRef(Start, End - Start);
+}
+
+/// ParseParenExpr - Parse a paren expression and return it.
+/// NOTE: This assumes the leading '(' has already been consumed.
+///
+/// parenexpr ::= expr)
+///
+bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (ParseExpression(Res)) return true;
+ if (Lexer.isNot(AsmToken::RParen))
+ return TokError("expected ')' in parentheses expression");
+ EndLoc = Lexer.getLoc();
+ Lex();
+ return false;
+}
+
+/// ParseBracketExpr - Parse a bracket expression and return it.
+/// NOTE: This assumes the leading '[' has already been consumed.
+///
+/// bracketexpr ::= expr]
+///
+bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (ParseExpression(Res)) return true;
+ if (Lexer.isNot(AsmToken::RBrac))
+ return TokError("expected ']' in brackets expression");
+ EndLoc = Lexer.getLoc();
+ Lex();
+ return false;
+}
+
+/// ParsePrimaryExpr - Parse a primary expression and return it.
+/// primaryexpr ::= (parenexpr
+/// primaryexpr ::= symbol
+/// primaryexpr ::= number
+/// primaryexpr ::= '.'
+/// primaryexpr ::= ~,+,- primaryexpr
+bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ switch (Lexer.getKind()) {
+ default:
+ return TokError("unknown token in expression");
+ // If we have an error assume that we've already handled it.
+ case AsmToken::Error:
+ return true;
+ case AsmToken::Exclaim:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreateLNot(Res, getContext());
+ return false;
+ case AsmToken::Dollar:
+ case AsmToken::String:
+ case AsmToken::Identifier: {
+ EndLoc = Lexer.getLoc();
+
+ StringRef Identifier;
+ if (ParseIdentifier(Identifier))
+ return true;
+
+ // This is a symbol reference.
+ std::pair<StringRef, StringRef> Split = Identifier.split('@');
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
+
+ // Lookup the symbol variant if used.
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ if (Split.first.size() != Identifier.size()) {
+ Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+ if (Variant == MCSymbolRefExpr::VK_Invalid) {
+ Variant = MCSymbolRefExpr::VK_None;
+ return TokError("invalid variant '" + Split.second + "'");
+ }
+ }
+
+ // If this is an absolute variable reference, substitute it now to preserve
+ // semantics in the face of reassignment.
+ if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
+ if (Variant)
+ return Error(EndLoc, "unexpected modifier on variable reference");
+
+ Res = Sym->getVariableValue();
+ return false;
+ }
+
+ // Otherwise create a symbol ref.
+ Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
+ return false;
+ }
+ case AsmToken::Integer: {
+ SMLoc Loc = getTok().getLoc();
+ int64_t IntVal = getTok().getIntVal();
+ Res = MCConstantExpr::Create(IntVal, getContext());
+ EndLoc = Lexer.getLoc();
+ Lex(); // Eat token.
+ // Look for 'b' or 'f' following an Integer as a directional label
+ if (Lexer.getKind() == AsmToken::Identifier) {
+ StringRef IDVal = getTok().getString();
+ if (IDVal == "f" || IDVal == "b"){
+ MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
+ IDVal == "f" ? 1 : 0);
+ Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ getContext());
+ if(IDVal == "b" && Sym->isUndefined())
+ return Error(Loc, "invalid reference to undefined symbol");
+ EndLoc = Lexer.getLoc();
+ Lex(); // Eat identifier.
+ }
+ }
+ return false;
+ }
+ case AsmToken::Real: {
+ APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ Res = MCConstantExpr::Create(IntVal, getContext());
+ Lex(); // Eat token.
+ return false;
+ }
+ case AsmToken::Dot: {
+ // This is a '.' reference, which references the current PC. Emit a
+ // temporary label to the streamer and refer to it.
+ MCSymbol *Sym = Ctx.CreateTempSymbol();
+ Out.EmitLabel(Sym);
+ Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ EndLoc = Lexer.getLoc();
+ Lex(); // Eat identifier.
+ return false;
+ }
+ case AsmToken::LParen:
+ Lex(); // Eat the '('.
+ return ParseParenExpr(Res, EndLoc);
+ case AsmToken::LBrac:
+ if (!PlatformParser->HasBracketExpressions())
+ return TokError("brackets expression not supported on this target");
+ Lex(); // Eat the '['.
+ return ParseBracketExpr(Res, EndLoc);
+ case AsmToken::Minus:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreateMinus(Res, getContext());
+ return false;
+ case AsmToken::Plus:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreatePlus(Res, getContext());
+ return false;
+ case AsmToken::Tilde:
+ Lex(); // Eat the operator.
+ if (ParsePrimaryExpr(Res, EndLoc))
+ return true;
+ Res = MCUnaryExpr::CreateNot(Res, getContext());
+ return false;
+ }
+}
+
+bool AsmParser::ParseExpression(const MCExpr *&Res) {
+ SMLoc EndLoc;
+ return ParseExpression(Res, EndLoc);
+}
+
+const MCExpr *
+AsmParser::ApplyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant) {
+ // Recurse over the given expression, rebuilding it to apply the given variant
+ // if there is exactly one symbol.
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return 0;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
+ TokError("invalid variant on expression '" +
+ getTok().getIdentifier() + "' (already modified)");
+ return E;
+ }
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return 0;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+
+ if (!LHS && !RHS)
+ return 0;
+
+ if (!LHS) LHS = BE->getLHS();
+ if (!RHS) RHS = BE->getRHS();
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ }
+ }
+
+ assert(0 && "Invalid expression kind!");
+ return 0;
+}
+
+/// ParseExpression - Parse an expression and return it.
+///
+/// expr ::= expr +,- expr -> lowest.
+/// expr ::= expr |,^,&,! expr -> middle.
+/// expr ::= expr *,/,%,<<,>> expr -> highest.
+/// expr ::= primaryexpr
+///
+bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+ // Parse the expression.
+ Res = 0;
+ if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+ return true;
+
+ // As a special case, we support 'a op b @ modifier' by rewriting the
+ // expression to include the modifier. This is inefficient, but in general we
+ // expect users to use 'a@modifier op b'.
+ if (Lexer.getKind() == AsmToken::At) {
+ Lex();
+
+ if (Lexer.isNot(AsmToken::Identifier))
+ return TokError("unexpected symbol modifier following '@'");
+
+ MCSymbolRefExpr::VariantKind Variant =
+ MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+ if (Variant == MCSymbolRefExpr::VK_Invalid)
+ return TokError("invalid variant '" + getTok().getIdentifier() + "'");
+
+ const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+ if (!ModifiedRes) {
+ return TokError("invalid modifier '" + getTok().getIdentifier() +
+ "' (no symbols present)");
+ return true;
+ }
+
+ Res = ModifiedRes;
+ Lex();
+ }
+
+ // Try to constant fold it up front, if possible.
+ int64_t Value;
+ if (Res->EvaluateAsAbsolute(Value))
+ Res = MCConstantExpr::Create(Value, getContext());
+
+ return false;
+}
+
+bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+ Res = 0;
+ return ParseParenExpr(Res, EndLoc) ||
+ ParseBinOpRHS(1, Res, EndLoc);
+}
+
+bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
+ const MCExpr *Expr;
+
+ SMLoc StartLoc = Lexer.getLoc();
+ if (ParseExpression(Expr))
+ return true;
+
+ if (!Expr->EvaluateAsAbsolute(Res))
+ return Error(StartLoc, "expected absolute expression");
+
+ return false;
+}
+
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
+ MCBinaryExpr::Opcode &Kind) {
+ switch (K) {
+ default:
+ return 0; // not a binop.
+
+ // Lowest Precedence: &&, ||, @
+ case AsmToken::AmpAmp:
+ Kind = MCBinaryExpr::LAnd;
+ return 1;
+ case AsmToken::PipePipe:
+ Kind = MCBinaryExpr::LOr;
+ return 1;
+
+
+ // Low Precedence: |, &, ^
+ //
+ // FIXME: gas seems to support '!' as an infix operator?
+ case AsmToken::Pipe:
+ Kind = MCBinaryExpr::Or;
+ return 2;
+ case AsmToken::Caret:
+ Kind = MCBinaryExpr::Xor;
+ return 2;
+ case AsmToken::Amp:
+ Kind = MCBinaryExpr::And;
+ return 2;
+
+ // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
+ case AsmToken::EqualEqual:
+ Kind = MCBinaryExpr::EQ;
+ return 3;
+ case AsmToken::ExclaimEqual:
+ case AsmToken::LessGreater:
+ Kind = MCBinaryExpr::NE;
+ return 3;
+ case AsmToken::Less:
+ Kind = MCBinaryExpr::LT;
+ return 3;
+ case AsmToken::LessEqual:
+ Kind = MCBinaryExpr::LTE;
+ return 3;
+ case AsmToken::Greater:
+ Kind = MCBinaryExpr::GT;
+ return 3;
+ case AsmToken::GreaterEqual:
+ Kind = MCBinaryExpr::GTE;
+ return 3;
+
+ // High Intermediate Precedence: +, -
+ case AsmToken::Plus:
+ Kind = MCBinaryExpr::Add;
+ return 4;
+ case AsmToken::Minus:
+ Kind = MCBinaryExpr::Sub;
+ return 4;
+
+ // Highest Precedence: *, /, %, <<, >>
+ case AsmToken::Star:
+ Kind = MCBinaryExpr::Mul;
+ return 5;
+ case AsmToken::Slash:
+ Kind = MCBinaryExpr::Div;
+ return 5;
+ case AsmToken::Percent:
+ Kind = MCBinaryExpr::Mod;
+ return 5;
+ case AsmToken::LessLess:
+ Kind = MCBinaryExpr::Shl;
+ return 5;
+ case AsmToken::GreaterGreater:
+ Kind = MCBinaryExpr::Shr;
+ return 5;
+ }
+}
+
+
+/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
+/// Res contains the LHS of the expression on input.
+bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
+ SMLoc &EndLoc) {
+ while (1) {
+ MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
+ unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
+
+ // If the next token is lower precedence than we are allowed to eat, return
+ // successfully with what we ate already.
+ if (TokPrec < Precedence)
+ return false;
+
+ Lex();
+
+ // Eat the next primary expression.
+ const MCExpr *RHS;
+ if (ParsePrimaryExpr(RHS, EndLoc)) return true;
+
+ // If BinOp binds less tightly with RHS than the operator after RHS, let
+ // the pending operator take RHS as its LHS.
+ MCBinaryExpr::Opcode Dummy;
+ unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
+ if (TokPrec < NextTokPrec) {
+ if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+ }
+
+ // Merge LHS and RHS according to operator.
+ Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
+ }
+}
+
+
+
+
+/// ParseStatement:
+/// ::= EndOfStatement
+/// ::= Label* Directive ...Operands... EndOfStatement
+/// ::= Label* Identifier OperandList* EndOfStatement
+bool AsmParser::ParseStatement() {
+ if (Lexer.is(AsmToken::EndOfStatement)) {
+ Out.AddBlankLine();
+ Lex();
+ return false;
+ }
+
+ // Statements always start with an identifier or are a full line comment.
+ AsmToken ID = getTok();
+ SMLoc IDLoc = ID.getLoc();
+ StringRef IDVal;
+ int64_t LocalLabelVal = -1;
+ // A full line comment is a '#' as the first token.
+ if (Lexer.is(AsmToken::Hash)) {
+ EatToEndOfStatement();
+ return false;
+ }
+
+ // Allow an integer followed by a ':' as a directional local label.
+ if (Lexer.is(AsmToken::Integer)) {
+ LocalLabelVal = getTok().getIntVal();
+ if (LocalLabelVal < 0) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ IDVal = "";
+ }
+ else {
+ IDVal = getTok().getString();
+ Lex(); // Consume the integer token to be used as an identifier token.
+ if (Lexer.getKind() != AsmToken::Colon) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ }
+ }
+
+ } else if (Lexer.is(AsmToken::Dot)) {
+ // Treat '.' as a valid identifier in this context.
+ Lex();
+ IDVal = ".";
+
+ } else if (ParseIdentifier(IDVal)) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ IDVal = "";
+ }
+
+
+ // Handle conditional assembly here before checking for skipping. We
+ // have to do this so that .endif isn't skipped in a ".if 0" block for
+ // example.
+ if (IDVal == ".if")
+ return ParseDirectiveIf(IDLoc);
+ if (IDVal == ".ifdef")
+ return ParseDirectiveIfdef(IDLoc, true);
+ if (IDVal == ".ifndef" || IDVal == ".ifnotdef")
+ return ParseDirectiveIfdef(IDLoc, false);
+ if (IDVal == ".elseif")
+ return ParseDirectiveElseIf(IDLoc);
+ if (IDVal == ".else")
+ return ParseDirectiveElse(IDLoc);
+ if (IDVal == ".endif")
+ return ParseDirectiveEndIf(IDLoc);
+
+ // If we are in a ".if 0" block, ignore this statement.
+ if (TheCondState.Ignore) {
+ EatToEndOfStatement();
+ return false;
+ }
+
+ // FIXME: Recurse on local labels?
+
+ // See what kind of statement we have.
+ switch (Lexer.getKind()) {
+ case AsmToken::Colon: {
+ CheckForValidSection();
+
+ // identifier ':' -> Label.
+ Lex();
+
+ // Diagnose attempt to use '.' as a label.
+ if (IDVal == ".")
+ return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
+
+ // Diagnose attempt to use a variable as a label.
+ //
+ // FIXME: Diagnostics. Note the location of the definition as a label.
+ // FIXME: This doesn't diagnose assignment to a symbol which has been
+ // implicitly marked as external.
+ MCSymbol *Sym;
+ if (LocalLabelVal == -1)
+ Sym = getContext().GetOrCreateSymbol(IDVal);
+ else
+ Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+ if (!Sym->isUndefined() || Sym->isVariable())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // Emit the label.
+ Out.EmitLabel(Sym);
+
+ // Consume any end of statement token, if present, to avoid spurious
+ // AddBlankLine calls().
+ if (Lexer.is(AsmToken::EndOfStatement)) {
+ Lex();
+ if (Lexer.is(AsmToken::Eof))
+ return false;
+ }
+
+ return ParseStatement();
+ }
+
+ case AsmToken::Equal:
+ // identifier '=' ... -> assignment statement
+ Lex();
+
+ return ParseAssignment(IDVal, true);
+
+ default: // Normal instruction or directive.
+ break;
+ }
+
+ // If macros are enabled, check to see if this is a macro instantiation.
+ if (MacrosEnabled)
+ if (const Macro *M = MacroMap.lookup(IDVal))
+ return HandleMacroEntry(IDVal, IDLoc, M);
+
+ // Otherwise, we have a normal instruction or directive.
+ if (IDVal[0] == '.' && IDVal != ".") {
+ // Assembler features
+ if (IDVal == ".set" || IDVal == ".equ")
+ return ParseDirectiveSet(IDVal, true);
+ if (IDVal == ".equiv")
+ return ParseDirectiveSet(IDVal, false);
+
+ // Data directives
+
+ if (IDVal == ".ascii")
+ return ParseDirectiveAscii(IDVal, false);
+ if (IDVal == ".asciz" || IDVal == ".string")
+ return ParseDirectiveAscii(IDVal, true);
+
+ if (IDVal == ".byte")
+ return ParseDirectiveValue(1);
+ if (IDVal == ".short")
+ return ParseDirectiveValue(2);
+ if (IDVal == ".value")
+ return ParseDirectiveValue(2);
+ if (IDVal == ".2byte")
+ return ParseDirectiveValue(2);
+ if (IDVal == ".long")
+ return ParseDirectiveValue(4);
+ if (IDVal == ".int")
+ return ParseDirectiveValue(4);
+ if (IDVal == ".4byte")
+ return ParseDirectiveValue(4);
+ if (IDVal == ".quad")
+ return ParseDirectiveValue(8);
+ if (IDVal == ".8byte")
+ return ParseDirectiveValue(8);
+ if (IDVal == ".single" || IDVal == ".float")
+ return ParseDirectiveRealValue(APFloat::IEEEsingle);
+ if (IDVal == ".double")
+ return ParseDirectiveRealValue(APFloat::IEEEdouble);
+
+ if (IDVal == ".align") {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+ }
+ if (IDVal == ".align32") {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+ }
+ if (IDVal == ".balign")
+ return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
+ if (IDVal == ".balignw")
+ return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
+ if (IDVal == ".balignl")
+ return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
+ if (IDVal == ".p2align")
+ return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
+ if (IDVal == ".p2alignw")
+ return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
+ if (IDVal == ".p2alignl")
+ return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+
+ if (IDVal == ".org")
+ return ParseDirectiveOrg();
+
+ if (IDVal == ".fill")
+ return ParseDirectiveFill();
+ if (IDVal == ".space" || IDVal == ".skip")
+ return ParseDirectiveSpace();
+ if (IDVal == ".zero")
+ return ParseDirectiveZero();
+
+ // Symbol attribute directives
+
+ if (IDVal == ".globl" || IDVal == ".global")
+ return ParseDirectiveSymbolAttribute(MCSA_Global);
+ // ELF only? Should it be here?
+ if (IDVal == ".local")
+ return ParseDirectiveSymbolAttribute(MCSA_Local);
+ if (IDVal == ".hidden")
+ return ParseDirectiveSymbolAttribute(MCSA_Hidden);
+ if (IDVal == ".indirect_symbol")
+ return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
+ if (IDVal == ".internal")
+ return ParseDirectiveSymbolAttribute(MCSA_Internal);
+ if (IDVal == ".lazy_reference")
+ return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
+ if (IDVal == ".no_dead_strip")
+ return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+ if (IDVal == ".symbol_resolver")
+ return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
+ if (IDVal == ".private_extern")
+ return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
+ if (IDVal == ".protected")
+ return ParseDirectiveSymbolAttribute(MCSA_Protected);
+ if (IDVal == ".reference")
+ return ParseDirectiveSymbolAttribute(MCSA_Reference);
+ if (IDVal == ".weak")
+ return ParseDirectiveSymbolAttribute(MCSA_Weak);
+ if (IDVal == ".weak_definition")
+ return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
+ if (IDVal == ".weak_reference")
+ return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
+ if (IDVal == ".weak_def_can_be_hidden")
+ return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+
+ if (IDVal == ".comm" || IDVal == ".common")
+ return ParseDirectiveComm(/*IsLocal=*/false);
+ if (IDVal == ".lcomm")
+ return ParseDirectiveComm(/*IsLocal=*/true);
+
+ if (IDVal == ".abort")
+ return ParseDirectiveAbort();
+ if (IDVal == ".include")
+ return ParseDirectiveInclude();
+
+ if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64")
+ return TokError(Twine(IDVal) + " not supported yet");
+
+ // Look up the handler in the handler table.
+ std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
+ DirectiveMap.lookup(IDVal);
+ if (Handler.first)
+ return (*Handler.second)(Handler.first, IDVal, IDLoc);
+
+ // Target hook for parsing target specific directives.
+ if (!getTargetParser().ParseDirective(ID))
+ return false;
+
+ bool retval = Warning(IDLoc, "ignoring directive for now");
+ EatToEndOfStatement();
+ return retval;
+ }
+
+ CheckForValidSection();
+
+ // Canonicalize the opcode to lower case.
+ SmallString<128> Opcode;
+ for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
+ Opcode.push_back(tolower(IDVal[i]));
+
+ SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+ bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
+ ParsedOperands);
+
+ // Dump the parsed representation, if requested.
+ if (getShowParsedOperands()) {
+ SmallString<256> Str;
+ raw_svector_ostream OS(Str);
+ OS << "parsed instruction: [";
+ for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
+ if (i != 0)
+ OS << ", ";
+ ParsedOperands[i]->print(OS);
+ }
+ OS << "]";
+
+ PrintMessage(IDLoc, OS.str(), "note");
+ }
+
+ // If parsing succeeded, match the instruction.
+ if (!HadError)
+ HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
+ Out);
+
+ // Free any parsed operands.
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+
+ // Don't skip the rest of the line, the instruction parser is responsible for
+ // that.
+ return false;
+}
+
+bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body,
+ const std::vector<StringRef> &Parameters,
+ const std::vector<std::vector<AsmToken> > &A,
+ const SMLoc &L) {
+ raw_svector_ostream OS(Buf);
+ unsigned NParameters = Parameters.size();
+ if (NParameters != 0 && NParameters != A.size())
+ return Error(L, "Wrong number of arguments");
+
+ while (!Body.empty()) {
+ // Scan for the next substitution.
+ std::size_t End = Body.size(), Pos = 0;
+ for (; Pos != End; ++Pos) {
+ // Check for a substitution or escape.
+ if (!NParameters) {
+ // This macro has no parameters, look for $0, $1, etc.
+ if (Body[Pos] != '$' || Pos + 1 == End)
+ continue;
+
+ char Next = Body[Pos + 1];
+ if (Next == '$' || Next == 'n' || isdigit(Next))
+ break;
+ } else {
+ // This macro has parameters, look for \foo, \bar, etc.
+ if (Body[Pos] == '\\' && Pos + 1 != End)
+ break;
+ }
+ }
+
+ // Add the prefix.
+ OS << Body.slice(0, Pos);
+
+ // Check if we reached the end.
+ if (Pos == End)
+ break;
+
+ if (!NParameters) {
+ switch (Body[Pos+1]) {
+ // $$ => $
+ case '$':
+ OS << '$';
+ break;
+
+ // $n => number of arguments
+ case 'n':
+ OS << A.size();
+ break;
+
+ // $[0-9] => argument
+ default: {
+ // Missing arguments are ignored.
+ unsigned Index = Body[Pos+1] - '0';
+ if (Index >= A.size())
+ break;
+
+ // Otherwise substitute with the token values, with spaces eliminated.
+ for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+ ie = A[Index].end(); it != ie; ++it)
+ OS << it->getString();
+ break;
+ }
+ }
+ Pos += 2;
+ } else {
+ unsigned I = Pos + 1;
+ while (isalnum(Body[I]) && I + 1 != End)
+ ++I;
+
+ const char *Begin = Body.data() + Pos +1;
+ StringRef Argument(Begin, I - (Pos +1));
+ unsigned Index = 0;
+ for (; Index < NParameters; ++Index)
+ if (Parameters[Index] == Argument)
+ break;
+
+ // FIXME: We should error at the macro definition.
+ if (Index == NParameters)
+ return Error(L, "Parameter not found");
+
+ for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+ ie = A[Index].end(); it != ie; ++it)
+ OS << it->getString();
+
+ Pos += 1 + Argument.size();
+ }
+ // Update the scan point.
+ Body = Body.substr(Pos);
+ }
+
+ // We include the .endmacro in the buffer as our queue to exit the macro
+ // instantiation.
+ OS << ".endmacro\n";
+ return false;
+}
+
+MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+ MemoryBuffer *I)
+ : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitLoc(EL)
+{
+}
+
+bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
+ const Macro *M) {
+ // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
+ // this, although we should protect against infinite loops.
+ if (ActiveMacros.size() == 20)
+ return TokError("macros cannot be nested more than 20 levels deep");
+
+ // Parse the macro instantiation arguments.
+ std::vector<std::vector<AsmToken> > MacroArguments;
+ MacroArguments.push_back(std::vector<AsmToken>());
+ unsigned ParenLevel = 0;
+ for (;;) {
+ if (Lexer.is(AsmToken::Eof))
+ return TokError("unexpected token in macro instantiation");
+ if (Lexer.is(AsmToken::EndOfStatement))
+ break;
+
+ // If we aren't inside parentheses and this is a comma, start a new token
+ // list.
+ if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+ MacroArguments.push_back(std::vector<AsmToken>());
+ } else {
+ // Adjust the current parentheses level.
+ if (Lexer.is(AsmToken::LParen))
+ ++ParenLevel;
+ else if (Lexer.is(AsmToken::RParen) && ParenLevel)
+ --ParenLevel;
+
+ // Append the token to the current argument list.
+ MacroArguments.back().push_back(getTok());
+ }
+ Lex();
+ }
+
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ StringRef Body = M->Body;
+
+ if (expandMacro(Buf, Body, M->Parameters, MacroArguments, getTok().getLoc()))
+ return true;
+
+ MemoryBuffer *Instantiation =
+ MemoryBuffer::getMemBufferCopy(Buf.str(), "<instantiation>");
+
+ // Create the macro instantiation object and add to the current macro
+ // instantiation stack.
+ MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+ getTok().getLoc(),
+ Instantiation);
+ ActiveMacros.push_back(MI);
+
+ // Jump to the macro instantiation and prime the lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lex();
+
+ return false;
+}
+
+void AsmParser::HandleMacroExit() {
+ // Jump to the EndOfStatement we should return to, and consume it.
+ JumpToLoc(ActiveMacros.back()->ExitLoc);
+ Lex();
+
+ // Pop the instantiation entry.
+ delete ActiveMacros.back();
+ ActiveMacros.pop_back();
+}
+
+static void MarkUsed(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Binary:
+ MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
+ MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+ break;
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ break;
+ case MCExpr::SymbolRef: {
+ static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
+ break;
+ }
+ case MCExpr::Unary:
+ MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+ break;
+ }
+}
+
+bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
+ // FIXME: Use better location, we should use proper tokens.
+ SMLoc EqualLoc = Lexer.getLoc();
+
+ const MCExpr *Value;
+ if (ParseExpression(Value))
+ return true;
+
+ MarkUsed(Value);
+
+ if (Lexer.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in assignment");
+
+ // Error on assignment to '.'.
+ if (Name == ".") {
+ return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported "
+ "(use '.space' or '.org').)"));
+ }
+
+ // Eat the end of statement marker.
+ Lex();
+
+ // Validate that the LHS is allowed to be a variable (either it has not been
+ // used as a symbol, or it is an absolute symbol).
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ // Diagnose assignment to a label.
+ //
+ // FIXME: Diagnostics. Note the location of the definition as a label.
+ // FIXME: Diagnose assignment to protected identifier (e.g., register name).
+ if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+ ; // Allow redefinitions of undefined symbols only used in directives.
+ else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
+ return Error(EqualLoc, "redefinition of '" + Name + "'");
+ else if (!Sym->isVariable())
+ return Error(EqualLoc, "invalid assignment to '" + Name + "'");
+ else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
+ return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
+ Name + "'");
+
+ // Don't count these checks as uses.
+ Sym->setUsed(false);
+ } else
+ Sym = getContext().GetOrCreateSymbol(Name);
+
+ // FIXME: Handle '.'.
+
+ // Do the assignment.
+ Out.EmitAssignment(Sym, Value);
+
+ return false;
+}
+
+/// ParseIdentifier:
+/// ::= identifier
+/// ::= string
+bool AsmParser::ParseIdentifier(StringRef &Res) {
+ // The assembler has relaxed rules for accepting identifiers, in particular we
+ // allow things like '.globl $foo', which would normally be separate
+ // tokens. At this level, we have already lexed so we cannot (currently)
+ // handle this as a context dependent token, instead we detect adjacent tokens
+ // and return the combined identifier.
+ if (Lexer.is(AsmToken::Dollar)) {
+ SMLoc DollarLoc = getLexer().getLoc();
+
+ // Consume the dollar sign, and check for a following identifier.
+ Lex();
+ if (Lexer.isNot(AsmToken::Identifier))
+ return true;
+
+ // We have a '$' followed by an identifier, make sure they are adjacent.
+ if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ return true;
+
+ // Construct the joined identifier and consume the token.
+ Res = StringRef(DollarLoc.getPointer(),
+ getTok().getIdentifier().size() + 1);
+ Lex();
+ return false;
+ }
+
+ if (Lexer.isNot(AsmToken::Identifier) &&
+ Lexer.isNot(AsmToken::String))
+ return true;
+
+ Res = getTok().getIdentifier();
+
+ Lex(); // Consume the identifier token.
+
+ return false;
+}
+
+/// ParseDirectiveSet:
+/// ::= .equ identifier ',' expression
+/// ::= .equiv identifier ',' expression
+/// ::= .set identifier ',' expression
+bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
+ StringRef Name;
+
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier after '" + Twine(IDVal) + "'");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "'");
+ Lex();
+
+ return ParseAssignment(Name, allow_redef);
+}
+
+bool AsmParser::ParseEscapedString(std::string &Data) {
+ assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
+
+ Data = "";
+ StringRef Str = getTok().getStringContents();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] != '\\') {
+ Data += Str[i];
+ continue;
+ }
+
+ // Recognize escaped characters. Note that this escape semantics currently
+ // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+ ++i;
+ if (i == e)
+ return TokError("unexpected backslash at end of string");
+
+ // Recognize octal sequences.
+ if ((unsigned) (Str[i] - '0') <= 7) {
+ // Consume up to three octal characters.
+ unsigned Value = Str[i] - '0';
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+
+ if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ ++i;
+ Value = Value * 8 + (Str[i] - '0');
+ }
+ }
+
+ if (Value > 255)
+ return TokError("invalid octal escape sequence (out of range)");
+
+ Data += (unsigned char) Value;
+ continue;
+ }
+
+ // Otherwise recognize individual escapes.
+ switch (Str[i]) {
+ default:
+ // Just reject invalid escape sequences for now.
+ return TokError("invalid escape sequence (unrecognized character)");
+
+ case 'b': Data += '\b'; break;
+ case 'f': Data += '\f'; break;
+ case 'n': Data += '\n'; break;
+ case 'r': Data += '\r'; break;
+ case 't': Data += '\t'; break;
+ case '"': Data += '"'; break;
+ case '\\': Data += '\\'; break;
+ }
+ }
+
+ return false;
+}
+
+/// ParseDirectiveAscii:
+/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ CheckForValidSection();
+
+ for (;;) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '" + Twine(IDVal) + "' directive");
+
+ std::string Data;
+ if (ParseEscapedString(Data))
+ return true;
+
+ getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
+ if (ZeroTerminated)
+ getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
+
+ Lex();
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveValue
+/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveValue(unsigned Size) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ CheckForValidSection();
+
+ for (;;) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (ParseExpression(Value))
+ return true;
+
+ // Special case constant expressions to match code generator.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE);
+ } else
+ getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveRealValue
+/// ::= (.single | .double) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ CheckForValidSection();
+
+ for (;;) {
+ // We don't truly support arithmetic on floating point expressions, so we
+ // have to manually parse unary prefixes.
+ bool IsNeg = false;
+ if (getLexer().is(AsmToken::Minus)) {
+ Lex();
+ IsNeg = true;
+ } else if (getLexer().is(AsmToken::Plus))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer) &&
+ getLexer().isNot(AsmToken::Real) &&
+ getLexer().isNot(AsmToken::Identifier))
+ return TokError("unexpected token in directive");
+
+ // Convert to an APFloat.
+ APFloat Value(Semantics);
+ StringRef IDVal = getTok().getString();
+ if (getLexer().is(AsmToken::Identifier)) {
+ if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
+ Value = APFloat::getInf(Semantics);
+ else if (!IDVal.compare_lower("nan"))
+ Value = APFloat::getNaN(Semantics, false, ~0);
+ else
+ return TokError("invalid floating point literal");
+ } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
+ APFloat::opInvalidOp)
+ return TokError("invalid floating point literal");
+ if (IsNeg)
+ Value.changeSign();
+
+ // Consume the numeric token.
+ Lex();
+
+ // Emit the value as an integer.
+ APInt AsInt = Value.bitcastToAPInt();
+ getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+ AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveSpace
+/// ::= .space expression [ , expression ]
+bool AsmParser::ParseDirectiveSpace() {
+ CheckForValidSection();
+
+ int64_t NumBytes;
+ if (ParseAbsoluteExpression(NumBytes))
+ return true;
+
+ int64_t FillExpr = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.space' directive");
+ Lex();
+
+ if (ParseAbsoluteExpression(FillExpr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.space' directive");
+ }
+
+ Lex();
+
+ if (NumBytes <= 0)
+ return TokError("invalid number of bytes in '.space' directive");
+
+ // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+ getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
+/// ParseDirectiveZero
+/// ::= .zero expression
+bool AsmParser::ParseDirectiveZero() {
+ CheckForValidSection();
+
+ int64_t NumBytes;
+ if (ParseAbsoluteExpression(NumBytes))
+ return true;
+
+ int64_t Val = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (ParseAbsoluteExpression(Val))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.zero' directive");
+
+ Lex();
+
+ getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
+/// ParseDirectiveFill
+/// ::= .fill expression , expression , expression
+bool AsmParser::ParseDirectiveFill() {
+ CheckForValidSection();
+
+ int64_t NumValues;
+ if (ParseAbsoluteExpression(NumValues))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.fill' directive");
+ Lex();
+
+ int64_t FillSize;
+ if (ParseAbsoluteExpression(FillSize))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.fill' directive");
+ Lex();
+
+ int64_t FillExpr;
+ if (ParseAbsoluteExpression(FillExpr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.fill' directive");
+
+ Lex();
+
+ if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
+ return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
+
+ for (uint64_t i = 0, e = NumValues; i != e; ++i)
+ getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
+/// ParseDirectiveOrg
+/// ::= .org expression [ , expression ]
+bool AsmParser::ParseDirectiveOrg() {
+ CheckForValidSection();
+
+ const MCExpr *Offset;
+ if (ParseExpression(Offset))
+ return true;
+
+ // Parse optional fill expression.
+ int64_t FillExpr = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.org' directive");
+ Lex();
+
+ if (ParseAbsoluteExpression(FillExpr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.org' directive");
+ }
+
+ Lex();
+
+ // FIXME: Only limited forms of relocatable expressions are accepted here, it
+ // has to be relative to the current section.
+ getStreamer().EmitValueToOffset(Offset, FillExpr);
+
+ return false;
+}
+
+/// ParseDirectiveAlign
+/// ::= {.align, ...} expression [ , expression [ , expression ]]
+bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+ CheckForValidSection();
+
+ SMLoc AlignmentLoc = getLexer().getLoc();
+ int64_t Alignment;
+ if (ParseAbsoluteExpression(Alignment))
+ return true;
+
+ SMLoc MaxBytesLoc;
+ bool HasFillExpr = false;
+ int64_t FillExpr = 0;
+ int64_t MaxBytesToFill = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ // The fill expression can be omitted while specifying a maximum number of
+ // alignment bytes, e.g:
+ // .align 3,,4
+ if (getLexer().isNot(AsmToken::Comma)) {
+ HasFillExpr = true;
+ if (ParseAbsoluteExpression(FillExpr))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ MaxBytesLoc = getLexer().getLoc();
+ if (ParseAbsoluteExpression(MaxBytesToFill))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+ }
+ }
+
+ Lex();
+
+ if (!HasFillExpr)
+ FillExpr = 0;
+
+ // Compute alignment in bytes.
+ if (IsPow2) {
+ // FIXME: Diagnose overflow.
+ if (Alignment >= 32) {
+ Error(AlignmentLoc, "invalid alignment value");
+ Alignment = 31;
+ }
+
+ Alignment = 1ULL << Alignment;
+ }
+
+ // Diagnose non-sensical max bytes to align.
+ if (MaxBytesLoc.isValid()) {
+ if (MaxBytesToFill < 1) {
+ Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+ "many bytes, ignoring maximum bytes expression");
+ MaxBytesToFill = 0;
+ }
+
+ if (MaxBytesToFill >= Alignment) {
+ Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
+ "has no effect");
+ MaxBytesToFill = 0;
+ }
+ }
+
+ // Check whether we should use optimal code alignment for this .align
+ // directive.
+ bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+ if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
+ ValueSize == 1 && UseCodeAlign) {
+ getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
+ } else {
+ // FIXME: Target specific behavior about how the "extra" bytes are filled.
+ getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
+ MaxBytesToFill);
+ }
+
+ return false;
+}
+
+/// ParseDirectiveSymbolAttribute
+/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
+bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ StringRef Name;
+
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+/// ParseDirectiveComm
+/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
+bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+ CheckForValidSection();
+
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef Name;
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (ParseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (ParseAbsoluteExpression(Pow2Alignment))
+ return true;
+
+ // If this target takes alignments in bytes (not log) validate and convert.
+ if (Lexer.getMAI().getAlignmentIsInBytes()) {
+ if (!isPowerOf2_64(Pow2Alignment))
+ return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
+ Pow2Alignment = Log2_64(Pow2Alignment);
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+ Lex();
+
+ // NOTE: a size of zero for a .comm should create a undefined symbol
+ // but a size of .lcomm creates a bss symbol of size zero.
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+ "be less than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+ "alignment, can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // '.lcomm' is equivalent to '.zerofill'.
+ // Create the Symbol as a common or local common with Size and Pow2Alignment
+ if (IsLocal) {
+ getStreamer().EmitZerofill(Ctx.getMachOSection(
+ "__DATA", "__bss", MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+ return false;
+ }
+
+ getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+ return false;
+}
+
+/// ParseDirectiveAbort
+/// ::= .abort [... message ...]
+bool AsmParser::ParseDirectiveAbort() {
+ // FIXME: Use loc from directive.
+ SMLoc Loc = getLexer().getLoc();
+
+ StringRef Str = ParseStringToEndOfStatement();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.abort' directive");
+
+ Lex();
+
+ if (Str.empty())
+ Error(Loc, ".abort detected. Assembly stopping.");
+ else
+ Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+ // FIXME: Actually abort assembly here.
+
+ return false;
+}
+
+/// ParseDirectiveInclude
+/// ::= .include "filename"
+bool AsmParser::ParseDirectiveInclude() {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.include' directive");
+
+ std::string Filename = getTok().getString();
+ SMLoc IncludeLoc = getLexer().getLoc();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.include' directive");
+
+ // Strip the quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Attempt to switch the lexer to the included file before consuming the end
+ // of statement to avoid losing it when we switch.
+ if (EnterIncludeFile(Filename)) {
+ Error(IncludeLoc, "Could not find include file '" + Filename + "'");
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveIf
+/// ::= .if expression
+bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+ if(TheCondState.Ignore) {
+ EatToEndOfStatement();
+ }
+ else {
+ int64_t ExprValue;
+ if (ParseAbsoluteExpression(ExprValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.if' directive");
+
+ Lex();
+
+ TheCondState.CondMet = ExprValue;
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+ StringRef Name;
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+
+ if (TheCondState.Ignore) {
+ EatToEndOfStatement();
+ } else {
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier after '.ifdef'");
+
+ Lex();
+
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+ if (expect_defined)
+ TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+ else
+ TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveElseIf
+/// ::= .elseif expression
+bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+ if (TheCondState.TheCond != AsmCond::IfCond &&
+ TheCondState.TheCond != AsmCond::ElseIfCond)
+ Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+ " an .elseif");
+ TheCondState.TheCond = AsmCond::ElseIfCond;
+
+ bool LastIgnoreState = false;
+ if (!TheCondStack.empty())
+ LastIgnoreState = TheCondStack.back().Ignore;
+ if (LastIgnoreState || TheCondState.CondMet) {
+ TheCondState.Ignore = true;
+ EatToEndOfStatement();
+ }
+ else {
+ int64_t ExprValue;
+ if (ParseAbsoluteExpression(ExprValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.elseif' directive");
+
+ Lex();
+ TheCondState.CondMet = ExprValue;
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveElse
+/// ::= .else
+bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.else' directive");
+
+ Lex();
+
+ if (TheCondState.TheCond != AsmCond::IfCond &&
+ TheCondState.TheCond != AsmCond::ElseIfCond)
+ Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+ ".elseif");
+ TheCondState.TheCond = AsmCond::ElseCond;
+ bool LastIgnoreState = false;
+ if (!TheCondStack.empty())
+ LastIgnoreState = TheCondStack.back().Ignore;
+ if (LastIgnoreState || TheCondState.CondMet)
+ TheCondState.Ignore = true;
+ else
+ TheCondState.Ignore = false;
+
+ return false;
+}
+
+/// ParseDirectiveEndIf
+/// ::= .endif
+bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.endif' directive");
+
+ Lex();
+
+ if ((TheCondState.TheCond == AsmCond::NoCond) ||
+ TheCondStack.empty())
+ Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
+ ".else");
+ if (!TheCondStack.empty()) {
+ TheCondState = TheCondStack.back();
+ TheCondStack.pop_back();
+ }
+
+ return false;
+}
+
+/// ParseDirectiveFile
+/// ::= .file [number] string
+bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
+ // FIXME: I'm not sure what this is.
+ int64_t FileNumber = -1;
+ SMLoc FileNumberLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Integer)) {
+ FileNumber = getTok().getIntVal();
+ Lex();
+
+ if (FileNumber < 1)
+ return TokError("file number less than one");
+ }
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.file' directive");
+
+ StringRef Filename = getTok().getString();
+ Filename = Filename.substr(1, Filename.size()-2);
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.file' directive");
+
+ if (FileNumber == -1)
+ getStreamer().EmitFileDirective(Filename);
+ else {
+ if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+ Error(FileNumberLoc, "file number already allocated");
+ }
+
+ return false;
+}
+
+/// ParseDirectiveLine
+/// ::= .line [number]
+bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("unexpected token in '.line' directive");
+
+ int64_t LineNumber = getTok().getIntVal();
+ (void) LineNumber;
+ Lex();
+
+ // FIXME: Do something with the .line.
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.line' directive");
+
+ return false;
+}
+
+
+/// ParseDirectiveLoc
+/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
+/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
+/// The first number is a file number, must have been previously assigned with
+/// a .file directive, the second number is the line number and optionally the
+/// third number is a column position (zero if not specified). The remaining
+/// optional items are .loc sub-directives.
+bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("unexpected token in '.loc' directive");
+ int64_t FileNumber = getTok().getIntVal();
+ if (FileNumber < 1)
+ return TokError("file number less than one in '.loc' directive");
+ if (!getContext().isValidDwarfFileNumber(FileNumber))
+ return TokError("unassigned file number in '.loc' directive");
+ Lex();
+
+ int64_t LineNumber = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ LineNumber = getTok().getIntVal();
+ if (LineNumber < 1)
+ return TokError("line number less than one in '.loc' directive");
+ Lex();
+ }
+
+ int64_t ColumnPos = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ ColumnPos = getTok().getIntVal();
+ if (ColumnPos < 0)
+ return TokError("column position less than zero in '.loc' directive");
+ Lex();
+ }
+
+ unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ unsigned Isa = 0;
+ int64_t Discriminator = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ StringRef Name;
+ SMLoc Loc = getTok().getLoc();
+ if (getParser().ParseIdentifier(Name))
+ return TokError("unexpected token in '.loc' directive");
+
+ if (Name == "basic_block")
+ Flags |= DWARF2_FLAG_BASIC_BLOCK;
+ else if (Name == "prologue_end")
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ else if (Name == "epilogue_begin")
+ Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+ else if (Name == "is_stmt") {
+ SMLoc Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+ // The expression must be the constant 0 or 1.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value == 0)
+ Flags &= ~DWARF2_FLAG_IS_STMT;
+ else if (Value == 1)
+ Flags |= DWARF2_FLAG_IS_STMT;
+ else
+ return Error(Loc, "is_stmt value not 0 or 1");
+ }
+ else {
+ return Error(Loc, "is_stmt value not the constant value of 0 or 1");
+ }
+ }
+ else if (Name == "isa") {
+ SMLoc Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+ // The expression must be a constant greater or equal to 0.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value < 0)
+ return Error(Loc, "isa number less than zero");
+ Isa = Value;
+ }
+ else {
+ return Error(Loc, "isa number not a constant value");
+ }
+ }
+ else if (Name == "discriminator") {
+ if (getParser().ParseAbsoluteExpression(Discriminator))
+ return true;
+ }
+ else {
+ return Error(Loc, "unknown sub-directive in '.loc' directive");
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+ }
+ }
+
+ getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
+ Isa, Discriminator, StringRef());
+
+ return false;
+}
+
+/// ParseDirectiveStabs
+/// ::= .stabs string, number, number, number
+bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ return TokError("unsupported directive '" + Directive + "'");
+}
+
+/// ParseDirectiveCFISections
+/// ::= .cfi_sections section [, section]
+bool GenericAsmParser::ParseDirectiveCFISections(StringRef,
+ SMLoc DirectiveLoc) {
+ StringRef Name;
+ bool EH = false;
+ bool Debug = false;
+
+ if (getParser().ParseIdentifier(Name))
+ return TokError("Expected an identifier");
+
+ if (Name == ".eh_frame")
+ EH = true;
+ else if (Name == ".debug_frame")
+ Debug = true;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getParser().ParseIdentifier(Name))
+ return TokError("Expected an identifier");
+
+ if (Name == ".eh_frame")
+ EH = true;
+ else if (Name == ".debug_frame")
+ Debug = true;
+ }
+
+ getStreamer().EmitCFISections(EH, Debug);
+
+ return false;
+}
+
+/// ParseDirectiveCFIStartProc
+/// ::= .cfi_startproc
+bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
+ SMLoc DirectiveLoc) {
+ getStreamer().EmitCFIStartProc();
+ return false;
+}
+
+/// ParseDirectiveCFIEndProc
+/// ::= .cfi_endproc
+bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
+ getStreamer().EmitCFIEndProc();
+ return false;
+}
+
+/// ParseRegisterOrRegisterNumber - parse register name or number.
+bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+ SMLoc DirectiveLoc) {
+ unsigned RegNo;
+
+ if (getLexer().isNot(AsmToken::Integer)) {
+ if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
+ DirectiveLoc))
+ return true;
+ Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true);
+ } else
+ return getParser().ParseAbsoluteExpression(Register);
+
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfa
+/// ::= .cfi_def_cfa register, offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Offset = 0;
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIDefCfa(Register, Offset);
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfaOffset
+/// ::= .cfi_def_cfa_offset offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Offset = 0;
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIDefCfaOffset(Offset);
+ return false;
+}
+
+/// ParseDirectiveCFIAdjustCfaOffset
+/// ::= .cfi_adjust_cfa_offset adjustment
+bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Adjustment = 0;
+ if (getParser().ParseAbsoluteExpression(Adjustment))
+ return true;
+
+ getStreamer().EmitCFIAdjustCfaOffset(Adjustment);
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfaRegister
+/// ::= .cfi_def_cfa_register register
+bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIDefCfaRegister(Register);
+ return false;
+}
+
+/// ParseDirectiveCFIOffset
+/// ::= .cfi_offset register, offset
+bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ int64_t Offset = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIOffset(Register, Offset);
+ return false;
+}
+
+/// ParseDirectiveCFIRelOffset
+/// ::= .cfi_rel_offset register, offset
+bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Offset = 0;
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ getStreamer().EmitCFIRelOffset(Register, Offset);
+ return false;
+}
+
+static bool isValidEncoding(int64_t Encoding) {
+ if (Encoding & ~0xff)
+ return false;
+
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return true;
+
+ const unsigned Format = Encoding & 0xf;
+ if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
+ Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
+ Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
+ Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
+ return false;
+
+ const unsigned Application = Encoding & 0x70;
+ if (Application != dwarf::DW_EH_PE_absptr &&
+ Application != dwarf::DW_EH_PE_pcrel)
+ return false;
+
+ return true;
+}
+
+/// ParseDirectiveCFIPersonalityOrLsda
+/// ::= .cfi_personality encoding, [symbol_name]
+/// ::= .cfi_lsda encoding, [symbol_name]
+bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ int64_t Encoding = 0;
+ if (getParser().ParseAbsoluteExpression(Encoding))
+ return true;
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return false;
+
+ if (!isValidEncoding(Encoding))
+ return TokError("unsupported encoding.");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (IDVal == ".cfi_personality")
+ getStreamer().EmitCFIPersonality(Sym, Encoding);
+ else {
+ assert(IDVal == ".cfi_lsda");
+ getStreamer().EmitCFILsda(Sym, Encoding);
+ }
+ return false;
+}
+
+/// ParseDirectiveCFIRememberState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ getStreamer().EmitCFIRememberState();
+ return false;
+}
+
+/// ParseDirectiveCFIRestoreState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ getStreamer().EmitCFIRestoreState();
+ return false;
+}
+
+/// ParseDirectiveCFISameValue
+/// ::= .cfi_same_value register
+bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFISameValue(Register);
+
+ return false;
+}
+
+/// ParseDirectiveMacrosOnOff
+/// ::= .macros_on
+/// ::= .macros_off
+bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(getLexer().getLoc(),
+ "unexpected token in '" + Directive + "' directive");
+
+ getParser().MacrosEnabled = Directive == ".macros_on";
+
+ return false;
+}
+
+/// ParseDirectiveMacro
+/// ::= .macro name [parameters]
+bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ std::vector<StringRef> Parameters;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for(;;) {
+ StringRef Parameter;
+ if (getParser().ParseIdentifier(Parameter))
+ return TokError("expected identifier in directive");
+ Parameters.push_back(Parameter);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ break;
+ Lex();
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.macro' directive");
+
+ // Eat the end of statement.
+ Lex();
+
+ AsmToken EndToken, StartToken = getTok();
+
+ // Lex the macro definition.
+ for (;;) {
+ // Check whether we have reached the end of the file.
+ if (getLexer().is(AsmToken::Eof))
+ return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+
+ // Otherwise, check whether we have reach the .endmacro.
+ if (getLexer().is(AsmToken::Identifier) &&
+ (getTok().getIdentifier() == ".endm" ||
+ getTok().getIdentifier() == ".endmacro")) {
+ EndToken = getTok();
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + EndToken.getIdentifier() +
+ "' directive");
+ break;
+ }
+
+ // Otherwise, scan til the end of the statement.
+ getParser().EatToEndOfStatement();
+ }
+
+ if (getParser().MacroMap.lookup(Name)) {
+ return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+ }
+
+ const char *BodyStart = StartToken.getLoc().getPointer();
+ const char *BodyEnd = EndToken.getLoc().getPointer();
+ StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+ getParser().MacroMap[Name] = new Macro(Name, Body, Parameters);
+ return false;
+}
+
+/// ParseDirectiveEndMacro
+/// ::= .endm
+/// ::= .endmacro
+bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Directive + "' directive");
+
+ // If we are inside a macro instantiation, terminate the current
+ // instantiation.
+ if (!getParser().ActiveMacros.empty()) {
+ getParser().HandleMacroExit();
+ return false;
+ }
+
+ // Otherwise, this .endmacro is a stray entry in the file; well formed
+ // .endmacro directives are handled during the macro definition parsing.
+ return TokError("unexpected '" + Directive + "' in file, "
+ "no current macro definition");
+}
+
+bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+ getParser().CheckForValidSection();
+
+ const MCExpr *Value;
+
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ if (DirName[1] == 's')
+ getStreamer().EmitSLEB128Value(Value);
+ else
+ getStreamer().EmitULEB128Value(Value);
+
+ return false;
+}
+
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
+ MCContext &C, MCStreamer &Out,
+ const MCAsmInfo &MAI) {
+ return new AsmParser(T, SM, C, Out, MAI);
+}
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
new file mode 100644
index 000000000000..66ad384c7db2
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -0,0 +1,442 @@
+//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/COFF.h"
+using namespace llvm;
+
+namespace {
+
+class COFFAsmParser : public MCAsmParserExtension {
+ template<bool (COFFAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<COFFAsmParser, Handler>);
+ }
+
+ bool ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind);
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ MCAsmParserExtension::Initialize(Parser);
+
+ AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+
+ // Win64 EH directives.
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
+ ".seh_proc");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>(
+ ".seh_endproc");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>(
+ ".seh_startchained");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>(
+ ".seh_endchained");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>(
+ ".seh_handler");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>(
+ ".seh_handlerdata");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>(
+ ".seh_pushreg");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>(
+ ".seh_setframe");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>(
+ ".seh_stackalloc");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>(
+ ".seh_savereg");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>(
+ ".seh_savexmm");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>(
+ ".seh_pushframe");
+ AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
+ ".seh_endprologue");
+ }
+
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch(".text",
+ COFF::IMAGE_SCN_CNT_CODE
+ | COFF::IMAGE_SCN_MEM_EXECUTE
+ | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ }
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ }
+
+ bool ParseDirectiveDef(StringRef, SMLoc);
+ bool ParseDirectiveScl(StringRef, SMLoc);
+ bool ParseDirectiveType(StringRef, SMLoc);
+ bool ParseDirectiveEndef(StringRef, SMLoc);
+
+ // Win64 EH directives.
+ bool ParseSEHDirectiveStartProc(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndProc(StringRef, SMLoc);
+ bool ParseSEHDirectiveStartChained(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndChained(StringRef, SMLoc);
+ bool ParseSEHDirectiveHandler(StringRef, SMLoc);
+ bool ParseSEHDirectiveHandlerData(StringRef, SMLoc);
+ bool ParseSEHDirectivePushReg(StringRef, SMLoc);
+ bool ParseSEHDirectiveSetFrame(StringRef, SMLoc);
+ bool ParseSEHDirectiveAllocStack(StringRef, SMLoc);
+ bool ParseSEHDirectiveSaveReg(StringRef, SMLoc);
+ bool ParseSEHDirectiveSaveXMM(StringRef, SMLoc);
+ bool ParseSEHDirectivePushFrame(StringRef, SMLoc);
+ bool ParseSEHDirectiveEndProlog(StringRef, SMLoc);
+
+ bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
+ bool ParseSEHRegisterNumber(unsigned &RegNo);
+public:
+ COFFAsmParser() {}
+};
+
+} // end annonomous namespace.
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ getStreamer().SwitchSection(getContext().getCOFFSection(
+ Section, Characteristics, Kind));
+
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
+ StringRef SymbolName;
+
+ if (getParser().ParseIdentifier(SymbolName))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+ getStreamer().BeginCOFFSymbolDef(Sym);
+
+ Lex();
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
+ int64_t SymbolStorageClass;
+ if (getParser().ParseAbsoluteExpression(SymbolStorageClass))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+ int64_t Type;
+ if (getParser().ParseAbsoluteExpression(Type))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitCOFFSymbolType(Type);
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EndCOFFSymbolDef();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().ParseIdentifier(SymbolID))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+
+ Lex();
+ getStreamer().EmitWin64EHStartProc(Symbol);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHEndProc();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHStartChained();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHEndChained();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().ParseIdentifier(SymbolID))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify one or both of @unwind or @except");
+ Lex();
+ bool unwind = false, except = false;
+ if (ParseAtUnwindOrAtExcept(unwind, except))
+ return true;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (ParseAtUnwindOrAtExcept(unwind, except))
+ return true;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ MCSymbol *handler = getContext().GetOrCreateSymbol(SymbolID);
+
+ Lex();
+ getStreamer().EmitWin64EHHandler(handler, unwind, except);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHHandlerData();
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) {
+ unsigned Reg;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHPushReg(Reg);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) {
+ unsigned Reg;
+ int64_t Off;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify a stack pointer offset");
+
+ Lex();
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Off))
+ return true;
+
+ if (Off & 0x0F)
+ return Error(startLoc, "offset is not a multiple of 16");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHSetFrame(Reg, Off);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) {
+ int64_t Size;
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ if (Size & 7)
+ return Error(startLoc, "size is not a multiple of 8");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHAllocStack(Size);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) {
+ unsigned Reg;
+ int64_t Off;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify an offset on the stack");
+
+ Lex();
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Off))
+ return true;
+
+ if (Off & 7)
+ return Error(startLoc, "size is not a multiple of 8");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ // FIXME: Err on %xmm* registers
+ getStreamer().EmitWin64EHSaveReg(Reg, Off);
+ return false;
+}
+
+// FIXME: This method is inherently x86-specific. It should really be in the
+// x86 backend.
+bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) {
+ unsigned Reg;
+ int64_t Off;
+ if (ParseSEHRegisterNumber(Reg))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("you must specify an offset on the stack");
+
+ Lex();
+ SMLoc startLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Off))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ if (Off & 0x0F)
+ return Error(startLoc, "offset is not a multiple of 16");
+
+ Lex();
+ // FIXME: Err on non-%xmm* registers
+ getStreamer().EmitWin64EHSaveXMM(Reg, Off);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) {
+ bool Code = false;
+ StringRef CodeID;
+ if (getLexer().is(AsmToken::At)) {
+ SMLoc startLoc = getLexer().getLoc();
+ Lex();
+ if (!getParser().ParseIdentifier(CodeID)) {
+ if (CodeID != "code")
+ return Error(startLoc, "expected @code");
+ Code = true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitWin64EHPushFrame(Code);
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EmitWin64EHEndProlog();
+ return false;
+}
+
+bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
+ StringRef identifier;
+ if (getLexer().isNot(AsmToken::At))
+ return TokError("a handler attribute must begin with '@'");
+ SMLoc startLoc = getLexer().getLoc();
+ Lex();
+ if (getParser().ParseIdentifier(identifier))
+ return Error(startLoc, "expected @unwind or @except");
+ if (identifier == "unwind")
+ unwind = true;
+ else if (identifier == "except")
+ except = true;
+ else
+ return Error(startLoc, "expected @unwind or @except");
+ return false;
+}
+
+bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
+ SMLoc startLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Percent)) {
+ const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+ SMLoc endLoc;
+ unsigned LLVMRegNo;
+ if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
+ return true;
+
+ // Check that this is a non-volatile register.
+ const unsigned *NVRegs = TAI.getCalleeSavedRegs();
+ unsigned i;
+ for (i = 0; NVRegs[i] != 0; ++i)
+ if (NVRegs[i] == LLVMRegNo)
+ break;
+ if (NVRegs[i] == 0)
+ return Error(startLoc, "expected non-volatile register");
+
+ int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo);
+ if (SEHRegNo < 0)
+ return Error(startLoc,"register can't be represented in SEH unwind info");
+ RegNo = SEHRegNo;
+ }
+ else {
+ int64_t n;
+ if (getParser().ParseAbsoluteExpression(n))
+ return true;
+ if (n > 15)
+ return Error(startLoc, "register number is too high");
+ RegNo = n;
+ }
+
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createCOFFAsmParser() {
+ return new COFFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
new file mode 100644
index 000000000000..6f450682cbeb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -0,0 +1,668 @@
+//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+namespace {
+
+/// \brief Implementation of directive handling which is shared across all
+/// Darwin targets.
+class DarwinAsmParser : public MCAsmParserExtension {
+ template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<DarwinAsmParser, Handler>);
+ }
+
+ bool ParseSectionSwitch(const char *Segment, const char *Section,
+ unsigned TAA = 0, unsigned ImplicitAlign = 0,
+ unsigned StubSize = 0);
+
+public:
+ DarwinAsmParser() {}
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+ ".subsections_via_symbols");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+ ".secure_log_unique");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+ ".secure_log_reset");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
+
+ // Special section directives.
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
+
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident");
+ }
+
+ bool ParseDirectiveDesc(StringRef, SMLoc);
+ bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
+ bool ParseDirectiveLsym(StringRef, SMLoc);
+ bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
+ bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
+ bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
+ bool ParseDirectiveTBSS(StringRef, SMLoc);
+ bool ParseDirectiveZerofill(StringRef, SMLoc);
+
+ // Named Section Directive
+ bool ParseSectionDirectiveConst(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__const");
+ }
+ bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__static_const");
+ }
+ bool ParseSectionDirectiveCString(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__literal4",
+ MCSectionMachO::S_4BYTE_LITERALS, 4);
+ }
+ bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__literal8",
+ MCSectionMachO::S_8BYTE_LITERALS, 8);
+ }
+ bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__literal16",
+ MCSectionMachO::S_16BYTE_LITERALS, 16);
+ }
+ bool ParseSectionDirectiveConstructor(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__constructor");
+ }
+ bool ParseSectionDirectiveDestructor(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__destructor");
+ }
+ bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__fvmlib_init0");
+ }
+ bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__fvmlib_init1");
+ }
+ bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__symbol_stub",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ // FIXME: Different on PPC and ARM.
+ 0, 16);
+ }
+ bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__picsymbol_stub",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26);
+ }
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__data");
+ }
+ bool ParseSectionDirectiveStaticData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__static_data");
+ }
+ bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveDyld(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__dyld");
+ }
+ bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveConstData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__const");
+ }
+ bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__class",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__meta_class",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cat_cls_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cat_inst_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__protocol",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__string_object",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cls_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__inst_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cls_refs",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+ MCSectionMachO::S_LITERAL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__message_refs",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+ MCSectionMachO::S_LITERAL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__symbols",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__category",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__class_vars",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__instance_vars",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__module_info",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__selector_strs",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+ }
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ }
+ bool ParseSectionDirectiveTLV(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+ }
+ bool ParseSectionDirectiveIdent(StringRef, SMLoc) {
+ // Darwin silently ignores the .ident directive.
+ getParser().EatToEndOfStatement();
+ return false;
+ }
+ bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+ }
+
+};
+
+}
+
+bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
+ const char *Section,
+ unsigned TAA, unsigned Align,
+ unsigned StubSize) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ // FIXME: Arch specific.
+ bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack.
+ getStreamer().SwitchSection(getContext().getMachOSection(
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
+
+ // Set the implicit alignment, if any.
+ //
+ // FIXME: This isn't really what 'as' does; I think it just uses the implicit
+ // alignment on the section (e.g., if one manually inserts bytes into the
+ // section, then just issuing the section switch directive will not realign
+ // the section. However, this is arguably more reasonable behavior, and there
+ // is no good reason for someone to intentionally emit incorrectly sized
+ // values into the implicitly aligned sections.
+ if (Align)
+ getStreamer().EmitValueToAlignment(Align, 0, 1, 0);
+
+ return false;
+}
+
+/// ParseDirectiveDesc
+/// ::= .desc identifier , expression
+bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.desc' directive");
+ Lex();
+
+ int64_t DescValue;
+ if (getParser().ParseAbsoluteExpression(DescValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.desc' directive");
+
+ Lex();
+
+ // Set the n_desc field of this Symbol to this DescValue
+ getStreamer().EmitSymbolDesc(Sym, DescValue);
+
+ return false;
+}
+
+/// ParseDirectiveDumpOrLoad
+/// ::= ( .dump | .load ) "filename"
+bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
+ SMLoc IDLoc) {
+ bool IsDump = Directive == ".dump";
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.dump' or '.load' directive");
+
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.dump' or '.load' directive");
+
+ Lex();
+
+ // FIXME: If/when .dump and .load are implemented they will be done in the
+ // the assembly parser and not have any need for an MCStreamer API.
+ if (IsDump)
+ return Warning(IDLoc, "ignoring directive .dump for now");
+ else
+ return Warning(IDLoc, "ignoring directive .load for now");
+}
+
+/// ParseDirectiveLsym
+/// ::= .lsym identifier , expression
+bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.lsym' directive");
+ Lex();
+
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.lsym' directive");
+
+ Lex();
+
+ // We don't currently support this directive.
+ //
+ // FIXME: Diagnostic location!
+ (void) Sym;
+ return TokError("directive '.lsym' is unsupported");
+}
+
+/// ParseDirectiveSection:
+/// ::= .section identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ SMLoc Loc = getLexer().getLoc();
+
+ StringRef SectionName;
+ if (getParser().ParseIdentifier(SectionName))
+ return Error(Loc, "expected identifier after '.section' directive");
+
+ // Verify there is a following comma.
+ if (!getLexer().is(AsmToken::Comma))
+ return TokError("unexpected token in '.section' directive");
+
+ std::string SectionSpec = SectionName;
+ SectionSpec += ",";
+
+ // Add all the tokens until the end of the line, ParseSectionSpecifier will
+ // handle this.
+ StringRef EOL = getLexer().LexUntilEndOfStatement();
+ SectionSpec.append(EOL.begin(), EOL.end());
+
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.section' directive");
+ Lex();
+
+
+ StringRef Segment, Section;
+ unsigned StubSize;
+ unsigned TAA;
+ bool TAAParsed;
+ std::string ErrorStr =
+ MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
+ TAA, TAAParsed, StubSize);
+
+ if (!ErrorStr.empty())
+ return Error(Loc, ErrorStr.c_str());
+
+ // FIXME: Arch specific.
+ bool isText = Segment == "__TEXT"; // FIXME: Hack.
+ getStreamer().SwitchSection(getContext().getMachOSection(
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
+ return false;
+}
+
+/// ParseDirectiveSecureLogUnique
+/// ::= .secure_log_unique ... message ...
+bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
+ StringRef LogMessage = getParser().ParseStringToEndOfStatement();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.secure_log_unique' directive");
+
+ if (getContext().getSecureLogUsed() != false)
+ return Error(IDLoc, ".secure_log_unique specified multiple times");
+
+ // Get the secure log path.
+ const char *SecureLogFile = getContext().getSecureLogFile();
+ if (SecureLogFile == NULL)
+ return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
+ "environment variable unset.");
+
+ // Open the secure log file if we haven't already.
+ raw_ostream *OS = getContext().getSecureLog();
+ if (OS == NULL) {
+ std::string Err;
+ OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append);
+ if (!Err.empty()) {
+ delete OS;
+ return Error(IDLoc, Twine("can't open secure log file: ") +
+ SecureLogFile + " (" + Err + ")");
+ }
+ getContext().setSecureLog(OS);
+ }
+
+ // Write the message.
+ int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
+ *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+ << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
+ << LogMessage + "\n";
+
+ getContext().setSecureLogUsed(true);
+
+ return false;
+}
+
+/// ParseDirectiveSecureLogReset
+/// ::= .secure_log_reset
+bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.secure_log_reset' directive");
+
+ Lex();
+
+ getContext().setSecureLogUsed(false);
+
+ return false;
+}
+
+/// ParseDirectiveSubsectionsViaSymbols
+/// ::= .subsections_via_symbols
+bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.subsections_via_symbols' directive");
+
+ Lex();
+
+ getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return false;
+}
+
+/// ParseDirectiveTBSS
+/// ::= .tbss identifier, size, align
+bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.tbss' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+ "zero");
+
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+ "than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ getStreamer().EmitTBSSSymbol(getContext().getMachOSection(
+ "__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ 0, SectionKind::getThreadBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
+/// ParseDirectiveZerofill
+/// ::= .zerofill segname , sectname [, identifier , size_expression [
+/// , align_expression ]]
+bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
+ StringRef Segment;
+ if (getParser().ParseIdentifier(Segment))
+ return TokError("expected segment name after '.zerofill' directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ StringRef Section;
+ if (getParser().ParseIdentifier(Section))
+ return TokError("expected section name after comma in '.zerofill' "
+ "directive");
+
+ // If this is the end of the line all that was wanted was to create the
+ // the section but with no symbol.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ // Create the zerofill section but no symbol
+ getStreamer().EmitZerofill(getContext().getMachOSection(
+ Segment, Section, MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()));
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef IDStr;
+ if (getParser().ParseIdentifier(IDStr))
+ return TokError("expected identifier in directive");
+
+ // handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.zerofill' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
+ "than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
+ "can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // Create the zerofill Symbol with Size and Pow2Alignment
+ //
+ // FIXME: Arch specific.
+ getStreamer().EmitZerofill(getContext().getMachOSection(
+ Segment, Section, MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createDarwinAsmParser() {
+ return new DarwinAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
new file mode 100644
index 000000000000..dcf689a6f0e7
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -0,0 +1,533 @@
+//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+namespace {
+
+class ELFAsmParser : public MCAsmParserExtension {
+ template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<ELFAsmParser, Handler>);
+ }
+
+ bool ParseSectionSwitch(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind);
+ bool SeenIdent;
+
+public:
+ ELFAsmParser() : SeenIdent(false) {
+ BracketExpressionsSupported = true;
+ }
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
+ }
+
+ // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
+ // the best way for us to get access to it?
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC, SectionKind::getText());
+ }
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC, SectionKind::getBSS());
+ }
+ bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_TLS | ELF::SHF_WRITE,
+ SectionKind::getThreadData());
+ }
+ bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_TLS | ELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+ }
+ bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+ }
+ bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+ }
+ bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
+ return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseDirectivePushSection(StringRef, SMLoc);
+ bool ParseDirectivePopSection(StringRef, SMLoc);
+ bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectiveSize(StringRef, SMLoc);
+ bool ParseDirectivePrevious(StringRef, SMLoc);
+ bool ParseDirectiveType(StringRef, SMLoc);
+ bool ParseDirectiveIdent(StringRef, SMLoc);
+ bool ParseDirectiveSymver(StringRef, SMLoc);
+ bool ParseDirectiveWeakref(StringRef, SMLoc);
+
+private:
+ bool ParseSectionName(StringRef &SectionName);
+};
+
+}
+
+bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ getStreamer().SwitchSection(getContext().getELFSection(
+ Section, Type, Flags, Kind));
+
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getStreamer().EmitELFSize(Sym, Expr);
+ return false;
+}
+
+bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
+ // A section name can contain -, so we cannot just use
+ // ParseIdentifier.
+ SMLoc FirstLoc = getLexer().getLoc();
+ unsigned Size = 0;
+
+ if (getLexer().is(AsmToken::String)) {
+ SectionName = getTok().getIdentifier();
+ Lex();
+ return false;
+ }
+
+ for (;;) {
+ StringRef Tmp;
+ unsigned CurSize;
+
+ SMLoc PrevLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Minus)) {
+ CurSize = 1;
+ Lex(); // Consume the "-".
+ } else if (getLexer().is(AsmToken::String)) {
+ CurSize = getTok().getIdentifier().size() + 2;
+ Lex();
+ } else if (getLexer().is(AsmToken::Identifier)) {
+ CurSize = getTok().getIdentifier().size();
+ Lex();
+ } else {
+ break;
+ }
+
+ Size += CurSize;
+ SectionName = StringRef(FirstLoc.getPointer(), Size);
+
+ // Make sure the following token is adjacent.
+ if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer())
+ break;
+ }
+ if (Size == 0)
+ return true;
+
+ return false;
+}
+
+static SectionKind computeSectionKind(unsigned Flags) {
+ if (Flags & ELF::SHF_EXECINSTR)
+ return SectionKind::getText();
+ if (Flags & ELF::SHF_TLS)
+ return SectionKind::getThreadData();
+ return SectionKind::getDataRel();
+}
+
+static int parseSectionFlags(StringRef flagsStr) {
+ int flags = 0;
+
+ for (unsigned i = 0; i < flagsStr.size(); i++) {
+ switch (flagsStr[i]) {
+ case 'a':
+ flags |= ELF::SHF_ALLOC;
+ break;
+ case 'x':
+ flags |= ELF::SHF_EXECINSTR;
+ break;
+ case 'w':
+ flags |= ELF::SHF_WRITE;
+ break;
+ case 'M':
+ flags |= ELF::SHF_MERGE;
+ break;
+ case 'S':
+ flags |= ELF::SHF_STRINGS;
+ break;
+ case 'T':
+ flags |= ELF::SHF_TLS;
+ break;
+ case 'c':
+ flags |= ELF::XCORE_SHF_CP_SECTION;
+ break;
+ case 'd':
+ flags |= ELF::XCORE_SHF_DP_SECTION;
+ break;
+ case 'G':
+ flags |= ELF::SHF_GROUP;
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ return flags;
+}
+
+bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
+ getStreamer().PushSection();
+
+ if (ParseDirectiveSection(s, loc)) {
+ getStreamer().PopSection();
+ return true;
+ }
+
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+ if (!getStreamer().PopSection())
+ return TokError(".popsection without corresponding .pushsection");
+ return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ StringRef SectionName;
+
+ if (ParseSectionName(SectionName))
+ return TokError("expected identifier in directive");
+
+ StringRef TypeName;
+ int64_t Size = 0;
+ StringRef GroupName;
+ unsigned Flags = 0;
+
+ // Set the defaults first.
+ if (SectionName == ".fini" || SectionName == ".init" ||
+ SectionName == ".rodata")
+ Flags |= ELF::SHF_ALLOC;
+ if (SectionName == ".fini" || SectionName == ".init")
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in directive");
+
+ StringRef FlagsStr = getTok().getStringContents();
+ Lex();
+
+ int extraFlags = parseSectionFlags(FlagsStr);
+ if (extraFlags < 0)
+ return TokError("unknown flag");
+ Flags |= extraFlags;
+
+ bool Mergeable = Flags & ELF::SHF_MERGE;
+ bool Group = Flags & ELF::SHF_GROUP;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ if (Mergeable)
+ return TokError("Mergeable section must specify the type");
+ if (Group)
+ return TokError("Group section must specify the type");
+ } else {
+ Lex();
+ if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+ return TokError("expected '@' or '%' before type");
+
+ Lex();
+ if (getParser().ParseIdentifier(TypeName))
+ return TokError("expected identifier in directive");
+
+ if (Mergeable) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected the entry size");
+ Lex();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+ if (Size <= 0)
+ return TokError("entry size must be positive");
+ }
+
+ if (Group) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected group name");
+ Lex();
+ if (getParser().ParseIdentifier(GroupName))
+ return true;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ StringRef Linkage;
+ if (getParser().ParseIdentifier(Linkage))
+ return true;
+ if (Linkage != "comdat")
+ return TokError("Linkage must be 'comdat'");
+ }
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ unsigned Type = ELF::SHT_PROGBITS;
+
+ if (!TypeName.empty()) {
+ if (TypeName == "init_array")
+ Type = ELF::SHT_INIT_ARRAY;
+ else if (TypeName == "fini_array")
+ Type = ELF::SHT_FINI_ARRAY;
+ else if (TypeName == "preinit_array")
+ Type = ELF::SHT_PREINIT_ARRAY;
+ else if (TypeName == "nobits")
+ Type = ELF::SHT_NOBITS;
+ else if (TypeName == "progbits")
+ Type = ELF::SHT_PROGBITS;
+ else if (TypeName == "note")
+ Type = ELF::SHT_NOTE;
+ else if (TypeName == "unwind")
+ Type = ELF::SHT_X86_64_UNWIND;
+ else
+ return TokError("unknown section type");
+ }
+
+ SectionKind Kind = computeSectionKind(Flags);
+ getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
+ Flags, Kind, Size,
+ GroupName));
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection == NULL)
+ return TokError(".previous without corresponding .section");
+ getStreamer().SwitchSection(PreviousSection);
+
+ return false;
+}
+
+/// ParseDirectiveELFType
+/// ::= .type identifier , @attribute
+bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.type' directive");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+ return TokError("expected '@' or '%' before type");
+ Lex();
+
+ StringRef Type;
+ SMLoc TypeLoc;
+
+ TypeLoc = getLexer().getLoc();
+ if (getParser().ParseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+ .Case("function", MCSA_ELF_TypeFunction)
+ .Case("object", MCSA_ELF_TypeObject)
+ .Case("tls_object", MCSA_ELF_TypeTLS)
+ .Case("common", MCSA_ELF_TypeCommon)
+ .Case("notype", MCSA_ELF_TypeNoType)
+ .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Default(MCSA_Invalid);
+
+ if (Attr == MCSA_Invalid)
+ return Error(TypeLoc, "unsupported attribute in '.type' directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.type' directive");
+
+ Lex();
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ return false;
+}
+
+/// ParseDirectiveIdent
+/// ::= .ident string
+bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.ident' directive");
+
+ StringRef Data = getTok().getIdentifier();
+
+ Lex();
+
+ const MCSection *Comment =
+ getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ SectionKind::getReadOnly(),
+ 1, "");
+
+ getStreamer().PushSection();
+ getStreamer().SwitchSection(Comment);
+ if (!SeenIdent) {
+ getStreamer().EmitIntValue(0, 1);
+ SeenIdent = true;
+ }
+ getStreamer().EmitBytes(Data, 0);
+ getStreamer().EmitIntValue(0, 1);
+ getStreamer().PopSection();
+ return false;
+}
+
+/// ParseDirectiveSymver
+/// ::= .symver foo, bar2@zed
+bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected a comma");
+
+ Lex();
+
+ StringRef AliasName;
+ if (getParser().ParseIdentifier(AliasName))
+ return TokError("expected identifier in directive");
+
+ if (AliasName.find('@') == StringRef::npos)
+ return TokError("expected a '@' in the name");
+
+ MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+
+ getStreamer().EmitAssignment(Alias, Value);
+ return false;
+}
+
+/// ParseDirectiveWeakref
+/// ::= .weakref foo, bar
+bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
+ // FIXME: Share code with the other alias building directives.
+
+ StringRef AliasName;
+ if (getParser().ParseIdentifier(AliasName))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected a comma");
+
+ Lex();
+
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitWeakReference(Alias, Sym);
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createELFAsmParser() {
+ return new ELFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
new file mode 100644
index 000000000000..dceece78ba10
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -0,0 +1,27 @@
+//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
+}
+
+MCAsmLexer::~MCAsmLexer() {
+}
+
+SMLoc MCAsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
+SMLoc AsmToken::getLoc() const {
+ return SMLoc::getFromPointer(Str.data());
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
new file mode 100644
index 000000000000..4030e41036aa
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -0,0 +1,48 @@
+//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
+}
+
+MCAsmParser::~MCAsmParser() {
+}
+
+void MCAsmParser::setTargetParser(TargetAsmParser &P) {
+ assert(!TargetParser && "Target parser is already initialized!");
+ TargetParser = &P;
+ TargetParser->Initialize(*this);
+}
+
+const AsmToken &MCAsmParser::getTok() {
+ return getLexer().getTok();
+}
+
+bool MCAsmParser::TokError(const Twine &Msg) {
+ Error(getLexer().getLoc(), Msg);
+ return true;
+}
+
+bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
+ SMLoc L;
+ return ParseExpression(Res, L);
+}
+
+void MCParsedAsmOperand::dump() const {
+ dbgs() << " " << *this;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
new file mode 100644
index 000000000000..3f25a14926b6
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -0,0 +1,22 @@
+//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+using namespace llvm;
+
+MCAsmParserExtension::MCAsmParserExtension() :
+ BracketExpressionsSupported(false) {
+}
+
+MCAsmParserExtension::~MCAsmParserExtension() {
+}
+
+void MCAsmParserExtension::Initialize(MCAsmParser &Parser) {
+ this->Parser = &Parser;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp
new file mode 100644
index 000000000000..512f6b044911
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+TargetAsmParser::TargetAsmParser()
+ : AvailableFeatures(0)
+{
+}
+
+TargetAsmParser::~TargetAsmParser() {
+}
diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp
new file mode 100644
index 000000000000..6098e6b8f38b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp
@@ -0,0 +1,234 @@
+//===- lib/MC/MCPureStreamer.cpp - MC "Pure" Object Output ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+// FIXME: Remove this.
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCPureStreamer : public MCObjectStreamer {
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst);
+ virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+ MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+ virtual void Finish();
+
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitThumbFunc(MCSymbol *Func) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCOFFSymbolType(int Type) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EndCOFFSymbolDef() {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitFileDirective(StringRef Filename) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ report_fatal_error("unsupported directive in pure streamer");
+ return false;
+ }
+
+ /// @}
+};
+
+} // end anonymous namespace.
+
+void MCPureStreamer::InitSections() {
+ // FIMXE: To what!?
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+
+}
+
+void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ Symbol->setSection(*getCurrentSection());
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // We have to create a new fragment if this is an atom defining symbol,
+ // fragments cannot span atoms.
+ if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+ new MCDataFragment(getCurrentSectionData());
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+}
+
+void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ // FIXME: Lift context changes into super class.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {
+ report_fatal_error("not yet implemented in pure streamer");
+}
+
+void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ IF->getCode() = Code;
+ IF->getFixups() = Fixups;
+}
+
+void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCPureStreamer::Finish() {
+ // FIXME: Handle DWARF tables?
+
+ this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE) {
+ return new MCPureStreamer(Context, TAB, OS, CE);
+}
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
new file mode 100644
index 000000000000..a792d5631790
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -0,0 +1,22 @@
+//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MCSection
+//===----------------------------------------------------------------------===//
+
+MCSection::~MCSection() {
+}
+
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
new file mode 100644
index 000000000000..90091f06e9ac
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+ const MCAsmInfo &MAI) const {
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" || Name == ".bss")
+ return true;
+
+ return false;
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+
+ // standard sections don't require the '.section'
+ if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ OS << '\t' << getSectionName() << '\n';
+ return;
+ }
+
+ OS << "\t.section\t" << getSectionName() << ",\"";
+ if (getKind().isText())
+ OS << 'x';
+ if (getKind().isWriteable())
+ OS << 'w';
+ else
+ OS << 'r';
+ if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
+ OS << 'n';
+ OS << "\"\n";
+
+ if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+ switch (Selection) {
+ case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
+ OS << "\t.linkonce one_only\n";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_ANY:
+ OS << "\t.linkonce discard\n";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
+ OS << "\t.linkonce same_size\n";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ OS << "\t.linkonce same_contents\n";
+ break;
+ //NOTE: as of binutils 2.20, there is no way to specifiy select largest
+ // with the .linkonce directive. For now, we treat it as an invalid
+ // comdat selection value.
+ case COFF::IMAGE_COMDAT_SELECT_LARGEST:
+ // OS << "\t.linkonce largest\n";
+ // break;
+ default:
+ assert (0 && "unsupported COFF selection type");
+ break;
+ }
+ }
+}
+
+bool MCSectionCOFF::UseCodeAlign() const {
+ return getKind().isText();
+}
+
+bool MCSectionCOFF::isVirtualSection() const {
+ return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
new file mode 100644
index 000000000000..dfd77c3fe813
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -0,0 +1,150 @@
+//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionELF::~MCSectionELF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
+ const MCAsmInfo &MAI) const {
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" ||
+ (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
+ return true;
+
+ return false;
+}
+
+void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+
+ if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ OS << '\t' << getSectionName() << '\n';
+ return;
+ }
+
+ StringRef name = getSectionName();
+ if (name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) {
+ OS << "\t.section\t" << name;
+ } else {
+ OS << "\t.section\t\"";
+ for (const char *b = name.begin(), *e = name.end(); b < e; ++b) {
+ if (*b == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*b != '\\') // Neither " or backslash
+ OS << *b;
+ else if (b + 1 == e) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << b[0] << b[1]; // Quoted character
+ ++b;
+ }
+ }
+ OS << '"';
+ }
+
+ // Handle the weird solaris syntax if desired.
+ if (MAI.usesSunStyleELFSectionSwitchSyntax() &&
+ !(Flags & ELF::SHF_MERGE)) {
+ if (Flags & ELF::SHF_ALLOC)
+ OS << ",#alloc";
+ if (Flags & ELF::SHF_EXECINSTR)
+ OS << ",#execinstr";
+ if (Flags & ELF::SHF_WRITE)
+ OS << ",#write";
+ if (Flags & ELF::SHF_TLS)
+ OS << ",#tls";
+ OS << '\n';
+ return;
+ }
+
+ OS << ",\"";
+ if (Flags & ELF::SHF_ALLOC)
+ OS << 'a';
+ if (Flags & ELF::SHF_EXECINSTR)
+ OS << 'x';
+ if (Flags & ELF::SHF_GROUP)
+ OS << 'G';
+ if (Flags & ELF::SHF_WRITE)
+ OS << 'w';
+ if (Flags & ELF::SHF_MERGE)
+ OS << 'M';
+ if (Flags & ELF::SHF_STRINGS)
+ OS << 'S';
+ if (Flags & ELF::SHF_TLS)
+ OS << 'T';
+
+ // If there are target-specific flags, print them.
+ if (Flags & ELF::XCORE_SHF_CP_SECTION)
+ OS << 'c';
+ if (Flags & ELF::XCORE_SHF_DP_SECTION)
+ OS << 'd';
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (MAI.getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ if (Type == ELF::SHT_INIT_ARRAY)
+ OS << "init_array";
+ else if (Type == ELF::SHT_FINI_ARRAY)
+ OS << "fini_array";
+ else if (Type == ELF::SHT_PREINIT_ARRAY)
+ OS << "preinit_array";
+ else if (Type == ELF::SHT_NOBITS)
+ OS << "nobits";
+ else if (Type == ELF::SHT_NOTE)
+ OS << "note";
+ else if (Type == ELF::SHT_PROGBITS)
+ OS << "progbits";
+
+ if (EntrySize) {
+ assert(Flags & ELF::SHF_MERGE);
+ OS << "," << EntrySize;
+ }
+
+ if (Flags & ELF::SHF_GROUP)
+ OS << "," << Group->getName() << ",comdat";
+ OS << '\n';
+}
+
+bool MCSectionELF::UseCodeAlign() const {
+ return getFlags() & ELF::SHF_EXECINSTR;
+}
+
+bool MCSectionELF::isVirtualSection() const {
+ return getType() == ELF::SHT_NOBITS;
+}
+
+unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString()) return 1;
+ if (Kind.isMergeable2ByteCString()) return 2;
+ if (Kind.isMergeable4ByteCString()) return 4;
+ if (Kind.isMergeableConst4()) return 4;
+ if (Kind.isMergeableConst8()) return 8;
+ if (Kind.isMergeableConst16()) return 16;
+ return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
new file mode 100644
index 000000000000..e771556262a8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -0,0 +1,303 @@
+//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+/// SectionTypeDescriptors - These are strings that describe the various section
+/// types. This *must* be kept in order with and stay synchronized with the
+/// section type list.
+static const struct {
+ const char *AssemblerName, *EnumName;
+} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = {
+ { "regular", "S_REGULAR" }, // 0x00
+ { 0, "S_ZEROFILL" }, // 0x01
+ { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02
+ { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03
+ { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04
+ { "literal_pointers", "S_LITERAL_POINTERS" }, // 0x05
+ { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" }, // 0x06
+ { "lazy_symbol_pointers", "S_LAZY_SYMBOL_POINTERS" }, // 0x07
+ { "symbol_stubs", "S_SYMBOL_STUBS" }, // 0x08
+ { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09
+ { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A
+ { "coalesced", "S_COALESCED" }, // 0x0B
+ { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C
+ { "interposing", "S_INTERPOSING" }, // 0x0D
+ { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E
+ { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F
+ { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+ { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11
+ { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12
+ { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13
+ { "thread_local_variable_pointers",
+ "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14
+ { "thread_local_init_function_pointers",
+ "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15
+};
+
+
+/// SectionAttrDescriptors - This is an array of descriptors for section
+/// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed
+/// by attribute, instead it is searched. The last entry has an AttrFlagEnd
+/// AttrFlag value.
+static const struct {
+ unsigned AttrFlag;
+ const char *AssemblerName, *EnumName;
+} SectionAttrDescriptors[] = {
+#define ENTRY(ASMNAME, ENUM) \
+ { MCSectionMachO::ENUM, ASMNAME, #ENUM },
+ENTRY("pure_instructions", S_ATTR_PURE_INSTRUCTIONS)
+ENTRY("no_toc", S_ATTR_NO_TOC)
+ENTRY("strip_static_syms", S_ATTR_STRIP_STATIC_SYMS)
+ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP)
+ENTRY("live_support", S_ATTR_LIVE_SUPPORT)
+ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
+ENTRY("debug", S_ATTR_DEBUG)
+ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC)
+ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC)
+#undef ENTRY
+ { 0, "none", 0 }, // used if section has no attributes but has a stub size
+#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set
+ { AttrFlagEnd, 0, 0 }
+};
+
+MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
+ unsigned TAA, unsigned reserved2, SectionKind K)
+ : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+ assert(Segment.size() <= 16 && Section.size() <= 16 &&
+ "Segment or section string too long");
+ for (unsigned i = 0; i != 16; ++i) {
+ if (i < Segment.size())
+ SegmentName[i] = Segment[i];
+ else
+ SegmentName[i] = 0;
+
+ if (i < Section.size())
+ SectionName[i] = Section[i];
+ else
+ SectionName[i] = 0;
+ }
+}
+
+void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+ OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
+
+ // Get the section type and attributes.
+ unsigned TAA = getTypeAndAttributes();
+ if (TAA == 0) {
+ OS << '\n';
+ return;
+ }
+
+ unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
+ assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
+ "Invalid SectionType specified!");
+
+ if (SectionTypeDescriptors[SectionType].AssemblerName) {
+ OS << ',';
+ OS << SectionTypeDescriptors[SectionType].AssemblerName;
+ } else {
+ // If we have no name for the attribute, stop here.
+ OS << '\n';
+ return;
+ }
+
+ // If we don't have any attributes, we're done.
+ unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
+ if (SectionAttrs == 0) {
+ // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
+ // the attribute specifier.
+ if (Reserved2 != 0)
+ OS << ",none," << Reserved2;
+ OS << '\n';
+ return;
+ }
+
+ // Check each attribute to see if we have it.
+ char Separator = ',';
+ for (unsigned i = 0;
+ SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag;
+ ++i) {
+ // Check to see if we have this attribute.
+ if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
+ continue;
+
+ // Yep, clear it and print it.
+ SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
+
+ OS << Separator;
+ if (SectionAttrDescriptors[i].AssemblerName)
+ OS << SectionAttrDescriptors[i].AssemblerName;
+ else
+ OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
+ Separator = '+';
+ }
+
+ assert(SectionAttrs == 0 && "Unknown section attributes!");
+
+ // If we have a S_SYMBOL_STUBS size specified, print it.
+ if (Reserved2 != 0)
+ OS << ',' << Reserved2;
+ OS << '\n';
+}
+
+bool MCSectionMachO::UseCodeAlign() const {
+ return hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+}
+
+bool MCSectionMachO::isVirtualSection() const {
+ return (getType() == MCSectionMachO::S_ZEROFILL ||
+ getType() == MCSectionMachO::S_GB_ZEROFILL ||
+ getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
+/// StripSpaces - This removes leading and trailing spaces from the StringRef.
+static void StripSpaces(StringRef &Str) {
+ while (!Str.empty() && isspace(Str[0]))
+ Str = Str.substr(1);
+ while (!Str.empty() && isspace(Str.back()))
+ Str = Str.substr(0, Str.size()-1);
+}
+
+/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+/// This is a string that can appear after a .section directive in a mach-o
+/// flavored .s file. If successful, this fills in the specified Out
+/// parameters and returns an empty string. When an invalid section
+/// specifier is present, this returns a string indicating the problem.
+std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
+ StringRef &Segment, // Out.
+ StringRef &Section, // Out.
+ unsigned &TAA, // Out.
+ bool &TAAParsed, // Out.
+ unsigned &StubSize) { // Out.
+ TAAParsed = false;
+ // Find the first comma.
+ std::pair<StringRef, StringRef> Comma = Spec.split(',');
+
+ // If there is no comma, we fail.
+ if (Comma.second.empty())
+ return "mach-o section specifier requires a segment and section "
+ "separated by a comma";
+
+ // Capture segment, remove leading and trailing whitespace.
+ Segment = Comma.first;
+ StripSpaces(Segment);
+
+ // Verify that the segment is present and not too long.
+ if (Segment.empty() || Segment.size() > 16)
+ return "mach-o section specifier requires a segment whose length is "
+ "between 1 and 16 characters";
+
+ // Split the section name off from any attributes if present.
+ Comma = Comma.second.split(',');
+
+ // Capture section, remove leading and trailing whitespace.
+ Section = Comma.first;
+ StripSpaces(Section);
+
+ // Verify that the section is present and not too long.
+ if (Section.empty() || Section.size() > 16)
+ return "mach-o section specifier requires a section whose length is "
+ "between 1 and 16 characters";
+
+ // If there is no comma after the section, we're done.
+ TAA = 0;
+ StubSize = 0;
+ if (Comma.second.empty())
+ return "";
+
+ // Otherwise, we need to parse the section type and attributes.
+ Comma = Comma.second.split(',');
+
+ // Get the section type.
+ StringRef SectionType = Comma.first;
+ StripSpaces(SectionType);
+
+ // Figure out which section type it is.
+ unsigned TypeID;
+ for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
+ if (SectionTypeDescriptors[TypeID].AssemblerName &&
+ SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
+ break;
+
+ // If we didn't find the section type, reject it.
+ if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
+ return "mach-o section specifier uses an unknown section type";
+
+ // Remember the TypeID.
+ TAA = TypeID;
+ TAAParsed = true;
+
+ // If we have no comma after the section type, there are no attributes.
+ if (Comma.second.empty()) {
+ // S_SYMBOL_STUBS always require a symbol stub size specifier.
+ if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+ return "mach-o section specifier of type 'symbol_stubs' requires a size "
+ "specifier";
+ return "";
+ }
+
+ // Otherwise, we do have some attributes. Split off the size specifier if
+ // present.
+ Comma = Comma.second.split(',');
+ StringRef Attrs = Comma.first;
+
+ // The attribute list is a '+' separated list of attributes.
+ std::pair<StringRef, StringRef> Plus = Attrs.split('+');
+
+ while (1) {
+ StringRef Attr = Plus.first;
+ StripSpaces(Attr);
+
+ // Look up the attribute.
+ for (unsigned i = 0; ; ++i) {
+ if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
+ return "mach-o section specifier has invalid attribute";
+
+ if (SectionAttrDescriptors[i].AssemblerName &&
+ Attr == SectionAttrDescriptors[i].AssemblerName) {
+ TAA |= SectionAttrDescriptors[i].AttrFlag;
+ break;
+ }
+ }
+
+ if (Plus.second.empty()) break;
+ Plus = Plus.second.split('+');
+ };
+
+ // Okay, we've parsed the section attributes, see if we have a stub size spec.
+ if (Comma.second.empty()) {
+ // S_SYMBOL_STUBS always require a symbol stub size specifier.
+ if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+ return "mach-o section specifier of type 'symbol_stubs' requires a size "
+ "specifier";
+ return "";
+ }
+
+ // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS.
+ if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
+ return "mach-o section specifier cannot have a stub size specified because "
+ "it does not have type 'symbol_stubs'";
+
+ // Okay, if we do, it must be a number.
+ StringRef StubSizeStr = Comma.second;
+ StripSpaces(StubSizeStr);
+
+ // Convert the stub size from a string to an integer.
+ if (StubSizeStr.getAsInteger(0, StubSize))
+ return "mach-o section specifier has a malformed stub size";
+
+ return "";
+}
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
new file mode 100644
index 000000000000..6e96b78e315b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -0,0 +1,534 @@
+//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include <cstdlib>
+using namespace llvm;
+
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), EmitEHFrame(true),
+ EmitDebugFrame(false),
+ CurrentW64UnwindInfo(0) {
+ const MCSection *section = NULL;
+ SectionStack.push_back(std::make_pair(section, section));
+}
+
+MCStreamer::~MCStreamer() {
+ for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
+ delete W64UnwindInfos[i];
+}
+
+const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
+ const MCSymbol *A,
+ const MCSymbol *B) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *ARef =
+ MCSymbolRefExpr::Create(A, Variant, Context);
+ const MCExpr *BRef =
+ MCSymbolRefExpr::Create(B, Variant, Context);
+ const MCExpr *AddrDelta =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ return AddrDelta;
+}
+
+const MCExpr *MCStreamer::ForceExpAbs(const MCExpr* Expr) {
+ if (Context.getAsmInfo().hasAggressiveSymbolFolding() ||
+ isa<MCSymbolRefExpr>(Expr))
+ return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+raw_ostream &MCStreamer::GetCommentOS() {
+ // By default, discard comments.
+ return nulls();
+}
+
+void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
+ const MCSymbol *Label, int PointerSize) {
+ // emit the sequence to set the address
+ EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+ EmitULEB128IntValue(PointerSize + 1);
+ EmitIntValue(dwarf::DW_LNE_set_address, 1);
+ EmitSymbolValue(Label, PointerSize);
+
+ // emit the sequence for the LineDelta (from 1) and a zero address delta.
+ MCDwarfLineAddr::Emit(this, LineDelta, 0);
+}
+
+/// EmitIntValue - Special case of EmitValue that avoids the client having to
+/// pass in a MCExpr for constant integers.
+void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(Size <= 8 && "Invalid size");
+ assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
+ "Invalid size");
+ char buf[8];
+ const bool isLittleEndian = Context.getAsmInfo().isLittleEndian();
+ for (unsigned i = 0; i != Size; ++i) {
+ unsigned index = isLittleEndian ? i : (Size - i - 1);
+ buf[i] = uint8_t(Value >> (index * 8));
+ }
+ EmitBytes(StringRef(buf, Size), AddrSpace);
+}
+
+/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeULEB128(Value, OSE);
+ EmitBytes(OSE.str(), AddrSpace);
+}
+
+/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeSLEB128(Value, OSE);
+ EmitBytes(OSE.str(), AddrSpace);
+}
+
+void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ const MCExpr *ABS = ForceExpAbs(Value);
+ EmitValue(ABS, Size, AddrSpace);
+}
+
+
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValueImpl(Value, Size, AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size,
+ AddrSpace);
+}
+
+void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ report_fatal_error("unsupported directive in streamer");
+}
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ const MCExpr *E = MCConstantExpr::Create(FillValue, getContext());
+ for (uint64_t i = 0, e = NumBytes; i != e; ++i)
+ EmitValue(E, 1, AddrSpace);
+}
+
+bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Filename) {
+ return getContext().GetDwarfFile(Filename, FileNo) == 0;
+}
+
+void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa,
+ unsigned Discriminator,
+ StringRef FileName) {
+ getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
+ Discriminator);
+}
+
+MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
+ if (FrameInfos.empty())
+ return NULL;
+ return &FrameInfos.back();
+}
+
+void MCStreamer::EnsureValidFrame() {
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ if (!CurFrame || CurFrame->End)
+ report_fatal_error("No open frame");
+}
+
+void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+ MCSymbol *EHSymbol) {
+}
+
+void MCStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection());
+
+ StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
+ if (!Symbol->getName().startswith(Prefix))
+ LastNonPrivate = Symbol;
+}
+
+void MCStreamer::EmitCFISections(bool EH, bool Debug) {
+ assert(EH || Debug);
+ EmitEHFrame = EH;
+ EmitDebugFrame = Debug;
+}
+
+void MCStreamer::EmitCFIStartProc() {
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ if (CurFrame && !CurFrame->End)
+ report_fatal_error("Starting a frame before finishing the previous one!");
+ MCDwarfFrameInfo Frame;
+ Frame.Begin = getContext().CreateTempSymbol();
+ Frame.Function = LastNonPrivate;
+ EmitLabel(Frame.Begin);
+ FrameInfos.push_back(Frame);
+}
+
+void MCStreamer::EmitCFIEndProc() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->End = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->End);
+}
+
+void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(MachineLocation::VirtualFP);
+ MachineLocation Source(Register, -Offset);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(MachineLocation::VirtualFP);
+ MachineLocation Source(MachineLocation::VirtualFP, -Offset);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(MachineLocation::VirtualFP);
+ MachineLocation Source(MachineLocation::VirtualFP, Adjustment);
+ MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(Register);
+ MachineLocation Source(MachineLocation::VirtualFP);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(Register, Offset);
+ MachineLocation Source(Register, Offset);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(Register, Offset);
+ MachineLocation Source(Register, Offset);
+ MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+ unsigned Encoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Personality = Sym;
+ CurFrame->PersonalityEncoding = Encoding;
+}
+
+void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Lsda = Sym;
+ CurFrame->LsdaEncoding = Encoding;
+}
+
+void MCStreamer::EmitCFIRememberState() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRestoreState() {
+ // FIXME: Error if there is no matching cfi_remember_state.
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFISameValue(int64_t Register) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register);
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
+ W64UnwindInfos.push_back(Frame);
+ CurrentW64UnwindInfo = W64UnwindInfos.back();
+}
+
+void MCStreamer::EnsureValidW64UnwindInfo() {
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (!CurFrame || CurFrame->End)
+ report_fatal_error("No open Win64 EH frame function!");
+}
+
+void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame && !CurFrame->End)
+ report_fatal_error("Starting a function before ending the previous one!");
+ MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
+ Frame->Begin = getContext().CreateTempSymbol();
+ Frame->Function = Symbol;
+ EmitLabel(Frame->Begin);
+ setCurrentW64UnwindInfo(Frame);
+}
+
+void MCStreamer::EmitWin64EHEndProc() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->ChainedParent)
+ report_fatal_error("Not all chained regions terminated!");
+ CurFrame->End = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->End);
+}
+
+void MCStreamer::EmitWin64EHStartChained() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ Frame->Begin = getContext().CreateTempSymbol();
+ Frame->Function = CurFrame->Function;
+ Frame->ChainedParent = CurFrame;
+ EmitLabel(Frame->Begin);
+ setCurrentW64UnwindInfo(Frame);
+}
+
+void MCStreamer::EmitWin64EHEndChained() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (!CurFrame->ChainedParent)
+ report_fatal_error("End of a chained region outside a chained region!");
+ CurFrame->End = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->End);
+ CurrentW64UnwindInfo = CurFrame->ChainedParent;
+}
+
+void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->ChainedParent)
+ report_fatal_error("Chained unwind areas can't have handlers!");
+ CurFrame->ExceptionHandler = Sym;
+ if (!Except && !Unwind)
+ report_fatal_error("Don't know what kind of handler this is!");
+ if (Unwind)
+ CurFrame->HandlesUnwind = true;
+ if (Except)
+ CurFrame->HandlesExceptions = true;
+}
+
+void MCStreamer::EmitWin64EHHandlerData() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->ChainedParent)
+ report_fatal_error("Chained unwind areas can't have handlers!");
+}
+
+void MCStreamer::EmitWin64EHPushReg(unsigned Register) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Win64EH::UOP_PushNonVol, Label, Register);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->LastFrameInst >= 0)
+ report_fatal_error("Frame register and offset already specified!");
+ if (Offset & 0x0F)
+ report_fatal_error("Misaligned frame pointer offset!");
+ MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+ CurFrame->LastFrameInst = CurFrame->Instructions.size();
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHAllocStack(unsigned Size) {
+ EnsureValidW64UnwindInfo();
+ if (Size & 7)
+ report_fatal_error("Misaligned stack allocation!");
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Label, Size);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+ EnsureValidW64UnwindInfo();
+ if (Offset & 7)
+ report_fatal_error("Misaligned saved register offset!");
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(
+ Offset > 512*1024-8 ? Win64EH::UOP_SaveNonVolBig : Win64EH::UOP_SaveNonVol,
+ Label, Register, Offset);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+ EnsureValidW64UnwindInfo();
+ if (Offset & 0x0F)
+ report_fatal_error("Misaligned saved vector register offset!");
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(
+ Offset > 512*1024-16 ? Win64EH::UOP_SaveXMM128Big : Win64EH::UOP_SaveXMM128,
+ Label, Register, Offset);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHPushFrame(bool Code) {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ if (CurFrame->Instructions.size() > 0)
+ report_fatal_error("If present, PushMachFrame must be the first UOP");
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Win64EH::UOP_PushMachFrame, Label, Code);
+ EmitLabel(Label);
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHEndProlog() {
+ EnsureValidW64UnwindInfo();
+ MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ CurFrame->PrologEnd = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->PrologEnd);
+}
+
+void MCStreamer::EmitFnStart() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitFnEnd() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitCantUnwind() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitHandlerData() {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitPersonality(const MCSymbol *Personality) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitPad(int64_t Offset) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
+ errs() << "Not implemented yet\n";
+ abort();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCStreamer::EmitRawText(StringRef String) {
+ errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
+ " something must not be fully mc'ized\n";
+ abort();
+}
+
+void MCStreamer::EmitRawText(const Twine &T) {
+ SmallString<128> Str;
+ T.toVector(Str);
+ EmitRawText(Str.str());
+}
+
+void MCStreamer::EmitFrames(bool usingCFI) {
+ if (!getNumFrameInfos())
+ return;
+
+ if (EmitEHFrame)
+ MCDwarfFrameEmitter::Emit(*this, usingCFI, true);
+
+ if (EmitDebugFrame)
+ MCDwarfFrameEmitter::Emit(*this, usingCFI, false);
+}
+
+void MCStreamer::EmitW64Tables() {
+ if (!getNumW64UnwindInfos())
+ return;
+
+ MCWin64EHUnwindEmitter::Emit(*this);
+}
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
new file mode 100644
index 000000000000..86dc1083cee9
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -0,0 +1,96 @@
+//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+void
+MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+ const SubtargetFeatureKV *PF,
+ const SubtargetFeatureKV *PD,
+ const SubtargetInfoKV *PI,
+ const InstrStage *IS,
+ const unsigned *OC,
+ const unsigned *FP,
+ unsigned NF, unsigned NP) {
+ TargetTriple = TT;
+ ProcFeatures = PF;
+ ProcDesc = PD;
+ ProcItins = PI;
+ Stages = IS;
+ OperandCycles = OC;
+ ForwardingPathes = FP;
+ NumFeatures = NF;
+ NumProcs = NP;
+
+ SubtargetFeatures Features(FS);
+ FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+ ProcFeatures, NumFeatures);
+}
+
+
+/// ReInitMCSubtargetInfo - Change CPU (and optionally supplemented with
+/// feature string) and recompute feature bits.
+uint64_t MCSubtargetInfo::ReInitMCSubtargetInfo(StringRef CPU, StringRef FS) {
+ SubtargetFeatures Features(FS);
+ FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+ ProcFeatures, NumFeatures);
+ return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version does not change the implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+ FeatureBits ^= FB;
+ return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version will also change all implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
+ SubtargetFeatures Features;
+ FeatureBits = Features.ToggleFeature(FeatureBits, FS,
+ ProcFeatures, NumFeatures);
+ return FeatureBits;
+}
+
+
+InstrItineraryData
+MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
+ assert(ProcItins && "Instruction itineraries information not available!");
+
+#ifndef NDEBUG
+ for (size_t i = 1; i < NumProcs; i++) {
+ assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 &&
+ "Itineraries table is not sorted");
+ }
+#endif
+
+ // Find entry
+ SubtargetInfoKV KV;
+ KV.Key = CPU.data();
+ const SubtargetInfoKV *Found =
+ std::lower_bound(ProcItins, ProcItins+NumProcs, KV);
+ if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) {
+ errs() << "'" << CPU
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)\n";
+ return InstrItineraryData();
+ }
+
+ return InstrItineraryData(Stages, OperandCycles, ForwardingPathes,
+ (InstrItinerary *)Found->Value);
+}
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
new file mode 100644
index 000000000000..c2fad1674aa4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Sentinel value for the absolute pseudo section.
+const MCSection *MCSymbol::AbsolutePseudoSection =
+ reinterpret_cast<const MCSection *>(1);
+
+static bool isAcceptableChar(char C) {
+ if ((C < 'a' || C > 'z') &&
+ (C < 'A' || C > 'Z') &&
+ (C < '0' || C > '9') &&
+ C != '_' && C != '$' && C != '.' && C != '@')
+ return false;
+ return true;
+}
+
+/// NameNeedsQuoting - Return true if the identifier \arg Str needs quotes to be
+/// syntactically correct.
+static bool NameNeedsQuoting(StringRef Str) {
+ assert(!Str.empty() && "Cannot create an empty MCSymbol");
+
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ if (!isAcceptableChar(Str[i]))
+ return true;
+ return false;
+}
+
+const MCSymbol &MCSymbol::AliasedSymbol() const {
+ const MCSymbol *S = this;
+ while (S->isVariable()) {
+ const MCExpr *Value = S->getVariableValue();
+ if (Value->getKind() != MCExpr::SymbolRef)
+ return *S;
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
+ S = &Ref->getSymbol();
+ }
+ return *S;
+}
+
+void MCSymbol::setVariableValue(const MCExpr *Value) {
+ assert(!IsUsed && "Cannot set a variable that has already been used.");
+ assert(Value && "Invalid variable value!");
+ assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
+ "Invalid redefinition!");
+ this->Value = Value;
+
+ // Variables should always be marked as in the same "section" as the value.
+ const MCSection *Section = Value->FindAssociatedSection();
+ if (Section) {
+ setSection(*Section);
+ } else {
+ setUndefined();
+ }
+}
+
+void MCSymbol::print(raw_ostream &OS) const {
+ // The name for this MCSymbol is required to be a valid target name. However,
+ // some targets support quoting names with funny characters. If the name
+ // contains a funny character, then print it quoted.
+ if (!NameNeedsQuoting(getName())) {
+ OS << getName();
+ return;
+ }
+
+ OS << '"' << getName() << '"';
+}
+
+void MCSymbol::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp
new file mode 100644
index 000000000000..c6ea16ce7b4d
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCValue.cpp
@@ -0,0 +1,36 @@
+//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ if (isAbsolute()) {
+ OS << getConstant();
+ return;
+ }
+
+ getSymA()->print(OS);
+
+ if (getSymB()) {
+ OS << " - ";
+ getSymB()->print(OS);
+ }
+
+ if (getConstant())
+ OS << " + " << getConstant();
+}
+
+void MCValue::dump() const {
+ print(dbgs(), 0);
+}
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
new file mode 100644
index 000000000000..e698384a49f1
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -0,0 +1,258 @@
+//===- lib/MC/MCWin64EH.cpp - MCWin64EH implementation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+// NOTE: All relocations generated here are 4-byte image-relative.
+
+static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &instArray){
+ uint8_t count = 0;
+ for (std::vector<MCWin64EHInstruction>::const_iterator I = instArray.begin(),
+ E = instArray.end(); I != E; ++I) {
+ switch (I->getOperation()) {
+ case Win64EH::UOP_PushNonVol:
+ case Win64EH::UOP_AllocSmall:
+ case Win64EH::UOP_SetFPReg:
+ case Win64EH::UOP_PushMachFrame:
+ count += 1;
+ break;
+ case Win64EH::UOP_SaveNonVol:
+ case Win64EH::UOP_SaveXMM128:
+ count += 2;
+ break;
+ case Win64EH::UOP_SaveNonVolBig:
+ case Win64EH::UOP_SaveXMM128Big:
+ count += 3;
+ break;
+ case Win64EH::UOP_AllocLarge:
+ if (I->getSize() > 512*1024-8)
+ count += 3;
+ else
+ count += 2;
+ break;
+ }
+ }
+ return count;
+}
+
+static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs,
+ MCSymbol *rhs) {
+ MCContext &context = streamer.getContext();
+ const MCExpr *diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(
+ lhs, context),
+ MCSymbolRefExpr::Create(
+ rhs, context),
+ context);
+ streamer.EmitAbsValue(diff, 1);
+
+}
+
+static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
+ MCWin64EHInstruction &inst) {
+ uint8_t b1, b2;
+ uint16_t w;
+ b2 = (inst.getOperation() & 0x0F);
+ switch (inst.getOperation()) {
+ case Win64EH::UOP_PushNonVol:
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ b2 |= (inst.getRegister() & 0x0F) << 4;
+ streamer.EmitIntValue(b2, 1);
+ break;
+ case Win64EH::UOP_AllocLarge:
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ if (inst.getSize() > 512*1024-8) {
+ b2 |= 0x10;
+ streamer.EmitIntValue(b2, 1);
+ w = inst.getSize() & 0xFFF8;
+ streamer.EmitIntValue(w, 2);
+ w = inst.getSize() >> 16;
+ } else {
+ streamer.EmitIntValue(b2, 1);
+ w = inst.getSize() >> 3;
+ }
+ streamer.EmitIntValue(w, 2);
+ break;
+ case Win64EH::UOP_AllocSmall:
+ b2 |= (((inst.getSize()-8) >> 3) & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ break;
+ case Win64EH::UOP_SetFPReg:
+ b1 = inst.getOffset() & 0xF0;
+ streamer.EmitIntValue(b1, 1);
+ streamer.EmitIntValue(b2, 1);
+ break;
+ case Win64EH::UOP_SaveNonVol:
+ case Win64EH::UOP_SaveXMM128:
+ b2 |= (inst.getRegister() & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ w = inst.getOffset() >> 3;
+ if (inst.getOperation() == Win64EH::UOP_SaveXMM128)
+ w >>= 1;
+ streamer.EmitIntValue(w, 2);
+ break;
+ case Win64EH::UOP_SaveNonVolBig:
+ case Win64EH::UOP_SaveXMM128Big:
+ b2 |= (inst.getRegister() & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ if (inst.getOperation() == Win64EH::UOP_SaveXMM128Big)
+ w = inst.getOffset() & 0xFFF0;
+ else
+ w = inst.getOffset() & 0xFFF8;
+ streamer.EmitIntValue(w, 2);
+ w = inst.getOffset() >> 16;
+ streamer.EmitIntValue(w, 2);
+ break;
+ case Win64EH::UOP_PushMachFrame:
+ if (inst.isPushCodeFrame())
+ b2 |= 0x10;
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
+ streamer.EmitIntValue(b2, 1);
+ break;
+ }
+}
+
+static void EmitRuntimeFunction(MCStreamer &streamer,
+ const MCWin64EHUnwindInfo *info) {
+ MCContext &context = streamer.getContext();
+
+ streamer.EmitValueToAlignment(4);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->Begin, context), 4);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->End, context), 4);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, context), 4);
+}
+
+static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
+ // If this UNWIND_INFO already has a symbol, it's already been emitted.
+ if (info->Symbol) return;
+
+ MCContext &context = streamer.getContext();
+ streamer.EmitValueToAlignment(4);
+ // Upper 3 bits are the version number (currently 1).
+ uint8_t flags = 0x01;
+ info->Symbol = context.CreateTempSymbol();
+ streamer.EmitLabel(info->Symbol);
+
+ if (info->ChainedParent)
+ flags |= Win64EH::UNW_ChainInfo << 3;
+ else {
+ if (info->HandlesUnwind)
+ flags |= Win64EH::UNW_TerminateHandler << 3;
+ if (info->HandlesExceptions)
+ flags |= Win64EH::UNW_ExceptionHandler << 3;
+ }
+ streamer.EmitIntValue(flags, 1);
+
+ if (info->PrologEnd)
+ EmitAbsDifference(streamer, info->PrologEnd, info->Begin);
+ else
+ streamer.EmitIntValue(0, 1);
+
+ uint8_t numCodes = CountOfUnwindCodes(info->Instructions);
+ streamer.EmitIntValue(numCodes, 1);
+
+ uint8_t frame = 0;
+ if (info->LastFrameInst >= 0) {
+ MCWin64EHInstruction &frameInst = info->Instructions[info->LastFrameInst];
+ assert(frameInst.getOperation() == Win64EH::UOP_SetFPReg);
+ frame = (frameInst.getRegister() & 0x0F) |
+ (frameInst.getOffset() & 0xF0);
+ }
+ streamer.EmitIntValue(frame, 1);
+
+ // Emit unwind instructions (in reverse order).
+ uint8_t numInst = info->Instructions.size();
+ for (uint8_t c = 0; c < numInst; ++c) {
+ MCWin64EHInstruction inst = info->Instructions.back();
+ info->Instructions.pop_back();
+ EmitUnwindCode(streamer, info->Begin, inst);
+ }
+
+ if (flags & (Win64EH::UNW_ChainInfo << 3))
+ EmitRuntimeFunction(streamer, info->ChainedParent);
+ else if (flags &
+ ((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3))
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, context),
+ 4);
+ else if (numCodes < 2) {
+ // The minimum size of an UNWIND_INFO struct is 8 bytes. If we're not
+ // a chained unwind info, if there is no handler, and if there are fewer
+ // than 2 slots used in the unwind code array, we have to pad to 8 bytes.
+ if (numCodes == 1)
+ streamer.EmitIntValue(0, 2);
+ else
+ streamer.EmitIntValue(0, 4);
+ }
+}
+
+StringRef MCWin64EHUnwindEmitter::GetSectionSuffix(const MCSymbol *func) {
+ if (!func || !func->isInSection()) return "";
+ const MCSection *section = &func->getSection();
+ const MCSectionCOFF *COFFSection;
+ if ((COFFSection = dyn_cast<MCSectionCOFF>(section))) {
+ StringRef name = COFFSection->getSectionName();
+ size_t dollar = name.find('$');
+ size_t dot = name.find('.', 1);
+ if (dollar == StringRef::npos && dot == StringRef::npos)
+ return "";
+ if (dot == StringRef::npos)
+ return name.substr(dollar);
+ if (dollar == StringRef::npos || dot < dollar)
+ return name.substr(dot);
+ return name.substr(dollar);
+ }
+ return "";
+}
+
+void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
+ MCWin64EHUnwindInfo *info) {
+ // Switch sections (the static function above is meant to be called from
+ // here and from Emit().
+ MCContext &context = streamer.getContext();
+ const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+ const MCSection *xdataSect =
+ TAI.getWin64EHTableSection(GetSectionSuffix(info->Function));
+ streamer.SwitchSection(xdataSect);
+
+ llvm::EmitUnwindInfo(streamer, info);
+}
+
+void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
+ MCContext &context = streamer.getContext();
+ // Emit the unwind info structs first.
+ const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+ for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
+ MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
+ const MCSection *xdataSect =
+ TAI.getWin64EHTableSection(GetSectionSuffix(info.Function));
+ streamer.SwitchSection(xdataSect);
+ llvm::EmitUnwindInfo(streamer, &info);
+ }
+ // Now emit RUNTIME_FUNCTION entries.
+ for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
+ MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
+ const MCSection *pdataSect =
+ TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
+ streamer.SwitchSection(pdataSect);
+ EmitRuntimeFunction(streamer, &info);
+ }
+}
+
+} // End of namespace llvm
+
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
new file mode 100644
index 000000000000..69efe231ad6e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -0,0 +1,785 @@
+//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+#include <vector>
+using namespace llvm;
+using namespace llvm::object;
+
+bool MachObjectWriter::
+doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
+ // Undefined symbols are always extern.
+ if (SD->Symbol->isUndefined())
+ return true;
+
+ // References to weak definitions require external relocation entries; the
+ // definition may not always be the one in the same object file.
+ if (SD->getFlags() & SF_WeakDefinition)
+ return true;
+
+ // Otherwise, we can use an internal relocation.
+ return false;
+}
+
+bool MachObjectWriter::
+MachSymbolData::operator<(const MachSymbolData &RHS) const {
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+}
+
+bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+ const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+ (MCFixupKind) Kind);
+
+ return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
+ const MCAsmLayout &Layout) const {
+ return getSectionAddress(Fragment->getParent()) +
+ Layout.getFragmentOffset(Fragment);
+}
+
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+ const MCAsmLayout &Layout) const {
+ const MCSymbol &S = SD->getSymbol();
+
+ // If this is a variable, then recursively evaluate now.
+ if (S.isVariable()) {
+ MCValue Target;
+ if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+ report_fatal_error("unable to evaluate offset for variable '" +
+ S.getName() + "'");
+
+ // Verify that any used symbols are defined.
+ if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymA()->getSymbol().getName() + "'");
+ if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ Target.getSymB()->getSymbol().getName() + "'");
+
+ uint64_t Address = Target.getConstant();
+ if (Target.getSymA())
+ Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+ Target.getSymA()->getSymbol()), Layout);
+ if (Target.getSymB())
+ Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+ Target.getSymB()->getSymbol()), Layout);
+ return Address;
+ }
+
+ return getSectionAddress(SD->getFragment()->getParent()) +
+ Layout.getSymbolOffset(SD);
+}
+
+uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+ const MCAsmLayout &Layout) const {
+ uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+ unsigned Next = SD->getLayoutOrder() + 1;
+ if (Next >= Layout.getSectionOrder().size())
+ return 0;
+
+ const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+ if (NextSD.getSection().isVirtualSection())
+ return 0;
+ return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+}
+
+void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+ unsigned LoadCommandsSize,
+ bool SubsectionsViaSymbols) {
+ uint32_t Flags = 0;
+
+ if (SubsectionsViaSymbols)
+ Flags |= macho::HF_SubsectionsViaSymbols;
+
+ // struct mach_header (28 bytes) or
+ // struct mach_header_64 (32 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+ Write32(TargetObjectWriter->getCPUType());
+ Write32(TargetObjectWriter->getCPUSubtype());
+
+ Write32(macho::HFT_Object);
+ Write32(NumLoadCommands);
+ Write32(LoadCommandsSize);
+ Write32(Flags);
+ if (is64Bit())
+ Write32(0); // reserved
+
+ assert(OS.tell() - Start ==
+ (is64Bit() ? macho::Header64Size : macho::Header32Size));
+}
+
+/// WriteSegmentLoadCommand - Write a segment load command.
+///
+/// \arg NumSections - The number of sections in this segment.
+/// \arg SectionDataSize - The total size of the sections.
+void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+ uint64_t VMSize,
+ uint64_t SectionDataStartOffset,
+ uint64_t SectionDataSize) {
+ // struct segment_command (56 bytes) or
+ // struct segment_command_64 (72 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ unsigned SegmentLoadCommandSize =
+ is64Bit() ? macho::SegmentLoadCommand64Size:
+ macho::SegmentLoadCommand32Size;
+ Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+ Write32(SegmentLoadCommandSize +
+ NumSections * (is64Bit() ? macho::Section64Size :
+ macho::Section32Size));
+
+ WriteBytes("", 16);
+ if (is64Bit()) {
+ Write64(0); // vmaddr
+ Write64(VMSize); // vmsize
+ Write64(SectionDataStartOffset); // file offset
+ Write64(SectionDataSize); // file size
+ } else {
+ Write32(0); // vmaddr
+ Write32(VMSize); // vmsize
+ Write32(SectionDataStartOffset); // file offset
+ Write32(SectionDataSize); // file size
+ }
+ Write32(0x7); // maxprot
+ Write32(0x7); // initprot
+ Write32(NumSections);
+ Write32(0); // flags
+
+ assert(OS.tell() - Start == SegmentLoadCommandSize);
+}
+
+void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSectionData &SD,
+ uint64_t FileOffset,
+ uint64_t RelocationsStart,
+ unsigned NumRelocations) {
+ uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+ // The offset is unused for virtual sections.
+ if (SD.getSection().isVirtualSection()) {
+ assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+ FileOffset = 0;
+ }
+
+ // struct section (68 bytes) or
+ // struct section_64 (80 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+ WriteBytes(Section.getSectionName(), 16);
+ WriteBytes(Section.getSegmentName(), 16);
+ if (is64Bit()) {
+ Write64(getSectionAddress(&SD)); // address
+ Write64(SectionSize); // size
+ } else {
+ Write32(getSectionAddress(&SD)); // address
+ Write32(SectionSize); // size
+ }
+ Write32(FileOffset);
+
+ unsigned Flags = Section.getTypeAndAttributes();
+ if (SD.hasInstructions())
+ Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+ assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+ Write32(Log2_32(SD.getAlignment()));
+ Write32(NumRelocations ? RelocationsStart : 0);
+ Write32(NumRelocations);
+ Write32(Flags);
+ Write32(IndirectSymBase.lookup(&SD)); // reserved1
+ Write32(Section.getStubSize()); // reserved2
+ if (is64Bit())
+ Write32(0); // reserved3
+
+ assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+ macho::Section32Size));
+}
+
+void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+ uint32_t NumSymbols,
+ uint32_t StringTableOffset,
+ uint32_t StringTableSize) {
+ // struct symtab_command (24 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(macho::LCT_Symtab);
+ Write32(macho::SymtabLoadCommandSize);
+ Write32(SymbolOffset);
+ Write32(NumSymbols);
+ Write32(StringTableOffset);
+ Write32(StringTableSize);
+
+ assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+}
+
+void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+ uint32_t NumLocalSymbols,
+ uint32_t FirstExternalSymbol,
+ uint32_t NumExternalSymbols,
+ uint32_t FirstUndefinedSymbol,
+ uint32_t NumUndefinedSymbols,
+ uint32_t IndirectSymbolOffset,
+ uint32_t NumIndirectSymbols) {
+ // struct dysymtab_command (80 bytes)
+
+ uint64_t Start = OS.tell();
+ (void) Start;
+
+ Write32(macho::LCT_Dysymtab);
+ Write32(macho::DysymtabLoadCommandSize);
+ Write32(FirstLocalSymbol);
+ Write32(NumLocalSymbols);
+ Write32(FirstExternalSymbol);
+ Write32(NumExternalSymbols);
+ Write32(FirstUndefinedSymbol);
+ Write32(NumUndefinedSymbols);
+ Write32(0); // tocoff
+ Write32(0); // ntoc
+ Write32(0); // modtaboff
+ Write32(0); // nmodtab
+ Write32(0); // extrefsymoff
+ Write32(0); // nextrefsyms
+ Write32(IndirectSymbolOffset);
+ Write32(NumIndirectSymbols);
+ Write32(0); // extreloff
+ Write32(0); // nextrel
+ Write32(0); // locreloff
+ Write32(0); // nlocrel
+
+ assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+}
+
+void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &Data = *MSD.SymbolData;
+ const MCSymbol &Symbol = Data.getSymbol();
+ uint8_t Type = 0;
+ uint16_t Flags = Data.getFlags();
+ uint32_t Address = 0;
+
+ // Set the N_TYPE bits. See <mach-o/nlist.h>.
+ //
+ // FIXME: Are the prebound or indirect fields possible here?
+ if (Symbol.isUndefined())
+ Type = macho::STT_Undefined;
+ else if (Symbol.isAbsolute())
+ Type = macho::STT_Absolute;
+ else
+ Type = macho::STT_Section;
+
+ // FIXME: Set STAB bits.
+
+ if (Data.isPrivateExtern())
+ Type |= macho::STF_PrivateExtern;
+
+ // Set external bit.
+ if (Data.isExternal() || Symbol.isUndefined())
+ Type |= macho::STF_External;
+
+ // Compute the symbol address.
+ if (Symbol.isDefined()) {
+ if (Symbol.isAbsolute()) {
+ Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
+ } else {
+ Address = getSymbolAddress(&Data, Layout);
+ }
+ } else if (Data.isCommon()) {
+ // Common symbols are encoded with the size in the address
+ // field, and their alignment in the flags.
+ Address = Data.getCommonSize();
+
+ // Common alignment is packed into the 'desc' bits.
+ if (unsigned Align = Data.getCommonAlignment()) {
+ unsigned Log2Size = Log2_32(Align);
+ assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+ if (Log2Size > 15)
+ report_fatal_error("invalid 'common' alignment '" +
+ Twine(Align) + "'");
+ // FIXME: Keep this mask with the SymbolFlags enumeration.
+ Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+ }
+ }
+
+ // struct nlist (12 bytes)
+
+ Write32(MSD.StringIndex);
+ Write8(Type);
+ Write8(MSD.SectionIndex);
+
+ // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+ // value.
+ Write16(Flags);
+ if (is64Bit())
+ Write64(Address);
+ else
+ Write32(Address);
+}
+
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+}
+
+void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+ // This is the point where 'as' creates actual symbols for indirect symbols
+ // (in the following two passes). It would be easier for us to do this sooner
+ // when we see the attribute, but that makes getting the order in the symbol
+ // table much more complicated than it is worth.
+ //
+ // FIXME: Revisit this when the dust settles.
+
+ // Bind non lazy symbol pointers first.
+ unsigned IndirectIndex = 0;
+ for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+ const MCSectionMachO &Section =
+ cast<MCSectionMachO>(it->SectionData->getSection());
+
+ if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+ continue;
+
+ // Initialize the section indirect symbol base, if necessary.
+ if (!IndirectSymBase.count(it->SectionData))
+ IndirectSymBase[it->SectionData] = IndirectIndex;
+
+ Asm.getOrCreateSymbolData(*it->Symbol);
+ }
+
+ // Then lazy symbol pointers and symbol stubs.
+ IndirectIndex = 0;
+ for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+ const MCSectionMachO &Section =
+ cast<MCSectionMachO>(it->SectionData->getSection());
+
+ if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+ Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+ continue;
+
+ // Initialize the section indirect symbol base, if necessary.
+ if (!IndirectSymBase.count(it->SectionData))
+ IndirectSymBase[it->SectionData] = IndirectIndex;
+
+ // Set the symbol type to undefined lazy, but only on construction.
+ //
+ // FIXME: Do not hardcode.
+ bool Created;
+ MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+ if (Created)
+ Entry.setFlags(Entry.getFlags() | 0x0001);
+ }
+}
+
+/// ComputeSymbolTable - Compute the symbol table data
+///
+/// \param StringTable [out] - The string table data.
+/// \param StringIndexMap [out] - Map from symbol names to offsets in the
+/// string table.
+void MachObjectWriter::
+ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+ std::vector<MachSymbolData> &LocalSymbolData,
+ std::vector<MachSymbolData> &ExternalSymbolData,
+ std::vector<MachSymbolData> &UndefinedSymbolData) {
+ // Build section lookup table.
+ DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+ unsigned Index = 1;
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it, ++Index)
+ SectionIndexMap[&it->getSection()] = Index;
+ assert(Index <= 256 && "Too many sections!");
+
+ // Index 0 is always the empty string.
+ StringMap<uint64_t> StringIndexMap;
+ StringTable += '\x00';
+
+ // Build the symbol arrays and the string table, but only for non-local
+ // symbols.
+ //
+ // The particular order that we collect the symbols and create the string
+ // table, then sort the symbols is chosen to match 'as'. Even though it
+ // doesn't matter for correctness, this is important for letting us diff .o
+ // files.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+ continue;
+
+ if (!it->isExternal() && !Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ MachSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isUndefined()) {
+ MSD.SectionIndex = 0;
+ UndefinedSymbolData.push_back(MSD);
+ } else if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = 0;
+ ExternalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ ExternalSymbolData.push_back(MSD);
+ }
+ }
+
+ // Now add the data for local symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+ continue;
+
+ if (it->isExternal() || Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ MachSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = 0;
+ LocalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ LocalSymbolData.push_back(MSD);
+ }
+ }
+
+ // External and undefined symbols are required to be in lexicographic order.
+ std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+ std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+ // Set the symbol indices.
+ Index = 0;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ LocalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ ExternalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+ // The string table is padded to a multiple of 4.
+ while (StringTable.size() % 4)
+ StringTable += '\x00';
+}
+
+void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ uint64_t StartAddress = 0;
+ const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+ for (int i = 0, n = Order.size(); i != n ; ++i) {
+ const MCSectionData *SD = Order[i];
+ StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+ SectionAddress[SD] = StartAddress;
+ StartAddress += Layout.getSectionAddressSize(SD);
+
+ // Explicitly pad the section to match the alignment requirements of the
+ // following one. This is for 'gas' compatibility, it shouldn't
+ /// strictly be necessary.
+ StartAddress += getPaddingSize(SD, Layout);
+ }
+}
+
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ computeSectionAddresses(Asm, Layout);
+
+ // Create symbol data for any indirect symbols.
+ BindIndirectSymbols(Asm);
+
+ // Compute symbol table information and bind symbol indices.
+ ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+ UndefinedSymbolData);
+}
+
+bool MachObjectWriter::
+IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ if (InSet)
+ return true;
+
+ // The effective address is
+ // addr(atom(A)) + offset(A)
+ // - addr(atom(B)) - offset(B)
+ // and the offsets are not relocatable, so the fixup is fully resolved when
+ // addr(atom(A)) - addr(atom(B)) == 0.
+ const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+ const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+ const MCSection &SecA = SA.getSection();
+ const MCSection &SecB = FB.getParent()->getSection();
+
+ if (IsPCRel) {
+ // The simple (Darwin, except on x86_64) way of dealing with this was to
+ // assume that any reference to a temporary symbol *must* be a temporary
+ // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+ // relocation to a temporary symbol (in the same section) is fully
+ // resolved. This also works in conjunction with absolutized .set, which
+ // requires the compiler to use .set to absolutize the differences between
+ // symbols which the compiler knows to be assembly time constants, so we
+ // don't need to worry about considering symbol differences fully resolved.
+
+ if (!Asm.getBackend().hasReliableSymbolDifference()) {
+ if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+ return false;
+ return true;
+ }
+ } else {
+ if (!TargetObjectWriter->useAggressiveSymbolFolding())
+ return false;
+ }
+
+ const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+
+ A_Base = FA.getAtom();
+ if (!A_Base)
+ return false;
+
+ B_Base = FB.getAtom();
+ if (!B_Base)
+ return false;
+
+ // If the atoms are the same, they are guaranteed to have the same address.
+ if (A_Base == B_Base)
+ return true;
+
+ // Otherwise, we can't prove this is fully resolved.
+ return false;
+}
+
+void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+ unsigned NumSections = Asm.size();
+
+ // The section data starts after the header, the segment load command (and
+ // section headers) and the symbol table.
+ unsigned NumLoadCommands = 1;
+ uint64_t LoadCommandsSize = is64Bit() ?
+ macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+ macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+ // Add the symbol table load command sizes, if used.
+ unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+ UndefinedSymbolData.size();
+ if (NumSymbols) {
+ NumLoadCommands += 2;
+ LoadCommandsSize += (macho::SymtabLoadCommandSize +
+ macho::DysymtabLoadCommandSize);
+ }
+
+ // Compute the total size of the section data, as well as its file size and vm
+ // size.
+ uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+ macho::Header32Size) + LoadCommandsSize;
+ uint64_t SectionDataSize = 0;
+ uint64_t SectionDataFileSize = 0;
+ uint64_t VMSize = 0;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ uint64_t Address = getSectionAddress(&SD);
+ uint64_t Size = Layout.getSectionAddressSize(&SD);
+ uint64_t FileSize = Layout.getSectionFileSize(&SD);
+ FileSize += getPaddingSize(&SD, Layout);
+
+ VMSize = std::max(VMSize, Address + Size);
+
+ if (SD.getSection().isVirtualSection())
+ continue;
+
+ SectionDataSize = std::max(SectionDataSize, Address + Size);
+ SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
+ }
+
+ // The section data is padded to 4 bytes.
+ //
+ // FIXME: Is this machine dependent?
+ unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+ SectionDataFileSize += SectionDataPadding;
+
+ // Write the prolog, starting with the header and load command...
+ WriteHeader(NumLoadCommands, LoadCommandsSize,
+ Asm.getSubsectionsViaSymbols());
+ WriteSegmentLoadCommand(NumSections, VMSize,
+ SectionDataStart, SectionDataSize);
+
+ // ... and then the section headers.
+ uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+ unsigned NumRelocs = Relocs.size();
+ uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+ WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+ RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+ }
+
+ // Write the symbol table load command, if used.
+ if (NumSymbols) {
+ unsigned FirstLocalSymbol = 0;
+ unsigned NumLocalSymbols = LocalSymbolData.size();
+ unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+ unsigned NumExternalSymbols = ExternalSymbolData.size();
+ unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+ unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+ unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+ unsigned NumSymTabSymbols =
+ NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+ uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+ uint64_t IndirectSymbolOffset = 0;
+
+ // If used, the indirect symbols are written after the section data.
+ if (NumIndirectSymbols)
+ IndirectSymbolOffset = RelocTableEnd;
+
+ // The symbol table is written after the indirect symbol data.
+ uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+ // The string table is written after symbol table.
+ uint64_t StringTableOffset =
+ SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+ macho::Nlist32Size);
+ WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+ StringTableOffset, StringTable.size());
+
+ WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+ FirstExternalSymbol, NumExternalSymbols,
+ FirstUndefinedSymbol, NumUndefinedSymbols,
+ IndirectSymbolOffset, NumIndirectSymbols);
+ }
+
+ // Write the actual section data.
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ Asm.WriteSectionData(it, Layout);
+
+ uint64_t Pad = getPaddingSize(it, Layout);
+ for (unsigned int i = 0; i < Pad; ++i)
+ Write8(0);
+ }
+
+ // Write the extra padding.
+ WriteZeros(SectionDataPadding);
+
+ // Write the relocation entries.
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ // Write the section relocation entries, in reverse order to match 'as'
+ // (approximately, the exact algorithm is more complicated than this).
+ std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ Write32(Relocs[e - i - 1].Word0);
+ Write32(Relocs[e - i - 1].Word1);
+ }
+ }
+
+ // Write the symbol table data, if used.
+ if (NumSymbols) {
+ // Write the indirect symbol entries.
+ for (MCAssembler::const_indirect_symbol_iterator
+ it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+ // Indirect symbols in the non lazy symbol pointer section have some
+ // special handling.
+ const MCSectionMachO &Section =
+ static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+ if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+ // If this symbol is defined and internal, mark it as such.
+ if (it->Symbol->isDefined() &&
+ !Asm.getSymbolData(*it->Symbol).isExternal()) {
+ uint32_t Flags = macho::ISF_Local;
+ if (it->Symbol->isAbsolute())
+ Flags |= macho::ISF_Absolute;
+ Write32(Flags);
+ continue;
+ }
+ }
+
+ Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+ }
+
+ // FIXME: Check that offsets match computed ones.
+
+ // Write the symbol table entries.
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ WriteNlist(LocalSymbolData[i], Layout);
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ WriteNlist(ExternalSymbolData[i], Layout);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ WriteNlist(UndefinedSymbolData[i], Layout);
+
+ // Write the string table.
+ OS << StringTable.str();
+ }
+}
+
+MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return new MachObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
new file mode 100644
index 000000000000..348cd4c9ab1b
--- /dev/null
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -0,0 +1,397 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const StringRef Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' or '-' flag
+ return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const StringRef Feature) {
+ return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const StringRef Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' for enabled
+ return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const StringRef Feature,
+ bool IsEnabled) {
+ assert(!Feature.empty() && "Empty string");
+ if (hasFlag(Feature))
+ return Feature;
+ std::string Prefix = IsEnabled ? "+" : "-";
+ Prefix += Feature;
+ return Prefix;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const StringRef S) {
+ if (S.empty())
+ return;
+
+ // Start at beginning of string.
+ size_t Pos = 0;
+ while (true) {
+ // Find the next comma
+ size_t Comma = S.find(',', Pos);
+ // If no comma found then the rest of the string is used
+ if (Comma == std::string::npos) {
+ // Add string to vector
+ V.push_back(S.substr(Pos));
+ break;
+ }
+ // Otherwise add substring to vector
+ V.push_back(S.substr(Pos, Comma - Pos));
+ // Advance to next item
+ Pos = Comma + 1;
+ }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+ // Start with empty string.
+ std::string Result;
+ // If the vector is not empty
+ if (!V.empty()) {
+ // Start with the first feature
+ Result = V[0];
+ // For each successive feature
+ for (size_t i = 1; i < V.size(); i++) {
+ // Add a comma
+ Result += ",";
+ // Add the feature
+ Result += V[i];
+ }
+ }
+ // Return the features string
+ return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const StringRef String,
+ bool IsEnabled) {
+ // Don't add empty features
+ if (!String.empty()) {
+ // Convert to lowercase, prepend flag and add to vector
+ Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+ }
+}
+
+/// Find KV in array using binary search.
+template<typename T> const T *Find(const StringRef S, const T *A, size_t L) {
+ // Make the lower bound element we're looking for
+ T KV;
+ KV.Key = S.data();
+ // Determine the end of the array
+ const T *Hi = A + L;
+ // Binary search the array
+ const T *F = std::lower_bound(A, Hi, KV);
+ // If not found then return NULL
+ if (F == Hi || StringRef(F->Key) != S) return NULL;
+ // Return the found array item
+ return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+ size_t Size) {
+ size_t MaxLen = 0;
+ for (size_t i = 0; i < Size; i++)
+ MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+ return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+ // Determine the length of the longest CPU and Feature entries.
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+ // Print the CPU table.
+ errs() << "Available CPUs for this target:\n\n";
+ for (size_t i = 0; i != CPUTableSize; i++)
+ errs() << " " << CPUTable[i].Key
+ << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
+ << " - " << CPUTable[i].Desc << ".\n";
+ errs() << "\n";
+
+ // Print the Feature table.
+ errs() << "Available features for this target:\n\n";
+ for (size_t i = 0; i != FeatTableSize; i++)
+ errs() << " " << FeatTable[i].Key
+ << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
+ << " - " << FeatTable[i].Desc << ".\n";
+ errs() << "\n";
+
+ errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
+ << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+ std::exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const StringRef Initial) {
+ // Break up string into separate features
+ Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+ return Join(Features);
+}
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FeatureEntry->Implies & FE.Value) {
+ Bits |= FE.Value;
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+///
+static
+void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FE.Implies & FeatureEntry->Value) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ToggleFeature - Toggle a feature and returns the newly updated feature
+/// bits.
+uint64_t
+SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+
+ return Bits;
+}
+
+
+/// getFeatureBits - Get feature bits a CPU.
+///
+uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
+ const SubtargetFeatureKV *CPUTable,
+ size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ if (!FeatureTableSize || !CPUTableSize)
+ return 0;
+
+#ifndef NDEBUG
+ for (size_t i = 1; i < CPUTableSize; i++) {
+ assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+ "CPU table is not sorted");
+ }
+ for (size_t i = 1; i < FeatureTableSize; i++) {
+ assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+ "CPU features table is not sorted");
+ }
+#endif
+ uint64_t Bits = 0; // Resulting bits
+
+ // Check if help is needed
+ if (CPU == "help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find CPU entry if CPU name is specified.
+ if (!CPU.empty()) {
+ const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize);
+ // If there is a match
+ if (CPUEntry) {
+ // Set base feature bits
+ Bits = CPUEntry->Value;
+
+ // Set the feature implied by this CPU feature, if any.
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+ if (CPUEntry->Value & FE.Value)
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ errs() << "'" << CPU
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)\n";
+ }
+ }
+
+ // Iterate through each feature
+ for (size_t i = 0, E = Features.size(); i < E; i++) {
+ const StringRef Feature = Features[i];
+
+ // Check for help
+ if (Feature == "+help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (isEnabled(Feature)) {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+ }
+
+ return Bits;
+}
+
+/// Get scheduling itinerary of a CPU.
+void *SubtargetFeatures::getItinerary(const StringRef CPU,
+ const SubtargetInfoKV *Table,
+ size_t TableSize) {
+ assert(Table && "missing table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < TableSize; i++) {
+ assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted");
+ }
+#endif
+
+ // Find entry
+ const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize);
+
+ if (Entry) {
+ return Entry->Value;
+ } else {
+ errs() << "'" << CPU
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)\n";
+ return NULL;
+ }
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(raw_ostream &OS) const {
+ for (size_t i = 0, e = Features.size(); i != e; ++i)
+ OS << Features[i] << " ";
+ OS << "\n";
+}
+
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+ print(dbgs());
+}
+
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
+///
+/// FIXME: This is an inelegant way of specifying the features of a
+/// subtarget. It would be better if we could encode this information
+/// into the IR. See <rdar://5972456>.
+///
+void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
+ if (Triple.getVendor() == Triple::Apple) {
+ if (Triple.getArch() == Triple::ppc) {
+ // powerpc-apple-*
+ AddFeature("altivec");
+ } else if (Triple.getArch() == Triple::ppc64) {
+ // powerpc64-apple-*
+ AddFeature("64bit");
+ AddFeature("altivec");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/MC/TargetAsmBackend.cpp b/contrib/llvm/lib/MC/TargetAsmBackend.cpp
new file mode 100644
index 000000000000..192755742535
--- /dev/null
+++ b/contrib/llvm/lib/MC/TargetAsmBackend.cpp
@@ -0,0 +1,37 @@
+//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+TargetAsmBackend::TargetAsmBackend()
+ : HasReliableSymbolDifference(false)
+{
+}
+
+TargetAsmBackend::~TargetAsmBackend() {
+}
+
+const MCFixupKindInfo &
+TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ static const MCFixupKindInfo Builtins[] = {
+ { "FK_Data_1", 0, 8, 0 },
+ { "FK_Data_2", 0, 16, 0 },
+ { "FK_Data_4", 0, 32, 0 },
+ { "FK_Data_8", 0, 64, 0 },
+ { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+ };
+
+ assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
+ "Unknown fixup kind");
+ return Builtins[Kind];
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..101237aabb02
--- /dev/null
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -0,0 +1,887 @@
+//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file writer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFObjectWriter"
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCSectionCOFF.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/Support/TimeValue.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+namespace {
+typedef llvm::SmallString<COFF::NameSize> name;
+
+enum AuxiliaryType {
+ ATFunctionDefinition,
+ ATbfAndefSymbol,
+ ATWeakExternal,
+ ATFile,
+ ATSectionDefinition
+};
+
+struct AuxSymbol {
+ AuxiliaryType AuxType;
+ COFF::Auxiliary Aux;
+};
+
+class COFFSymbol;
+class COFFSection;
+
+class COFFSymbol {
+public:
+ COFF::symbol Data;
+
+ typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+
+ name Name;
+ int Index;
+ AuxiliarySymbols Aux;
+ COFFSymbol *Other;
+ COFFSection *Section;
+ int Relocations;
+
+ MCSymbolData const *MCData;
+
+ COFFSymbol(llvm::StringRef name);
+ size_t size() const;
+ void set_name_offset(uint32_t Offset);
+
+ bool should_keep() const;
+};
+
+// This class contains staging data for a COFF relocation entry.
+struct COFFRelocation {
+ COFF::relocation Data;
+ COFFSymbol *Symb;
+
+ COFFRelocation() : Symb(NULL) {}
+ static size_t size() { return COFF::RelocationSize; }
+};
+
+typedef std::vector<COFFRelocation> relocations;
+
+class COFFSection {
+public:
+ COFF::section Header;
+
+ std::string Name;
+ int Number;
+ MCSectionData const *MCData;
+ COFFSymbol *Symbol;
+ relocations Relocations;
+
+ COFFSection(llvm::StringRef name);
+ static size_t size();
+};
+
+// This class holds the COFF string table.
+class StringTable {
+ typedef llvm::StringMap<size_t> map;
+ map Map;
+
+ void update_length();
+public:
+ std::vector<char> Data;
+
+ StringTable();
+ size_t size() const;
+ size_t insert(llvm::StringRef String);
+};
+
+class WinCOFFObjectWriter : public MCObjectWriter {
+public:
+
+ typedef std::vector<COFFSymbol*> symbols;
+ typedef std::vector<COFFSection*> sections;
+
+ typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
+ typedef DenseMap<MCSection const *, COFFSection *> section_map;
+
+ // Root level file contents.
+ bool Is64Bit;
+ COFF::header Header;
+ sections Sections;
+ symbols Symbols;
+ StringTable Strings;
+
+ // Maps used during object file creation.
+ section_map SectionMap;
+ symbol_map SymbolMap;
+
+ WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+ ~WinCOFFObjectWriter();
+
+ COFFSymbol *createSymbol(StringRef Name);
+ COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+ COFFSection *createSection(StringRef Name);
+
+ template <typename object_t, typename list_t>
+ object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
+
+ void DefineSection(MCSectionData const &SectionData);
+ void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
+
+ void MakeSymbolReal(COFFSymbol &S, size_t Index);
+ void MakeSectionReal(COFFSection &S, size_t Number);
+
+ bool ExportSection(COFFSection const *S);
+ bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+
+ bool IsPhysicalSection(COFFSection *S);
+
+ // Entity writing methods.
+
+ void WriteFileHeader(const COFF::header &Header);
+ void WriteSymbol(const COFFSymbol *S);
+ void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
+ void WriteSectionHeader(const COFF::section &S);
+ void WriteRelocation(const COFF::relocation &R);
+
+ // MCObjectWriter interface implementation.
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+ void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+}
+
+static inline void write_uint32_le(void *Data, uint32_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x000000FF) >> 0;
+ Ptr[1] = (Value & 0x0000FF00) >> 8;
+ Ptr[2] = (Value & 0x00FF0000) >> 16;
+ Ptr[3] = (Value & 0xFF000000) >> 24;
+}
+
+static inline void write_uint16_le(void *Data, uint16_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x00FF) >> 0;
+ Ptr[1] = (Value & 0xFF00) >> 8;
+}
+
+static inline void write_uint8_le(void *Data, uint8_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0xFF) >> 0;
+}
+
+//------------------------------------------------------------------------------
+// Symbol class implementation
+
+COFFSymbol::COFFSymbol(llvm::StringRef name)
+ : Name(name.begin(), name.end())
+ , Other(NULL)
+ , Section(NULL)
+ , Relocations(0)
+ , MCData(NULL) {
+ memset(&Data, 0, sizeof(Data));
+}
+
+size_t COFFSymbol::size() const {
+ return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
+}
+
+// In the case that the name does not fit within 8 bytes, the offset
+// into the string table is stored in the last 4 bytes instead, leaving
+// the first 4 bytes as 0.
+void COFFSymbol::set_name_offset(uint32_t Offset) {
+ write_uint32_le(Data.Name + 0, 0);
+ write_uint32_le(Data.Name + 4, Offset);
+}
+
+/// logic to decide if the symbol should be reported in the symbol table
+bool COFFSymbol::should_keep() const {
+ // no section means its external, keep it
+ if (Section == NULL)
+ return true;
+
+ // if it has relocations pointing at it, keep it
+ if (Relocations > 0) {
+ assert(Section->Number != -1 && "Sections with relocations must be real!");
+ return true;
+ }
+
+ // if the section its in is being droped, drop it
+ if (Section->Number == -1)
+ return false;
+
+ // if it is the section symbol, keep it
+ if (Section->Symbol == this)
+ return true;
+
+ // if its temporary, drop it
+ if (MCData && MCData->getSymbol().isTemporary())
+ return false;
+
+ // otherwise, keep it
+ return true;
+}
+
+//------------------------------------------------------------------------------
+// Section class implementation
+
+COFFSection::COFFSection(llvm::StringRef name)
+ : Name(name)
+ , MCData(NULL)
+ , Symbol(NULL) {
+ memset(&Header, 0, sizeof(Header));
+}
+
+size_t COFFSection::size() {
+ return COFF::SectionSize;
+}
+
+//------------------------------------------------------------------------------
+// StringTable class implementation
+
+/// Write the length of the string table into Data.
+/// The length of the string table includes uint32 length header.
+void StringTable::update_length() {
+ write_uint32_le(&Data.front(), Data.size());
+}
+
+StringTable::StringTable() {
+ // The string table data begins with the length of the entire string table
+ // including the length header. Allocate space for this header.
+ Data.resize(4);
+}
+
+size_t StringTable::size() const {
+ return Data.size();
+}
+
+/// Add String to the table iff it is not already there.
+/// @returns the index into the string table where the string is now located.
+size_t StringTable::insert(llvm::StringRef String) {
+ map::iterator i = Map.find(String);
+
+ if (i != Map.end())
+ return i->second;
+
+ size_t Offset = Data.size();
+
+ // Insert string data into string table.
+ Data.insert(Data.end(), String.begin(), String.end());
+ Data.push_back('\0');
+
+ // Put a reference to it in the map.
+ Map[String] = Offset;
+
+ // Update the internal length field.
+ update_length();
+
+ return Offset;
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+ : MCObjectWriter(OS, true)
+ , Is64Bit(is64Bit) {
+ memset(&Header, 0, sizeof(Header));
+
+ Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
+ : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+WinCOFFObjectWriter::~WinCOFFObjectWriter() {
+ for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
+ delete *I;
+ for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
+ delete *I;
+}
+
+COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
+ return createCOFFEntity<COFFSymbol>(Name, Symbols);
+}
+
+COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
+ symbol_map::iterator i = SymbolMap.find(Symbol);
+ if (i != SymbolMap.end())
+ return i->second;
+ COFFSymbol *RetSymbol
+ = createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
+ SymbolMap[Symbol] = RetSymbol;
+ return RetSymbol;
+}
+
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+ return createCOFFEntity<COFFSection>(Name, Sections);
+}
+
+/// A template used to lookup or create a symbol/section, and initialize it if
+/// needed.
+template <typename object_t, typename list_t>
+object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
+ list_t &List) {
+ object_t *Object = new object_t(Name);
+
+ List.push_back(Object);
+
+ return Object;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+ assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
+ && "Got non COFF section in the COFF backend!");
+ // FIXME: Not sure how to verify this (at least in a debug build).
+ MCSectionCOFF const &Sec =
+ static_cast<MCSectionCOFF const &>(SectionData.getSection());
+
+ COFFSection *coff_section = createSection(Sec.getSectionName());
+ COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
+
+ coff_section->Symbol = coff_symbol;
+ coff_symbol->Section = coff_section;
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+
+ // In this case the auxiliary symbol is a Section Definition.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATSectionDefinition;
+ coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
+
+ coff_section->Header.Characteristics = Sec.getCharacteristics();
+
+ uint32_t &Characteristics = coff_section->Header.Characteristics;
+ switch (SectionData.getAlignment()) {
+ case 1: Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES; break;
+ case 2: Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES; break;
+ case 4: Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES; break;
+ case 8: Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES; break;
+ case 16: Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES; break;
+ case 32: Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES; break;
+ case 64: Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES; break;
+ case 128: Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES; break;
+ case 256: Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES; break;
+ case 512: Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES; break;
+ case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
+ case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
+ case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
+ case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+ default:
+ llvm_unreachable("unsupported section alignment");
+ }
+
+ // Bind internal COFF section to MC section.
+ coff_section->MCData = &SectionData;
+ SectionMap[&SectionData.getSection()] = coff_section;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF symbol staging object.
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Assembler) {
+ COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol());
+
+ coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0;
+ coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+
+ if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+ if (SymbolData.getSymbol().isVariable()) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+ const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+ // FIXME: This assert message isn't very good.
+ assert(Value->getKind() == MCExpr::SymbolRef &&
+ "Value must be a SymbolRef!");
+
+ const MCSymbolRefExpr *SymbolRef =
+ static_cast<const MCSymbolRefExpr *>(Value);
+ coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol());
+ } else {
+ std::string WeakName = std::string(".weak.")
+ + SymbolData.getSymbol().getName().str()
+ + ".default";
+ COFFSymbol *WeakDefault = createSymbol(WeakName);
+ WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+ WeakDefault->Data.StorageClass = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ WeakDefault->Data.Type = 0;
+ WeakDefault->Data.Value = 0;
+ coff_symbol->Other = WeakDefault;
+ }
+
+ // Setup the Weak External auxiliary symbol.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATWeakExternal;
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+ }
+
+ // If no storage class was specified in the streamer, define it here.
+ if (coff_symbol->Data.StorageClass == 0) {
+ bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+
+ coff_symbol->Data.StorageClass =
+ external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+ }
+
+ if (SymbolData.Fragment != NULL)
+ coff_symbol->Section =
+ SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+
+ // Bind internal COFF symbol to MC symbol.
+ coff_symbol->MCData = &SymbolData;
+ SymbolMap[&SymbolData.getSymbol()] = coff_symbol;
+}
+
+/// making a section real involves assigned it a number and putting
+/// name into the string table if needed
+void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ // FIXME: Why is this number 999999? This number is never mentioned in the
+ // spec. I'm assuming this is due to the printed value needing to fit into
+ // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+ // 6)? The spec does not state if this entry should be null terminated in
+ // this case, and thus this seems to be the best way to do it. I think I
+ // just solved my own FIXME...
+ if (StringTableEntry > 999999)
+ report_fatal_error("COFF string table is greater than 999999 bytes.");
+
+ std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
+ } else
+ std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+
+ S.Number = Number;
+ S.Symbol->Data.SectionNumber = S.Number;
+ S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
+}
+
+void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ S.set_name_offset(StringTableEntry);
+ } else
+ std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+ S.Index = Index;
+}
+
+bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
+ return !S->MCData->getFragmentList().empty();
+}
+
+bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Asm) {
+ // This doesn't seem to be right. Strings referred to from the .data section
+ // need symbols so they can be linked to code in the .text section right?
+
+ // return Asm.isSymbolLinkerVisible (&SymbolData);
+
+ // For now, all non-variable symbols are exported,
+ // the linker will sort the rest out for us.
+ return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+}
+
+bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+ return (S->Header.Characteristics
+ & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+}
+
+//------------------------------------------------------------------------------
+// entity writing methods
+
+void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+ WriteLE16(Header.Machine);
+ WriteLE16(Header.NumberOfSections);
+ WriteLE32(Header.TimeDateStamp);
+ WriteLE32(Header.PointerToSymbolTable);
+ WriteLE32(Header.NumberOfSymbols);
+ WriteLE16(Header.SizeOfOptionalHeader);
+ WriteLE16(Header.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
+ WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
+ WriteLE32(S->Data.Value);
+ WriteLE16(S->Data.SectionNumber);
+ WriteLE16(S->Data.Type);
+ Write8(S->Data.StorageClass);
+ Write8(S->Data.NumberOfAuxSymbols);
+ WriteAuxiliarySymbols(S->Aux);
+}
+
+void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+ const COFFSymbol::AuxiliarySymbols &S) {
+ for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+ i != e; ++i) {
+ switch(i->AuxType) {
+ case ATFunctionDefinition:
+ WriteLE32(i->Aux.FunctionDefinition.TagIndex);
+ WriteLE32(i->Aux.FunctionDefinition.TotalSize);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+ break;
+ case ATbfAndefSymbol:
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
+ WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
+ WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+ break;
+ case ATWeakExternal:
+ WriteLE32(i->Aux.WeakExternal.TagIndex);
+ WriteLE32(i->Aux.WeakExternal.Characteristics);
+ WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+ break;
+ case ATFile:
+ WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
+ sizeof(i->Aux.File.FileName)));
+ break;
+ case ATSectionDefinition:
+ WriteLE32(i->Aux.SectionDefinition.Length);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+ WriteLE32(i->Aux.SectionDefinition.CheckSum);
+ WriteLE16(i->Aux.SectionDefinition.Number);
+ Write8(i->Aux.SectionDefinition.Selection);
+ WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+ break;
+ }
+ }
+}
+
+void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
+ WriteBytes(StringRef(S.Name, COFF::NameSize));
+
+ WriteLE32(S.VirtualSize);
+ WriteLE32(S.VirtualAddress);
+ WriteLE32(S.SizeOfRawData);
+ WriteLE32(S.PointerToRawData);
+ WriteLE32(S.PointerToRelocations);
+ WriteLE32(S.PointerToLineNumbers);
+ WriteLE16(S.NumberOfRelocations);
+ WriteLE16(S.NumberOfLineNumbers);
+ WriteLE32(S.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+ WriteLE32(R.VirtualAddress);
+ WriteLE32(R.SymbolTableIndex);
+ WriteLE16(R.Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MCObjectWriter interface implementations
+
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // "Define" each section & symbol. This creates section & symbol
+ // entries in the staging area.
+
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
+ DefineSection(*i);
+
+ for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
+ e = Asm.symbol_end(); i != e; i++) {
+ if (ExportSymbol(*i, Asm))
+ DefineSymbol(*i, Asm);
+ }
+}
+
+void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+
+ MCSectionData const *SectionData = Fragment->getParent();
+
+ // Mark this symbol as requiring an entry in the symbol table.
+ assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
+ "Section must already have been defined in ExecutePostLayoutBinding!");
+ assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
+ "Symbol must already have been defined in ExecutePostLayoutBinding!");
+
+ COFFSection *coff_section = SectionMap[&SectionData->getSection()];
+ COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
+ const MCSymbolRefExpr *SymA = Target.getSymA();
+ const MCSymbolRefExpr *SymB = Target.getSymB();
+ const bool CrossSection = SymB &&
+ &SymA->getSymbol().getSection() != &SymB->getSymbol().getSection();
+
+ if (Target.getSymB()) {
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+
+ // Offset of the symbol in the section
+ int64_t a = Layout.getSymbolOffset(&B_SD);
+
+ // Ofeset of the relocation in the section
+ int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+ FixedValue = b - a;
+ // In the case where we have SymbA and SymB, we just need to store the delta
+ // between the two symbols. Update FixedValue to account for the delta, and
+ // skip recording the relocation.
+ if (!CrossSection)
+ return;
+ } else {
+ FixedValue = Target.getConstant();
+ }
+
+ COFFRelocation Reloc;
+
+ Reloc.Data.SymbolTableIndex = 0;
+ Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+
+ // Turn relocations for temporary symbols into section relocations.
+ if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) {
+ Reloc.Symb = coff_symbol->Section->Symbol;
+ FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
+ + coff_symbol->MCData->getOffset();
+ } else
+ Reloc.Symb = coff_symbol;
+
+ ++Reloc.Symb->Relocations;
+
+ Reloc.Data.VirtualAddress += Fixup.getOffset();
+
+ unsigned FixupKind = Fixup.getKind();
+
+ if (CrossSection)
+ FixupKind = FK_PCRel_4;
+
+ switch (FixupKind) {
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
+ : COFF::IMAGE_REL_I386_REL32;
+ // FIXME: Can anyone explain what this does other than adjust for the size
+ // of the offset?
+ FixedValue += 4;
+ break;
+ case FK_Data_4:
+ case X86::reloc_signed_4byte:
+ Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
+ : COFF::IMAGE_REL_I386_DIR32;
+ break;
+ case FK_Data_8:
+ if (Is64Bit)
+ Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
+ else
+ llvm_unreachable("unsupported relocation type");
+ break;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+
+ coff_section->Relocations.push_back(Reloc);
+}
+
+void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // Assign symbol and section indexes and offsets.
+ Header.NumberOfSections = 0;
+
+ for (sections::iterator i = Sections.begin(),
+ e = Sections.end(); i != e; i++) {
+ if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+ MakeSectionReal(**i, ++Header.NumberOfSections);
+ } else {
+ (*i)->Number = -1;
+ }
+ }
+
+ Header.NumberOfSymbols = 0;
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *coff_symbol = *i;
+ MCSymbolData const *SymbolData = coff_symbol->MCData;
+
+ // Update section number & offset for symbols that have them.
+ if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
+ assert(coff_symbol->Section != NULL);
+
+ coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
+ coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+ + SymbolData->Offset;
+ }
+
+ if (coff_symbol->should_keep()) {
+ MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+
+ // Update auxiliary symbol info.
+ coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+ Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ } else
+ coff_symbol->Index = -1;
+ }
+
+ // Fixup weak external references.
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *coff_symbol = *i;
+ if (coff_symbol->Other != NULL) {
+ assert(coff_symbol->Index != -1);
+ assert(coff_symbol->Aux.size() == 1 &&
+ "Symbol must contain one aux symbol!");
+ assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
+ "Symbol's aux symbol must be a Weak External!");
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
+ }
+ }
+
+ // Assign file offsets to COFF object file structures.
+
+ unsigned offset = 0;
+
+ offset += COFF::HeaderSize;
+ offset += COFF::SectionSize * Header.NumberOfSections;
+
+ for (MCAssembler::const_iterator i = Asm.begin(),
+ e = Asm.end();
+ i != e; i++) {
+ COFFSection *Sec = SectionMap[&i->getSection()];
+
+ if (Sec->Number == -1)
+ continue;
+
+ Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+
+ if (IsPhysicalSection(Sec)) {
+ Sec->Header.PointerToRawData = offset;
+
+ offset += Sec->Header.SizeOfRawData;
+ }
+
+ if (Sec->Relocations.size() > 0) {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ Sec->Header.PointerToRelocations = offset;
+
+ offset += COFF::RelocationSize * Sec->Relocations.size();
+
+ for (relocations::iterator cr = Sec->Relocations.begin(),
+ er = Sec->Relocations.end();
+ cr != er; ++cr) {
+ assert((*cr).Symb->Index != -1);
+ (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+ }
+ }
+
+ assert(Sec->Symbol->Aux.size() == 1
+ && "Section's symbol must have one aux!");
+ AuxSymbol &Aux = Sec->Symbol->Aux[0];
+ assert(Aux.AuxType == ATSectionDefinition &&
+ "Section's symbol's aux symbol must be a Section Definition!");
+ Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+ Aux.Aux.SectionDefinition.NumberOfRelocations =
+ Sec->Header.NumberOfRelocations;
+ Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+ Sec->Header.NumberOfLineNumbers;
+ }
+
+ Header.PointerToSymbolTable = offset;
+
+ Header.TimeDateStamp = sys::TimeValue::now().toEpochTime();
+
+ // Write it all to disk...
+ WriteFileHeader(Header);
+
+ {
+ sections::iterator i, ie;
+ MCAssembler::const_iterator j, je;
+
+ for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
+ if ((*i)->Number != -1)
+ WriteSectionHeader((*i)->Header);
+
+ for (i = Sections.begin(), ie = Sections.end(),
+ j = Asm.begin(), je = Asm.end();
+ (i != ie) && (j != je); ++i, ++j) {
+
+ if ((*i)->Number == -1)
+ continue;
+
+ if ((*i)->Header.PointerToRawData != 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRawData &&
+ "Section::PointerToRawData is insane!");
+
+ Asm.WriteSectionData(j, Layout);
+ }
+
+ if ((*i)->Relocations.size() > 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+ "Section::PointerToRelocations is insane!");
+
+ for (relocations::const_iterator k = (*i)->Relocations.begin(),
+ ke = (*i)->Relocations.end();
+ k != ke; k++) {
+ WriteRelocation(k->Data);
+ }
+ } else
+ assert((*i)->Header.PointerToRelocations == 0 &&
+ "Section::PointerToRelocations is insane!");
+ }
+ }
+
+ assert(OS.tell() == Header.PointerToSymbolTable &&
+ "Header::PointerToSymbolTable is insane!");
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
+ if ((*i)->Index != -1)
+ WriteSymbol(*i);
+
+ OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter factory function
+
+namespace llvm {
+ MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
+ return new WinCOFFObjectWriter(OS, is64Bit);
+ }
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
new file mode 100644
index 000000000000..6c36c1231c83
--- /dev/null
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -0,0 +1,406 @@
+//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file streamer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFStreamer"
+
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringMap.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class WinCOFFStreamer : public MCObjectStreamer {
+public:
+ MCSymbol const *CurSymbol;
+
+ WinCOFFStreamer(MCContext &Context,
+ TargetAsmBackend &TAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS);
+
+ void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External);
+
+ // MCStreamer interface
+
+ virtual void InitSections();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size,unsigned ByteAlignment);
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment);
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize, unsigned MaxBytesToEmit);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit);
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitInstruction(const MCInst &Instruction);
+ virtual void EmitWin64EHHandlerData();
+ virtual void Finish();
+
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst) {
+ llvm_unreachable("Not used by WinCOFF.");
+ }
+ virtual void EmitInstToData(const MCInst &Inst) {
+ llvm_unreachable("Not used by WinCOFF.");
+ }
+
+ void SetSection(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind) {
+ SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind));
+ }
+
+ void SetSectionText() {
+ SetSection(".text",
+ COFF::IMAGE_SCN_CNT_CODE
+ | COFF::IMAGE_SCN_MEM_EXECUTE
+ | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+ }
+
+ void SetSectionData() {
+ SetSection(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+ }
+
+ void SetSectionBSS() {
+ SetSection(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+ }
+
+};
+} // end anonymous namespace.
+
+WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
+ TargetAsmBackend &TAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS)
+ : MCObjectStreamer(Context, TAB, OS, &CE)
+ , CurSymbol(NULL) {
+}
+
+void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External) {
+ assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+ std::string SectionName(".bss$linkonce");
+ SectionName.append(Symbol->getName().begin(), Symbol->getName().end());
+
+ MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ unsigned Characteristics =
+ COFF::IMAGE_SCN_LNK_COMDAT |
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
+
+ const MCSection *Section = MCStreamer::getContext().getCOFFSection(
+ SectionName, Characteristics, Selection, SectionKind::getBSS());
+
+ MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+
+ if (SectionData.getAlignment() < ByteAlignment)
+ SectionData.setAlignment(ByteAlignment);
+
+ SymbolData.setExternal(External);
+
+ Symbol->setSection(*Section);
+
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+
+ SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
+}
+
+// MCStreamer interface
+
+void WinCOFFStreamer::InitSections() {
+ SetSectionText();
+ SetSectionData();
+ SetSectionBSS();
+ SetSectionText();
+}
+
+void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ MCObjectStreamer::EmitLabel(Symbol);
+}
+
+void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ // FIXME: This is all very ugly and depressing. What needs to happen here
+ // depends on quite a few things that are all part of relaxation, which we
+ // don't really even do.
+
+ if (Value->getKind() != MCExpr::SymbolRef) {
+ // TODO: This is exactly the same as MachOStreamer. Consider merging into
+ // MCObjectStreamer.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ AddValueSymbols(Value);
+ Symbol->setVariableValue(Value);
+ } else {
+ // FIXME: This is a horrible way to do this :(. This should really be
+ // handled after we are done with the MC* objects and immediately before
+ // writing out the object file when we know exactly what the symbol should
+ // look like in the coff symbol table. I'm not doing that now because the
+ // COFF object writer doesn't have a clearly defined separation between MC
+ // data structures, the object writers data structures, and the raw, POD,
+ // data structures that get written to disk.
+
+ // Copy over the aliased data.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData(
+ dyn_cast<const MCSymbolRefExpr>(Value)->getSymbol());
+
+ // FIXME: This is particularly nasty because it breaks as soon as any data
+ // members of MCSymbolData change.
+ SD.CommonAlign = RealSD.CommonAlign;
+ SD.CommonSize = RealSD.CommonSize;
+ SD.Flags = RealSD.Flags;
+ SD.Fragment = RealSD.Fragment;
+ SD.Index = RealSD.Index;
+ SD.IsExternal = RealSD.IsExternal;
+ SD.IsPrivateExtern = RealSD.IsPrivateExtern;
+ SD.Offset = RealSD.Offset;
+ SD.SymbolSize = RealSD.SymbolSize;
+ }
+}
+
+void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ assert(Symbol && "Symbol must be non-null!");
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ switch (Attribute) {
+ case MCSA_WeakReference:
+ case MCSA_Weak: {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+ SD.setExternal(true);
+ }
+ break;
+
+ case MCSA_Global:
+ getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+ break;
+
+ default:
+ llvm_unreachable("unsupported attribute");
+ break;
+ }
+}
+
+void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
+ "to BeginCOFFSymbolDef!");
+ CurSymbol = Symbol;
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
+ "the first byte!");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ StorageClass << COFF::SF_ClassShift,
+ COFF::SF_ClassMask);
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
+ "bytes");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ Type << COFF::SF_TypeShift,
+ COFF::SF_TypeMask);
+}
+
+void WinCOFFStreamer::EndCOFFSymbolDef() {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ CurSymbol = NULL;
+}
+
+void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ AddCommonSymbol(Symbol, Size, ByteAlignment, true);
+}
+
+void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ AddCommonSymbol(Symbol, Size, 1, false);
+}
+
+void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size,unsigned ByteAlignment) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
+ // Ignore for now, linkers don't care, and proper debug
+ // info will be a much large effort.
+}
+
+void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
+ for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
+ if (Instruction.getOperand(i).isExpr())
+ AddValueSymbols(Instruction.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ MCInstFragment *Fragment =
+ new MCInstFragment(Instruction, getCurrentSectionData());
+
+ raw_svector_ostream VecOS(Fragment->getCode());
+
+ getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS,
+ Fragment->getFixups());
+}
+
+void WinCOFFStreamer::EmitWin64EHHandlerData() {
+ MCStreamer::EmitWin64EHHandlerData();
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
+}
+
+void WinCOFFStreamer::Finish() {
+ EmitW64Tables();
+ MCObjectStreamer::Finish();
+}
+
+namespace llvm
+{
+ MCStreamer *createWinCOFFStreamer(MCContext &Context,
+ TargetAsmBackend &TAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS,
+ bool RelaxAll) {
+ WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS);
+ S->getAssembler().setRelaxAll(RelaxAll);
+ return S;
+ }
+}
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
new file mode 100644
index 000000000000..4b31c7557dd3
--- /dev/null
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -0,0 +1,96 @@
+//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Binary class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Binary.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+// Include headers for createBinary.
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/COFF.h"
+
+using namespace llvm;
+using namespace object;
+
+Binary::~Binary() {
+ delete Data;
+}
+
+Binary::Binary(unsigned int Type, MemoryBuffer *Source)
+ : TypeID(Type)
+ , Data(Source) {}
+
+StringRef Binary::getData() const {
+ return Data->getBuffer();
+}
+
+StringRef Binary::getFileName() const {
+ return Data->getBufferIdentifier();
+}
+
+error_code object::createBinary(MemoryBuffer *Source,
+ OwningPtr<Binary> &Result) {
+ OwningPtr<MemoryBuffer> scopedSource(Source);
+ if (!Source)
+ return make_error_code(errc::invalid_argument);
+ if (Source->getBufferSize() < 64)
+ return object_error::invalid_file_type;
+ sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(),
+ static_cast<unsigned>(Source->getBufferSize()));
+ error_code ec;
+ switch (type) {
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType: {
+ OwningPtr<Binary> ret(
+ ObjectFile::createELFObjectFile(scopedSource.take()));
+ if (!ret)
+ return object_error::invalid_file_type;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: {
+ OwningPtr<Binary> ret(
+ ObjectFile::createMachOObjectFile(scopedSource.take()));
+ if (!ret)
+ return object_error::invalid_file_type;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ case sys::COFF_FileType: {
+ OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec));
+ if (ec) return ec;
+ Result.swap(ret);
+ return object_error::success;
+ }
+ default: // Unrecognized object file format.
+ return object_error::invalid_file_type;
+ }
+}
+
+error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(Path, File))
+ return ec;
+ return createBinary(File.take(), Result);
+}
diff --git a/contrib/llvm/lib/Object/CMakeLists.txt b/contrib/llvm/lib/Object/CMakeLists.txt
new file mode 100644
index 000000000000..6a6814fd37d9
--- /dev/null
+++ b/contrib/llvm/lib/Object/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMObject
+ MachOObject.cpp
+ ObjectFile.cpp
+ COFFObjectFile.cpp
+ ELFObjectFile.cpp
+ )
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
new file mode 100644
index 000000000000..07de6bc99973
--- /dev/null
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -0,0 +1,455 @@
+//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/COFF.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+using support::ulittle8_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+using support::little16_t;
+}
+
+namespace {
+// Returns false if size is greater than the buffer size. And sets ec.
+bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) {
+ if (m->getBufferSize() < size) {
+ ec = object_error::unexpected_eof;
+ return false;
+ }
+ return true;
+}
+
+// Returns false if any bytes in [addr, addr + size) fall outsize of m.
+bool checkAddr(const MemoryBuffer *m,
+ error_code &ec,
+ uintptr_t addr,
+ uint64_t size) {
+ if (addr + size < addr ||
+ addr + size < size ||
+ addr + size > uintptr_t(m->getBufferEnd())) {
+ ec = object_error::unexpected_eof;
+ return false;
+ }
+ return true;
+}
+}
+
+const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const {
+ const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p);
+
+# ifndef NDEBUG
+ // Verify that the symbol points to a valid entry in the symbol table.
+ uintptr_t offset = uintptr_t(addr) - uintptr_t(base());
+ if (offset < Header->PointerToSymbolTable
+ || offset >= Header->PointerToSymbolTable
+ + (Header->NumberOfSymbols * sizeof(coff_symbol)))
+ report_fatal_error("Symbol was outside of symbol table.");
+
+ assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol)
+ == 0 && "Symbol did not point to the beginning of a symbol");
+# endif
+
+ return addr;
+}
+
+const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const {
+ const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p);
+
+# ifndef NDEBUG
+ // Verify that the section points to a valid entry in the section table.
+ if (addr < SectionTable
+ || addr >= (SectionTable + Header->NumberOfSections))
+ report_fatal_error("Section was outside of section table.");
+
+ uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable);
+ assert(offset % sizeof(coff_section) == 0 &&
+ "Section did not point to the beginning of a section");
+# endif
+
+ return addr;
+}
+
+error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
+ SymbolRef &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ symb += 1 + symb->NumberOfAuxSymbols;
+ Symb.p = reinterpret_cast<uintptr_t>(symb);
+ Result = SymbolRef(Symb, this);
+ return object_error::success;
+}
+
+ error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
+ StringRef &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ // Check for string table entry. First 4 bytes are 0.
+ if (symb->Name.Offset.Zeroes == 0) {
+ uint32_t Offset = symb->Name.Offset.Offset;
+ if (error_code ec = getString(Offset, Result))
+ return ec;
+ return object_error::success;
+ }
+
+ if (symb->Name.ShortName[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ Result = StringRef(symb->Name.ShortName);
+ else
+ // Not null terminated, use all 8 bytes.
+ Result = StringRef(symb->Name.ShortName, 8);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'U' || Type == 'w')
+ Result = UnknownAddressOrSize;
+ else if (Section)
+ Result = Section->VirtualAddress + symb->Value;
+ else
+ Result = symb->Value;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
+ uint64_t &Result) const {
+ // FIXME: Return the correct size. This requires looking at all the symbols
+ // in the same section as this symbol, and looking for either the next
+ // symbol, or the end of the section.
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'U' || Type == 'w')
+ Result = UnknownAddressOrSize;
+ else if (Section)
+ Result = Section->SizeOfRawData - symb->Value;
+ else
+ Result = 0;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+ char &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ StringRef name;
+ if (error_code ec = getSymbolName(Symb, name))
+ return ec;
+ char ret = StringSwitch<char>(name)
+ .StartsWith(".debug", 'N')
+ .StartsWith(".sxdata", 'N')
+ .Default('?');
+
+ if (ret != '?') {
+ Result = ret;
+ return object_error::success;
+ }
+
+ uint32_t Characteristics = 0;
+ if (symb->SectionNumber > 0) {
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ Characteristics = Section->Characteristics;
+ }
+
+ switch (symb->SectionNumber) {
+ case COFF::IMAGE_SYM_UNDEFINED:
+ // Check storage classes.
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+ Result = 'w';
+ return object_error::success; // Don't do ::toupper.
+ } else
+ ret = 'u';
+ break;
+ case COFF::IMAGE_SYM_ABSOLUTE:
+ ret = 'a';
+ break;
+ case COFF::IMAGE_SYM_DEBUG:
+ ret = 'n';
+ break;
+ default:
+ // Check section type.
+ if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+ ret = 't';
+ else if ( Characteristics & COFF::IMAGE_SCN_MEM_READ
+ && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+ ret = 'r';
+ else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ ret = 'd';
+ else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ ret = 'b';
+ else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
+ ret = 'i';
+
+ // Check for section symbol.
+ else if ( symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+ && symb->Value == 0)
+ ret = 's';
+ }
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+ ret = ::toupper(ret);
+
+ Result = ret;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb,
+ bool &Result) const {
+ Result = false;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
+ SectionRef &Result) const {
+ const coff_section *sec = toSec(Sec);
+ sec += 1;
+ Sec.p = reinterpret_cast<uintptr_t>(sec);
+ Result = SectionRef(Sec, this);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
+ StringRef &Result) const {
+ const coff_section *sec = toSec(Sec);
+ StringRef name;
+ if (sec->Name[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ name = sec->Name;
+ else
+ // Not null terminated, use all 8 bytes.
+ name = StringRef(sec->Name, 8);
+
+ // Check for string table entry. First byte is '/'.
+ if (name[0] == '/') {
+ uint32_t Offset;
+ name.substr(1).getAsInteger(10, Offset);
+ if (error_code ec = getString(Offset, name))
+ return ec;
+ }
+
+ Result = name;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->VirtualAddress;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->SizeOfRawData;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
+ StringRef &Result) const {
+ const coff_section *sec = toSec(Sec);
+ // The only thing that we need to verify is that the contents is contained
+ // within the file bounds. We don't need to make sure it doesn't cover other
+ // data, as there's nothing that says that is not allowed.
+ uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
+ uintptr_t con_end = con_start + sec->SizeOfRawData;
+ if (con_end >= uintptr_t(Data->getBufferEnd()))
+ return object_error::parse_failed;
+ Result = StringRef(reinterpret_cast<const char*>(con_start),
+ sec->SizeOfRawData);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
+ : ObjectFile(Binary::isCOFF, Object, ec) {
+ // Check that we at least have enough room for a header.
+ if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
+
+ // The actual starting location of the COFF header in the file. This can be
+ // non-zero in PE/COFF files.
+ uint64_t HeaderStart = 0;
+
+ // Check if this is a PE/COFF file.
+ if (base()[0] == 0x4d && base()[1] == 0x5a) {
+ // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
+ // PE signature to find 'normal' COFF header.
+ if (!checkSize(Data, ec, 0x3c + 8)) return;
+ HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c);
+ // Check the PE header. ("PE\0\0")
+ if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
+ ec = object_error::parse_failed;
+ return;
+ }
+ HeaderStart += 4; // Skip the PE Header.
+ }
+
+ Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart);
+ if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header)))
+ return;
+
+ SectionTable =
+ reinterpret_cast<const coff_section *>( base()
+ + HeaderStart
+ + sizeof(coff_file_header)
+ + Header->SizeOfOptionalHeader);
+ if (!checkAddr(Data, ec, uintptr_t(SectionTable),
+ Header->NumberOfSections * sizeof(coff_section)))
+ return;
+
+ SymbolTable =
+ reinterpret_cast<const coff_symbol *>(base()
+ + Header->PointerToSymbolTable);
+ if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+ Header->NumberOfSymbols * sizeof(coff_symbol)))
+ return;
+
+ // Find string table.
+ StringTable = reinterpret_cast<const char *>(base())
+ + Header->PointerToSymbolTable
+ + Header->NumberOfSymbols * sizeof(coff_symbol);
+ if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+ return;
+
+ StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+ if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+ return;
+ // Check that the string table is null terminated if has any in it.
+ if (StringTableSize < 4
+ || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+ ec = object_error::parse_failed;
+ return;
+ }
+
+ ec = object_error::success;
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
+ DataRefImpl ret;
+ std::memset(&ret, 0, sizeof(DataRefImpl));
+ ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+ return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
+ // The symbol table ends where the string table begins.
+ DataRefImpl ret;
+ std::memset(&ret, 0, sizeof(DataRefImpl));
+ ret.p = reinterpret_cast<intptr_t>(StringTable);
+ return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
+ DataRefImpl ret;
+ std::memset(&ret, 0, sizeof(DataRefImpl));
+ ret.p = reinterpret_cast<intptr_t>(SectionTable);
+ return section_iterator(SectionRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::end_sections() const {
+ DataRefImpl ret;
+ std::memset(&ret, 0, sizeof(DataRefImpl));
+ ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
+ return section_iterator(SectionRef(ret, this));
+}
+
+uint8_t COFFObjectFile::getBytesInAddress() const {
+ return getArch() == Triple::x86_64 ? 8 : 4;
+}
+
+StringRef COFFObjectFile::getFileFormatName() const {
+ switch(Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return "COFF-i386";
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return "COFF-x86-64";
+ default:
+ return "COFF-<unknown arch>";
+ }
+}
+
+unsigned COFFObjectFile::getArch() const {
+ switch(Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return Triple::x86;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return Triple::x86_64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+error_code COFFObjectFile::getSection(int32_t index,
+ const coff_section *&Result) const {
+ // Check for special index values.
+ if (index == COFF::IMAGE_SYM_UNDEFINED ||
+ index == COFF::IMAGE_SYM_ABSOLUTE ||
+ index == COFF::IMAGE_SYM_DEBUG)
+ Result = NULL;
+ else if (index > 0 && index <= Header->NumberOfSections)
+ // We already verified the section table data, so no need to check again.
+ Result = SectionTable + (index - 1);
+ else
+ return object_error::parse_failed;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getString(uint32_t offset,
+ StringRef &Result) const {
+ if (StringTableSize <= 4)
+ // Tried to get a string from an empty string table.
+ return object_error::parse_failed;
+ if (offset >= StringTableSize)
+ return object_error::unexpected_eof;
+ Result = StringRef(StringTable + offset);
+ return object_error::success;
+}
+
+namespace llvm {
+
+ ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
+ error_code ec;
+ return new COFFObjectFile(Object, ec);
+ }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp
new file mode 100644
index 000000000000..e2ff4dfc0384
--- /dev/null
+++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp
@@ -0,0 +1,740 @@
+//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <limits>
+#include <utility>
+
+using namespace llvm;
+using namespace object;
+
+// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
+namespace {
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelperCommon {
+ typedef support::detail::packed_endian_specific_integral
+ <uint16_t, target_endianness, support::aligned> Elf_Half;
+ typedef support::detail::packed_endian_specific_integral
+ <uint32_t, target_endianness, support::aligned> Elf_Word;
+ typedef support::detail::packed_endian_specific_integral
+ <int32_t, target_endianness, support::aligned> Elf_Sword;
+ typedef support::detail::packed_endian_specific_integral
+ <uint64_t, target_endianness, support::aligned> Elf_Xword;
+ typedef support::detail::packed_endian_specific_integral
+ <int64_t, target_endianness, support::aligned> Elf_Sxword;
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct ELFDataTypeTypedefHelper;
+
+/// ELF 32bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, false>
+ : ELFDataTypeTypedefHelperCommon<target_endianness> {
+ typedef support::detail::packed_endian_specific_integral
+ <uint32_t, target_endianness, support::aligned> Elf_Addr;
+ typedef support::detail::packed_endian_specific_integral
+ <uint32_t, target_endianness, support::aligned> Elf_Off;
+};
+
+/// ELF 64bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, true>
+ : ELFDataTypeTypedefHelperCommon<target_endianness>{
+ typedef support::detail::packed_endian_specific_integral
+ <uint64_t, target_endianness, support::aligned> Elf_Addr;
+ typedef support::detail::packed_endian_specific_integral
+ <uint64_t, target_endianness, support::aligned> Elf_Off;
+};
+}
+
+// I really don't like doing this, but the alternative is copypasta.
+#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+
+ // Section header.
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, false> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+ Elf_Word sh_name; // Section name (index into string table)
+ Elf_Word sh_type; // Section type (SHT_*)
+ Elf_Word sh_flags; // Section flags (SHF_*)
+ Elf_Addr sh_addr; // Address where section is to be loaded
+ Elf_Off sh_offset; // File offset of section data, in bytes
+ Elf_Word sh_size; // Size of section, in bytes
+ Elf_Word sh_link; // Section type-specific header table index link
+ Elf_Word sh_info; // Section type-specific extra information
+ Elf_Word sh_addralign;// Section address alignment
+ Elf_Word sh_entsize; // Size of records contained within the section
+};
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, true> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+ Elf_Word sh_name; // Section name (index into string table)
+ Elf_Word sh_type; // Section type (SHT_*)
+ Elf_Xword sh_flags; // Section flags (SHF_*)
+ Elf_Addr sh_addr; // Address where section is to be loaded
+ Elf_Off sh_offset; // File offset of section data, in bytes
+ Elf_Xword sh_size; // Size of section, in bytes
+ Elf_Word sh_link; // Section type-specific header table index link
+ Elf_Word sh_info; // Section type-specific extra information
+ Elf_Xword sh_addralign;// Section address alignment
+ Elf_Xword sh_entsize; // Size of records contained within the section
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
+ using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
+ using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+
+ /// @brief Get the number of entities this section contains if it has any.
+ unsigned getEntityCount() const {
+ if (sh_entsize == 0)
+ return 0;
+ return sh_size / sh_entsize;
+ }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, false> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+ Elf_Word st_name; // Symbol name (index into string table)
+ Elf_Addr st_value; // Value or address associated with the symbol
+ Elf_Word st_size; // Size of the symbol
+ unsigned char st_info; // Symbol's type and binding attributes
+ unsigned char st_other; // Must be zero; reserved
+ Elf_Half st_shndx; // Which section (header table index) it's defined in
+};
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, true> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+ Elf_Word st_name; // Symbol name (index into string table)
+ unsigned char st_info; // Symbol's type and binding attributes
+ unsigned char st_other; // Must be zero; reserved
+ Elf_Half st_shndx; // Which section (header table index) it's defined in
+ Elf_Addr st_value; // Value or address associated with the symbol
+ Elf_Xword st_size; // Size of the symbol
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
+ using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+
+ // These accessors and mutators correspond to the ELF32_ST_BIND,
+ // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+ unsigned char getBinding() const { return st_info >> 4; }
+ unsigned char getType() const { return st_info & 0x0f; }
+ void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+ void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+ void setBindingAndType(unsigned char b, unsigned char t) {
+ st_info = (b << 4) + (t & 0x0f);
+ }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile : public ObjectFile {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+
+ typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
+ typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+
+ struct Elf_Ehdr {
+ unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
+ Elf_Half e_type; // Type of file (see ET_*)
+ Elf_Half e_machine; // Required architecture for this file (see EM_*)
+ Elf_Word e_version; // Must be equal to 1
+ Elf_Addr e_entry; // Address to jump to in order to start program
+ Elf_Off e_phoff; // Program header table's file offset, in bytes
+ Elf_Off e_shoff; // Section header table's file offset, in bytes
+ Elf_Word e_flags; // Processor-specific flags
+ Elf_Half e_ehsize; // Size of ELF header, in bytes
+ Elf_Half e_phentsize;// Size of an entry in the program header table
+ Elf_Half e_phnum; // Number of entries in the program header table
+ Elf_Half e_shentsize;// Size of an entry in the section header table
+ Elf_Half e_shnum; // Number of entries in the section header table
+ Elf_Half e_shstrndx; // Section header table index of section name
+ // string table
+ bool checkMagic() const {
+ return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+ }
+ unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
+ unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+ };
+
+ typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t;
+
+ const Elf_Ehdr *Header;
+ const Elf_Shdr *SectionHeaderTable;
+ const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+ const Elf_Shdr *dot_strtab_sec; // Symbol header string table.
+ SymbolTableSections_t SymbolTableSections;
+
+ void validateSymbol(DataRefImpl Symb) const;
+ const Elf_Sym *getSymbol(DataRefImpl Symb) const;
+ const Elf_Shdr *getSection(DataRefImpl index) const;
+ const Elf_Shdr *getSection(uint16_t index) const;
+ const char *getString(uint16_t section, uint32_t offset) const;
+ const char *getString(const Elf_Shdr *section, uint32_t offset) const;
+
+protected:
+ virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+ virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+ virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+ virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+ virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+ virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+ virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+ virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+ virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+ virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+ virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+ virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+ virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+ bool &Result) const;
+
+public:
+ ELFObjectFile(MemoryBuffer *Object, error_code &ec);
+ virtual symbol_iterator begin_symbols() const;
+ virtual symbol_iterator end_symbols() const;
+ virtual section_iterator begin_sections() const;
+ virtual section_iterator end_sections() const;
+
+ virtual uint8_t getBytesInAddress() const;
+ virtual StringRef getFileFormatName() const;
+ virtual unsigned getArch() const;
+};
+} // end namespace
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+ ::validateSymbol(DataRefImpl Symb) const {
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+ // FIXME: We really need to do proper error handling in the case of an invalid
+ // input file. Because we don't use exceptions, I think we'll just pass
+ // an error object around.
+ if (!( symb
+ && SymbolTableSection
+ && symb >= (const Elf_Sym*)(base()
+ + SymbolTableSection->sh_offset)
+ && symb < (const Elf_Sym*)(base()
+ + SymbolTableSection->sh_offset
+ + SymbolTableSection->sh_size)))
+ // FIXME: Proper error handling.
+ report_fatal_error("Symb must point to a valid symbol!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolNext(DataRefImpl Symb,
+ SymbolRef &Result) const {
+ validateSymbol(Symb);
+ const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+
+ ++Symb.d.a;
+ // Check to see if we are at the end of this symbol table.
+ if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
+ // We are at the end. If there are other symbol tables, jump to them.
+ ++Symb.d.b;
+ Symb.d.a = 1; // The 0th symbol in ELF is fake.
+ // Otherwise return the terminator.
+ if (Symb.d.b >= SymbolTableSections.size()) {
+ Symb.d.a = std::numeric_limits<uint32_t>::max();
+ Symb.d.b = std::numeric_limits<uint32_t>::max();
+ }
+ }
+
+ Result = SymbolRef(Symb, this);
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolName(DataRefImpl Symb,
+ StringRef &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ if (symb->st_name == 0) {
+ const Elf_Shdr *section = getSection(symb->st_shndx);
+ if (!section)
+ Result = "";
+ else
+ Result = getString(dot_shstrtab_sec, section->sh_name);
+ return object_error::success;
+ }
+
+ // Use the default symbol table name section.
+ Result = getString(dot_strtab_sec, symb->st_name);
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *Section;
+ switch (symb->st_shndx) {
+ case ELF::SHN_COMMON:
+ // Undefined symbols have no address yet.
+ case ELF::SHN_UNDEF:
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ case ELF::SHN_ABS:
+ Result = symb->st_value;
+ return object_error::success;
+ default: Section = getSection(symb->st_shndx);
+ }
+
+ switch (symb->getType()) {
+ case ELF::STT_SECTION:
+ Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+ return object_error::success;
+ case ELF::STT_FUNC:
+ case ELF::STT_OBJECT:
+ case ELF::STT_NOTYPE:
+ Result = symb->st_value;
+ return object_error::success;
+ default:
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolSize(DataRefImpl Symb,
+ uint64_t &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ if (symb->st_size == 0)
+ Result = UnknownAddressOrSize;
+ Result = symb->st_size;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolNMTypeChar(DataRefImpl Symb,
+ char &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *Section = getSection(symb->st_shndx);
+
+ char ret = '?';
+
+ if (Section) {
+ switch (Section->sh_type) {
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_DYNAMIC:
+ switch (Section->sh_flags) {
+ case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
+ ret = 't'; break;
+ case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
+ ret = 'd'; break;
+ case ELF::SHF_ALLOC:
+ case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
+ case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
+ ret = 'r'; break;
+ }
+ break;
+ case ELF::SHT_NOBITS: ret = 'b';
+ }
+ }
+
+ switch (symb->st_shndx) {
+ case ELF::SHN_UNDEF:
+ if (ret == '?')
+ ret = 'U';
+ break;
+ case ELF::SHN_ABS: ret = 'a'; break;
+ case ELF::SHN_COMMON: ret = 'c'; break;
+ }
+
+ switch (symb->getBinding()) {
+ case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
+ case ELF::STB_WEAK:
+ if (symb->st_shndx == ELF::SHN_UNDEF)
+ ret = 'w';
+ else
+ if (symb->getType() == ELF::STT_OBJECT)
+ ret = 'V';
+ else
+ ret = 'W';
+ }
+
+ if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
+ StringRef name;
+ if (error_code ec = getSymbolName(Symb, name))
+ return ec;
+ Result = StringSwitch<char>(name)
+ .StartsWith(".debug", 'N')
+ .StartsWith(".note", 'n');
+ return object_error::success;
+ }
+
+ Result = ret;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::isSymbolInternal(DataRefImpl Symb,
+ bool &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+
+ if ( symb->getType() == ELF::STT_FILE
+ || symb->getType() == ELF::STT_SECTION)
+ Result = true;
+ Result = false;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
+ const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
+ sec += Header->e_shentsize;
+ Sec.p = reinterpret_cast<intptr_t>(sec);
+ Result = SectionRef(Sec, this);
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionName(DataRefImpl Sec,
+ StringRef &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionAddress(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ Result = sec->sh_addr;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionSize(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ Result = sec->sh_size;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionContents(DataRefImpl Sec,
+ StringRef &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ const char *start = (const char*)base() + sec->sh_offset;
+ Result = StringRef(start, sec->sh_size);
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::isSectionText(DataRefImpl Sec,
+ bool &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ if (sec->sh_flags & ELF::SHF_EXECINSTR)
+ Result = true;
+ else
+ Result = false;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
+ , error_code &ec)
+ : ObjectFile(Binary::isELF, Object, ec)
+ , SectionHeaderTable(0)
+ , dot_shstrtab_sec(0)
+ , dot_strtab_sec(0) {
+ Header = reinterpret_cast<const Elf_Ehdr *>(base());
+
+ if (Header->e_shoff == 0)
+ return;
+
+ SectionHeaderTable =
+ reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
+ uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
+ if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize
+ <= base() + Data->getBufferSize()))
+ // FIXME: Proper error handling.
+ report_fatal_error("Section table goes past end of file!");
+
+
+ // To find the symbol tables we walk the section table to find SHT_STMTAB.
+ for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+ *e = i + Header->e_shnum * Header->e_shentsize;
+ i != e; i += Header->e_shentsize) {
+ const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+ if (sh->sh_type == ELF::SHT_SYMTAB) {
+ SymbolTableSections.push_back(sh);
+ }
+ }
+
+ // Get string table sections.
+ dot_shstrtab_sec = getSection(Header->e_shstrndx);
+ if (dot_shstrtab_sec) {
+ // Verify that the last byte in the string table in a null.
+ if (((const char*)base() + dot_shstrtab_sec->sh_offset)
+ [dot_shstrtab_sec->sh_size - 1] != 0)
+ // FIXME: Proper error handling.
+ report_fatal_error("String table must end with a null terminator!");
+ }
+
+ // Merge this into the above loop.
+ for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+ *e = i + Header->e_shnum * Header->e_shentsize;
+ i != e; i += Header->e_shentsize) {
+ const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+ if (sh->sh_type == ELF::SHT_STRTAB) {
+ StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
+ if (SectionName == ".strtab") {
+ if (dot_strtab_sec != 0)
+ // FIXME: Proper error handling.
+ report_fatal_error("Already found section named .strtab!");
+ dot_strtab_sec = sh;
+ const char *dot_strtab = (const char*)base() + sh->sh_offset;
+ if (dot_strtab[sh->sh_size - 1] != 0)
+ // FIXME: Proper error handling.
+ report_fatal_error("String table must end with a null terminator!");
+ }
+ }
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::begin_symbols() const {
+ DataRefImpl SymbolData;
+ memset(&SymbolData, 0, sizeof(SymbolData));
+ if (SymbolTableSections.size() == 0) {
+ SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+ SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+ } else {
+ SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+ SymbolData.d.b = 0;
+ }
+ return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::end_symbols() const {
+ DataRefImpl SymbolData;
+ memset(&SymbolData, 0, sizeof(SymbolData));
+ SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+ SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+ return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::begin_sections() const {
+ DataRefImpl ret;
+ memset(&ret, 0, sizeof(DataRefImpl));
+ ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
+ return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::end_sections() const {
+ DataRefImpl ret;
+ memset(&ret, 0, sizeof(DataRefImpl));
+ ret.p = reinterpret_cast<intptr_t>(base()
+ + Header->e_shoff
+ + (Header->e_shentsize * Header->e_shnum));
+ return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
+ return is64Bits ? 8 : 4;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+ ::getFileFormatName() const {
+ switch(Header->e_ident[ELF::EI_CLASS]) {
+ case ELF::ELFCLASS32:
+ switch(Header->e_machine) {
+ case ELF::EM_386:
+ return "ELF32-i386";
+ case ELF::EM_X86_64:
+ return "ELF32-x86-64";
+ default:
+ return "ELF32-unknown";
+ }
+ case ELF::ELFCLASS64:
+ switch(Header->e_machine) {
+ case ELF::EM_386:
+ return "ELF64-i386";
+ case ELF::EM_X86_64:
+ return "ELF64-x86-64";
+ default:
+ return "ELF64-unknown";
+ }
+ default:
+ // FIXME: Proper error handling.
+ report_fatal_error("Invalid ELFCLASS!");
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+ switch(Header->e_machine) {
+ case ELF::EM_386:
+ return Triple::x86;
+ case ELF::EM_X86_64:
+ return Triple::x86_64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+ const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
+ return reinterpret_cast<const Elf_Sym *>(
+ base()
+ + sec->sh_offset
+ + (Symb.d.a * sec->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+ const Elf_Shdr *sec = getSection(Symb.d.b);
+ if (sec->sh_type != ELF::SHT_SYMTAB)
+ // FIXME: Proper error handling.
+ report_fatal_error("Invalid symbol table section!");
+ return sec;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
+ if (index == 0 || index >= ELF::SHN_LORESERVE)
+ return 0;
+ if (!SectionHeaderTable || index >= Header->e_shnum)
+ // FIXME: Proper error handling.
+ report_fatal_error("Invalid section index!");
+
+ return reinterpret_cast<const Elf_Shdr *>(
+ reinterpret_cast<const char *>(SectionHeaderTable)
+ + (index * Header->e_shentsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+ ::getString(uint16_t section,
+ ELF::Elf32_Word offset) const {
+ return getString(getSection(section), offset);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+ ::getString(const Elf_Shdr *section,
+ ELF::Elf32_Word offset) const {
+ assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
+ if (offset >= section->sh_size)
+ // FIXME: Proper error handling.
+ report_fatal_error("Symbol name offset outside of string table!");
+ return (const char *)base() + section->sh_offset + offset;
+}
+
+// EI_CLASS, EI_DATA.
+static std::pair<unsigned char, unsigned char>
+getElfArchType(MemoryBuffer *Object) {
+ if (Object->getBufferSize() < ELF::EI_NIDENT)
+ return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+ return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+ , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
+}
+
+namespace llvm {
+
+ ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+ std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+ error_code ec;
+ if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+ return new ELFObjectFile<support::little, false>(Object, ec);
+ else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+ return new ELFObjectFile<support::big, false>(Object, ec);
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
+ return new ELFObjectFile<support::little, true>(Object, ec);
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+ return new ELFObjectFile<support::big, true>(Object, ec);
+ // FIXME: Proper error handling.
+ report_fatal_error("Not an ELF object file!");
+ }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
new file mode 100644
index 000000000000..25946257ab5a
--- /dev/null
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -0,0 +1,57 @@
+//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the Object library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class _object_error_category : public _do_message {
+public:
+ virtual const char* name() const;
+ virtual std::string message(int ev) const;
+ virtual error_condition default_error_condition(int ev) const;
+};
+}
+
+const char *_object_error_category::name() const {
+ return "llvm.object";
+}
+
+std::string _object_error_category::message(int ev) const {
+ switch (ev) {
+ case object_error::success: return "Success";
+ case object_error::invalid_file_type:
+ return "The file was not recognized as a valid object file";
+ case object_error::parse_failed:
+ return "Invalid data was encountered while parsing the file";
+ case object_error::unexpected_eof:
+ return "The end of the file was unexpectedly encountered";
+ default:
+ llvm_unreachable("An enumerator of object_error does not have a message "
+ "defined.");
+ }
+}
+
+error_condition _object_error_category::default_error_condition(int ev) const {
+ if (ev == object_error::success)
+ return errc::success;
+ return errc::invalid_argument;
+}
+
+const error_category &object::object_category() {
+ static _object_error_category o;
+ return o;
+}
diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp
new file mode 100644
index 000000000000..9890febfb616
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObject.cpp
@@ -0,0 +1,370 @@
+//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+/* Translation Utilities */
+
+template<typename T>
+static void SwapValue(T &Value) {
+ Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<typename T>
+static void ReadInMemoryStruct(const MachOObject &MOO,
+ StringRef Buffer, uint64_t Base,
+ InMemoryStruct<T> &Res) {
+ typedef T struct_type;
+ uint64_t Size = sizeof(struct_type);
+
+ // Check that the buffer contains the expected data.
+ if (Base + Size > Buffer.size()) {
+ Res = 0;
+ return;
+ }
+
+ // Check whether we can return a direct pointer.
+ struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
+ if (!MOO.isSwappedEndian()) {
+ Res = Ptr;
+ return;
+ }
+
+ // Otherwise, copy the struct and translate the values.
+ Res = *Ptr;
+ SwapStruct(*Res);
+}
+
+/* *** */
+
+MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
+ bool Is64Bit_)
+ : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
+ IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+ HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
+ // Load the common header.
+ memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
+ if (IsSwappedEndian) {
+ SwapValue(Header.Magic);
+ SwapValue(Header.CPUType);
+ SwapValue(Header.CPUSubtype);
+ SwapValue(Header.FileType);
+ SwapValue(Header.NumLoadCommands);
+ SwapValue(Header.SizeOfLoadCommands);
+ SwapValue(Header.Flags);
+ }
+
+ if (is64Bit()) {
+ memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
+ sizeof(Header64Ext));
+ if (IsSwappedEndian) {
+ SwapValue(Header64Ext.Reserved);
+ }
+ }
+
+ // Create the load command array if sane.
+ if (getHeader().NumLoadCommands < (1 << 20))
+ LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
+}
+
+MachOObject::~MachOObject() {
+ delete [] LoadCommands;
+}
+
+MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
+ std::string *ErrorStr) {
+ // First, check the magic value and initialize the basic object info.
+ bool IsLittleEndian = false, Is64Bit = false;
+ StringRef Magic = Buffer->getBuffer().slice(0, 4);
+ if (Magic == "\xFE\xED\xFA\xCE") {
+ } else if (Magic == "\xCE\xFA\xED\xFE") {
+ IsLittleEndian = true;
+ } else if (Magic == "\xFE\xED\xFA\xCF") {
+ Is64Bit = true;
+ } else if (Magic == "\xCF\xFA\xED\xFE") {
+ IsLittleEndian = true;
+ Is64Bit = true;
+ } else {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
+ return 0;
+ }
+
+ // Ensure that the at least the full header is present.
+ unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
+ if (Buffer->getBufferSize() < HeaderSize) {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
+ return 0;
+ }
+
+ OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
+ Is64Bit));
+
+ // Check for bogus number of load commands.
+ if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
+ return 0;
+ }
+
+ if (ErrorStr) *ErrorStr = "";
+ return Object.take();
+}
+
+StringRef MachOObject::getData(size_t Offset, size_t Size) const {
+ return Buffer->getBuffer().substr(Offset,Size);
+}
+
+void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
+ HasStringTable = true;
+ StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
+ SLC.StringTableSize);
+}
+
+const MachOObject::LoadCommandInfo &
+MachOObject::getLoadCommandInfo(unsigned Index) const {
+ assert(Index < getHeader().NumLoadCommands && "Invalid index!");
+
+ // Load the command, if necessary.
+ if (Index >= NumLoadedCommands) {
+ uint64_t Offset;
+ if (Index == 0) {
+ Offset = getHeaderSize();
+ } else {
+ const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
+ Offset = Prev.Offset + Prev.Command.Size;
+ }
+
+ LoadCommandInfo &Info = LoadCommands[Index];
+ memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
+ sizeof(macho::LoadCommand));
+ if (IsSwappedEndian) {
+ SwapValue(Info.Command.Type);
+ SwapValue(Info.Command.Size);
+ }
+ Info.Offset = Offset;
+ NumLoadedCommands = Index + 1;
+ }
+
+ return LoadCommands[Index];
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.VMAddress);
+ SwapValue(Value.VMSize);
+ SwapValue(Value.FileOffset);
+ SwapValue(Value.FileSize);
+ SwapValue(Value.MaxVMProtection);
+ SwapValue(Value.InitialVMProtection);
+ SwapValue(Value.NumSections);
+ SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.VMAddress);
+ SwapValue(Value.VMSize);
+ SwapValue(Value.FileOffset);
+ SwapValue(Value.FileSize);
+ SwapValue(Value.MaxVMProtection);
+ SwapValue(Value.InitialVMProtection);
+ SwapValue(Value.NumSections);
+ SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.SymbolTableOffset);
+ SwapValue(Value.NumSymbolTableEntries);
+ SwapValue(Value.StringTableOffset);
+ SwapValue(Value.StringTableSize);
+}
+void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.LocalSymbolsIndex);
+ SwapValue(Value.NumLocalSymbols);
+ SwapValue(Value.ExternalSymbolsIndex);
+ SwapValue(Value.NumExternalSymbols);
+ SwapValue(Value.UndefinedSymbolsIndex);
+ SwapValue(Value.NumUndefinedSymbols);
+ SwapValue(Value.TOCOffset);
+ SwapValue(Value.NumTOCEntries);
+ SwapValue(Value.ModuleTableOffset);
+ SwapValue(Value.NumModuleTableEntries);
+ SwapValue(Value.ReferenceSymbolTableOffset);
+ SwapValue(Value.NumReferencedSymbolTableEntries);
+ SwapValue(Value.IndirectSymbolTableOffset);
+ SwapValue(Value.NumIndirectSymbolTableEntries);
+ SwapValue(Value.ExternalRelocationTableOffset);
+ SwapValue(Value.NumExternalRelocationTableEntries);
+ SwapValue(Value.LocalRelocationTableOffset);
+ SwapValue(Value.NumLocalRelocationTableEntries);
+}
+void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
+ SwapValue(Value.Index);
+}
+void
+MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+ unsigned Index,
+ InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
+ uint64_t Offset = (DLC.IndirectSymbolTableOffset +
+ Index * sizeof(macho::IndirectSymbolTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+
+template<>
+void SwapStruct(macho::Section &Value) {
+ SwapValue(Value.Address);
+ SwapValue(Value.Size);
+ SwapValue(Value.Offset);
+ SwapValue(Value.Align);
+ SwapValue(Value.RelocationTableOffset);
+ SwapValue(Value.NumRelocationTableEntries);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Reserved1);
+ SwapValue(Value.Reserved2);
+}
+void MachOObject::ReadSection(const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section> &Res) const {
+ assert(LCI.Command.Type == macho::LCT_Segment &&
+ "Unexpected load command info!");
+ uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+ Index * sizeof(macho::Section));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Section64 &Value) {
+ SwapValue(Value.Address);
+ SwapValue(Value.Size);
+ SwapValue(Value.Offset);
+ SwapValue(Value.Align);
+ SwapValue(Value.RelocationTableOffset);
+ SwapValue(Value.NumRelocationTableEntries);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Reserved1);
+ SwapValue(Value.Reserved2);
+ SwapValue(Value.Reserved3);
+}
+void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section64> &Res) const {
+ assert(LCI.Command.Type == macho::LCT_Segment64 &&
+ "Unexpected load command info!");
+ uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+ Index * sizeof(macho::Section64));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::RelocationEntry &Value) {
+ SwapValue(Value.Word0);
+ SwapValue(Value.Word1);
+}
+void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::RelocationEntry> &Res) const {
+ uint64_t Offset = (RelocationTableOffset +
+ Index * sizeof(macho::RelocationEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &Value) {
+ SwapValue(Value.StringIndex);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+ uint64_t Offset = (SymbolTableOffset +
+ Index * sizeof(macho::SymbolTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &Value) {
+ SwapValue(Value.StringIndex);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+ uint64_t Offset = (SymbolTableOffset +
+ Index * sizeof(macho::Symbol64TableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+/* ** */
+// Object Dumping Facilities
+void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
+void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
+
+void MachOObject::printHeader(raw_ostream &O) const {
+ O << "('cputype', " << Header.CPUType << ")\n";
+ O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
+ O << "('filetype', " << Header.FileType << ")\n";
+ O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
+ O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
+ O << "('flag', " << Header.Flags << ")\n";
+
+ // Print extended header if 64-bit.
+ if (is64Bit())
+ O << "('reserved', " << Header64Ext.Reserved << ")\n";
+}
+
+void MachOObject::print(raw_ostream &O) const {
+ O << "Header:\n";
+ printHeader(O);
+ O << "Load Commands:\n";
+
+ O << "Buffer:\n";
+}
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
new file mode 100644
index 000000000000..26a6e136d753
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -0,0 +1,469 @@
+//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOObjectFile class, which binds the MachOObject
+// class to the generic ObjectFile wrapper.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MachO.h"
+
+#include <cctype>
+#include <cstring>
+#include <limits>
+
+using namespace llvm;
+using namespace object;
+
+namespace llvm {
+
+typedef MachOObject::LoadCommandInfo LoadCommandInfo;
+
+class MachOObjectFile : public ObjectFile {
+public:
+ MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec)
+ : ObjectFile(Binary::isMachO, Object, ec),
+ MachOObj(MOO),
+ RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {}
+
+ virtual symbol_iterator begin_symbols() const;
+ virtual symbol_iterator end_symbols() const;
+ virtual section_iterator begin_sections() const;
+ virtual section_iterator end_sections() const;
+
+ virtual uint8_t getBytesInAddress() const;
+ virtual StringRef getFileFormatName() const;
+ virtual unsigned getArch() const;
+
+protected:
+ virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+ virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+ virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+ virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+ virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+ virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+ virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+ virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+ virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+ virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+ virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+ virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+ virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
+ bool &Result) const;
+
+private:
+ MachOObject *MachOObj;
+ mutable uint32_t RegisteredStringTable;
+
+ void moveToNextSection(DataRefImpl &DRI) const;
+ void getSymbolTableEntry(DataRefImpl DRI,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+ void getSymbol64TableEntry(DataRefImpl DRI,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
+ void moveToNextSymbol(DataRefImpl &DRI) const;
+ void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
+ void getSection64(DataRefImpl DRI,
+ InMemoryStruct<macho::Section64> &Res) const;
+};
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+ error_code ec;
+ std::string Err;
+ MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
+ if (!MachOObj)
+ return NULL;
+ return new MachOObjectFile(Buffer, MachOObj, ec);
+}
+
+/*===-- Symbols -----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ while (DRI.d.a < LoadCommandCount) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ if (LCI.Command.Type == macho::LCT_Symtab) {
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+ MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+ if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
+ return;
+ }
+
+ DRI.d.a++;
+ DRI.d.b = 0;
+ }
+}
+
+void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+ if (RegisteredStringTable != DRI.d.a) {
+ MachOObj->RegisterStringTable(*SymtabLoadCmd);
+ RegisteredStringTable = DRI.d.a;
+ }
+
+ MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+ Res);
+}
+
+void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+ InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+ if (RegisteredStringTable != DRI.d.a) {
+ MachOObj->RegisterStringTable(*SymtabLoadCmd);
+ RegisteredStringTable = DRI.d.a;
+ }
+
+ MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+ Res);
+}
+
+
+error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
+ SymbolRef &Result) const {
+ DRI.d.b++;
+ moveToNextSymbol(DRI);
+ Result = SymbolRef(DRI, this);
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
+ StringRef &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = Entry->Value;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = Entry->Value;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
+ uint64_t &Result) const {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
+ char &Result) const {
+ uint8_t Type, Flags;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Type = Entry->Type;
+ Flags = Entry->Flags;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Type = Entry->Type;
+ Flags = Entry->Flags;
+ }
+
+ char Char;
+ switch (Type & macho::STF_TypeMask) {
+ case macho::STT_Undefined:
+ Char = 'u';
+ break;
+ case macho::STT_Absolute:
+ case macho::STT_Section:
+ Char = 's';
+ break;
+ default:
+ Char = '?';
+ break;
+ }
+
+ if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
+ Char = toupper(Char);
+ Result = Char;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
+ bool &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = Entry->Flags & macho::STF_StabsEntryMask;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = Entry->Flags & macho::STF_StabsEntryMask;
+ }
+ return object_error::success;
+}
+
+ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
+ // DRI.d.a = segment number; DRI.d.b = symbol index.
+ DataRefImpl DRI;
+ DRI.d.a = DRI.d.b = 0;
+ moveToNextSymbol(DRI);
+ return symbol_iterator(SymbolRef(DRI, this));
+}
+
+ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const {
+ DataRefImpl DRI;
+ DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+ DRI.d.b = 0;
+ return symbol_iterator(SymbolRef(DRI, this));
+}
+
+
+/*===-- Sections ----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ while (DRI.d.a < LoadCommandCount) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ if (LCI.Command.Type == macho::LCT_Segment) {
+ InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
+ MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
+ if (DRI.d.b < SegmentLoadCmd->NumSections)
+ return;
+ } else if (LCI.Command.Type == macho::LCT_Segment64) {
+ InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
+ MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
+ if (DRI.d.b < Segment64LoadCmd->NumSections)
+ return;
+ }
+
+ DRI.d.a++;
+ DRI.d.b = 0;
+ }
+}
+
+error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
+ SectionRef &Result) const {
+ DRI.d.b++;
+ moveToNextSection(DRI);
+ Result = SectionRef(DRI, this);
+ return object_error::success;
+}
+
+void
+MachOObjectFile::getSection(DataRefImpl DRI,
+ InMemoryStruct<macho::Section> &Res) const {
+ InMemoryStruct<macho::SegmentLoadCommand> SLC;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+ MachOObj->ReadSection(LCI, DRI.d.b, Res);
+}
+
+void
+MachOObjectFile::getSection64(DataRefImpl DRI,
+ InMemoryStruct<macho::Section64> &Res) const {
+ InMemoryStruct<macho::Segment64LoadCommand> SLC;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+ MachOObj->ReadSection64(LCI, DRI.d.b, Res);
+}
+
+static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ if (LCI.Command.Type == macho::LCT_Segment64)
+ return true;
+ assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
+ return false;
+}
+
+error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
+ StringRef &Result) const {
+ // FIXME: thread safety.
+ static char result[34];
+ if (is64BitLoadCommand(MachOObj, DRI)) {
+ InMemoryStruct<macho::Segment64LoadCommand> SLC;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+ InMemoryStruct<macho::Section64> Sect;
+ MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
+
+ strcpy(result, Sect->SegmentName);
+ strcat(result, ",");
+ strcat(result, Sect->Name);
+ } else {
+ InMemoryStruct<macho::SegmentLoadCommand> SLC;
+ LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+ MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+ InMemoryStruct<macho::Section> Sect;
+ MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+
+ strcpy(result, Sect->SegmentName);
+ strcat(result, ",");
+ strcat(result, Sect->Name);
+ }
+ Result = StringRef(result);
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (is64BitLoadCommand(MachOObj, DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = Sect->Address;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = Sect->Address;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (is64BitLoadCommand(MachOObj, DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = Sect->Size;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = Sect->Size;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
+ StringRef &Result) const {
+ if (is64BitLoadCommand(MachOObj, DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = MachOObj->getData(Sect->Offset, Sect->Size);
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = MachOObj->getData(Sect->Offset, Sect->Size);
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
+ bool &Result) const {
+ if (is64BitLoadCommand(MachOObj, DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = !strcmp(Sect->Name, "__text");
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = !strcmp(Sect->Name, "__text");
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb,
+ bool &Result) const {
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(Symb, Entry);
+ Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(Symb, Entry);
+ Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+ }
+ return object_error::success;
+}
+
+ObjectFile::section_iterator MachOObjectFile::begin_sections() const {
+ DataRefImpl DRI;
+ DRI.d.a = DRI.d.b = 0;
+ moveToNextSection(DRI);
+ return section_iterator(SectionRef(DRI, this));
+}
+
+ObjectFile::section_iterator MachOObjectFile::end_sections() const {
+ DataRefImpl DRI;
+ DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+ DRI.d.b = 0;
+ return section_iterator(SectionRef(DRI, this));
+}
+
+/*===-- Miscellaneous -----------------------------------------------------===*/
+
+uint8_t MachOObjectFile::getBytesInAddress() const {
+ return MachOObj->is64Bit() ? 8 : 4;
+}
+
+StringRef MachOObjectFile::getFileFormatName() const {
+ if (!MachOObj->is64Bit()) {
+ switch (MachOObj->getHeader().CPUType) {
+ case llvm::MachO::CPUTypeI386:
+ return "Mach-O 32-bit i386";
+ case llvm::MachO::CPUTypeARM:
+ return "Mach-O arm";
+ case llvm::MachO::CPUTypePowerPC:
+ return "Mach-O 32-bit ppc";
+ default:
+ assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+ "64-bit object file when we're not 64-bit?");
+ return "Mach-O 32-bit unknown";
+ }
+ }
+
+ switch (MachOObj->getHeader().CPUType) {
+ case llvm::MachO::CPUTypeX86_64:
+ return "Mach-O 64-bit x86-64";
+ case llvm::MachO::CPUTypePowerPC64:
+ return "Mach-O 64-bit ppc64";
+ default:
+ assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 &&
+ "32-bit object file when we're 64-bit?");
+ return "Mach-O 64-bit unknown";
+ }
+}
+
+unsigned MachOObjectFile::getArch() const {
+ switch (MachOObj->getHeader().CPUType) {
+ case llvm::MachO::CPUTypeI386:
+ return Triple::x86;
+ case llvm::MachO::CPUTypeX86_64:
+ return Triple::x86_64;
+ case llvm::MachO::CPUTypeARM:
+ return Triple::arm;
+ case llvm::MachO::CPUTypePowerPC:
+ return Triple::ppc;
+ case llvm::MachO::CPUTypePowerPC64:
+ return Triple::ppc64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+} // end namespace llvm
+
diff --git a/contrib/llvm/lib/Object/Makefile b/contrib/llvm/lib/Object/Makefile
new file mode 100644
index 000000000000..79388dc97f1a
--- /dev/null
+++ b/contrib/llvm/lib/Object/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Object/Makefile ---------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMObject
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
new file mode 100644
index 000000000000..9a373ad21bd2
--- /dev/null
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -0,0 +1,68 @@
+//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings to the file-format-independent object
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm-c/Object.h"
+
+using namespace llvm;
+using namespace object;
+
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
+ return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
+}
+
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
+ delete unwrap(ObjectFile);
+}
+
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
+ ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections();
+ return wrap(new ObjectFile::section_iterator(SI));
+}
+
+void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) {
+ delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+ LLVMSectionIteratorRef SI) {
+ return (*unwrap(SI) == unwrap(ObjectFile)->end_sections()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
+ error_code ec;
+ unwrap(SI)->increment(ec);
+ if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
+}
+
+const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
+ StringRef ret;
+ if (error_code ec = (*unwrap(SI))->getName(ret))
+ report_fatal_error(ec.message());
+ return ret.data();
+}
+
+uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(SI))->getSize(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
+const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
+ StringRef ret;
+ if (error_code ec = (*unwrap(SI))->getContents(ret))
+ report_fatal_error(ec.message());
+ return ret.data();
+}
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
new file mode 100644
index 000000000000..a7798df33fe5
--- /dev/null
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -0,0 +1,61 @@
+//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+using namespace object;
+
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+ : Binary(Type, source) {
+}
+
+ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
+ if (!Object || Object->getBufferSize() < 64)
+ return 0;
+ sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
+ static_cast<unsigned>(Object->getBufferSize()));
+ switch (type) {
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType:
+ return createELFObjectFile(Object);
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ return createMachOObjectFile(Object);
+ case sys::COFF_FileType:
+ return createCOFFObjectFile(Object);
+ default:
+ llvm_unreachable("Unknown Object File Type");
+ }
+}
+
+ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+ return NULL;
+ return createObjectFile(File.take());
+}
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
new file mode 100644
index 000000000000..c64da6e137ea
--- /dev/null
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -0,0 +1,3616 @@
+//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision floating
+// point values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits.h>
+#include <cstring>
+
+using namespace llvm;
+
+#define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
+
+/* Assumed in hexadecimal significand parsing, and conversion to
+ hexadecimal strings. */
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
+
+namespace llvm {
+
+ /* Represents floating point arithmetic semantics. */
+ struct fltSemantics {
+ /* The largest E such that 2^E is representable; this matches the
+ definition of IEEE 754. */
+ exponent_t maxExponent;
+
+ /* The smallest E such that 2^E is a normalized number; this
+ matches the definition of IEEE 754. */
+ exponent_t minExponent;
+
+ /* Number of bits in the significand. This includes the integer
+ bit. */
+ unsigned int precision;
+
+ /* True if arithmetic is supported. */
+ unsigned int arithmeticOK;
+ };
+
+ const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
+ const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
+ const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
+ const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
+ const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
+ const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
+
+ // The PowerPC format consists of two doubles. It does not map cleanly
+ // onto the usual format above. For now only storage of constants of
+ // this type is supported, no arithmetic.
+ const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
+
+ /* A tight upper bound on number of parts required to hold the value
+ pow(5, power) is
+
+ power * 815 / (351 * integerPartWidth) + 1
+
+ However, whilst the result may require only this many parts,
+ because we are multiplying two values to get it, the
+ multiplication may require an extra part with the excess part
+ being zero (consider the trivial case of 1 * 1, tcFullMultiply
+ requires two parts to hold the single-part result). So we add an
+ extra one to guarantee enough space whilst multiplying. */
+ const unsigned int maxExponent = 16383;
+ const unsigned int maxPrecision = 113;
+ const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+ const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
+ / (351 * integerPartWidth));
+}
+
+/* A bunch of private, handy routines. */
+
+static inline unsigned int
+partCountForBits(unsigned int bits)
+{
+ return ((bits) + integerPartWidth - 1) / integerPartWidth;
+}
+
+/* Returns 0U-9U. Return values >= 10U are not digits. */
+static inline unsigned int
+decDigitValue(unsigned int c)
+{
+ return c - '0';
+}
+
+static unsigned int
+hexDigitValue(unsigned int c)
+{
+ unsigned int r;
+
+ r = c - '0';
+ if (r <= 9)
+ return r;
+
+ r = c - 'A';
+ if (r <= 5)
+ return r + 10;
+
+ r = c - 'a';
+ if (r <= 5)
+ return r + 10;
+
+ return -1U;
+}
+
+static inline void
+assertArithmeticOK(const llvm::fltSemantics &semantics) {
+ assert(semantics.arithmeticOK &&
+ "Compile-time arithmetic does not support these semantics");
+}
+
+/* Return the value of a decimal exponent of the form
+ [+-]ddddddd.
+
+ If the exponent overflows, returns a large exponent with the
+ appropriate sign. */
+static int
+readExponent(StringRef::iterator begin, StringRef::iterator end)
+{
+ bool isNegative;
+ unsigned int absExponent;
+ const unsigned int overlargeExponent = 24000; /* FIXME. */
+ StringRef::iterator p = begin;
+
+ assert(p != end && "Exponent has no digits");
+
+ isNegative = (*p == '-');
+ if (*p == '-' || *p == '+') {
+ p++;
+ assert(p != end && "Exponent has no digits");
+ }
+
+ absExponent = decDigitValue(*p++);
+ assert(absExponent < 10U && "Invalid character in exponent");
+
+ for (; p != end; ++p) {
+ unsigned int value;
+
+ value = decDigitValue(*p);
+ assert(value < 10U && "Invalid character in exponent");
+
+ value += absExponent * 10;
+ if (absExponent >= overlargeExponent) {
+ absExponent = overlargeExponent;
+ p = end; /* outwit assert below */
+ break;
+ }
+ absExponent = value;
+ }
+
+ assert(p == end && "Invalid exponent in exponent");
+
+ if (isNegative)
+ return -(int) absExponent;
+ else
+ return (int) absExponent;
+}
+
+/* This is ugly and needs cleaning up, but I don't immediately see
+ how whilst remaining safe. */
+static int
+totalExponent(StringRef::iterator p, StringRef::iterator end,
+ int exponentAdjustment)
+{
+ int unsignedExponent;
+ bool negative, overflow;
+ int exponent = 0;
+
+ assert(p != end && "Exponent has no digits");
+
+ negative = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ assert(p != end && "Exponent has no digits");
+ }
+
+ unsignedExponent = 0;
+ overflow = false;
+ for (; p != end; ++p) {
+ unsigned int value;
+
+ value = decDigitValue(*p);
+ assert(value < 10U && "Invalid character in exponent");
+
+ unsignedExponent = unsignedExponent * 10 + value;
+ if (unsignedExponent > 32767)
+ overflow = true;
+ }
+
+ if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
+ overflow = true;
+
+ if (!overflow) {
+ exponent = unsignedExponent;
+ if (negative)
+ exponent = -exponent;
+ exponent += exponentAdjustment;
+ if (exponent > 32767 || exponent < -32768)
+ overflow = true;
+ }
+
+ if (overflow)
+ exponent = negative ? -32768: 32767;
+
+ return exponent;
+}
+
+static StringRef::iterator
+skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
+ StringRef::iterator *dot)
+{
+ StringRef::iterator p = begin;
+ *dot = end;
+ while (*p == '0' && p != end)
+ p++;
+
+ if (*p == '.') {
+ *dot = p++;
+
+ assert(end - begin != 1 && "Significand has no digits");
+
+ while (*p == '0' && p != end)
+ p++;
+ }
+
+ return p;
+}
+
+/* Given a normal decimal floating point number of the form
+
+ dddd.dddd[eE][+-]ddd
+
+ where the decimal point and exponent are optional, fill out the
+ structure D. Exponent is appropriate if the significand is
+ treated as an integer, and normalizedExponent if the significand
+ is taken to have the decimal point after a single leading
+ non-zero digit.
+
+ If the value is zero, V->firstSigDigit points to a non-digit, and
+ the return exponent is zero.
+*/
+struct decimalInfo {
+ const char *firstSigDigit;
+ const char *lastSigDigit;
+ int exponent;
+ int normalizedExponent;
+};
+
+static void
+interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
+ decimalInfo *D)
+{
+ StringRef::iterator dot = end;
+ StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
+
+ D->firstSigDigit = p;
+ D->exponent = 0;
+ D->normalizedExponent = 0;
+
+ for (; p != end; ++p) {
+ if (*p == '.') {
+ assert(dot == end && "String contains multiple dots");
+ dot = p++;
+ if (p == end)
+ break;
+ }
+ if (decDigitValue(*p) >= 10U)
+ break;
+ }
+
+ if (p != end) {
+ assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
+ assert(p != begin && "Significand has no digits");
+ assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+ /* p points to the first non-digit in the string */
+ D->exponent = readExponent(p + 1, end);
+
+ /* Implied decimal point? */
+ if (dot == end)
+ dot = p;
+ }
+
+ /* If number is all zeroes accept any exponent. */
+ if (p != D->firstSigDigit) {
+ /* Drop insignificant trailing zeroes. */
+ if (p != begin) {
+ do
+ do
+ p--;
+ while (p != begin && *p == '0');
+ while (p != begin && *p == '.');
+ }
+
+ /* Adjust the exponents for any decimal point. */
+ D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
+ D->normalizedExponent = (D->exponent +
+ static_cast<exponent_t>((p - D->firstSigDigit)
+ - (dot > D->firstSigDigit && dot < p)));
+ }
+
+ D->lastSigDigit = p;
+}
+
+/* Return the trailing fraction of a hexadecimal number.
+ DIGITVALUE is the first hex digit of the fraction, P points to
+ the next digit. */
+static lostFraction
+trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
+ unsigned int digitValue)
+{
+ unsigned int hexDigit;
+
+ /* If the first trailing digit isn't 0 or 8 we can work out the
+ fraction immediately. */
+ if (digitValue > 8)
+ return lfMoreThanHalf;
+ else if (digitValue < 8 && digitValue > 0)
+ return lfLessThanHalf;
+
+ /* Otherwise we need to find the first non-zero digit. */
+ while (*p == '0')
+ p++;
+
+ assert(p != end && "Invalid trailing hexadecimal fraction!");
+
+ hexDigit = hexDigitValue(*p);
+
+ /* If we ran off the end it is exactly zero or one-half, otherwise
+ a little more. */
+ if (hexDigit == -1U)
+ return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
+ else
+ return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
+}
+
+/* Return the fraction lost were a bignum truncated losing the least
+ significant BITS bits. */
+static lostFraction
+lostFractionThroughTruncation(const integerPart *parts,
+ unsigned int partCount,
+ unsigned int bits)
+{
+ unsigned int lsb;
+
+ lsb = APInt::tcLSB(parts, partCount);
+
+ /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
+ if (bits <= lsb)
+ return lfExactlyZero;
+ if (bits == lsb + 1)
+ return lfExactlyHalf;
+ if (bits <= partCount * integerPartWidth &&
+ APInt::tcExtractBit(parts, bits - 1))
+ return lfMoreThanHalf;
+
+ return lfLessThanHalf;
+}
+
+/* Shift DST right BITS bits noting lost fraction. */
+static lostFraction
+shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
+{
+ lostFraction lost_fraction;
+
+ lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
+
+ APInt::tcShiftRight(dst, parts, bits);
+
+ return lost_fraction;
+}
+
+/* Combine the effect of two lost fractions. */
+static lostFraction
+combineLostFractions(lostFraction moreSignificant,
+ lostFraction lessSignificant)
+{
+ if (lessSignificant != lfExactlyZero) {
+ if (moreSignificant == lfExactlyZero)
+ moreSignificant = lfLessThanHalf;
+ else if (moreSignificant == lfExactlyHalf)
+ moreSignificant = lfMoreThanHalf;
+ }
+
+ return moreSignificant;
+}
+
+/* The error from the true value, in half-ulps, on multiplying two
+ floating point numbers, which differ from the value they
+ approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+ than the returned value.
+
+ See "How to Read Floating Point Numbers Accurately" by William D
+ Clinger. */
+static unsigned int
+HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+{
+ assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+ if (HUerr1 + HUerr2 == 0)
+ return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
+ else
+ return inexactMultiply + 2 * (HUerr1 + HUerr2);
+}
+
+/* The number of ulps from the boundary (zero, or half if ISNEAREST)
+ when the least significant BITS are truncated. BITS cannot be
+ zero. */
+static integerPart
+ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+{
+ unsigned int count, partBits;
+ integerPart part, boundary;
+
+ assert(bits != 0);
+
+ bits--;
+ count = bits / integerPartWidth;
+ partBits = bits % integerPartWidth + 1;
+
+ part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+ if (isNearest)
+ boundary = (integerPart) 1 << (partBits - 1);
+ else
+ boundary = 0;
+
+ if (count == 0) {
+ if (part - boundary <= boundary - part)
+ return part - boundary;
+ else
+ return boundary - part;
+ }
+
+ if (part == boundary) {
+ while (--count)
+ if (parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return parts[0];
+ } else if (part == boundary - 1) {
+ while (--count)
+ if (~parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return -parts[0];
+ }
+
+ return ~(integerPart) 0; /* A lot. */
+}
+
+/* Place pow(5, power) in DST, and return the number of parts used.
+ DST must be at least one part larger than size of the answer. */
+static unsigned int
+powerOf5(integerPart *dst, unsigned int power)
+{
+ static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+ 15625, 78125 };
+ integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
+ pow5s[0] = 78125 * 5;
+
+ unsigned int partsCount[16] = { 1 };
+ integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+ unsigned int result;
+ assert(power <= maxExponent);
+
+ p1 = dst;
+ p2 = scratch;
+
+ *p1 = firstEightPowers[power & 7];
+ power >>= 3;
+
+ result = 1;
+ pow5 = pow5s;
+
+ for (unsigned int n = 0; power; power >>= 1, n++) {
+ unsigned int pc;
+
+ pc = partsCount[n];
+
+ /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
+ if (pc == 0) {
+ pc = partsCount[n - 1];
+ APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+ pc *= 2;
+ if (pow5[pc - 1] == 0)
+ pc--;
+ partsCount[n] = pc;
+ }
+
+ if (power & 1) {
+ integerPart *tmp;
+
+ APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+ result += pc;
+ if (p2[result - 1] == 0)
+ result--;
+
+ /* Now result is in p1 with partsCount parts and p2 is scratch
+ space. */
+ tmp = p1, p1 = p2, p2 = tmp;
+ }
+
+ pow5 += pc;
+ }
+
+ if (p1 != dst)
+ APInt::tcAssign(dst, p1, result);
+
+ return result;
+}
+
+/* Zero at the end to avoid modular arithmetic when adding one; used
+ when rounding up during hexadecimal output. */
+static const char hexDigitsLower[] = "0123456789abcdef0";
+static const char hexDigitsUpper[] = "0123456789ABCDEF0";
+static const char infinityL[] = "infinity";
+static const char infinityU[] = "INFINITY";
+static const char NaNL[] = "nan";
+static const char NaNU[] = "NAN";
+
+/* Write out an integerPart in hexadecimal, starting with the most
+ significant nibble. Write out exactly COUNT hexdigits, return
+ COUNT. */
+static unsigned int
+partAsHex (char *dst, integerPart part, unsigned int count,
+ const char *hexDigitChars)
+{
+ unsigned int result = count;
+
+ assert(count != 0 && count <= integerPartWidth / 4);
+
+ part >>= (integerPartWidth - 4 * count);
+ while (count--) {
+ dst[count] = hexDigitChars[part & 0xf];
+ part >>= 4;
+ }
+
+ return result;
+}
+
+/* Write out an unsigned decimal integer. */
+static char *
+writeUnsignedDecimal (char *dst, unsigned int n)
+{
+ char buff[40], *p;
+
+ p = buff;
+ do
+ *p++ = '0' + n % 10;
+ while (n /= 10);
+
+ do
+ *dst++ = *--p;
+ while (p != buff);
+
+ return dst;
+}
+
+/* Write out a signed decimal integer. */
+static char *
+writeSignedDecimal (char *dst, int value)
+{
+ if (value < 0) {
+ *dst++ = '-';
+ dst = writeUnsignedDecimal(dst, -(unsigned) value);
+ } else
+ dst = writeUnsignedDecimal(dst, value);
+
+ return dst;
+}
+
+/* Constructors. */
+void
+APFloat::initialize(const fltSemantics *ourSemantics)
+{
+ unsigned int count;
+
+ semantics = ourSemantics;
+ count = partCount();
+ if (count > 1)
+ significand.parts = new integerPart[count];
+}
+
+void
+APFloat::freeSignificand()
+{
+ if (partCount() > 1)
+ delete [] significand.parts;
+}
+
+void
+APFloat::assign(const APFloat &rhs)
+{
+ assert(semantics == rhs.semantics);
+
+ sign = rhs.sign;
+ category = rhs.category;
+ exponent = rhs.exponent;
+ sign2 = rhs.sign2;
+ exponent2 = rhs.exponent2;
+ if (category == fcNormal || category == fcNaN)
+ copySignificand(rhs);
+}
+
+void
+APFloat::copySignificand(const APFloat &rhs)
+{
+ assert(category == fcNormal || category == fcNaN);
+ assert(rhs.partCount() >= partCount());
+
+ APInt::tcAssign(significandParts(), rhs.significandParts(),
+ partCount());
+}
+
+/* Make this number a NaN, with an arbitrary but deterministic value
+ for the significand. If double or longer, this is a signalling NaN,
+ which may not be ideal. If float, this is QNaN(0). */
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
+{
+ category = fcNaN;
+ sign = Negative;
+
+ integerPart *significand = significandParts();
+ unsigned numParts = partCount();
+
+ // Set the significand bits to the fill.
+ if (!fill || fill->getNumWords() < numParts)
+ APInt::tcSet(significand, 0, numParts);
+ if (fill) {
+ APInt::tcAssign(significand, fill->getRawData(),
+ std::min(fill->getNumWords(), numParts));
+
+ // Zero out the excess bits of the significand.
+ unsigned bitsToPreserve = semantics->precision - 1;
+ unsigned part = bitsToPreserve / 64;
+ bitsToPreserve %= 64;
+ significand[part] &= ((1ULL << bitsToPreserve) - 1);
+ for (part++; part != numParts; ++part)
+ significand[part] = 0;
+ }
+
+ unsigned QNaNBit = semantics->precision - 2;
+
+ if (SNaN) {
+ // We always have to clear the QNaN bit to make it an SNaN.
+ APInt::tcClearBit(significand, QNaNBit);
+
+ // If there are no bits set in the payload, we have to set
+ // *something* to make it a NaN instead of an infinity;
+ // conventionally, this is the next bit down from the QNaN bit.
+ if (APInt::tcIsZero(significand, numParts))
+ APInt::tcSetBit(significand, QNaNBit - 1);
+ } else {
+ // We always have to set the QNaN bit to make it a QNaN.
+ APInt::tcSetBit(significand, QNaNBit);
+ }
+
+ // For x87 extended precision, we want to make a NaN, not a
+ // pseudo-NaN. Maybe we should expose the ability to make
+ // pseudo-NaNs?
+ if (semantics == &APFloat::x87DoubleExtended)
+ APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+ const APInt *fill) {
+ APFloat value(Sem, uninitialized);
+ value.makeNaN(SNaN, Negative, fill);
+ return value;
+}
+
+APFloat &
+APFloat::operator=(const APFloat &rhs)
+{
+ if (this != &rhs) {
+ if (semantics != rhs.semantics) {
+ freeSignificand();
+ initialize(rhs.semantics);
+ }
+ assign(rhs);
+ }
+
+ return *this;
+}
+
+bool
+APFloat::bitwiseIsEqual(const APFloat &rhs) const {
+ if (this == &rhs)
+ return true;
+ if (semantics != rhs.semantics ||
+ category != rhs.category ||
+ sign != rhs.sign)
+ return false;
+ if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+ sign2 != rhs.sign2)
+ return false;
+ if (category==fcZero || category==fcInfinity)
+ return true;
+ else if (category==fcNormal && exponent!=rhs.exponent)
+ return false;
+ else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+ exponent2!=rhs.exponent2)
+ return false;
+ else {
+ int i= partCount();
+ const integerPart* p=significandParts();
+ const integerPart* q=rhs.significandParts();
+ for (; i>0; i--, p++, q++) {
+ if (*p != *q)
+ return false;
+ }
+ return true;
+ }
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
+ : exponent2(0), sign2(0) {
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ sign = 0;
+ zeroSignificand();
+ exponent = ourSemantics.precision - 1;
+ significandParts()[0] = value;
+ normalize(rmNearestTiesToEven, lfExactlyZero);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ category = fcZero;
+ sign = false;
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
+ : exponent2(0), sign2(0) {
+ assertArithmeticOK(ourSemantics);
+ // Allocates storage if necessary but does not initialize it.
+ initialize(&ourSemantics);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics,
+ fltCategory ourCategory, bool negative)
+ : exponent2(0), sign2(0) {
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ category = ourCategory;
+ sign = negative;
+ if (category == fcNormal)
+ category = fcZero;
+ else if (ourCategory == fcNaN)
+ makeNaN();
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
+ : exponent2(0), sign2(0) {
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ convertFromString(text, rmNearestTiesToEven);
+}
+
+APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
+ initialize(rhs.semantics);
+ assign(rhs);
+}
+
+APFloat::~APFloat()
+{
+ freeSignificand();
+}
+
+// Profile - This method 'profiles' an APFloat for use with FoldingSet.
+void APFloat::Profile(FoldingSetNodeID& ID) const {
+ ID.Add(bitcastToAPInt());
+}
+
+unsigned int
+APFloat::partCount() const
+{
+ return partCountForBits(semantics->precision + 1);
+}
+
+unsigned int
+APFloat::semanticsPrecision(const fltSemantics &semantics)
+{
+ return semantics.precision;
+}
+
+const integerPart *
+APFloat::significandParts() const
+{
+ return const_cast<APFloat *>(this)->significandParts();
+}
+
+integerPart *
+APFloat::significandParts()
+{
+ assert(category == fcNormal || category == fcNaN);
+
+ if (partCount() > 1)
+ return significand.parts;
+ else
+ return &significand.part;
+}
+
+void
+APFloat::zeroSignificand()
+{
+ category = fcNormal;
+ APInt::tcSet(significandParts(), 0, partCount());
+}
+
+/* Increment an fcNormal floating point number's significand. */
+void
+APFloat::incrementSignificand()
+{
+ integerPart carry;
+
+ carry = APInt::tcIncrement(significandParts(), partCount());
+
+ /* Our callers should never cause us to overflow. */
+ assert(carry == 0);
+}
+
+/* Add the significand of the RHS. Returns the carry flag. */
+integerPart
+APFloat::addSignificand(const APFloat &rhs)
+{
+ integerPart *parts;
+
+ parts = significandParts();
+
+ assert(semantics == rhs.semantics);
+ assert(exponent == rhs.exponent);
+
+ return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
+}
+
+/* Subtract the significand of the RHS with a borrow flag. Returns
+ the borrow flag. */
+integerPart
+APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
+{
+ integerPart *parts;
+
+ parts = significandParts();
+
+ assert(semantics == rhs.semantics);
+ assert(exponent == rhs.exponent);
+
+ return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
+ partCount());
+}
+
+/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
+ on to the full-precision result of the multiplication. Returns the
+ lost fraction. */
+lostFraction
+APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
+{
+ unsigned int omsb; // One, not zero, based MSB.
+ unsigned int partsCount, newPartsCount, precision;
+ integerPart *lhsSignificand;
+ integerPart scratch[4];
+ integerPart *fullSignificand;
+ lostFraction lost_fraction;
+ bool ignored;
+
+ assert(semantics == rhs.semantics);
+
+ precision = semantics->precision;
+ newPartsCount = partCountForBits(precision * 2);
+
+ if (newPartsCount > 4)
+ fullSignificand = new integerPart[newPartsCount];
+ else
+ fullSignificand = scratch;
+
+ lhsSignificand = significandParts();
+ partsCount = partCount();
+
+ APInt::tcFullMultiply(fullSignificand, lhsSignificand,
+ rhs.significandParts(), partsCount, partsCount);
+
+ lost_fraction = lfExactlyZero;
+ omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+ exponent += rhs.exponent;
+
+ if (addend) {
+ Significand savedSignificand = significand;
+ const fltSemantics *savedSemantics = semantics;
+ fltSemantics extendedSemantics;
+ opStatus status;
+ unsigned int extendedPrecision;
+
+ /* Normalize our MSB. */
+ extendedPrecision = precision + precision - 1;
+ if (omsb != extendedPrecision) {
+ APInt::tcShiftLeft(fullSignificand, newPartsCount,
+ extendedPrecision - omsb);
+ exponent -= extendedPrecision - omsb;
+ }
+
+ /* Create new semantics. */
+ extendedSemantics = *semantics;
+ extendedSemantics.precision = extendedPrecision;
+
+ if (newPartsCount == 1)
+ significand.part = fullSignificand[0];
+ else
+ significand.parts = fullSignificand;
+ semantics = &extendedSemantics;
+
+ APFloat extendedAddend(*addend);
+ status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
+ assert(status == opOK);
+ lost_fraction = addOrSubtractSignificand(extendedAddend, false);
+
+ /* Restore our state. */
+ if (newPartsCount == 1)
+ fullSignificand[0] = significand.part;
+ significand = savedSignificand;
+ semantics = savedSemantics;
+
+ omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+ }
+
+ exponent -= (precision - 1);
+
+ if (omsb > precision) {
+ unsigned int bits, significantParts;
+ lostFraction lf;
+
+ bits = omsb - precision;
+ significantParts = partCountForBits(omsb);
+ lf = shiftRight(fullSignificand, significantParts, bits);
+ lost_fraction = combineLostFractions(lf, lost_fraction);
+ exponent += bits;
+ }
+
+ APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
+
+ if (newPartsCount > 4)
+ delete [] fullSignificand;
+
+ return lost_fraction;
+}
+
+/* Multiply the significands of LHS and RHS to DST. */
+lostFraction
+APFloat::divideSignificand(const APFloat &rhs)
+{
+ unsigned int bit, i, partsCount;
+ const integerPart *rhsSignificand;
+ integerPart *lhsSignificand, *dividend, *divisor;
+ integerPart scratch[4];
+ lostFraction lost_fraction;
+
+ assert(semantics == rhs.semantics);
+
+ lhsSignificand = significandParts();
+ rhsSignificand = rhs.significandParts();
+ partsCount = partCount();
+
+ if (partsCount > 2)
+ dividend = new integerPart[partsCount * 2];
+ else
+ dividend = scratch;
+
+ divisor = dividend + partsCount;
+
+ /* Copy the dividend and divisor as they will be modified in-place. */
+ for (i = 0; i < partsCount; i++) {
+ dividend[i] = lhsSignificand[i];
+ divisor[i] = rhsSignificand[i];
+ lhsSignificand[i] = 0;
+ }
+
+ exponent -= rhs.exponent;
+
+ unsigned int precision = semantics->precision;
+
+ /* Normalize the divisor. */
+ bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
+ if (bit) {
+ exponent += bit;
+ APInt::tcShiftLeft(divisor, partsCount, bit);
+ }
+
+ /* Normalize the dividend. */
+ bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
+ if (bit) {
+ exponent -= bit;
+ APInt::tcShiftLeft(dividend, partsCount, bit);
+ }
+
+ /* Ensure the dividend >= divisor initially for the loop below.
+ Incidentally, this means that the division loop below is
+ guaranteed to set the integer bit to one. */
+ if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
+ exponent--;
+ APInt::tcShiftLeft(dividend, partsCount, 1);
+ assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
+ }
+
+ /* Long division. */
+ for (bit = precision; bit; bit -= 1) {
+ if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
+ APInt::tcSubtract(dividend, divisor, 0, partsCount);
+ APInt::tcSetBit(lhsSignificand, bit - 1);
+ }
+
+ APInt::tcShiftLeft(dividend, partsCount, 1);
+ }
+
+ /* Figure out the lost fraction. */
+ int cmp = APInt::tcCompare(dividend, divisor, partsCount);
+
+ if (cmp > 0)
+ lost_fraction = lfMoreThanHalf;
+ else if (cmp == 0)
+ lost_fraction = lfExactlyHalf;
+ else if (APInt::tcIsZero(dividend, partsCount))
+ lost_fraction = lfExactlyZero;
+ else
+ lost_fraction = lfLessThanHalf;
+
+ if (partsCount > 2)
+ delete [] dividend;
+
+ return lost_fraction;
+}
+
+unsigned int
+APFloat::significandMSB() const
+{
+ return APInt::tcMSB(significandParts(), partCount());
+}
+
+unsigned int
+APFloat::significandLSB() const
+{
+ return APInt::tcLSB(significandParts(), partCount());
+}
+
+/* Note that a zero result is NOT normalized to fcZero. */
+lostFraction
+APFloat::shiftSignificandRight(unsigned int bits)
+{
+ /* Our exponent should not overflow. */
+ assert((exponent_t) (exponent + bits) >= exponent);
+
+ exponent += bits;
+
+ return shiftRight(significandParts(), partCount(), bits);
+}
+
+/* Shift the significand left BITS bits, subtract BITS from its exponent. */
+void
+APFloat::shiftSignificandLeft(unsigned int bits)
+{
+ assert(bits < semantics->precision);
+
+ if (bits) {
+ unsigned int partsCount = partCount();
+
+ APInt::tcShiftLeft(significandParts(), partsCount, bits);
+ exponent -= bits;
+
+ assert(!APInt::tcIsZero(significandParts(), partsCount));
+ }
+}
+
+APFloat::cmpResult
+APFloat::compareAbsoluteValue(const APFloat &rhs) const
+{
+ int compare;
+
+ assert(semantics == rhs.semantics);
+ assert(category == fcNormal);
+ assert(rhs.category == fcNormal);
+
+ compare = exponent - rhs.exponent;
+
+ /* If exponents are equal, do an unsigned bignum comparison of the
+ significands. */
+ if (compare == 0)
+ compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
+ partCount());
+
+ if (compare > 0)
+ return cmpGreaterThan;
+ else if (compare < 0)
+ return cmpLessThan;
+ else
+ return cmpEqual;
+}
+
+/* Handle overflow. Sign is preserved. We either become infinity or
+ the largest finite number. */
+APFloat::opStatus
+APFloat::handleOverflow(roundingMode rounding_mode)
+{
+ /* Infinity? */
+ if (rounding_mode == rmNearestTiesToEven ||
+ rounding_mode == rmNearestTiesToAway ||
+ (rounding_mode == rmTowardPositive && !sign) ||
+ (rounding_mode == rmTowardNegative && sign)) {
+ category = fcInfinity;
+ return (opStatus) (opOverflow | opInexact);
+ }
+
+ /* Otherwise we become the largest finite number. */
+ category = fcNormal;
+ exponent = semantics->maxExponent;
+ APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
+ semantics->precision);
+
+ return opInexact;
+}
+
+/* Returns TRUE if, when truncating the current number, with BIT the
+ new LSB, with the given lost fraction and rounding mode, the result
+ would need to be rounded away from zero (i.e., by increasing the
+ signficand). This routine must work for fcZero of both signs, and
+ fcNormal numbers. */
+bool
+APFloat::roundAwayFromZero(roundingMode rounding_mode,
+ lostFraction lost_fraction,
+ unsigned int bit) const
+{
+ /* NaNs and infinities should not have lost fractions. */
+ assert(category == fcNormal || category == fcZero);
+
+ /* Current callers never pass this so we don't handle it. */
+ assert(lost_fraction != lfExactlyZero);
+
+ switch (rounding_mode) {
+ default:
+ llvm_unreachable(0);
+
+ case rmNearestTiesToAway:
+ return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
+
+ case rmNearestTiesToEven:
+ if (lost_fraction == lfMoreThanHalf)
+ return true;
+
+ /* Our zeroes don't have a significand to test. */
+ if (lost_fraction == lfExactlyHalf && category != fcZero)
+ return APInt::tcExtractBit(significandParts(), bit);
+
+ return false;
+
+ case rmTowardZero:
+ return false;
+
+ case rmTowardPositive:
+ return sign == false;
+
+ case rmTowardNegative:
+ return sign == true;
+ }
+}
+
+APFloat::opStatus
+APFloat::normalize(roundingMode rounding_mode,
+ lostFraction lost_fraction)
+{
+ unsigned int omsb; /* One, not zero, based MSB. */
+ int exponentChange;
+
+ if (category != fcNormal)
+ return opOK;
+
+ /* Before rounding normalize the exponent of fcNormal numbers. */
+ omsb = significandMSB() + 1;
+
+ if (omsb) {
+ /* OMSB is numbered from 1. We want to place it in the integer
+ bit numbered PRECISON if possible, with a compensating change in
+ the exponent. */
+ exponentChange = omsb - semantics->precision;
+
+ /* If the resulting exponent is too high, overflow according to
+ the rounding mode. */
+ if (exponent + exponentChange > semantics->maxExponent)
+ return handleOverflow(rounding_mode);
+
+ /* Subnormal numbers have exponent minExponent, and their MSB
+ is forced based on that. */
+ if (exponent + exponentChange < semantics->minExponent)
+ exponentChange = semantics->minExponent - exponent;
+
+ /* Shifting left is easy as we don't lose precision. */
+ if (exponentChange < 0) {
+ assert(lost_fraction == lfExactlyZero);
+
+ shiftSignificandLeft(-exponentChange);
+
+ return opOK;
+ }
+
+ if (exponentChange > 0) {
+ lostFraction lf;
+
+ /* Shift right and capture any new lost fraction. */
+ lf = shiftSignificandRight(exponentChange);
+
+ lost_fraction = combineLostFractions(lf, lost_fraction);
+
+ /* Keep OMSB up-to-date. */
+ if (omsb > (unsigned) exponentChange)
+ omsb -= exponentChange;
+ else
+ omsb = 0;
+ }
+ }
+
+ /* Now round the number according to rounding_mode given the lost
+ fraction. */
+
+ /* As specified in IEEE 754, since we do not trap we do not report
+ underflow for exact results. */
+ if (lost_fraction == lfExactlyZero) {
+ /* Canonicalize zeroes. */
+ if (omsb == 0)
+ category = fcZero;
+
+ return opOK;
+ }
+
+ /* Increment the significand if we're rounding away from zero. */
+ if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
+ if (omsb == 0)
+ exponent = semantics->minExponent;
+
+ incrementSignificand();
+ omsb = significandMSB() + 1;
+
+ /* Did the significand increment overflow? */
+ if (omsb == (unsigned) semantics->precision + 1) {
+ /* Renormalize by incrementing the exponent and shifting our
+ significand right one. However if we already have the
+ maximum exponent we overflow to infinity. */
+ if (exponent == semantics->maxExponent) {
+ category = fcInfinity;
+
+ return (opStatus) (opOverflow | opInexact);
+ }
+
+ shiftSignificandRight(1);
+
+ return opInexact;
+ }
+ }
+
+ /* The normal case - we were and are not denormal, and any
+ significand increment above didn't overflow. */
+ if (omsb == semantics->precision)
+ return opInexact;
+
+ /* We have a non-zero denormal. */
+ assert(omsb < semantics->precision);
+
+ /* Canonicalize zeroes. */
+ if (omsb == 0)
+ category = fcZero;
+
+ /* The fcZero case is a denormal that underflowed to zero. */
+ return (opStatus) (opUnderflow | opInexact);
+}
+
+APFloat::opStatus
+APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcNormal, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcZero):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcZero, fcInfinity):
+ category = fcInfinity;
+ sign = rhs.sign ^ subtract;
+ return opOK;
+
+ case convolve(fcZero, fcNormal):
+ assign(rhs);
+ sign = rhs.sign ^ subtract;
+ return opOK;
+
+ case convolve(fcZero, fcZero):
+ /* Sign depends on rounding mode; handled by caller. */
+ return opOK;
+
+ case convolve(fcInfinity, fcInfinity):
+ /* Differently signed infinities can only be validly
+ subtracted. */
+ if (((sign ^ rhs.sign)!=0) != subtract) {
+ makeNaN();
+ return opInvalidOp;
+ }
+
+ return opOK;
+
+ case convolve(fcNormal, fcNormal):
+ return opDivByZero;
+ }
+}
+
+/* Add or subtract two normal numbers. */
+lostFraction
+APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
+{
+ integerPart carry;
+ lostFraction lost_fraction;
+ int bits;
+
+ /* Determine if the operation on the absolute values is effectively
+ an addition or subtraction. */
+ subtract ^= (sign ^ rhs.sign) ? true : false;
+
+ /* Are we bigger exponent-wise than the RHS? */
+ bits = exponent - rhs.exponent;
+
+ /* Subtraction is more subtle than one might naively expect. */
+ if (subtract) {
+ APFloat temp_rhs(rhs);
+ bool reverse;
+
+ if (bits == 0) {
+ reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
+ lost_fraction = lfExactlyZero;
+ } else if (bits > 0) {
+ lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
+ shiftSignificandLeft(1);
+ reverse = false;
+ } else {
+ lost_fraction = shiftSignificandRight(-bits - 1);
+ temp_rhs.shiftSignificandLeft(1);
+ reverse = true;
+ }
+
+ if (reverse) {
+ carry = temp_rhs.subtractSignificand
+ (*this, lost_fraction != lfExactlyZero);
+ copySignificand(temp_rhs);
+ sign = !sign;
+ } else {
+ carry = subtractSignificand
+ (temp_rhs, lost_fraction != lfExactlyZero);
+ }
+
+ /* Invert the lost fraction - it was on the RHS and
+ subtracted. */
+ if (lost_fraction == lfLessThanHalf)
+ lost_fraction = lfMoreThanHalf;
+ else if (lost_fraction == lfMoreThanHalf)
+ lost_fraction = lfLessThanHalf;
+
+ /* The code above is intended to ensure that no borrow is
+ necessary. */
+ assert(!carry);
+ } else {
+ if (bits > 0) {
+ APFloat temp_rhs(rhs);
+
+ lost_fraction = temp_rhs.shiftSignificandRight(bits);
+ carry = addSignificand(temp_rhs);
+ } else {
+ lost_fraction = shiftSignificandRight(-bits);
+ carry = addSignificand(rhs);
+ }
+
+ /* We have a guard bit; generating a carry cannot happen. */
+ assert(!carry);
+ }
+
+ return lost_fraction;
+}
+
+APFloat::opStatus
+APFloat::multiplySpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcInfinity):
+ category = fcInfinity;
+ return opOK;
+
+ case convolve(fcZero, fcNormal):
+ case convolve(fcNormal, fcZero):
+ case convolve(fcZero, fcZero):
+ category = fcZero;
+ return opOK;
+
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcInfinity, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+APFloat::opStatus
+APFloat::divideSpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ category = fcZero;
+ return opOK;
+
+ case convolve(fcNormal, fcZero):
+ category = fcInfinity;
+ return opDivByZero;
+
+ case convolve(fcInfinity, fcInfinity):
+ case convolve(fcZero, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+APFloat::opStatus
+APFloat::modSpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ case convolve(fcNormal, fcInfinity):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcZero):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcInfinity):
+ case convolve(fcZero, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+/* Change sign. */
+void
+APFloat::changeSign()
+{
+ /* Look mummy, this one's easy. */
+ sign = !sign;
+}
+
+void
+APFloat::clearSign()
+{
+ /* So is this one. */
+ sign = 0;
+}
+
+void
+APFloat::copySign(const APFloat &rhs)
+{
+ /* And this one. */
+ sign = rhs.sign;
+}
+
+/* Normalized addition or subtraction. */
+APFloat::opStatus
+APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
+ bool subtract)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+
+ fs = addOrSubtractSpecials(rhs, subtract);
+
+ /* This return code means it was not a simple case. */
+ if (fs == opDivByZero) {
+ lostFraction lost_fraction;
+
+ lost_fraction = addOrSubtractSignificand(rhs, subtract);
+ fs = normalize(rounding_mode, lost_fraction);
+
+ /* Can only be zero if we lost no fraction. */
+ assert(category != fcZero || lost_fraction == lfExactlyZero);
+ }
+
+ /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ positive zero unless rounding to minus infinity, except that
+ adding two like-signed zeroes gives that zero. */
+ if (category == fcZero) {
+ if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
+ sign = (rounding_mode == rmTowardNegative);
+ }
+
+ return fs;
+}
+
+/* Normalized addition. */
+APFloat::opStatus
+APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
+{
+ return addOrSubtract(rhs, rounding_mode, false);
+}
+
+/* Normalized subtraction. */
+APFloat::opStatus
+APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
+{
+ return addOrSubtract(rhs, rounding_mode, true);
+}
+
+/* Normalized multiply. */
+APFloat::opStatus
+APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+ sign ^= rhs.sign;
+ fs = multiplySpecials(rhs);
+
+ if (category == fcNormal) {
+ lostFraction lost_fraction = multiplySignificand(rhs, 0);
+ fs = normalize(rounding_mode, lost_fraction);
+ if (lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+ }
+
+ return fs;
+}
+
+/* Normalized divide. */
+APFloat::opStatus
+APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+ sign ^= rhs.sign;
+ fs = divideSpecials(rhs);
+
+ if (category == fcNormal) {
+ lostFraction lost_fraction = divideSignificand(rhs);
+ fs = normalize(rounding_mode, lost_fraction);
+ if (lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+ }
+
+ return fs;
+}
+
+/* Normalized remainder. This is not currently correct in all cases. */
+APFloat::opStatus
+APFloat::remainder(const APFloat &rhs)
+{
+ opStatus fs;
+ APFloat V = *this;
+ unsigned int origSign = sign;
+
+ assertArithmeticOK(*semantics);
+ fs = V.divide(rhs, rmNearestTiesToEven);
+ if (fs == opDivByZero)
+ return fs;
+
+ int parts = partCount();
+ integerPart *x = new integerPart[parts];
+ bool ignored;
+ fs = V.convertToInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven, &ignored);
+ if (fs==opInvalidOp)
+ return fs;
+
+ fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven);
+ assert(fs==opOK); // should always work
+
+ fs = V.multiply(rhs, rmNearestTiesToEven);
+ assert(fs==opOK || fs==opInexact); // should not overflow or underflow
+
+ fs = subtract(V, rmNearestTiesToEven);
+ assert(fs==opOK || fs==opInexact); // likewise
+
+ if (isZero())
+ sign = origSign; // IEEE754 requires this
+ delete[] x;
+ return fs;
+}
+
+/* Normalized llvm frem (C fmod).
+ This is not currently correct in all cases. */
+APFloat::opStatus
+APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+ assertArithmeticOK(*semantics);
+ fs = modSpecials(rhs);
+
+ if (category == fcNormal && rhs.category == fcNormal) {
+ APFloat V = *this;
+ unsigned int origSign = sign;
+
+ fs = V.divide(rhs, rmNearestTiesToEven);
+ if (fs == opDivByZero)
+ return fs;
+
+ int parts = partCount();
+ integerPart *x = new integerPart[parts];
+ bool ignored;
+ fs = V.convertToInteger(x, parts * integerPartWidth, true,
+ rmTowardZero, &ignored);
+ if (fs==opInvalidOp)
+ return fs;
+
+ fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven);
+ assert(fs==opOK); // should always work
+
+ fs = V.multiply(rhs, rounding_mode);
+ assert(fs==opOK || fs==opInexact); // should not overflow or underflow
+
+ fs = subtract(V, rounding_mode);
+ assert(fs==opOK || fs==opInexact); // likewise
+
+ if (isZero())
+ sign = origSign; // IEEE754 requires this
+ delete[] x;
+ }
+ return fs;
+}
+
+/* Normalized fused-multiply-add. */
+APFloat::opStatus
+APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
+ const APFloat &addend,
+ roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+
+ /* Post-multiplication sign, before addition. */
+ sign ^= multiplicand.sign;
+
+ /* If and only if all arguments are normal do we need to do an
+ extended-precision calculation. */
+ if (category == fcNormal &&
+ multiplicand.category == fcNormal &&
+ addend.category == fcNormal) {
+ lostFraction lost_fraction;
+
+ lost_fraction = multiplySignificand(multiplicand, &addend);
+ fs = normalize(rounding_mode, lost_fraction);
+ if (lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+
+ /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ positive zero unless rounding to minus infinity, except that
+ adding two like-signed zeroes gives that zero. */
+ if (category == fcZero && sign != addend.sign)
+ sign = (rounding_mode == rmTowardNegative);
+ } else {
+ fs = multiplySpecials(multiplicand);
+
+ /* FS can only be opOK or opInvalidOp. There is no more work
+ to do in the latter case. The IEEE-754R standard says it is
+ implementation-defined in this case whether, if ADDEND is a
+ quiet NaN, we raise invalid op; this implementation does so.
+
+ If we need to do the addition we can do so with normal
+ precision. */
+ if (fs == opOK)
+ fs = addOrSubtract(addend, rounding_mode, false);
+ }
+
+ return fs;
+}
+
+/* Comparison requires normalized numbers. */
+APFloat::cmpResult
+APFloat::compare(const APFloat &rhs) const
+{
+ cmpResult result;
+
+ assertArithmeticOK(*semantics);
+ assert(semantics == rhs.semantics);
+
+ switch (convolve(category, rhs.category)) {
+ default:
+ llvm_unreachable(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ return cmpUnordered;
+
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcNormal, fcZero):
+ if (sign)
+ return cmpLessThan;
+ else
+ return cmpGreaterThan;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ if (rhs.sign)
+ return cmpGreaterThan;
+ else
+ return cmpLessThan;
+
+ case convolve(fcInfinity, fcInfinity):
+ if (sign == rhs.sign)
+ return cmpEqual;
+ else if (sign)
+ return cmpLessThan;
+ else
+ return cmpGreaterThan;
+
+ case convolve(fcZero, fcZero):
+ return cmpEqual;
+
+ case convolve(fcNormal, fcNormal):
+ break;
+ }
+
+ /* Two normal numbers. Do they have the same sign? */
+ if (sign != rhs.sign) {
+ if (sign)
+ result = cmpLessThan;
+ else
+ result = cmpGreaterThan;
+ } else {
+ /* Compare absolute values; invert result if negative. */
+ result = compareAbsoluteValue(rhs);
+
+ if (sign) {
+ if (result == cmpLessThan)
+ result = cmpGreaterThan;
+ else if (result == cmpGreaterThan)
+ result = cmpLessThan;
+ }
+ }
+
+ return result;
+}
+
+/// APFloat::convert - convert a value of one floating point type to another.
+/// The return value corresponds to the IEEE754 exceptions. *losesInfo
+/// records whether the transformation lost information, i.e. whether
+/// converting the result back to the original type will produce the
+/// original value (this is almost the same as return value==fsOK, but there
+/// are edge cases where this is not so).
+
+APFloat::opStatus
+APFloat::convert(const fltSemantics &toSemantics,
+ roundingMode rounding_mode, bool *losesInfo)
+{
+ lostFraction lostFraction;
+ unsigned int newPartCount, oldPartCount;
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+ assertArithmeticOK(toSemantics);
+ lostFraction = lfExactlyZero;
+ newPartCount = partCountForBits(toSemantics.precision + 1);
+ oldPartCount = partCount();
+
+ /* Handle storage complications. If our new form is wider,
+ re-allocate our bit pattern into wider storage. If it is
+ narrower, we ignore the excess parts, but if narrowing to a
+ single part we need to free the old storage.
+ Be careful not to reference significandParts for zeroes
+ and infinities, since it aborts. */
+ if (newPartCount > oldPartCount) {
+ integerPart *newParts;
+ newParts = new integerPart[newPartCount];
+ APInt::tcSet(newParts, 0, newPartCount);
+ if (category==fcNormal || category==fcNaN)
+ APInt::tcAssign(newParts, significandParts(), oldPartCount);
+ freeSignificand();
+ significand.parts = newParts;
+ } else if (newPartCount < oldPartCount) {
+ /* Capture any lost fraction through truncation of parts so we get
+ correct rounding whilst normalizing. */
+ if (category==fcNormal)
+ lostFraction = lostFractionThroughTruncation
+ (significandParts(), oldPartCount, toSemantics.precision);
+ if (newPartCount == 1) {
+ integerPart newPart = 0;
+ if (category==fcNormal || category==fcNaN)
+ newPart = significandParts()[0];
+ freeSignificand();
+ significand.part = newPart;
+ }
+ }
+
+ if (category == fcNormal) {
+ /* Re-interpret our bit-pattern. */
+ exponent += toSemantics.precision - semantics->precision;
+ semantics = &toSemantics;
+ fs = normalize(rounding_mode, lostFraction);
+ *losesInfo = (fs != opOK);
+ } else if (category == fcNaN) {
+ int shift = toSemantics.precision - semantics->precision;
+ // Do this now so significandParts gets the right answer
+ const fltSemantics *oldSemantics = semantics;
+ semantics = &toSemantics;
+ *losesInfo = false;
+ // No normalization here, just truncate
+ if (shift>0)
+ APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+ else if (shift < 0) {
+ unsigned ushift = -shift;
+ // Figure out if we are losing information. This happens
+ // if are shifting out something other than 0s, or if the x87 long
+ // double input did not have its integer bit set (pseudo-NaN), or if the
+ // x87 long double input did not have its QNan bit set (because the x87
+ // hardware sets this bit when converting a lower-precision NaN to
+ // x87 long double).
+ if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
+ *losesInfo = true;
+ if (oldSemantics == &APFloat::x87DoubleExtended &&
+ (!(*significandParts() & 0x8000000000000000ULL) ||
+ !(*significandParts() & 0x4000000000000000ULL)))
+ *losesInfo = true;
+ APInt::tcShiftRight(significandParts(), newPartCount, ushift);
+ }
+ // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
+ // does not give you back the same bits. This is dubious, and we
+ // don't currently do it. You're really supposed to get
+ // an invalid operation signal at runtime, but nobody does that.
+ fs = opOK;
+ } else {
+ semantics = &toSemantics;
+ fs = opOK;
+ *losesInfo = false;
+ }
+
+ return fs;
+}
+
+/* Convert a floating point number to an integer according to the
+ rounding mode. If the rounded integer value is out of range this
+ returns an invalid operation exception and the contents of the
+ destination parts are unspecified. If the rounded value is in
+ range but the floating point number is not the exact integer, the C
+ standard doesn't require an inexact exception to be raised. IEEE
+ 854 does require it so we do that.
+
+ Note that for conversions to integer type the C standard requires
+ round-to-zero to always be used. */
+APFloat::opStatus
+APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
+ bool isSigned,
+ roundingMode rounding_mode,
+ bool *isExact) const
+{
+ lostFraction lost_fraction;
+ const integerPart *src;
+ unsigned int dstPartsCount, truncatedBits;
+
+ assertArithmeticOK(*semantics);
+
+ *isExact = false;
+
+ /* Handle the three special cases first. */
+ if (category == fcInfinity || category == fcNaN)
+ return opInvalidOp;
+
+ dstPartsCount = partCountForBits(width);
+
+ if (category == fcZero) {
+ APInt::tcSet(parts, 0, dstPartsCount);
+ // Negative zero can't be represented as an int.
+ *isExact = !sign;
+ return opOK;
+ }
+
+ src = significandParts();
+
+ /* Step 1: place our absolute value, with any fraction truncated, in
+ the destination. */
+ if (exponent < 0) {
+ /* Our absolute value is less than one; truncate everything. */
+ APInt::tcSet(parts, 0, dstPartsCount);
+ /* For exponent -1 the integer bit represents .5, look at that.
+ For smaller exponents leftmost truncated bit is 0. */
+ truncatedBits = semantics->precision -1U - exponent;
+ } else {
+ /* We want the most significant (exponent + 1) bits; the rest are
+ truncated. */
+ unsigned int bits = exponent + 1U;
+
+ /* Hopelessly large in magnitude? */
+ if (bits > width)
+ return opInvalidOp;
+
+ if (bits < semantics->precision) {
+ /* We truncate (semantics->precision - bits) bits. */
+ truncatedBits = semantics->precision - bits;
+ APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
+ } else {
+ /* We want at least as many bits as are available. */
+ APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
+ APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
+ truncatedBits = 0;
+ }
+ }
+
+ /* Step 2: work out any lost fraction, and increment the absolute
+ value if we would round away from zero. */
+ if (truncatedBits) {
+ lost_fraction = lostFractionThroughTruncation(src, partCount(),
+ truncatedBits);
+ if (lost_fraction != lfExactlyZero &&
+ roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
+ if (APInt::tcIncrement(parts, dstPartsCount))
+ return opInvalidOp; /* Overflow. */
+ }
+ } else {
+ lost_fraction = lfExactlyZero;
+ }
+
+ /* Step 3: check if we fit in the destination. */
+ unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
+
+ if (sign) {
+ if (!isSigned) {
+ /* Negative numbers cannot be represented as unsigned. */
+ if (omsb != 0)
+ return opInvalidOp;
+ } else {
+ /* It takes omsb bits to represent the unsigned integer value.
+ We lose a bit for the sign, but care is needed as the
+ maximally negative integer is a special case. */
+ if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
+ return opInvalidOp;
+
+ /* This case can happen because of rounding. */
+ if (omsb > width)
+ return opInvalidOp;
+ }
+
+ APInt::tcNegate (parts, dstPartsCount);
+ } else {
+ if (omsb >= width + !isSigned)
+ return opInvalidOp;
+ }
+
+ if (lost_fraction == lfExactlyZero) {
+ *isExact = true;
+ return opOK;
+ } else
+ return opInexact;
+}
+
+/* Same as convertToSignExtendedInteger, except we provide
+ deterministic values in case of an invalid operation exception,
+ namely zero for NaNs and the minimal or maximal value respectively
+ for underflow or overflow.
+ The *isExact output tells whether the result is exact, in the sense
+ that converting it back to the original floating point type produces
+ the original value. This is almost equivalent to result==opOK,
+ except for negative zeroes.
+*/
+APFloat::opStatus
+APFloat::convertToInteger(integerPart *parts, unsigned int width,
+ bool isSigned,
+ roundingMode rounding_mode, bool *isExact) const
+{
+ opStatus fs;
+
+ fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
+ isExact);
+
+ if (fs == opInvalidOp) {
+ unsigned int bits, dstPartsCount;
+
+ dstPartsCount = partCountForBits(width);
+
+ if (category == fcNaN)
+ bits = 0;
+ else if (sign)
+ bits = isSigned;
+ else
+ bits = width - isSigned;
+
+ APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
+ if (sign && isSigned)
+ APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
+ }
+
+ return fs;
+}
+
+/* Same as convertToInteger(integerPart*, ...), except the result is returned in
+ an APSInt, whose initial bit-width and signed-ness are used to determine the
+ precision of the conversion.
+ */
+APFloat::opStatus
+APFloat::convertToInteger(APSInt &result,
+ roundingMode rounding_mode, bool *isExact) const
+{
+ unsigned bitWidth = result.getBitWidth();
+ SmallVector<uint64_t, 4> parts(result.getNumWords());
+ opStatus status = convertToInteger(
+ parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
+ // Keeps the original signed-ness.
+ result = APInt(bitWidth, (unsigned)parts.size(), parts.data());
+ return status;
+}
+
+/* Convert an unsigned integer SRC to a floating point number,
+ rounding according to ROUNDING_MODE. The sign of the floating
+ point number is not modified. */
+APFloat::opStatus
+APFloat::convertFromUnsignedParts(const integerPart *src,
+ unsigned int srcCount,
+ roundingMode rounding_mode)
+{
+ unsigned int omsb, precision, dstCount;
+ integerPart *dst;
+ lostFraction lost_fraction;
+
+ assertArithmeticOK(*semantics);
+ category = fcNormal;
+ omsb = APInt::tcMSB(src, srcCount) + 1;
+ dst = significandParts();
+ dstCount = partCount();
+ precision = semantics->precision;
+
+ /* We want the most significant PRECISON bits of SRC. There may not
+ be that many; extract what we can. */
+ if (precision <= omsb) {
+ exponent = omsb - 1;
+ lost_fraction = lostFractionThroughTruncation(src, srcCount,
+ omsb - precision);
+ APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
+ } else {
+ exponent = precision - 1;
+ lost_fraction = lfExactlyZero;
+ APInt::tcExtract(dst, dstCount, src, omsb, 0);
+ }
+
+ return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::convertFromAPInt(const APInt &Val,
+ bool isSigned,
+ roundingMode rounding_mode)
+{
+ unsigned int partCount = Val.getNumWords();
+ APInt api = Val;
+
+ sign = false;
+ if (isSigned && api.isNegative()) {
+ sign = true;
+ api = -api;
+ }
+
+ return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+/* Convert a two's complement integer SRC to a floating point number,
+ rounding according to ROUNDING_MODE. ISSIGNED is true if the
+ integer is signed, in which case it must be sign-extended. */
+APFloat::opStatus
+APFloat::convertFromSignExtendedInteger(const integerPart *src,
+ unsigned int srcCount,
+ bool isSigned,
+ roundingMode rounding_mode)
+{
+ opStatus status;
+
+ assertArithmeticOK(*semantics);
+ if (isSigned &&
+ APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
+ integerPart *copy;
+
+ /* If we're signed and negative negate a copy. */
+ sign = true;
+ copy = new integerPart[srcCount];
+ APInt::tcAssign(copy, src, srcCount);
+ APInt::tcNegate(copy, srcCount);
+ status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
+ delete [] copy;
+ } else {
+ sign = false;
+ status = convertFromUnsignedParts(src, srcCount, rounding_mode);
+ }
+
+ return status;
+}
+
+/* FIXME: should this just take a const APInt reference? */
+APFloat::opStatus
+APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
+ unsigned int width, bool isSigned,
+ roundingMode rounding_mode)
+{
+ unsigned int partCount = partCountForBits(width);
+ APInt api = APInt(width, partCount, parts);
+
+ sign = false;
+ if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
+ sign = true;
+ api = -api;
+ }
+
+ return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+APFloat::opStatus
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
+{
+ lostFraction lost_fraction = lfExactlyZero;
+ integerPart *significand;
+ unsigned int bitPos, partsCount;
+ StringRef::iterator dot, firstSignificantDigit;
+
+ zeroSignificand();
+ exponent = 0;
+ category = fcNormal;
+
+ significand = significandParts();
+ partsCount = partCount();
+ bitPos = partsCount * integerPartWidth;
+
+ /* Skip leading zeroes and any (hexa)decimal point. */
+ StringRef::iterator begin = s.begin();
+ StringRef::iterator end = s.end();
+ StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+ firstSignificantDigit = p;
+
+ for (; p != end;) {
+ integerPart hex_value;
+
+ if (*p == '.') {
+ assert(dot == end && "String contains multiple dots");
+ dot = p++;
+ if (p == end) {
+ break;
+ }
+ }
+
+ hex_value = hexDigitValue(*p);
+ if (hex_value == -1U) {
+ break;
+ }
+
+ p++;
+
+ if (p == end) {
+ break;
+ } else {
+ /* Store the number whilst 4-bit nibbles remain. */
+ if (bitPos) {
+ bitPos -= 4;
+ hex_value <<= bitPos % integerPartWidth;
+ significand[bitPos / integerPartWidth] |= hex_value;
+ } else {
+ lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
+ while (p != end && hexDigitValue(*p) != -1U)
+ p++;
+ break;
+ }
+ }
+ }
+
+ /* Hex floats require an exponent but not a hexadecimal point. */
+ assert(p != end && "Hex strings require an exponent");
+ assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
+ assert(p != begin && "Significand has no digits");
+ assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+ /* Ignore the exponent if we are zero. */
+ if (p != firstSignificantDigit) {
+ int expAdjustment;
+
+ /* Implicit hexadecimal point? */
+ if (dot == end)
+ dot = p;
+
+ /* Calculate the exponent adjustment implicit in the number of
+ significant digits. */
+ expAdjustment = static_cast<int>(dot - firstSignificantDigit);
+ if (expAdjustment < 0)
+ expAdjustment++;
+ expAdjustment = expAdjustment * 4 - 1;
+
+ /* Adjust for writing the significand starting at the most
+ significant nibble. */
+ expAdjustment += semantics->precision;
+ expAdjustment -= partsCount * integerPartWidth;
+
+ /* Adjust for the given exponent. */
+ exponent = totalExponent(p + 1, end, expAdjustment);
+ }
+
+ return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+ unsigned sigPartCount, int exp,
+ roundingMode rounding_mode)
+{
+ unsigned int parts, pow5PartCount;
+ fltSemantics calcSemantics = { 32767, -32767, 0, true };
+ integerPart pow5Parts[maxPowerOfFiveParts];
+ bool isNearest;
+
+ isNearest = (rounding_mode == rmNearestTiesToEven ||
+ rounding_mode == rmNearestTiesToAway);
+
+ parts = partCountForBits(semantics->precision + 11);
+
+ /* Calculate pow(5, abs(exp)). */
+ pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+ for (;; parts *= 2) {
+ opStatus sigStatus, powStatus;
+ unsigned int excessPrecision, truncatedBits;
+
+ calcSemantics.precision = parts * integerPartWidth - 1;
+ excessPrecision = calcSemantics.precision - semantics->precision;
+ truncatedBits = excessPrecision;
+
+ APFloat decSig(calcSemantics, fcZero, sign);
+ APFloat pow5(calcSemantics, fcZero, false);
+
+ sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+ rmNearestTiesToEven);
+ powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+ rmNearestTiesToEven);
+ /* Add exp, as 10^n = 5^n * 2^n. */
+ decSig.exponent += exp;
+
+ lostFraction calcLostFraction;
+ integerPart HUerr, HUdistance;
+ unsigned int powHUerr;
+
+ if (exp >= 0) {
+ /* multiplySignificand leaves the precision-th bit set to 1. */
+ calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+ powHUerr = powStatus != opOK;
+ } else {
+ calcLostFraction = decSig.divideSignificand(pow5);
+ /* Denormal numbers have less precision. */
+ if (decSig.exponent < semantics->minExponent) {
+ excessPrecision += (semantics->minExponent - decSig.exponent);
+ truncatedBits = excessPrecision;
+ if (excessPrecision > calcSemantics.precision)
+ excessPrecision = calcSemantics.precision;
+ }
+ /* Extra half-ulp lost in reciprocal of exponent. */
+ powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
+ }
+
+ /* Both multiplySignificand and divideSignificand return the
+ result with the integer bit set. */
+ assert(APInt::tcExtractBit
+ (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+ HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+ powHUerr);
+ HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+ excessPrecision, isNearest);
+
+ /* Are we guaranteed to round correctly if we truncate? */
+ if (HUdistance >= HUerr) {
+ APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+ calcSemantics.precision - excessPrecision,
+ excessPrecision);
+ /* Take the exponent of decSig. If we tcExtract-ed less bits
+ above we must adjust our exponent to compensate for the
+ implicit right shift. */
+ exponent = (decSig.exponent + semantics->precision
+ - (calcSemantics.precision - excessPrecision));
+ calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+ decSig.partCount(),
+ truncatedBits);
+ return normalize(rounding_mode, calcLostFraction);
+ }
+ }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
+{
+ decimalInfo D;
+ opStatus fs;
+
+ /* Scan the text. */
+ StringRef::iterator p = str.begin();
+ interpretDecimal(p, str.end(), &D);
+
+ /* Handle the quick cases. First the case of no significant digits,
+ i.e. zero, and then exponents that are obviously too large or too
+ small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
+ definitely overflows if
+
+ (exp - 1) * L >= maxExponent
+
+ and definitely underflows to zero where
+
+ (exp + 1) * L <= minExponent - precision
+
+ With integer arithmetic the tightest bounds for L are
+
+ 93/28 < L < 196/59 [ numerator <= 256 ]
+ 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
+ */
+
+ if (decDigitValue(*D.firstSigDigit) >= 10U) {
+ category = fcZero;
+ fs = opOK;
+
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
+ } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+ fs = handleOverflow(rounding_mode);
+
+ /* If it wasn't, then it also wasn't high enough to overflow max
+ during the log-rebasing in the min-exponent check. Check that it
+ won't overflow min in either check, then perform the min-exponent
+ check. */
+ } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+ (D.normalizedExponent + 1) * 28738 <=
+ 8651 * (semantics->minExponent - (int) semantics->precision)) {
+ /* Underflow to zero and round. */
+ zeroSignificand();
+ fs = normalize(rounding_mode, lfLessThanHalf);
+
+ /* We can finally safely perform the max-exponent check. */
+ } else if ((D.normalizedExponent - 1) * 42039
+ >= 12655 * semantics->maxExponent) {
+ /* Overflow and round. */
+ fs = handleOverflow(rounding_mode);
+ } else {
+ integerPart *decSignificand;
+ unsigned int partCount;
+
+ /* A tight upper bound on number of bits required to hold an
+ N-digit decimal integer is N * 196 / 59. Allocate enough space
+ to hold the full significand, and an extra part required by
+ tcMultiplyPart. */
+ partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
+ partCount = partCountForBits(1 + 196 * partCount / 59);
+ decSignificand = new integerPart[partCount + 1];
+ partCount = 0;
+
+ /* Convert to binary efficiently - we do almost all multiplication
+ in an integerPart. When this would overflow do we do a single
+ bignum multiplication, and then revert again to multiplication
+ in an integerPart. */
+ do {
+ integerPart decValue, val, multiplier;
+
+ val = 0;
+ multiplier = 1;
+
+ do {
+ if (*p == '.') {
+ p++;
+ if (p == str.end()) {
+ break;
+ }
+ }
+ decValue = decDigitValue(*p++);
+ assert(decValue < 10U && "Invalid character in significand");
+ multiplier *= 10;
+ val = val * 10 + decValue;
+ /* The maximum number that can be multiplied by ten with any
+ digit added without overflowing an integerPart. */
+ } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
+
+ /* Multiply out the current part. */
+ APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+ partCount, partCount + 1, false);
+
+ /* If we used another part (likely but not guaranteed), increase
+ the count. */
+ if (decSignificand[partCount])
+ partCount++;
+ } while (p <= D.lastSigDigit);
+
+ category = fcNormal;
+ fs = roundSignificandWithExponent(decSignificand, partCount,
+ D.exponent, rounding_mode);
+
+ delete [] decSignificand;
+ }
+
+ return fs;
+}
+
+APFloat::opStatus
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
+{
+ assertArithmeticOK(*semantics);
+ assert(!str.empty() && "Invalid string length");
+
+ /* Handle a leading minus sign. */
+ StringRef::iterator p = str.begin();
+ size_t slen = str.size();
+ sign = *p == '-' ? 1 : 0;
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String has no digits");
+ }
+
+ if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ assert(slen - 2 && "Invalid string");
+ return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
+ rounding_mode);
+ }
+
+ return convertFromDecimalString(StringRef(p, slen), rounding_mode);
+}
+
+/* Write out a hexadecimal representation of the floating point value
+ to DST, which must be of sufficient size, in the C99 form
+ [-]0xh.hhhhp[+-]d. Return the number of characters written,
+ excluding the terminating NUL.
+
+ If UPPERCASE, the output is in upper case, otherwise in lower case.
+
+ HEXDIGITS digits appear altogether, rounding the value if
+ necessary. If HEXDIGITS is 0, the minimal precision to display the
+ number precisely is used instead. If nothing would appear after
+ the decimal point it is suppressed.
+
+ The decimal exponent is always printed and has at least one digit.
+ Zero values display an exponent of zero. Infinities and NaNs
+ appear as "infinity" or "nan" respectively.
+
+ The above rules are as specified by C99. There is ambiguity about
+ what the leading hexadecimal digit should be. This implementation
+ uses whatever is necessary so that the exponent is displayed as
+ stored. This implies the exponent will fall within the IEEE format
+ range, and the leading hexadecimal digit will be 0 (for denormals),
+ 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
+ any other digits zero).
+*/
+unsigned int
+APFloat::convertToHexString(char *dst, unsigned int hexDigits,
+ bool upperCase, roundingMode rounding_mode) const
+{
+ char *p;
+
+ assertArithmeticOK(*semantics);
+
+ p = dst;
+ if (sign)
+ *dst++ = '-';
+
+ switch (category) {
+ case fcInfinity:
+ memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
+ dst += sizeof infinityL - 1;
+ break;
+
+ case fcNaN:
+ memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
+ dst += sizeof NaNU - 1;
+ break;
+
+ case fcZero:
+ *dst++ = '0';
+ *dst++ = upperCase ? 'X': 'x';
+ *dst++ = '0';
+ if (hexDigits > 1) {
+ *dst++ = '.';
+ memset (dst, '0', hexDigits - 1);
+ dst += hexDigits - 1;
+ }
+ *dst++ = upperCase ? 'P': 'p';
+ *dst++ = '0';
+ break;
+
+ case fcNormal:
+ dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
+ break;
+ }
+
+ *dst = 0;
+
+ return static_cast<unsigned int>(dst - p);
+}
+
+/* Does the hard work of outputting the correctly rounded hexadecimal
+ form of a normal floating point number with the specified number of
+ hexadecimal digits. If HEXDIGITS is zero the minimum number of
+ digits necessary to print the value precisely is output. */
+char *
+APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
+ bool upperCase,
+ roundingMode rounding_mode) const
+{
+ unsigned int count, valueBits, shift, partsCount, outputDigits;
+ const char *hexDigitChars;
+ const integerPart *significand;
+ char *p;
+ bool roundUp;
+
+ *dst++ = '0';
+ *dst++ = upperCase ? 'X': 'x';
+
+ roundUp = false;
+ hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
+
+ significand = significandParts();
+ partsCount = partCount();
+
+ /* +3 because the first digit only uses the single integer bit, so
+ we have 3 virtual zero most-significant-bits. */
+ valueBits = semantics->precision + 3;
+ shift = integerPartWidth - valueBits % integerPartWidth;
+
+ /* The natural number of digits required ignoring trailing
+ insignificant zeroes. */
+ outputDigits = (valueBits - significandLSB () + 3) / 4;
+
+ /* hexDigits of zero means use the required number for the
+ precision. Otherwise, see if we are truncating. If we are,
+ find out if we need to round away from zero. */
+ if (hexDigits) {
+ if (hexDigits < outputDigits) {
+ /* We are dropping non-zero bits, so need to check how to round.
+ "bits" is the number of dropped bits. */
+ unsigned int bits;
+ lostFraction fraction;
+
+ bits = valueBits - hexDigits * 4;
+ fraction = lostFractionThroughTruncation (significand, partsCount, bits);
+ roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
+ }
+ outputDigits = hexDigits;
+ }
+
+ /* Write the digits consecutively, and start writing in the location
+ of the hexadecimal point. We move the most significant digit
+ left and add the hexadecimal point later. */
+ p = ++dst;
+
+ count = (valueBits + integerPartWidth - 1) / integerPartWidth;
+
+ while (outputDigits && count) {
+ integerPart part;
+
+ /* Put the most significant integerPartWidth bits in "part". */
+ if (--count == partsCount)
+ part = 0; /* An imaginary higher zero part. */
+ else
+ part = significand[count] << shift;
+
+ if (count && shift)
+ part |= significand[count - 1] >> (integerPartWidth - shift);
+
+ /* Convert as much of "part" to hexdigits as we can. */
+ unsigned int curDigits = integerPartWidth / 4;
+
+ if (curDigits > outputDigits)
+ curDigits = outputDigits;
+ dst += partAsHex (dst, part, curDigits, hexDigitChars);
+ outputDigits -= curDigits;
+ }
+
+ if (roundUp) {
+ char *q = dst;
+
+ /* Note that hexDigitChars has a trailing '0'. */
+ do {
+ q--;
+ *q = hexDigitChars[hexDigitValue (*q) + 1];
+ } while (*q == '0');
+ assert(q >= p);
+ } else {
+ /* Add trailing zeroes. */
+ memset (dst, '0', outputDigits);
+ dst += outputDigits;
+ }
+
+ /* Move the most significant digit to before the point, and if there
+ is something after the decimal point add it. This must come
+ after rounding above. */
+ p[-1] = p[0];
+ if (dst -1 == p)
+ dst--;
+ else
+ p[0] = '.';
+
+ /* Finally output the exponent. */
+ *dst++ = upperCase ? 'P': 'p';
+
+ return writeSignedDecimal (dst, exponent);
+}
+
+// For good performance it is desirable for different APFloats
+// to produce different integers.
+uint32_t
+APFloat::getHashValue() const
+{
+ if (category==fcZero) return sign<<8 | semantics->precision ;
+ else if (category==fcInfinity) return sign<<9 | semantics->precision;
+ else if (category==fcNaN) return 1<<10 | semantics->precision;
+ else {
+ uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
+ const integerPart* p = significandParts();
+ for (int i=partCount(); i>0; i--, p++)
+ hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
+ return hash;
+ }
+}
+
+// Conversion from APFloat to/from host float/double. It may eventually be
+// possible to eliminate these and have everybody deal with APFloats, but that
+// will take a while. This approach will not easily extend to long double.
+// Current implementation requires integerPartWidth==64, which is correct at
+// the moment but could be made more general.
+
+// Denormals have exponent minExponent in APFloat, but minExponent-1 in
+// the actual IEEE respresentations. We compensate for that here.
+
+APInt
+APFloat::convertF80LongDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
+ assert(partCount()==2);
+
+ uint64_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+16383; //bias
+ mysignificand = significandParts()[0];
+ if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7fff;
+ mysignificand = 0x8000000000000000ULL;
+ } else {
+ assert(category == fcNaN && "Unknown category");
+ myexponent = 0x7fff;
+ mysignificand = significandParts()[0];
+ }
+
+ uint64_t words[2];
+ words[0] = mysignificand;
+ words[1] = ((uint64_t)(sign & 1) << 15) |
+ (myexponent & 0x7fffLL);
+ return APInt(80, 2, words);
+}
+
+APInt
+APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
+ assert(partCount()==2);
+
+ uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
+
+ if (category==fcNormal) {
+ myexponent = exponent + 1023; //bias
+ myexponent2 = exponent2 + 1023;
+ mysignificand = significandParts()[0];
+ mysignificand2 = significandParts()[1];
+ if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+ myexponent = 0; // denormal
+ if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
+ myexponent2 = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ myexponent2 = 0;
+ mysignificand2 = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7ff;
+ myexponent2 = 0;
+ mysignificand = 0;
+ mysignificand2 = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category");
+ myexponent = 0x7ff;
+ mysignificand = significandParts()[0];
+ myexponent2 = exponent2;
+ mysignificand2 = significandParts()[1];
+ }
+
+ uint64_t words[2];
+ words[0] = ((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7ff) << 52) |
+ (mysignificand & 0xfffffffffffffLL);
+ words[1] = ((uint64_t)(sign2 & 1) << 63) |
+ ((myexponent2 & 0x7ff) << 52) |
+ (mysignificand2 & 0xfffffffffffffLL);
+ return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertQuadrupleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
+ assert(partCount()==2);
+
+ uint64_t myexponent, mysignificand, mysignificand2;
+
+ if (category==fcNormal) {
+ myexponent = exponent+16383; //bias
+ mysignificand = significandParts()[0];
+ mysignificand2 = significandParts()[1];
+ if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = mysignificand2 = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7fff;
+ mysignificand = mysignificand2 = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x7fff;
+ mysignificand = significandParts()[0];
+ mysignificand2 = significandParts()[1];
+ }
+
+ uint64_t words[2];
+ words[0] = mysignificand;
+ words[1] = ((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7fff) << 48) |
+ (mysignificand2 & 0xffffffffffffLL);
+
+ return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+ assert(partCount()==1);
+
+ uint64_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+1023; //bias
+ mysignificand = *significandParts();
+ if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7ff;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x7ff;
+ mysignificand = *significandParts();
+ }
+
+ return APInt(64, ((((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7ff) << 52) |
+ (mysignificand & 0xfffffffffffffLL))));
+}
+
+APInt
+APFloat::convertFloatAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+ assert(partCount()==1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+127; //bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x800000))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0xff;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0xff;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
+ (mysignificand & 0x7fffff)));
+}
+
+APInt
+APFloat::convertHalfAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
+ assert(partCount()==1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+15; //bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x400))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x1f;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x1f;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
+ (mysignificand & 0x3ff)));
+}
+
+// This function creates an APInt that is just a bit map of the floating
+// point constant as it would appear in memory. It is not a conversion,
+// and treating the result as a normal integer is unlikely to be useful.
+
+APInt
+APFloat::bitcastToAPInt() const
+{
+ if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
+ return convertHalfAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
+ return convertFloatAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
+ return convertDoubleAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEquad)
+ return convertQuadrupleAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
+ return convertPPCDoubleDoubleAPFloatToAPInt();
+
+ assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
+ "unknown format!");
+ return convertF80LongDoubleAPFloatToAPInt();
+}
+
+float
+APFloat::convertToFloat() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
+ "Float semantics are not IEEEsingle");
+ APInt api = bitcastToAPInt();
+ return api.bitsToFloat();
+}
+
+double
+APFloat::convertToDouble() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
+ "Float semantics are not IEEEdouble");
+ APInt api = bitcastToAPInt();
+ return api.bitsToDouble();
+}
+
+/// Integer bit is explicit in this format. Intel hardware (387 and later)
+/// does not support these bit patterns:
+/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
+/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
+/// exponent = 0, integer bit 1 ("pseudodenormal")
+/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
+/// At the moment, the first two are treated as NaNs, the second two as Normal.
+void
+APFloat::initFromF80LongDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==80);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i2 & 0x7fff);
+ uint64_t mysignificand = i1;
+
+ initialize(&APFloat::x87DoubleExtended);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i2>>15);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
+ // exponent meaningless
+ category = fcNaN;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = 0;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 16383;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = 0;
+ if (myexponent==0) // denormal
+ exponent = -16382;
+ }
+}
+
+void
+APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==128);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i1 >> 52) & 0x7ff;
+ uint64_t mysignificand = i1 & 0xfffffffffffffLL;
+ uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
+ uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
+
+ initialize(&APFloat::PPCDoubleDouble);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i1>>63);
+ sign2 = static_cast<unsigned int>(i2>>63);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ // exponent2 and significand2 are required to be 0; we don't check
+ category = fcZero;
+ } else if (myexponent==0x7ff && mysignificand==0) {
+ // exponent, significand meaningless
+ // exponent2 and significand2 are required to be 0; we don't check
+ category = fcInfinity;
+ } else if (myexponent==0x7ff && mysignificand!=0) {
+ // exponent meaningless. So is the whole second word, but keep it
+ // for determinism.
+ category = fcNaN;
+ exponent2 = myexponent2;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ } else {
+ category = fcNormal;
+ // Note there is no category2; the second word is treated as if it is
+ // fcNormal, although it might be something else considered by itself.
+ exponent = myexponent - 1023;
+ exponent2 = myexponent2 - 1023;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ if (myexponent==0) // denormal
+ exponent = -1022;
+ else
+ significandParts()[0] |= 0x10000000000000LL; // integer bit
+ if (myexponent2==0)
+ exponent2 = -1022;
+ else
+ significandParts()[1] |= 0x10000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromQuadrupleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==128);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i2 >> 48) & 0x7fff;
+ uint64_t mysignificand = i1;
+ uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+
+ initialize(&APFloat::IEEEquad);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i2>>63);
+ if (myexponent==0 &&
+ (mysignificand==0 && mysignificand2==0)) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7fff &&
+ (mysignificand==0 && mysignificand2==0)) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7fff &&
+ (mysignificand!=0 || mysignificand2 !=0)) {
+ // exponent meaningless
+ category = fcNaN;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 16383;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ if (myexponent==0) // denormal
+ exponent = -16382;
+ else
+ significandParts()[1] |= 0x1000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==64);
+ uint64_t i = *api.getRawData();
+ uint64_t myexponent = (i >> 52) & 0x7ff;
+ uint64_t mysignificand = i & 0xfffffffffffffLL;
+
+ initialize(&APFloat::IEEEdouble);
+ assert(partCount()==1);
+
+ sign = static_cast<unsigned int>(i>>63);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7ff && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7ff && mysignificand!=0) {
+ // exponent meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 1023;
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -1022;
+ else
+ *significandParts() |= 0x10000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromFloatAPInt(const APInt & api)
+{
+ assert(api.getBitWidth()==32);
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 23) & 0xff;
+ uint32_t mysignificand = i & 0x7fffff;
+
+ initialize(&APFloat::IEEEsingle);
+ assert(partCount()==1);
+
+ sign = i >> 31;
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0xff && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0xff && mysignificand!=0) {
+ // sign, exponent, significand meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 127; //bias
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -126;
+ else
+ *significandParts() |= 0x800000; // integer bit
+ }
+}
+
+void
+APFloat::initFromHalfAPInt(const APInt & api)
+{
+ assert(api.getBitWidth()==16);
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 10) & 0x1f;
+ uint32_t mysignificand = i & 0x3ff;
+
+ initialize(&APFloat::IEEEhalf);
+ assert(partCount()==1);
+
+ sign = i >> 15;
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x1f && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x1f && mysignificand!=0) {
+ // sign, exponent, significand meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 15; //bias
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -14;
+ else
+ *significandParts() |= 0x400; // integer bit
+ }
+}
+
+/// Treat api as containing the bits of a floating point number. Currently
+/// we infer the floating point type from the size of the APInt. The
+/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
+/// when the size is anything else).
+void
+APFloat::initFromAPInt(const APInt& api, bool isIEEE)
+{
+ if (api.getBitWidth() == 16)
+ return initFromHalfAPInt(api);
+ else if (api.getBitWidth() == 32)
+ return initFromFloatAPInt(api);
+ else if (api.getBitWidth()==64)
+ return initFromDoubleAPInt(api);
+ else if (api.getBitWidth()==80)
+ return initFromF80LongDoubleAPInt(api);
+ else if (api.getBitWidth()==128)
+ return (isIEEE ?
+ initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
+ else
+ llvm_unreachable(0);
+}
+
+APFloat
+APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
+{
+ return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
+}
+
+APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
+ APFloat Val(Sem, fcNormal, Negative);
+
+ // We want (in interchange format):
+ // sign = {Negative}
+ // exponent = 1..10
+ // significand = 1..1
+
+ Val.exponent = Sem.maxExponent; // unbiased
+
+ // 1-initialize all bits....
+ Val.zeroSignificand();
+ integerPart *significand = Val.significandParts();
+ unsigned N = partCountForBits(Sem.precision);
+ for (unsigned i = 0; i != N; ++i)
+ significand[i] = ~((integerPart) 0);
+
+ // ...and then clear the top bits for internal consistency.
+ significand[N-1] &=
+ (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
+
+ return Val;
+}
+
+APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
+ APFloat Val(Sem, fcNormal, Negative);
+
+ // We want (in interchange format):
+ // sign = {Negative}
+ // exponent = 0..0
+ // significand = 0..01
+
+ Val.exponent = Sem.minExponent; // unbiased
+ Val.zeroSignificand();
+ Val.significandParts()[0] = 1;
+ return Val;
+}
+
+APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
+ APFloat Val(Sem, fcNormal, Negative);
+
+ // We want (in interchange format):
+ // sign = {Negative}
+ // exponent = 0..0
+ // significand = 10..0
+
+ Val.exponent = Sem.minExponent;
+ Val.zeroSignificand();
+ Val.significandParts()[partCountForBits(Sem.precision)-1] |=
+ (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
+
+ return Val;
+}
+
+APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
+ initFromAPInt(api, isIEEE);
+}
+
+APFloat::APFloat(float f) : exponent2(0), sign2(0) {
+ initFromAPInt(APInt::floatToBits(f));
+}
+
+APFloat::APFloat(double d) : exponent2(0), sign2(0) {
+ initFromAPInt(APInt::doubleToBits(d));
+}
+
+namespace {
+ static void append(SmallVectorImpl<char> &Buffer,
+ unsigned N, const char *Str) {
+ unsigned Start = Buffer.size();
+ Buffer.set_size(Start + N);
+ memcpy(&Buffer[Start], Str, N);
+ }
+
+ template <unsigned N>
+ void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
+ append(Buffer, N, Str);
+ }
+
+ /// Removes data from the given significand until it is no more
+ /// precise than is required for the desired precision.
+ void AdjustToPrecision(APInt &significand,
+ int &exp, unsigned FormatPrecision) {
+ unsigned bits = significand.getActiveBits();
+
+ // 196/59 is a very slight overestimate of lg_2(10).
+ unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
+
+ if (bits <= bitsRequired) return;
+
+ unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
+ if (!tensRemovable) return;
+
+ exp += tensRemovable;
+
+ APInt divisor(significand.getBitWidth(), 1);
+ APInt powten(significand.getBitWidth(), 10);
+ while (true) {
+ if (tensRemovable & 1)
+ divisor *= powten;
+ tensRemovable >>= 1;
+ if (!tensRemovable) break;
+ powten *= powten;
+ }
+
+ significand = significand.udiv(divisor);
+
+ // Truncate the significand down to its active bit count, but
+ // don't try to drop below 32.
+ unsigned newPrecision = std::max(32U, significand.getActiveBits());
+ significand = significand.trunc(newPrecision);
+ }
+
+
+ void AdjustToPrecision(SmallVectorImpl<char> &buffer,
+ int &exp, unsigned FormatPrecision) {
+ unsigned N = buffer.size();
+ if (N <= FormatPrecision) return;
+
+ // The most significant figures are the last ones in the buffer.
+ unsigned FirstSignificant = N - FormatPrecision;
+
+ // Round.
+ // FIXME: this probably shouldn't use 'round half up'.
+
+ // Rounding down is just a truncation, except we also want to drop
+ // trailing zeros from the new result.
+ if (buffer[FirstSignificant - 1] < '5') {
+ while (buffer[FirstSignificant] == '0')
+ FirstSignificant++;
+
+ exp += FirstSignificant;
+ buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+ return;
+ }
+
+ // Rounding up requires a decimal add-with-carry. If we continue
+ // the carry, the newly-introduced zeros will just be truncated.
+ for (unsigned I = FirstSignificant; I != N; ++I) {
+ if (buffer[I] == '9') {
+ FirstSignificant++;
+ } else {
+ buffer[I]++;
+ break;
+ }
+ }
+
+ // If we carried through, we have exactly one digit of precision.
+ if (FirstSignificant == N) {
+ exp += FirstSignificant;
+ buffer.clear();
+ buffer.push_back('1');
+ return;
+ }
+
+ exp += FirstSignificant;
+ buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+ }
+}
+
+void APFloat::toString(SmallVectorImpl<char> &Str,
+ unsigned FormatPrecision,
+ unsigned FormatMaxPadding) const {
+ switch (category) {
+ case fcInfinity:
+ if (isNegative())
+ return append(Str, "-Inf");
+ else
+ return append(Str, "+Inf");
+
+ case fcNaN: return append(Str, "NaN");
+
+ case fcZero:
+ if (isNegative())
+ Str.push_back('-');
+
+ if (!FormatMaxPadding)
+ append(Str, "0.0E+0");
+ else
+ Str.push_back('0');
+ return;
+
+ case fcNormal:
+ break;
+ }
+
+ if (isNegative())
+ Str.push_back('-');
+
+ // Decompose the number into an APInt and an exponent.
+ int exp = exponent - ((int) semantics->precision - 1);
+ APInt significand(semantics->precision,
+ partCountForBits(semantics->precision),
+ significandParts());
+
+ // Set FormatPrecision if zero. We want to do this before we
+ // truncate trailing zeros, as those are part of the precision.
+ if (!FormatPrecision) {
+ // It's an interesting question whether to use the nominal
+ // precision or the active precision here for denormals.
+
+ // FormatPrecision = ceil(significandBits / lg_2(10))
+ FormatPrecision = (semantics->precision * 59 + 195) / 196;
+ }
+
+ // Ignore trailing binary zeros.
+ int trailingZeros = significand.countTrailingZeros();
+ exp += trailingZeros;
+ significand = significand.lshr(trailingZeros);
+
+ // Change the exponent from 2^e to 10^e.
+ if (exp == 0) {
+ // Nothing to do.
+ } else if (exp > 0) {
+ // Just shift left.
+ significand = significand.zext(semantics->precision + exp);
+ significand <<= exp;
+ exp = 0;
+ } else { /* exp < 0 */
+ int texp = -exp;
+
+ // We transform this using the identity:
+ // (N)(2^-e) == (N)(5^e)(10^-e)
+ // This means we have to multiply N (the significand) by 5^e.
+ // To avoid overflow, we have to operate on numbers large
+ // enough to store N * 5^e:
+ // log2(N * 5^e) == log2(N) + e * log2(5)
+ // <= semantics->precision + e * 137 / 59
+ // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
+
+ unsigned precision = semantics->precision + 137 * texp / 59;
+
+ // Multiply significand by 5^e.
+ // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
+ significand = significand.zext(precision);
+ APInt five_to_the_i(precision, 5);
+ while (true) {
+ if (texp & 1) significand *= five_to_the_i;
+
+ texp >>= 1;
+ if (!texp) break;
+ five_to_the_i *= five_to_the_i;
+ }
+ }
+
+ AdjustToPrecision(significand, exp, FormatPrecision);
+
+ llvm::SmallVector<char, 256> buffer;
+
+ // Fill the buffer.
+ unsigned precision = significand.getBitWidth();
+ APInt ten(precision, 10);
+ APInt digit(precision, 0);
+
+ bool inTrail = true;
+ while (significand != 0) {
+ // digit <- significand % 10
+ // significand <- significand / 10
+ APInt::udivrem(significand, ten, significand, digit);
+
+ unsigned d = digit.getZExtValue();
+
+ // Drop trailing zeros.
+ if (inTrail && !d) exp++;
+ else {
+ buffer.push_back((char) ('0' + d));
+ inTrail = false;
+ }
+ }
+
+ assert(!buffer.empty() && "no characters in buffer!");
+
+ // Drop down to FormatPrecision.
+ // TODO: don't do more precise calculations above than are required.
+ AdjustToPrecision(buffer, exp, FormatPrecision);
+
+ unsigned NDigits = buffer.size();
+
+ // Check whether we should use scientific notation.
+ bool FormatScientific;
+ if (!FormatMaxPadding)
+ FormatScientific = true;
+ else {
+ if (exp >= 0) {
+ // 765e3 --> 765000
+ // ^^^
+ // But we shouldn't make the number look more precise than it is.
+ FormatScientific = ((unsigned) exp > FormatMaxPadding ||
+ NDigits + (unsigned) exp > FormatPrecision);
+ } else {
+ // Power of the most significant digit.
+ int MSD = exp + (int) (NDigits - 1);
+ if (MSD >= 0) {
+ // 765e-2 == 7.65
+ FormatScientific = false;
+ } else {
+ // 765e-5 == 0.00765
+ // ^ ^^
+ FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
+ }
+ }
+ }
+
+ // Scientific formatting is pretty straightforward.
+ if (FormatScientific) {
+ exp += (NDigits - 1);
+
+ Str.push_back(buffer[NDigits-1]);
+ Str.push_back('.');
+ if (NDigits == 1)
+ Str.push_back('0');
+ else
+ for (unsigned I = 1; I != NDigits; ++I)
+ Str.push_back(buffer[NDigits-1-I]);
+ Str.push_back('E');
+
+ Str.push_back(exp >= 0 ? '+' : '-');
+ if (exp < 0) exp = -exp;
+ SmallVector<char, 6> expbuf;
+ do {
+ expbuf.push_back((char) ('0' + (exp % 10)));
+ exp /= 10;
+ } while (exp);
+ for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
+ Str.push_back(expbuf[E-1-I]);
+ return;
+ }
+
+ // Non-scientific, positive exponents.
+ if (exp >= 0) {
+ for (unsigned I = 0; I != NDigits; ++I)
+ Str.push_back(buffer[NDigits-1-I]);
+ for (unsigned I = 0; I != (unsigned) exp; ++I)
+ Str.push_back('0');
+ return;
+ }
+
+ // Non-scientific, negative exponents.
+
+ // The number of digits to the left of the decimal point.
+ int NWholeDigits = exp + (int) NDigits;
+
+ unsigned I = 0;
+ if (NWholeDigits > 0) {
+ for (; I != (unsigned) NWholeDigits; ++I)
+ Str.push_back(buffer[NDigits-I-1]);
+ Str.push_back('.');
+ } else {
+ unsigned NZeros = 1 + (unsigned) -NWholeDigits;
+
+ Str.push_back('0');
+ Str.push_back('.');
+ for (unsigned Z = 1; Z != NZeros; ++Z)
+ Str.push_back('0');
+ }
+
+ for (; I != NDigits; ++I)
+ Str.push_back(buffer[NDigits-I-1]);
+}
+
+bool APFloat::getExactInverse(APFloat *inv) const {
+ // We can only guarantee the existence of an exact inverse for IEEE floats.
+ if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
+ semantics != &IEEEdouble && semantics != &IEEEquad)
+ return false;
+
+ // Special floats and denormals have no exact inverse.
+ if (category != fcNormal)
+ return false;
+
+ // Check that the number is a power of two by making sure that only the
+ // integer bit is set in the significand.
+ if (significandLSB() != semantics->precision - 1)
+ return false;
+
+ // Get the inverse.
+ APFloat reciprocal(*semantics, 1ULL);
+ if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
+ return false;
+
+ // Avoid multiplication with a denormal, it is not safe on all platforms and
+ // may be slower than a normal division.
+ if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
+ return false;
+
+ assert(reciprocal.category == fcNormal &&
+ reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
+
+ if (inv)
+ *inv = reciprocal;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
new file mode 100644
index 000000000000..76265d445f45
--- /dev/null
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -0,0 +1,2947 @@
+//===-- APInt.cpp - Implement APInt class ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integer
+// constant values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "apint"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cmath>
+#include <limits>
+#include <cstring>
+#include <cstdlib>
+using namespace llvm;
+
+/// A utility function for allocating memory, checking for allocation failures,
+/// and ensuring the contents are zeroed.
+inline static uint64_t* getClearedMemory(unsigned numWords) {
+ uint64_t * result = new uint64_t[numWords];
+ assert(result && "APInt memory allocation fails!");
+ memset(result, 0, numWords * sizeof(uint64_t));
+ return result;
+}
+
+/// A utility function for allocating memory and checking for allocation
+/// failure. The content is not zeroed.
+inline static uint64_t* getMemory(unsigned numWords) {
+ uint64_t * result = new uint64_t[numWords];
+ assert(result && "APInt memory allocation fails!");
+ return result;
+}
+
+/// A utility function that converts a character to a digit.
+inline static unsigned getDigit(char cdigit, uint8_t radix) {
+ unsigned r;
+
+ if (radix == 16) {
+ r = cdigit - '0';
+ if (r <= 9)
+ return r;
+
+ r = cdigit - 'A';
+ if (r <= 5)
+ return r + 10;
+
+ r = cdigit - 'a';
+ if (r <= 5)
+ return r + 10;
+ }
+
+ r = cdigit - '0';
+ if (r < radix)
+ return r;
+
+ return -1U;
+}
+
+
+void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
+ pVal = getClearedMemory(getNumWords());
+ pVal[0] = val;
+ if (isSigned && int64_t(val) < 0)
+ for (unsigned i = 1; i < getNumWords(); ++i)
+ pVal[i] = -1ULL;
+}
+
+void APInt::initSlowCase(const APInt& that) {
+ pVal = getMemory(getNumWords());
+ memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+ : BitWidth(numBits), VAL(0) {
+ assert(BitWidth && "Bitwidth too small");
+ assert(bigVal && "Null pointer detected!");
+ if (isSingleWord())
+ VAL = bigVal[0];
+ else {
+ // Get memory, cleared to 0
+ pVal = getClearedMemory(getNumWords());
+ // Calculate the number of words to copy
+ unsigned words = std::min<unsigned>(numWords, getNumWords());
+ // Copy the words from bigVal to pVal
+ memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+ }
+ // Make sure unused high bits are cleared
+ clearUnusedBits();
+}
+
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
+ : BitWidth(numbits), VAL(0) {
+ assert(BitWidth && "Bitwidth too small");
+ fromString(numbits, Str, radix);
+}
+
+APInt& APInt::AssignSlowCase(const APInt& RHS) {
+ // Don't do anything for X = X
+ if (this == &RHS)
+ return *this;
+
+ if (BitWidth == RHS.getBitWidth()) {
+ // assume same bit-width single-word case is already handled
+ assert(!isSingleWord());
+ memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
+ return *this;
+ }
+
+ if (isSingleWord()) {
+ // assume case where both are single words is already handled
+ assert(!RHS.isSingleWord());
+ VAL = 0;
+ pVal = getMemory(RHS.getNumWords());
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ } else if (getNumWords() == RHS.getNumWords())
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ else if (RHS.isSingleWord()) {
+ delete [] pVal;
+ VAL = RHS.VAL;
+ } else {
+ delete [] pVal;
+ pVal = getMemory(RHS.getNumWords());
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ }
+ BitWidth = RHS.BitWidth;
+ return clearUnusedBits();
+}
+
+APInt& APInt::operator=(uint64_t RHS) {
+ if (isSingleWord())
+ VAL = RHS;
+ else {
+ pVal[0] = RHS;
+ memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+ }
+ return clearUnusedBits();
+}
+
+/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+void APInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger(BitWidth);
+
+ if (isSingleWord()) {
+ ID.AddInteger(VAL);
+ return;
+ }
+
+ unsigned NumWords = getNumWords();
+ for (unsigned i = 0; i < NumWords; ++i)
+ ID.AddInteger(pVal[i]);
+}
+
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array, x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+ for (unsigned i = 0; i < len; ++i) {
+ dest[i] = y + x[i];
+ if (dest[i] < y)
+ y = 1; // Carry one to next digit.
+ else {
+ y = 0; // No need to carry so exit early
+ break;
+ }
+ }
+ return y;
+}
+
+/// @brief Prefix increment operator. Increments the APInt by one.
+APInt& APInt::operator++() {
+ if (isSingleWord())
+ ++VAL;
+ else
+ add_1(pVal, pVal, getNumWords(), 1);
+ return clearUnusedBits();
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x. The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
+ for (unsigned i = 0; i < len; ++i) {
+ uint64_t X = x[i];
+ x[i] -= y;
+ if (y > X)
+ y = 1; // We have to "borrow 1" from next "digit"
+ else {
+ y = 0; // No need to borrow
+ break; // Remaining digits are unchanged so exit early
+ }
+ }
+ return bool(y);
+}
+
+/// @brief Prefix decrement operator. Decrements the APInt by one.
+APInt& APInt::operator--() {
+ if (isSingleWord())
+ --VAL;
+ else
+ sub_1(pVal, getNumWords(), 1);
+ return clearUnusedBits();
+}
+
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+ unsigned len) {
+ bool carry = false;
+ for (unsigned i = 0; i< len; ++i) {
+ uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
+ dest[i] = x[i] + y[i] + carry;
+ carry = dest[i] < limit || (carry && dest[i] == limit);
+ }
+ return carry;
+}
+
+/// Adds the RHS APint to this APInt.
+/// @returns this, after addition of RHS.
+/// @brief Addition assignment operator.
+APInt& APInt::operator+=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ VAL += RHS.VAL;
+ else {
+ add(pVal, pVal, RHS.pVal, getNumWords());
+ }
+ return clearUnusedBits();
+}
+
+/// Subtracts the integer array y from the integer array x
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+ unsigned len) {
+ bool borrow = false;
+ for (unsigned i = 0; i < len; ++i) {
+ uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+ borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+ dest[i] = x_tmp - y[i];
+ }
+ return borrow;
+}
+
+/// Subtracts the RHS APInt from this APInt
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+APInt& APInt::operator-=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ VAL -= RHS.VAL;
+ else
+ sub(pVal, pVal, RHS.pVal, getNumWords());
+ return clearUnusedBits();
+}
+
+/// Multiplies an integer array, x, by a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
+static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+ // Split y into high 32-bit part (hy) and low 32-bit part (ly)
+ uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
+ uint64_t carry = 0;
+
+ // For each digit of x.
+ for (unsigned i = 0; i < len; ++i) {
+ // Split x into high and low words
+ uint64_t lx = x[i] & 0xffffffffULL;
+ uint64_t hx = x[i] >> 32;
+ // hasCarry - A flag to indicate if there is a carry to the next digit.
+ // hasCarry == 0, no carry
+ // hasCarry == 1, has carry
+ // hasCarry == 2, no carry and the calculation result == 0.
+ uint8_t hasCarry = 0;
+ dest[i] = carry + lx * ly;
+ // Determine if the add above introduces carry.
+ hasCarry = (dest[i] < carry) ? 1 : 0;
+ carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
+ // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+ // (2^32 - 1) + 2^32 = 2^64.
+ hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+ carry += (lx * hy) & 0xffffffffULL;
+ dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
+ carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
+ (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
+ }
+ return carry;
+}
+
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen.
+/// @brief Generalized multiplicate of integer arrays.
+static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
+ unsigned ylen) {
+ dest[xlen] = mul_1(dest, x, xlen, y[0]);
+ for (unsigned i = 1; i < ylen; ++i) {
+ uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
+ uint64_t carry = 0, lx = 0, hx = 0;
+ for (unsigned j = 0; j < xlen; ++j) {
+ lx = x[j] & 0xffffffffULL;
+ hx = x[j] >> 32;
+ // hasCarry - A flag to indicate if has carry.
+ // hasCarry == 0, no carry
+ // hasCarry == 1, has carry
+ // hasCarry == 2, no carry and the calculation result == 0.
+ uint8_t hasCarry = 0;
+ uint64_t resul = carry + lx * ly;
+ hasCarry = (resul < carry) ? 1 : 0;
+ carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
+ hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+ carry += (lx * hy) & 0xffffffffULL;
+ resul = (carry << 32) | (resul & 0xffffffffULL);
+ dest[i+j] += resul;
+ carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
+ (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
+ ((lx * hy) >> 32) + hx * hy;
+ }
+ dest[i+xlen] = carry;
+ }
+}
+
+APInt& APInt::operator*=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL *= RHS.VAL;
+ clearUnusedBits();
+ return *this;
+ }
+
+ // Get some bit facts about LHS and check for zero
+ unsigned lhsBits = getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+ if (!lhsWords)
+ // 0 * X ===> 0
+ return *this;
+
+ // Get some bit facts about RHS and check for zero
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+ if (!rhsWords) {
+ // X * 0 ===> 0
+ clearAllBits();
+ return *this;
+ }
+
+ // Allocate space for the result
+ unsigned destWords = rhsWords + lhsWords;
+ uint64_t *dest = getMemory(destWords);
+
+ // Perform the long multiply
+ mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
+
+ // Copy result back into *this
+ clearAllBits();
+ unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
+ memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+
+ // delete dest array and return
+ delete[] dest;
+ return *this;
+}
+
+APInt& APInt::operator&=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL &= RHS.VAL;
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] &= RHS.pVal[i];
+ return *this;
+}
+
+APInt& APInt::operator|=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL |= RHS.VAL;
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] |= RHS.pVal[i];
+ return *this;
+}
+
+APInt& APInt::operator^=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL ^= RHS.VAL;
+ this->clearUnusedBits();
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] ^= RHS.pVal[i];
+ return clearUnusedBits();
+}
+
+APInt APInt::AndSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t* val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] & RHS.pVal[i];
+ return APInt(val, getBitWidth());
+}
+
+APInt APInt::OrSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t *val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] | RHS.pVal[i];
+ return APInt(val, getBitWidth());
+}
+
+APInt APInt::XorSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t *val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] ^ RHS.pVal[i];
+
+ // 0^0==1 so clear the high bits in case they got set.
+ return APInt(val, getBitWidth()).clearUnusedBits();
+}
+
+bool APInt::operator !() const {
+ if (isSingleWord())
+ return !VAL;
+
+ for (unsigned i = 0; i < getNumWords(); ++i)
+ if (pVal[i])
+ return false;
+ return true;
+}
+
+APInt APInt::operator*(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL * RHS.VAL);
+ APInt Result(*this);
+ Result *= RHS;
+ return Result.clearUnusedBits();
+}
+
+APInt APInt::operator+(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL + RHS.VAL);
+ APInt Result(BitWidth, 0);
+ add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+ return Result.clearUnusedBits();
+}
+
+APInt APInt::operator-(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL - RHS.VAL);
+ APInt Result(BitWidth, 0);
+ sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+ return Result.clearUnusedBits();
+}
+
+bool APInt::operator[](unsigned bitPosition) const {
+ assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
+ return (maskBit(bitPosition) &
+ (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
+}
+
+bool APInt::EqualSlowCase(const APInt& RHS) const {
+ // Get some facts about the number of bits used in the two operands.
+ unsigned n1 = getActiveBits();
+ unsigned n2 = RHS.getActiveBits();
+
+ // If the number of bits isn't the same, they aren't equal
+ if (n1 != n2)
+ return false;
+
+ // If the number of bits fits in a word, we only need to compare the low word.
+ if (n1 <= APINT_BITS_PER_WORD)
+ return pVal[0] == RHS.pVal[0];
+
+ // Otherwise, compare everything
+ for (int i = whichWord(n1 - 1); i >= 0; --i)
+ if (pVal[i] != RHS.pVal[i])
+ return false;
+ return true;
+}
+
+bool APInt::EqualSlowCase(uint64_t Val) const {
+ unsigned n = getActiveBits();
+ if (n <= APINT_BITS_PER_WORD)
+ return pVal[0] == Val;
+ else
+ return false;
+}
+
+bool APInt::ult(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord())
+ return VAL < RHS.VAL;
+
+ // Get active bit length of both operands
+ unsigned n1 = getActiveBits();
+ unsigned n2 = RHS.getActiveBits();
+
+ // If magnitude of LHS is less than RHS, return true.
+ if (n1 < n2)
+ return true;
+
+ // If magnitude of RHS is greather than LHS, return false.
+ if (n2 < n1)
+ return false;
+
+ // If they bot fit in a word, just compare the low order word
+ if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+ return pVal[0] < RHS.pVal[0];
+
+ // Otherwise, compare all words
+ unsigned topWord = whichWord(std::max(n1,n2)-1);
+ for (int i = topWord; i >= 0; --i) {
+ if (pVal[i] > RHS.pVal[i])
+ return false;
+ if (pVal[i] < RHS.pVal[i])
+ return true;
+ }
+ return false;
+}
+
+bool APInt::slt(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord()) {
+ int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+ int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
+ return lhsSext < rhsSext;
+ }
+
+ APInt lhs(*this);
+ APInt rhs(RHS);
+ bool lhsNeg = isNegative();
+ bool rhsNeg = rhs.isNegative();
+ if (lhsNeg) {
+ // Sign bit is set so perform two's complement to make it positive
+ lhs.flipAllBits();
+ lhs++;
+ }
+ if (rhsNeg) {
+ // Sign bit is set so perform two's complement to make it positive
+ rhs.flipAllBits();
+ rhs++;
+ }
+
+ // Now we have unsigned values to compare so do the comparison if necessary
+ // based on the negativeness of the values.
+ if (lhsNeg)
+ if (rhsNeg)
+ return lhs.ugt(rhs);
+ else
+ return true;
+ else if (rhsNeg)
+ return false;
+ else
+ return lhs.ult(rhs);
+}
+
+void APInt::setBit(unsigned bitPosition) {
+ if (isSingleWord())
+ VAL |= maskBit(bitPosition);
+ else
+ pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+}
+
+/// Set the given bit to 0 whose position is given as "bitPosition".
+/// @brief Set a given bit to 0.
+void APInt::clearBit(unsigned bitPosition) {
+ if (isSingleWord())
+ VAL &= ~maskBit(bitPosition);
+ else
+ pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+}
+
+/// @brief Toggle every bit to its opposite value.
+
+/// Toggle a given bit to its opposite value whose position is given
+/// as "bitPosition".
+/// @brief Toggles a given bit to its opposite value.
+void APInt::flipBit(unsigned bitPosition) {
+ assert(bitPosition < BitWidth && "Out of the bit-width range!");
+ if ((*this)[bitPosition]) clearBit(bitPosition);
+ else setBit(bitPosition);
+}
+
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
+ assert(!str.empty() && "Invalid string length");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+ "Radix should be 2, 8, 10, or 16!");
+
+ size_t slen = str.size();
+
+ // Each computation below needs to know if it's negative.
+ StringRef::iterator p = str.begin();
+ unsigned isNegative = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String is only a sign, needs a value.");
+ }
+
+ // For radixes of power-of-two values, the bits required is accurately and
+ // easily computed
+ if (radix == 2)
+ return slen + isNegative;
+ if (radix == 8)
+ return slen * 3 + isNegative;
+ if (radix == 16)
+ return slen * 4 + isNegative;
+
+ // This is grossly inefficient but accurate. We could probably do something
+ // with a computation of roughly slen*64/20 and then adjust by the value of
+ // the first few digits. But, I'm not sure how accurate that could be.
+
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large. This avoids the assertion in the constructor. This
+ // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+ // bits in that case.
+ unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
+
+ // Convert to the actual binary value.
+ APInt tmp(sufficient, StringRef(p, slen), radix);
+
+ // Compute how many bits are required. If the log is infinite, assume we need
+ // just bit.
+ unsigned log = tmp.logBase2();
+ if (log == (unsigned)-1) {
+ return isNegative + 1;
+ } else {
+ return isNegative + log + 1;
+ }
+}
+
+// From http://www.burtleburtle.net, byBob Jenkins.
+// When targeting x86, both GCC and LLVM seem to recognize this as a
+// rotate instruction.
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define mix(a,b,c) \
+ { \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+ }
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define final(a,b,c) \
+ { \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+ }
+
+// hashword() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins. k is a pointer to an array of uint32_t values; length is
+// the length of the key, in 32-bit chunks. This version only handles
+// keys that are a multiple of 32 bits in size.
+static inline uint32_t hashword(const uint64_t *k64, size_t length)
+{
+ const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch (length) { /* all the case statements fall through */
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+// hashword8() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins. This computes a 32-bit hash from one 64-bit word. When
+// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
+// function into about 35 instructions when inlined.
+static inline uint32_t hashword8(const uint64_t k64)
+{
+ uint32_t a,b,c;
+ a = b = c = 0xdeadbeef + 4;
+ b += k64 >> 32;
+ a += k64 & 0xffffffff;
+ final(a,b,c);
+ return c;
+}
+#undef final
+#undef mix
+#undef rot
+
+uint64_t APInt::getHashValue() const {
+ uint64_t hash;
+ if (isSingleWord())
+ hash = hashword8(VAL);
+ else
+ hash = hashword(pVal, getNumWords()*2);
+ return hash;
+}
+
+/// HiBits - This function returns the high "numBits" bits of this APInt.
+APInt APInt::getHiBits(unsigned numBits) const {
+ return APIntOps::lshr(*this, BitWidth - numBits);
+}
+
+/// LoBits - This function returns the low "numBits" bits of this APInt.
+APInt APInt::getLoBits(unsigned numBits) const {
+ return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
+ BitWidth - numBits);
+}
+
+unsigned APInt::countLeadingZerosSlowCase() const {
+ // Treat the most significand word differently because it might have
+ // meaningless bits set beyond the precision.
+ unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
+ integerPart MSWMask;
+ if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
+ else {
+ MSWMask = ~integerPart(0);
+ BitsInMSW = APINT_BITS_PER_WORD;
+ }
+
+ unsigned i = getNumWords();
+ integerPart MSW = pVal[i-1] & MSWMask;
+ if (MSW)
+ return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
+
+ unsigned Count = BitsInMSW;
+ for (--i; i > 0u; --i) {
+ if (pVal[i-1] == 0)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += CountLeadingZeros_64(pVal[i-1]);
+ break;
+ }
+ }
+ return Count;
+}
+
+static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
+ unsigned Count = 0;
+ if (skip)
+ V <<= skip;
+ while (V && (V & (1ULL << 63))) {
+ Count++;
+ V <<= 1;
+ }
+ return Count;
+}
+
+unsigned APInt::countLeadingOnes() const {
+ if (isSingleWord())
+ return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+
+ unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
+ unsigned shift;
+ if (!highWordBits) {
+ highWordBits = APINT_BITS_PER_WORD;
+ shift = 0;
+ } else {
+ shift = APINT_BITS_PER_WORD - highWordBits;
+ }
+ int i = getNumWords() - 1;
+ unsigned Count = countLeadingOnes_64(pVal[i], shift);
+ if (Count == highWordBits) {
+ for (i--; i >= 0; --i) {
+ if (pVal[i] == -1ULL)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += countLeadingOnes_64(pVal[i], 0);
+ break;
+ }
+ }
+ }
+ return Count;
+}
+
+unsigned APInt::countTrailingZeros() const {
+ if (isSingleWord())
+ return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && pVal[i] == 0; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += CountTrailingZeros_64(pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countTrailingOnesSlowCase() const {
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += CountTrailingOnes_64(pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countPopulationSlowCase() const {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < getNumWords(); ++i)
+ Count += CountPopulation_64(pVal[i]);
+ return Count;
+}
+
+APInt APInt::byteSwap() const {
+ assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+ if (BitWidth == 16)
+ return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+ else if (BitWidth == 32)
+ return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+ else if (BitWidth == 48) {
+ unsigned Tmp1 = unsigned(VAL >> 16);
+ Tmp1 = ByteSwap_32(Tmp1);
+ uint16_t Tmp2 = uint16_t(VAL);
+ Tmp2 = ByteSwap_16(Tmp2);
+ return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
+ } else if (BitWidth == 64)
+ return APInt(BitWidth, ByteSwap_64(VAL));
+ else {
+ APInt Result(BitWidth, 0);
+ char *pByte = (char*)Result.pVal;
+ for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
+ char Tmp = pByte[i];
+ pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
+ pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
+ }
+ return Result;
+ }
+}
+
+APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
+ const APInt& API2) {
+ APInt A = API1, B = API2;
+ while (!!B) {
+ APInt T = B;
+ B = APIntOps::urem(A, B);
+ A = T;
+ }
+ return A;
+}
+
+APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
+ union {
+ double D;
+ uint64_t I;
+ } T;
+ T.D = Double;
+
+ // Get the sign bit from the highest order bit
+ bool isNeg = T.I >> 63;
+
+ // Get the 11-bit exponent and adjust for the 1023 bit bias
+ int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+
+ // If the exponent is negative, the value is < 0 so just return 0.
+ if (exp < 0)
+ return APInt(width, 0u);
+
+ // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+ uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+
+ // If the exponent doesn't shift all bits out of the mantissa
+ if (exp < 52)
+ return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
+ APInt(width, mantissa >> (52 - exp));
+
+ // If the client didn't provide enough bits for us to shift the mantissa into
+ // then the result is undefined, just return 0
+ if (width <= exp - 52)
+ return APInt(width, 0);
+
+ // Otherwise, we have to shift the mantissa bits up to the right location
+ APInt Tmp(width, mantissa);
+ Tmp = Tmp.shl((unsigned)exp - 52);
+ return isNeg ? -Tmp : Tmp;
+}
+
+/// RoundToDouble - This function converts this APInt to a double.
+/// The layout for double is as following (IEEE Standard 754):
+/// --------------------------------------
+/// | Sign Exponent Fraction Bias |
+/// |-------------------------------------- |
+/// | 1[63] 11[62-52] 52[51-00] 1023 |
+/// --------------------------------------
+double APInt::roundToDouble(bool isSigned) const {
+
+ // Handle the simple case where the value is contained in one uint64_t.
+ // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
+ if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+ if (isSigned) {
+ int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
+ return double(sext);
+ } else
+ return double(getWord(0));
+ }
+
+ // Determine if the value is negative.
+ bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+ // Construct the absolute value if we're negative.
+ APInt Tmp(isNeg ? -(*this) : (*this));
+
+ // Figure out how many bits we're using.
+ unsigned n = Tmp.getActiveBits();
+
+ // The exponent (without bias normalization) is just the number of bits
+ // we are using. Note that the sign bit is gone since we constructed the
+ // absolute value.
+ uint64_t exp = n;
+
+ // Return infinity for exponent overflow
+ if (exp > 1023) {
+ if (!isSigned || !isNeg)
+ return std::numeric_limits<double>::infinity();
+ else
+ return -std::numeric_limits<double>::infinity();
+ }
+ exp += 1023; // Increment for 1023 bias
+
+ // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+ // extract the high 52 bits from the correct words in pVal.
+ uint64_t mantissa;
+ unsigned hiWord = whichWord(n-1);
+ if (hiWord == 0) {
+ mantissa = Tmp.pVal[0];
+ if (n > 52)
+ mantissa >>= n - 52; // shift down, we want the top 52 bits.
+ } else {
+ assert(hiWord > 0 && "huh?");
+ uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+ uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+ mantissa = hibits | lobits;
+ }
+
+ // The leading bit of mantissa is implicit, so get rid of it.
+ uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+ union {
+ double D;
+ uint64_t I;
+ } T;
+ T.I = sign | (exp << 52) | mantissa;
+ return T.D;
+}
+
+// Truncate to new width.
+APInt APInt::trunc(unsigned width) const {
+ assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width && "Can't truncate to 0 bits");
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, getRawData()[0]);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+ Result.pVal[i] = pVal[i];
+
+ // Truncate and copy any partial word.
+ unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.pVal[i] = pVal[i] << bits >> bits;
+
+ return Result;
+}
+
+// Sign extend to a new width.
+APInt APInt::sext(unsigned width) const {
+ assert(width > BitWidth && "Invalid APInt SignExtend request");
+
+ if (width <= APINT_BITS_PER_WORD) {
+ uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+ val = (int64_t)val >> (width - BitWidth);
+ return APInt(width, val >> (APINT_BITS_PER_WORD - width));
+ }
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ uint64_t word = 0;
+ for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
+ word = getRawData()[i];
+ Result.pVal[i] = word;
+ }
+
+ // Read and sign-extend any partial word.
+ unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ word = (int64_t)getRawData()[i] << bits >> bits;
+ else
+ word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+
+ // Write remaining full words.
+ for (; i != width / APINT_BITS_PER_WORD; i++) {
+ Result.pVal[i] = word;
+ word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+ }
+
+ // Write any partial word.
+ bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.pVal[i] = word << bits >> bits;
+
+ return Result;
+}
+
+// Zero extend to a new width.
+APInt APInt::zext(unsigned width) const {
+ assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, VAL);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy words.
+ unsigned i;
+ for (i = 0; i != getNumWords(); i++)
+ Result.pVal[i] = getRawData()[i];
+
+ // Zero remaining words.
+ memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+
+ return Result;
+}
+
+APInt APInt::zextOrTrunc(unsigned width) const {
+ if (BitWidth < width)
+ return zext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+APInt APInt::sextOrTrunc(unsigned width) const {
+ if (BitWidth < width)
+ return sext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(const APInt &shiftAmt) const {
+ return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(unsigned shiftAmt) const {
+ assert(shiftAmt <= BitWidth && "Invalid shift amount");
+ // Handle a degenerate case
+ if (shiftAmt == 0)
+ return *this;
+
+ // Handle single word shifts with built-in ashr
+ if (isSingleWord()) {
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0); // undefined
+ else {
+ unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
+ return APInt(BitWidth,
+ (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
+ }
+ }
+
+ // If all the bits were shifted out, the result is, technically, undefined.
+ // We return -1 if it was negative, 0 otherwise. We check this early to avoid
+ // issues in the algorithm below.
+ if (shiftAmt == BitWidth) {
+ if (isNegative())
+ return APInt(BitWidth, -1ULL, true);
+ else
+ return APInt(BitWidth, 0);
+ }
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // Compute some values needed by the following shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+ unsigned breakWord = getNumWords() - 1 - offset; // last word affected
+ unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
+ if (bitsInWord == 0)
+ bitsInWord = APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ // Move the words containing significant bits
+ for (unsigned i = 0; i <= breakWord; ++i)
+ val[i] = pVal[i+offset]; // move whole word
+
+ // Adjust the top significant word for sign bit fill, if negative
+ if (isNegative())
+ if (bitsInWord < APINT_BITS_PER_WORD)
+ val[breakWord] |= ~0ULL << bitsInWord; // set high bits
+ } else {
+ // Shift the low order words
+ for (unsigned i = 0; i < breakWord; ++i) {
+ // This combines the shifted corresponding word with the low bits from
+ // the next word (shifted into this word's high bits).
+ val[i] = (pVal[i+offset] >> wordShift) |
+ (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+ }
+
+ // Shift the break word. In this case there are no bits from the next word
+ // to include in this word.
+ val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+ // Deal with sign extenstion in the break word, and possibly the word before
+ // it.
+ if (isNegative()) {
+ if (wordShift > bitsInWord) {
+ if (breakWord > 0)
+ val[breakWord-1] |=
+ ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+ val[breakWord] |= ~0ULL;
+ } else
+ val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+ }
+ }
+
+ // Remaining words are 0 or -1, just assign them.
+ uint64_t fillValue = (isNegative() ? -1ULL : 0);
+ for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+ val[i] = fillValue;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(const APInt &shiftAmt) const {
+ return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(unsigned shiftAmt) const {
+ if (isSingleWord()) {
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+ else
+ return APInt(BitWidth, this->VAL >> shiftAmt);
+ }
+
+ // If all the bits were shifted out, the result is 0. This avoids issues
+ // with shifting by the size of the integer type, which produces undefined
+ // results. We define these "undefined results" to always be 0.
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+
+ // If none of the bits are shifted out, the result is *this. This avoids
+ // issues with shifting by the size of the integer type, which produces
+ // undefined results in the code below. This is also an optimization.
+ if (shiftAmt == 0)
+ return *this;
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // If we are shifting less than a word, compute the shift with a simple carry
+ if (shiftAmt < APINT_BITS_PER_WORD) {
+ uint64_t carry = 0;
+ for (int i = getNumWords()-1; i >= 0; --i) {
+ val[i] = (pVal[i] >> shiftAmt) | carry;
+ carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
+ }
+ return APInt(val, BitWidth).clearUnusedBits();
+ }
+
+ // Compute some values needed by the remaining shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ for (unsigned i = 0; i < getNumWords() - offset; ++i)
+ val[i] = pVal[i+offset];
+ for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
+ val[i] = 0;
+ return APInt(val,BitWidth).clearUnusedBits();
+ }
+
+ // Shift the low order words
+ unsigned breakWord = getNumWords() - offset -1;
+ for (unsigned i = 0; i < breakWord; ++i)
+ val[i] = (pVal[i+offset] >> wordShift) |
+ (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+ // Shift the break word.
+ val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+ // Remaining words are 0
+ for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+ val[i] = 0;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Left-shift this APInt by shiftAmt.
+/// @brief Left-shift function.
+APInt APInt::shl(const APInt &shiftAmt) const {
+ // It's undefined behavior in C to shift by BitWidth or greater.
+ return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::shlSlowCase(unsigned shiftAmt) const {
+ // If all the bits were shifted out, the result is 0. This avoids issues
+ // with shifting by the size of the integer type, which produces undefined
+ // results. We define these "undefined results" to always be 0.
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+
+ // If none of the bits are shifted out, the result is *this. This avoids a
+ // lshr by the words size in the loop below which can produce incorrect
+ // results. It also avoids the expensive computation below for a common case.
+ if (shiftAmt == 0)
+ return *this;
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // If we are shifting less than a word, do it the easy way
+ if (shiftAmt < APINT_BITS_PER_WORD) {
+ uint64_t carry = 0;
+ for (unsigned i = 0; i < getNumWords(); i++) {
+ val[i] = pVal[i] << shiftAmt | carry;
+ carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
+ }
+ return APInt(val, BitWidth).clearUnusedBits();
+ }
+
+ // Compute some values needed by the remaining shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ for (unsigned i = 0; i < offset; i++)
+ val[i] = 0;
+ for (unsigned i = offset; i < getNumWords(); i++)
+ val[i] = pVal[i-offset];
+ return APInt(val,BitWidth).clearUnusedBits();
+ }
+
+ // Copy whole words from this to Result.
+ unsigned i = getNumWords() - 1;
+ for (; i > offset; --i)
+ val[i] = pVal[i-offset] << wordShift |
+ pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
+ val[offset] = pVal[0] << wordShift;
+ for (i = 0; i < offset; ++i)
+ val[i] = 0;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+APInt APInt::rotl(const APInt &rotateAmt) const {
+ return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotl(unsigned rotateAmt) const {
+ if (rotateAmt == 0)
+ return *this;
+ // Don't get too fancy, just use existing shift/or facilities
+ APInt hi(*this);
+ APInt lo(*this);
+ hi.shl(rotateAmt);
+ lo.lshr(BitWidth - rotateAmt);
+ return hi | lo;
+}
+
+APInt APInt::rotr(const APInt &rotateAmt) const {
+ return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotr(unsigned rotateAmt) const {
+ if (rotateAmt == 0)
+ return *this;
+ // Don't get too fancy, just use existing shift/or facilities
+ APInt hi(*this);
+ APInt lo(*this);
+ lo.lshr(rotateAmt);
+ hi.shl(BitWidth - rotateAmt);
+ return hi | lo;
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+APInt APInt::sqrt() const {
+
+ // Determine the magnitude of the value.
+ unsigned magnitude = getActiveBits();
+
+ // Use a fast table for some small values. This also gets rid of some
+ // rounding errors in libc sqrt for small values.
+ if (magnitude <= 5) {
+ static const uint8_t results[32] = {
+ /* 0 */ 0,
+ /* 1- 2 */ 1, 1,
+ /* 3- 6 */ 2, 2, 2, 2,
+ /* 7-12 */ 3, 3, 3, 3, 3, 3,
+ /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+ /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ /* 31 */ 6
+ };
+ return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+ }
+
+ // If the magnitude of the value fits in less than 52 bits (the precision of
+ // an IEEE double precision floating point value), then we can use the
+ // libc sqrt function which will probably use a hardware sqrt computation.
+ // This should be faster than the algorithm below.
+ if (magnitude < 52) {
+#if HAVE_ROUND
+ return APInt(BitWidth,
+ uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+#else
+ return APInt(BitWidth,
+ uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
+#endif
+ }
+
+ // Okay, all the short cuts are exhausted. We must compute it. The following
+ // is a classical Babylonian method for computing the square root. This code
+ // was adapted to APINt from a wikipedia article on such computations.
+ // See http://www.wikipedia.org/ and go to the page named
+ // Calculate_an_integer_square_root.
+ unsigned nbits = BitWidth, i = 4;
+ APInt testy(BitWidth, 16);
+ APInt x_old(BitWidth, 1);
+ APInt x_new(BitWidth, 0);
+ APInt two(BitWidth, 2);
+
+ // Select a good starting value using binary logarithms.
+ for (;; i += 2, testy = testy.shl(2))
+ if (i >= nbits || this->ule(testy)) {
+ x_old = x_old.shl(i / 2);
+ break;
+ }
+
+ // Use the Babylonian method to arrive at the integer square root:
+ for (;;) {
+ x_new = (this->udiv(x_old) + x_old).udiv(two);
+ if (x_old.ule(x_new))
+ break;
+ x_old = x_new;
+ }
+
+ // Make sure we return the closest approximation
+ // NOTE: The rounding calculation below is correct. It will produce an
+ // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+ // determined to be a rounding issue with pari/gp as it begins to use a
+ // floating point representation after 192 bits. There are no discrepancies
+ // between this algorithm and pari/gp for bit widths < 192 bits.
+ APInt square(x_old * x_old);
+ APInt nextSquare((x_old + 1) * (x_old +1));
+ if (this->ult(square))
+ return x_old;
+ else if (this->ule(nextSquare)) {
+ APInt midpoint((nextSquare - square).udiv(two));
+ APInt offset(*this - square);
+ if (offset.ult(midpoint))
+ return x_old;
+ else
+ return x_old + 1;
+ } else
+ llvm_unreachable("Error in APInt::sqrt computation");
+ return x_old + 1;
+}
+
+/// Computes the multiplicative inverse of this APInt for a given modulo. The
+/// iterative extended Euclidean algorithm is used to solve for this value,
+/// however we simplify it to speed up calculating only the inverse, and take
+/// advantage of div+rem calculations. We also use some tricks to avoid copying
+/// (potentially large) APInts around.
+APInt APInt::multiplicativeInverse(const APInt& modulo) const {
+ assert(ult(modulo) && "This APInt must be smaller than the modulo");
+
+ // Using the properties listed at the following web page (accessed 06/21/08):
+ // http://www.numbertheory.org/php/euclid.html
+ // (especially the properties numbered 3, 4 and 9) it can be proved that
+ // BitWidth bits suffice for all the computations in the algorithm implemented
+ // below. More precisely, this number of bits suffice if the multiplicative
+ // inverse exists, but may not suffice for the general extended Euclidean
+ // algorithm.
+
+ APInt r[2] = { modulo, *this };
+ APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
+ APInt q(BitWidth, 0);
+
+ unsigned i;
+ for (i = 0; r[i^1] != 0; i ^= 1) {
+ // An overview of the math without the confusing bit-flipping:
+ // q = r[i-2] / r[i-1]
+ // r[i] = r[i-2] % r[i-1]
+ // t[i] = t[i-2] - t[i-1] * q
+ udivrem(r[i], r[i^1], q, r[i]);
+ t[i] -= t[i^1] * q;
+ }
+
+ // If this APInt and the modulo are not coprime, there is no multiplicative
+ // inverse, so return 0. We check this by looking at the next-to-last
+ // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
+ // algorithm.
+ if (r[i] != 1)
+ return APInt(BitWidth, 0);
+
+ // The next-to-last t is the multiplicative inverse. However, we are
+ // interested in a positive inverse. Calcuate a positive one from a negative
+ // one if necessary. A simple addition of the modulo suffices because
+ // abs(t[i]) is known to be less than *this/2 (see the link above).
+ return t[i].isNegative() ? t[i] + modulo : t[i];
+}
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts. Requires that
+/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., chapter 10.
+APInt::ms APInt::magic() const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt ad, anc, delta, q1, r1, q2, r2, t;
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ struct ms mag;
+
+ ad = d.abs();
+ t = signedMin + (d.lshr(d.getBitWidth() - 1));
+ anc = t - 1 - t.urem(ad); // absolute value of nc
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
+ r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
+ r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = q1<<1; // update q1 = 2p/abs(nc)
+ r1 = r1<<1; // update r1 = rem(2p/abs(nc))
+ if (r1.uge(anc)) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = q2<<1; // update q2 = 2p/abs(d)
+ r2 = r2<<1; // update r2 = rem(2p/abs(d))
+ if (r2.uge(ad)) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1.ult(delta) || (q1 == delta && r1 == 0));
+
+ mag.m = q2 + 1;
+ if (d.isNegative()) mag.m = -mag.m; // resulting magic number
+ mag.s = p - d.getBitWidth(); // resulting shift
+ return mag;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+APInt::mu APInt::magicu(unsigned LeadingZeros) const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+ nc = allOnes - (-d).urem(d);
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
+ r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
+ r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1.uge(nc - r1)) {
+ q1 = q1 + q1 + 1; // update q1
+ r1 = r1 + r1 - nc; // update r1
+ }
+ else {
+ q1 = q1+q1; // update q1
+ r1 = r1+r1; // update r1
+ }
+ if ((r2 + 1).uge(d - r2)) {
+ if (q2.uge(signedMax)) magu.a = 1;
+ q2 = q2+q2 + 1; // update q2
+ r2 = r2+r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2.uge(signedMin)) magu.a = 1;
+ q2 = q2+q2; // update q2
+ r2 = r2+r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < d.getBitWidth()*2 &&
+ (q1.ult(delta) || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - d.getBitWidth(); // resulting shift
+ return magu;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
+ unsigned m, unsigned n) {
+ assert(u && "Must provide dividend");
+ assert(v && "Must provide divisor");
+ assert(q && "Must provide quotient");
+ assert(u != v && u != q && v != q && "Must us different memory");
+ assert(n>1 && "n must be > 1");
+
+ // Knuth uses the value b as the base of the number system. In our case b
+ // is 2^31 so we just set it to -1u.
+ uint64_t b = uint64_t(1) << 32;
+
+#if 0
+ DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+ DEBUG(dbgs() << "KnuthDiv: original:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << " by");
+ DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+ DEBUG(dbgs() << '\n');
+#endif
+ // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+ // u and v by d. Note that we have taken Knuth's advice here to use a power
+ // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+ // 2 allows us to shift instead of multiply and it is easy to determine the
+ // shift amount from the leading zeros. We are basically normalizing the u
+ // and v so that its high bits are shifted to the top of v's range without
+ // overflow. Note that this can require an extra word in u so that u must
+ // be of length m+n+1.
+ unsigned shift = CountLeadingZeros_32(v[n-1]);
+ unsigned v_carry = 0;
+ unsigned u_carry = 0;
+ if (shift) {
+ for (unsigned i = 0; i < m+n; ++i) {
+ unsigned u_tmp = u[i] >> (32 - shift);
+ u[i] = (u[i] << shift) | u_carry;
+ u_carry = u_tmp;
+ }
+ for (unsigned i = 0; i < n; ++i) {
+ unsigned v_tmp = v[i] >> (32 - shift);
+ v[i] = (v[i] << shift) | v_carry;
+ v_carry = v_tmp;
+ }
+ }
+ u[m+n] = u_carry;
+#if 0
+ DEBUG(dbgs() << "KnuthDiv: normal:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << " by");
+ DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
+ int j = m;
+ do {
+ DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+ // D3. [Calculate q'.].
+ // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+ // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+ // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+ // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+ // on v[n-2] determines at high speed most of the cases in which the trial
+ // value qp is one too large, and it eliminates all cases where qp is two
+ // too large.
+ uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
+ DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+ uint64_t qp = dividend / v[n-1];
+ uint64_t rp = dividend % v[n-1];
+ if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+ qp--;
+ rp += v[n-1];
+ if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+ qp--;
+ }
+ DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+ // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+ // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+ // consists of a simple multiplication by a one-place number, combined with
+ // a subtraction.
+ bool isNeg = false;
+ for (unsigned i = 0; i < n; ++i) {
+ uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
+ uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+ bool borrow = subtrahend > u_tmp;
+ DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
+ << ", subtrahend == " << subtrahend
+ << ", borrow = " << borrow << '\n');
+
+ uint64_t result = u_tmp - subtrahend;
+ unsigned k = j + i;
+ u[k++] = (unsigned)(result & (b-1)); // subtract low word
+ u[k++] = (unsigned)(result >> 32); // subtract high word
+ while (borrow && k <= m+n) { // deal with borrow to the left
+ borrow = u[k] == 0;
+ u[k]--;
+ k++;
+ }
+ isNeg |= borrow;
+ DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " <<
+ u[j+i+1] << '\n');
+ }
+ DEBUG(dbgs() << "KnuthDiv: after subtraction:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << '\n');
+ // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+ // this step is actually negative, (u[j+n]...u[j]) should be left as the
+ // true value plus b**(n+1), namely as the b's complement of
+ // the true value, and a "borrow" to the left should be remembered.
+ //
+ if (isNeg) {
+ bool carry = true; // true because b's complement is "complement + 1"
+ for (unsigned i = 0; i <= m+n; ++i) {
+ u[i] = ~u[i] + carry; // b's complement
+ carry = carry && u[i] == 0;
+ }
+ }
+ DEBUG(dbgs() << "KnuthDiv: after complement:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+ DEBUG(dbgs() << '\n');
+
+ // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+ // negative, go to step D6; otherwise go on to step D7.
+ q[j] = (unsigned)qp;
+ if (isNeg) {
+ // D6. [Add back]. The probability that this step is necessary is very
+ // small, on the order of only 2/b. Make sure that test data accounts for
+ // this possibility. Decrease q[j] by 1
+ q[j]--;
+ // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+ // A carry will occur to the left of u[j+n], and it should be ignored
+ // since it cancels with the borrow that occurred in D4.
+ bool carry = false;
+ for (unsigned i = 0; i < n; i++) {
+ unsigned limit = std::min(u[j+i],v[i]);
+ u[j+i] += v[i] + carry;
+ carry = u[j+i] < limit || (carry && u[j+i] == limit);
+ }
+ u[j+n] += carry;
+ }
+ DEBUG(dbgs() << "KnuthDiv: after correction:");
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+ DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+
+ // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
+ } while (--j >= 0);
+
+ DEBUG(dbgs() << "KnuthDiv: quotient:");
+ DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
+ DEBUG(dbgs() << '\n');
+
+ // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+ // remainder may be obtained by dividing u[...] by d. If r is non-null we
+ // compute the remainder (urem uses this).
+ if (r) {
+ // The value d is expressed by the "shift" value above since we avoided
+ // multiplication by d by using a shift left. So, all we have to do is
+ // shift right here. In order to mak
+ if (shift) {
+ unsigned carry = 0;
+ DEBUG(dbgs() << "KnuthDiv: remainder:");
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = (u[i] >> shift) | carry;
+ carry = u[i] << (32 - shift);
+ DEBUG(dbgs() << " " << r[i]);
+ }
+ } else {
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = u[i];
+ DEBUG(dbgs() << " " << r[i]);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#if 0
+ DEBUG(dbgs() << '\n');
+#endif
+}
+
+void APInt::divide(const APInt LHS, unsigned lhsWords,
+ const APInt &RHS, unsigned rhsWords,
+ APInt *Quotient, APInt *Remainder)
+{
+ assert(lhsWords >= rhsWords && "Fractional result");
+
+ // First, compose the values into an array of 32-bit words instead of
+ // 64-bit words. This is a necessity of both the "short division" algorithm
+ // and the Knuth "classical algorithm" which requires there to be native
+ // operations for +, -, and * on an m bit value with an m*2 bit result. We
+ // can't use 64-bit operands here because we don't have native results of
+ // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+ // work on large-endian machines.
+ uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
+ unsigned n = rhsWords * 2;
+ unsigned m = (lhsWords * 2) - n;
+
+ // Allocate space for the temporary values we need either on the stack, if
+ // it will fit, or on the heap if it won't.
+ unsigned SPACE[128];
+ unsigned *U = 0;
+ unsigned *V = 0;
+ unsigned *Q = 0;
+ unsigned *R = 0;
+ if ((Remainder?4:3)*n+2*m+1 <= 128) {
+ U = &SPACE[0];
+ V = &SPACE[m+n+1];
+ Q = &SPACE[(m+n+1) + n];
+ if (Remainder)
+ R = &SPACE[(m+n+1) + n + (m+n)];
+ } else {
+ U = new unsigned[m + n + 1];
+ V = new unsigned[n];
+ Q = new unsigned[m+n];
+ if (Remainder)
+ R = new unsigned[n];
+ }
+
+ // Initialize the dividend
+ memset(U, 0, (m+n+1)*sizeof(unsigned));
+ for (unsigned i = 0; i < lhsWords; ++i) {
+ uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+ U[i * 2] = (unsigned)(tmp & mask);
+ U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+ }
+ U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+ // Initialize the divisor
+ memset(V, 0, (n)*sizeof(unsigned));
+ for (unsigned i = 0; i < rhsWords; ++i) {
+ uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+ V[i * 2] = (unsigned)(tmp & mask);
+ V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+ }
+
+ // initialize the quotient and remainder
+ memset(Q, 0, (m+n) * sizeof(unsigned));
+ if (Remainder)
+ memset(R, 0, n * sizeof(unsigned));
+
+ // Now, adjust m and n for the Knuth division. n is the number of words in
+ // the divisor. m is the number of words by which the dividend exceeds the
+ // divisor (i.e. m+n is the length of the dividend). These sizes must not
+ // contain any zero words or the Knuth algorithm fails.
+ for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+ n--;
+ m++;
+ }
+ for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+ m--;
+
+ // If we're left with only a single word for the divisor, Knuth doesn't work
+ // so we implement the short division algorithm here. This is much simpler
+ // and faster because we are certain that we can divide a 64-bit quantity
+ // by a 32-bit quantity at hardware speed and short division is simply a
+ // series of such operations. This is just like doing short division but we
+ // are using base 2^32 instead of base 10.
+ assert(n != 0 && "Divide by zero?");
+ if (n == 1) {
+ unsigned divisor = V[0];
+ unsigned remainder = 0;
+ for (int i = m+n-1; i >= 0; i--) {
+ uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
+ if (partial_dividend == 0) {
+ Q[i] = 0;
+ remainder = 0;
+ } else if (partial_dividend < divisor) {
+ Q[i] = 0;
+ remainder = (unsigned)partial_dividend;
+ } else if (partial_dividend == divisor) {
+ Q[i] = 1;
+ remainder = 0;
+ } else {
+ Q[i] = (unsigned)(partial_dividend / divisor);
+ remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
+ }
+ }
+ if (R)
+ R[0] = remainder;
+ } else {
+ // Now we're ready to invoke the Knuth classical divide algorithm. In this
+ // case n > 1.
+ KnuthDiv(U, V, Q, R, m, n);
+ }
+
+ // If the caller wants the quotient
+ if (Quotient) {
+ // Set up the Quotient value's memory.
+ if (Quotient->BitWidth != LHS.BitWidth) {
+ if (Quotient->isSingleWord())
+ Quotient->VAL = 0;
+ else
+ delete [] Quotient->pVal;
+ Quotient->BitWidth = LHS.BitWidth;
+ if (!Quotient->isSingleWord())
+ Quotient->pVal = getClearedMemory(Quotient->getNumWords());
+ } else
+ Quotient->clearAllBits();
+
+ // The quotient is in Q. Reconstitute the quotient into Quotient's low
+ // order words.
+ if (lhsWords == 1) {
+ uint64_t tmp =
+ uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
+ if (Quotient->isSingleWord())
+ Quotient->VAL = tmp;
+ else
+ Quotient->pVal[0] = tmp;
+ } else {
+ assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
+ for (unsigned i = 0; i < lhsWords; ++i)
+ Quotient->pVal[i] =
+ uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+ }
+ }
+
+ // If the caller wants the remainder
+ if (Remainder) {
+ // Set up the Remainder value's memory.
+ if (Remainder->BitWidth != RHS.BitWidth) {
+ if (Remainder->isSingleWord())
+ Remainder->VAL = 0;
+ else
+ delete [] Remainder->pVal;
+ Remainder->BitWidth = RHS.BitWidth;
+ if (!Remainder->isSingleWord())
+ Remainder->pVal = getClearedMemory(Remainder->getNumWords());
+ } else
+ Remainder->clearAllBits();
+
+ // The remainder is in R. Reconstitute the remainder into Remainder's low
+ // order words.
+ if (rhsWords == 1) {
+ uint64_t tmp =
+ uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
+ if (Remainder->isSingleWord())
+ Remainder->VAL = tmp;
+ else
+ Remainder->pVal[0] = tmp;
+ } else {
+ assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
+ for (unsigned i = 0; i < rhsWords; ++i)
+ Remainder->pVal[i] =
+ uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+ }
+ }
+
+ // Clean up the memory we allocated.
+ if (U != &SPACE[0]) {
+ delete [] U;
+ delete [] V;
+ delete [] Q;
+ delete [] R;
+ }
+}
+
+APInt APInt::udiv(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+ // First, deal with the easy case
+ if (isSingleWord()) {
+ assert(RHS.VAL != 0 && "Divide by zero?");
+ return APInt(BitWidth, VAL / RHS.VAL);
+ }
+
+ // Get some facts about the LHS and RHS number of bits and words
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+ assert(rhsWords && "Divided by zero???");
+ unsigned lhsBits = this->getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+
+ // Deal with some degenerate cases
+ if (!lhsWords)
+ // 0 / X ===> 0
+ return APInt(BitWidth, 0);
+ else if (lhsWords < rhsWords || this->ult(RHS)) {
+ // X / Y ===> 0, iff X < Y
+ return APInt(BitWidth, 0);
+ } else if (*this == RHS) {
+ // X / X ===> 1
+ return APInt(BitWidth, 1);
+ } else if (lhsWords == 1 && rhsWords == 1) {
+ // All high words are zero, just use native divide
+ return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
+ }
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Quotient(1,0); // to hold result.
+ divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
+ return Quotient;
+}
+
+APInt APInt::urem(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ assert(RHS.VAL != 0 && "Remainder by zero?");
+ return APInt(BitWidth, VAL % RHS.VAL);
+ }
+
+ // Get some facts about the LHS
+ unsigned lhsBits = getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+ // Get some facts about the RHS
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+ assert(rhsWords && "Performing remainder operation by zero ???");
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ // 0 % Y ===> 0
+ return APInt(BitWidth, 0);
+ } else if (lhsWords < rhsWords || this->ult(RHS)) {
+ // X % Y ===> X, iff X < Y
+ return *this;
+ } else if (*this == RHS) {
+ // X % X == 0;
+ return APInt(BitWidth, 0);
+ } else if (lhsWords == 1) {
+ // All high words are zero, just use native remainder
+ return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
+ }
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Remainder(1,0);
+ divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
+ return Remainder;
+}
+
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
+ APInt &Quotient, APInt &Remainder) {
+ // Get some size facts about the dividend and divisor
+ unsigned lhsBits = LHS.getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ Quotient = 0; // 0 / Y ===> 0
+ Remainder = 0; // 0 % Y ===> 0
+ return;
+ }
+
+ if (lhsWords < rhsWords || LHS.ult(RHS)) {
+ Remainder = LHS; // X % Y ===> X, iff X < Y
+ Quotient = 0; // X / Y ===> 0, iff X < Y
+ return;
+ }
+
+ if (LHS == RHS) {
+ Quotient = 1; // X / X ===> 1
+ Remainder = 0; // X % X ===> 0;
+ return;
+ }
+
+ if (lhsWords == 1 && rhsWords == 1) {
+ // There is only one word to consider so use the native versions.
+ uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
+ uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+ Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
+ Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
+ return;
+ }
+
+ // Okay, lets do it the long way
+ divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+}
+
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = isNonNegative() == RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = Res.ult(RHS);
+ return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this - RHS;
+ Overflow = isNonNegative() != RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this-RHS;
+ Overflow = Res.ugt(*this);
+ return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+ // MININT/-1 --> overflow.
+ Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+ return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this * RHS;
+
+ if (*this != 0 && RHS != 0)
+ Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+ else
+ Overflow = false;
+ return Res;
+}
+
+APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this * RHS;
+
+ if (*this != 0 && RHS != 0)
+ Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
+ else
+ Overflow = false;
+ return Res;
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+ Overflow = ShAmt >= getBitWidth();
+ if (Overflow)
+ ShAmt = getBitWidth()-1;
+
+ if (isNonNegative()) // Don't allow sign change.
+ Overflow = ShAmt >= countLeadingZeros();
+ else
+ Overflow = ShAmt >= countLeadingOnes();
+
+ return *this << ShAmt;
+}
+
+
+
+
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
+ // Check our assumptions here
+ assert(!str.empty() && "Invalid string length");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+ "Radix should be 2, 8, 10, or 16!");
+
+ StringRef::iterator p = str.begin();
+ size_t slen = str.size();
+ bool isNeg = *p == '-';
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ assert(slen && "String is only a sign, needs a value.");
+ }
+ assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+ assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+ assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+ assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+ "Insufficient bit width");
+
+ // Allocate memory
+ if (!isSingleWord())
+ pVal = getClearedMemory(getNumWords());
+
+ // Figure out if we can shift instead of multiply
+ unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+ // Set up an APInt for the digit to add outside the loop so we don't
+ // constantly construct/destruct it.
+ APInt apdigit(getBitWidth(), 0);
+ APInt apradix(getBitWidth(), radix);
+
+ // Enter digit traversal loop
+ for (StringRef::iterator e = str.end(); p != e; ++p) {
+ unsigned digit = getDigit(*p, radix);
+ assert(digit < radix && "Invalid character in digit string");
+
+ // Shift or multiply the value by the radix
+ if (slen > 1) {
+ if (shift)
+ *this <<= shift;
+ else
+ *this *= apradix;
+ }
+
+ // Add in the digit we just interpreted
+ if (apdigit.isSingleWord())
+ apdigit.VAL = digit;
+ else
+ apdigit.pVal[0] = digit;
+ *this += apdigit;
+ }
+ // If its negative, put it in two's complement form
+ if (isNeg) {
+ (*this)--;
+ this->flipAllBits();
+ }
+}
+
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
+ bool Signed, bool formatAsCLiteral) const {
+ assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
+ "Radix should be 2, 8, 10, or 16!");
+
+ const char *Prefix = "";
+ if (formatAsCLiteral) {
+ switch (Radix) {
+ case 2:
+ // Binary literals are a non-standard extension added in gcc 4.3:
+ // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+ Prefix = "0b";
+ break;
+ case 8:
+ Prefix = "0";
+ break;
+ case 16:
+ Prefix = "0x";
+ break;
+ }
+ }
+
+ // First, check for a zero value and just short circuit the logic below.
+ if (*this == 0) {
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+ Str.push_back('0');
+ return;
+ }
+
+ static const char Digits[] = "0123456789ABCDEF";
+
+ if (isSingleWord()) {
+ char Buffer[65];
+ char *BufPtr = Buffer+65;
+
+ uint64_t N;
+ if (!Signed) {
+ N = getZExtValue();
+ } else {
+ int64_t I = getSExtValue();
+ if (I >= 0) {
+ N = I;
+ } else {
+ Str.push_back('-');
+ N = -(uint64_t)I;
+ }
+ }
+
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
+ while (N) {
+ *--BufPtr = Digits[N % Radix];
+ N /= Radix;
+ }
+ Str.append(BufPtr, Buffer+65);
+ return;
+ }
+
+ APInt Tmp(*this);
+
+ if (Signed && isNegative()) {
+ // They want to print the signed version and it is a negative value
+ // Flip the bits and add one to turn it into the equivalent positive
+ // value and put a '-' in the result.
+ Tmp.flipAllBits();
+ Tmp++;
+ Str.push_back('-');
+ }
+
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
+ // We insert the digits backward, then reverse them to get the right order.
+ unsigned StartDig = Str.size();
+
+ // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+ // because the number of bits per digit (1, 3 and 4 respectively) divides
+ // equaly. We just shift until the value is zero.
+ if (Radix != 10) {
+ // Just shift tmp right for each digit width until it becomes zero
+ unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
+ unsigned MaskAmt = Radix - 1;
+
+ while (Tmp != 0) {
+ unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+ Str.push_back(Digits[Digit]);
+ Tmp = Tmp.lshr(ShiftAmt);
+ }
+ } else {
+ APInt divisor(4, 10);
+ while (Tmp != 0) {
+ APInt APdigit(1, 0);
+ APInt tmp2(Tmp.getBitWidth(), 0);
+ divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
+ &APdigit);
+ unsigned Digit = (unsigned)APdigit.getZExtValue();
+ assert(Digit < Radix && "divide failed");
+ Str.push_back(Digits[Digit]);
+ Tmp = tmp2;
+ }
+ }
+
+ // Reverse the digits before returning.
+ std::reverse(Str.begin()+StartDig, Str.end());
+}
+
+/// toString - This returns the APInt as a std::string. Note that this is an
+/// inefficient method. It is better to pass in a SmallVector/SmallString
+/// to the methods above.
+std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
+ SmallString<40> S;
+ toString(S, Radix, Signed, /* formatAsCLiteral = */false);
+ return S.str();
+}
+
+
+void APInt::dump() const {
+ SmallString<40> S, U;
+ this->toStringUnsigned(U);
+ this->toStringSigned(S);
+ dbgs() << "APInt(" << BitWidth << "b, "
+ << U.str() << "u " << S.str() << "s)";
+}
+
+void APInt::print(raw_ostream &OS, bool isSigned) const {
+ SmallString<40> S;
+ this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
+ OS << S.str();
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
+// and unrestricting assumption.
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
+
+/* Some handy functions local to this file. */
+namespace {
+
+ /* Returns the integer part with the least significant BITS set.
+ BITS cannot be zero. */
+ static inline integerPart
+ lowBitMask(unsigned int bits)
+ {
+ assert(bits != 0 && bits <= integerPartWidth);
+
+ return ~(integerPart) 0 >> (integerPartWidth - bits);
+ }
+
+ /* Returns the value of the lower half of PART. */
+ static inline integerPart
+ lowHalf(integerPart part)
+ {
+ return part & lowBitMask(integerPartWidth / 2);
+ }
+
+ /* Returns the value of the upper half of PART. */
+ static inline integerPart
+ highHalf(integerPart part)
+ {
+ return part >> (integerPartWidth / 2);
+ }
+
+ /* Returns the bit number of the most significant set bit of a part.
+ If the input number has no bits set -1U is returned. */
+ static unsigned int
+ partMSB(integerPart value)
+ {
+ unsigned int n, msb;
+
+ if (value == 0)
+ return -1U;
+
+ n = integerPartWidth / 2;
+
+ msb = 0;
+ do {
+ if (value >> n) {
+ value >>= n;
+ msb += n;
+ }
+
+ n >>= 1;
+ } while (n);
+
+ return msb;
+ }
+
+ /* Returns the bit number of the least significant set bit of a
+ part. If the input number has no bits set -1U is returned. */
+ static unsigned int
+ partLSB(integerPart value)
+ {
+ unsigned int n, lsb;
+
+ if (value == 0)
+ return -1U;
+
+ lsb = integerPartWidth - 1;
+ n = integerPartWidth / 2;
+
+ do {
+ if (value << n) {
+ value <<= n;
+ lsb -= n;
+ }
+
+ n >>= 1;
+ } while (n);
+
+ return lsb;
+ }
+}
+
+/* Sets the least significant part of a bignum to the input value, and
+ zeroes out higher parts. */
+void
+APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(parts > 0);
+
+ dst[0] = part;
+ for (i = 1; i < parts; i++)
+ dst[i] = 0;
+}
+
+/* Assign one bignum to another. */
+void
+APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] = src[i];
+}
+
+/* Returns true if a bignum is zero, false otherwise. */
+bool
+APInt::tcIsZero(const integerPart *src, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ if (src[i])
+ return false;
+
+ return true;
+}
+
+/* Extract the given bit of a bignum; returns 0 or 1. */
+int
+APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
+{
+ return (parts[bit / integerPartWidth] &
+ ((integerPart) 1 << bit % integerPartWidth)) != 0;
+}
+
+/* Set the given bit of a bignum. */
+void
+APInt::tcSetBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
+}
+
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] &=
+ ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
+/* Returns the bit number of the least significant set bit of a
+ number. If the input number has no bits set -1U is returned. */
+unsigned int
+APInt::tcLSB(const integerPart *parts, unsigned int n)
+{
+ unsigned int i, lsb;
+
+ for (i = 0; i < n; i++) {
+ if (parts[i] != 0) {
+ lsb = partLSB(parts[i]);
+
+ return lsb + i * integerPartWidth;
+ }
+ }
+
+ return -1U;
+}
+
+/* Returns the bit number of the most significant set bit of a number.
+ If the input number has no bits set -1U is returned. */
+unsigned int
+APInt::tcMSB(const integerPart *parts, unsigned int n)
+{
+ unsigned int msb;
+
+ do {
+ --n;
+
+ if (parts[n] != 0) {
+ msb = partMSB(parts[n]);
+
+ return msb + n * integerPartWidth;
+ }
+ } while (n);
+
+ return -1U;
+}
+
+/* Copy the bit vector of width srcBITS from SRC, starting at bit
+ srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
+ the least significant bit of DST. All high bits above srcBITS in
+ DST are zero-filled. */
+void
+APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
+ unsigned int srcBits, unsigned int srcLSB)
+{
+ unsigned int firstSrcPart, dstParts, shift, n;
+
+ dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
+ assert(dstParts <= dstCount);
+
+ firstSrcPart = srcLSB / integerPartWidth;
+ tcAssign (dst, src + firstSrcPart, dstParts);
+
+ shift = srcLSB % integerPartWidth;
+ tcShiftRight (dst, dstParts, shift);
+
+ /* We now have (dstParts * integerPartWidth - shift) bits from SRC
+ in DST. If this is less that srcBits, append the rest, else
+ clear the high bits. */
+ n = dstParts * integerPartWidth - shift;
+ if (n < srcBits) {
+ integerPart mask = lowBitMask (srcBits - n);
+ dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
+ << n % integerPartWidth);
+ } else if (n > srcBits) {
+ if (srcBits % integerPartWidth)
+ dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
+ }
+
+ /* Clear high parts. */
+ while (dstParts < dstCount)
+ dst[dstParts++] = 0;
+}
+
+/* DST += RHS + C where C is zero or one. Returns the carry flag. */
+integerPart
+APInt::tcAdd(integerPart *dst, const integerPart *rhs,
+ integerPart c, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(c <= 1);
+
+ for (i = 0; i < parts; i++) {
+ integerPart l;
+
+ l = dst[i];
+ if (c) {
+ dst[i] += rhs[i] + 1;
+ c = (dst[i] <= l);
+ } else {
+ dst[i] += rhs[i];
+ c = (dst[i] < l);
+ }
+ }
+
+ return c;
+}
+
+/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+integerPart
+APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
+ integerPart c, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(c <= 1);
+
+ for (i = 0; i < parts; i++) {
+ integerPart l;
+
+ l = dst[i];
+ if (c) {
+ dst[i] -= rhs[i] + 1;
+ c = (dst[i] >= l);
+ } else {
+ dst[i] -= rhs[i];
+ c = (dst[i] > l);
+ }
+ }
+
+ return c;
+}
+
+/* Negate a bignum in-place. */
+void
+APInt::tcNegate(integerPart *dst, unsigned int parts)
+{
+ tcComplement(dst, parts);
+ tcIncrement(dst, parts);
+}
+
+/* DST += SRC * MULTIPLIER + CARRY if add is true
+ DST = SRC * MULTIPLIER + CARRY if add is false
+
+ Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
+ they must start at the same point, i.e. DST == SRC.
+
+ If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+ returned. Otherwise DST is filled with the least significant
+ DSTPARTS parts of the result, and if all of the omitted higher
+ parts were zero return zero, otherwise overflow occurred and
+ return one. */
+int
+APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
+ integerPart multiplier, integerPart carry,
+ unsigned int srcParts, unsigned int dstParts,
+ bool add)
+{
+ unsigned int i, n;
+
+ /* Otherwise our writes of DST kill our later reads of SRC. */
+ assert(dst <= src || dst >= src + srcParts);
+ assert(dstParts <= srcParts + 1);
+
+ /* N loops; minimum of dstParts and srcParts. */
+ n = dstParts < srcParts ? dstParts: srcParts;
+
+ for (i = 0; i < n; i++) {
+ integerPart low, mid, high, srcPart;
+
+ /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+
+ This cannot overflow, because
+
+ (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+
+ which is less than n^2. */
+
+ srcPart = src[i];
+
+ if (multiplier == 0 || srcPart == 0) {
+ low = carry;
+ high = 0;
+ } else {
+ low = lowHalf(srcPart) * lowHalf(multiplier);
+ high = highHalf(srcPart) * highHalf(multiplier);
+
+ mid = lowHalf(srcPart) * highHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= integerPartWidth / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ mid = highHalf(srcPart) * lowHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= integerPartWidth / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ /* Now add carry. */
+ if (low + carry < low)
+ high++;
+ low += carry;
+ }
+
+ if (add) {
+ /* And now DST[i], and store the new low part there. */
+ if (low + dst[i] < low)
+ high++;
+ dst[i] += low;
+ } else
+ dst[i] = low;
+
+ carry = high;
+ }
+
+ if (i < dstParts) {
+ /* Full multiplication, there is no overflow. */
+ assert(i + 1 == dstParts);
+ dst[i] = carry;
+ return 0;
+ } else {
+ /* We overflowed if there is carry. */
+ if (carry)
+ return 1;
+
+ /* We would overflow if any significant unwritten parts would be
+ non-zero. This is true if any remaining src parts are non-zero
+ and the multiplier is non-zero. */
+ if (multiplier)
+ for (; i < srcParts; i++)
+ if (src[i])
+ return 1;
+
+ /* We fitted in the narrow destination. */
+ return 0;
+ }
+}
+
+/* DST = LHS * RHS, where DST has the same width as the operands and
+ is filled with the least significant parts of the result. Returns
+ one if overflow occurred, otherwise zero. DST must be disjoint
+ from both operands. */
+int
+APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
+ const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+ int overflow;
+
+ assert(dst != lhs && dst != rhs);
+
+ overflow = 0;
+ tcSet(dst, 0, parts);
+
+ for (i = 0; i < parts; i++)
+ overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
+ parts - i, true);
+
+ return overflow;
+}
+
+/* DST = LHS * RHS, where DST has width the sum of the widths of the
+ operands. No overflow occurs. DST must be disjoint from both
+ operands. Returns the number of parts required to hold the
+ result. */
+unsigned int
+APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
+ const integerPart *rhs, unsigned int lhsParts,
+ unsigned int rhsParts)
+{
+ /* Put the narrower number on the LHS for less loops below. */
+ if (lhsParts > rhsParts) {
+ return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
+ } else {
+ unsigned int n;
+
+ assert(dst != lhs && dst != rhs);
+
+ tcSet(dst, 0, rhsParts);
+
+ for (n = 0; n < lhsParts; n++)
+ tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
+
+ n = lhsParts + rhsParts;
+
+ return n - (dst[n - 1] == 0);
+ }
+}
+
+/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
+ Otherwise set LHS to LHS / RHS with the fractional part discarded,
+ set REMAINDER to the remainder, return zero. i.e.
+
+ OLD_LHS = RHS * LHS + REMAINDER
+
+ SCRATCH is a bignum of the same size as the operands and result for
+ use by the routine; its contents need not be initialized and are
+ destroyed. LHS, REMAINDER and SCRATCH must be distinct.
+*/
+int
+APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
+ integerPart *remainder, integerPart *srhs,
+ unsigned int parts)
+{
+ unsigned int n, shiftCount;
+ integerPart mask;
+
+ assert(lhs != remainder && lhs != srhs && remainder != srhs);
+
+ shiftCount = tcMSB(rhs, parts) + 1;
+ if (shiftCount == 0)
+ return true;
+
+ shiftCount = parts * integerPartWidth - shiftCount;
+ n = shiftCount / integerPartWidth;
+ mask = (integerPart) 1 << (shiftCount % integerPartWidth);
+
+ tcAssign(srhs, rhs, parts);
+ tcShiftLeft(srhs, parts, shiftCount);
+ tcAssign(remainder, lhs, parts);
+ tcSet(lhs, 0, parts);
+
+ /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
+ the total. */
+ for (;;) {
+ int compare;
+
+ compare = tcCompare(remainder, srhs, parts);
+ if (compare >= 0) {
+ tcSubtract(remainder, srhs, 0, parts);
+ lhs[n] |= mask;
+ }
+
+ if (shiftCount == 0)
+ break;
+ shiftCount--;
+ tcShiftRight(srhs, parts, 1);
+ if ((mask >>= 1) == 0)
+ mask = (integerPart) 1 << (integerPartWidth - 1), n--;
+ }
+
+ return false;
+}
+
+/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero.
+ There are no restrictions on COUNT. */
+void
+APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
+{
+ if (count) {
+ unsigned int jump, shift;
+
+ /* Jump is the inter-part jump; shift is is intra-part shift. */
+ jump = count / integerPartWidth;
+ shift = count % integerPartWidth;
+
+ while (parts > jump) {
+ integerPart part;
+
+ parts--;
+
+ /* dst[i] comes from the two parts src[i - jump] and, if we have
+ an intra-part shift, src[i - jump - 1]. */
+ part = dst[parts - jump];
+ if (shift) {
+ part <<= shift;
+ if (parts >= jump + 1)
+ part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
+ }
+
+ dst[parts] = part;
+ }
+
+ while (parts > 0)
+ dst[--parts] = 0;
+ }
+}
+
+/* Shift a bignum right COUNT bits in-place. Shifted in bits are
+ zero. There are no restrictions on COUNT. */
+void
+APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
+{
+ if (count) {
+ unsigned int i, jump, shift;
+
+ /* Jump is the inter-part jump; shift is is intra-part shift. */
+ jump = count / integerPartWidth;
+ shift = count % integerPartWidth;
+
+ /* Perform the shift. This leaves the most significant COUNT bits
+ of the result at zero. */
+ for (i = 0; i < parts; i++) {
+ integerPart part;
+
+ if (i + jump >= parts) {
+ part = 0;
+ } else {
+ part = dst[i + jump];
+ if (shift) {
+ part >>= shift;
+ if (i + jump + 1 < parts)
+ part |= dst[i + jump + 1] << (integerPartWidth - shift);
+ }
+ }
+
+ dst[i] = part;
+ }
+ }
+}
+
+/* Bitwise and of two bignums. */
+void
+APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] &= rhs[i];
+}
+
+/* Bitwise inclusive or of two bignums. */
+void
+APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] |= rhs[i];
+}
+
+/* Bitwise exclusive or of two bignums. */
+void
+APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] ^= rhs[i];
+}
+
+/* Complement a bignum in-place. */
+void
+APInt::tcComplement(integerPart *dst, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ dst[i] = ~dst[i];
+}
+
+/* Comparison (unsigned) of two bignums. */
+int
+APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
+ unsigned int parts)
+{
+ while (parts) {
+ parts--;
+ if (lhs[parts] == rhs[parts])
+ continue;
+
+ if (lhs[parts] > rhs[parts])
+ return 1;
+ else
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Increment a bignum in-place, return the carry flag. */
+integerPart
+APInt::tcIncrement(integerPart *dst, unsigned int parts)
+{
+ unsigned int i;
+
+ for (i = 0; i < parts; i++)
+ if (++dst[i] != 0)
+ break;
+
+ return i == parts;
+}
+
+/* Set the least significant BITS bits of a bignum, clear the
+ rest. */
+void
+APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
+ unsigned int bits)
+{
+ unsigned int i;
+
+ i = 0;
+ while (bits > integerPartWidth) {
+ dst[i++] = ~(integerPart) 0;
+ bits -= integerPartWidth;
+ }
+
+ if (bits)
+ dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
+
+ while (i < parts)
+ dst[i++] = 0;
+}
diff --git a/contrib/llvm/lib/Support/APSInt.cpp b/contrib/llvm/lib/Support/APSInt.cpp
new file mode 100644
index 000000000000..73acafa690c7
--- /dev/null
+++ b/contrib/llvm/lib/Support/APSInt.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the APSInt class, which is a simple class that
+// represents an arbitrary sized integer that knows its signedness.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+
+using namespace llvm;
+
+void APSInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0));
+ APInt::Profile(ID);
+}
diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp
new file mode 100644
index 000000000000..215b0f249d96
--- /dev/null
+++ b/contrib/llvm/lib/Support/Allocator.cpp
@@ -0,0 +1,188 @@
+//===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BumpPtrAllocator interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Recycler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstring>
+
+namespace llvm {
+
+BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
+ SlabAllocator &allocator)
+ : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
+ CurSlab(0), BytesAllocated(0) { }
+
+BumpPtrAllocator::~BumpPtrAllocator() {
+ DeallocateSlabs(CurSlab);
+}
+
+/// AlignPtr - Align Ptr to Alignment bytes, rounding up. Alignment should
+/// be a power of two. This method rounds up, so AlignPtr(7, 4) == 8 and
+/// AlignPtr(8, 4) == 8.
+char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
+ assert(Alignment && (Alignment & (Alignment - 1)) == 0 &&
+ "Alignment is not a power of two!");
+
+ // Do the alignment.
+ return (char*)(((uintptr_t)Ptr + Alignment - 1) &
+ ~(uintptr_t)(Alignment - 1));
+}
+
+/// StartNewSlab - Allocate a new slab and move the bump pointers over into
+/// the new slab. Modifies CurPtr and End.
+void BumpPtrAllocator::StartNewSlab() {
+ // If we allocated a big number of slabs already it's likely that we're going
+ // to allocate more. Increase slab size to reduce mallocs and possibly memory
+ // overhead. The factors are chosen conservatively to avoid overallocation.
+ if (BytesAllocated >= SlabSize * 128)
+ SlabSize *= 2;
+
+ MemSlab *NewSlab = Allocator.Allocate(SlabSize);
+ NewSlab->NextPtr = CurSlab;
+ CurSlab = NewSlab;
+ CurPtr = (char*)(CurSlab + 1);
+ End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// DeallocateSlabs - Deallocate all memory slabs after and including this
+/// one.
+void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
+ while (Slab) {
+ MemSlab *NextSlab = Slab->NextPtr;
+#ifndef NDEBUG
+ // Poison the memory so stale pointers crash sooner. Note we must
+ // preserve the Size and NextPtr fields at the beginning.
+ sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab));
+ memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab));
+#endif
+ Allocator.Deallocate(Slab);
+ Slab = NextSlab;
+ }
+}
+
+/// Reset - Deallocate all but the current slab and reset the current pointer
+/// to the beginning of it, freeing all memory allocated so far.
+void BumpPtrAllocator::Reset() {
+ if (!CurSlab)
+ return;
+ DeallocateSlabs(CurSlab->NextPtr);
+ CurSlab->NextPtr = 0;
+ CurPtr = (char*)(CurSlab + 1);
+ End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// Allocate - Allocate space at the specified alignment.
+///
+void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+ if (!CurSlab) // Start a new slab if we haven't allocated one already.
+ StartNewSlab();
+
+ // Keep track of how many bytes we've allocated.
+ BytesAllocated += Size;
+
+ // 0-byte alignment means 1-byte alignment.
+ if (Alignment == 0) Alignment = 1;
+
+ // Allocate the aligned space, going forwards from CurPtr.
+ char *Ptr = AlignPtr(CurPtr, Alignment);
+
+ // Check if we can hold it.
+ if (Ptr + Size <= End) {
+ CurPtr = Ptr + Size;
+ return Ptr;
+ }
+
+ // If Size is really big, allocate a separate slab for it.
+ size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1;
+ if (PaddedSize > SizeThreshold) {
+ MemSlab *NewSlab = Allocator.Allocate(PaddedSize);
+
+ // Put the new slab after the current slab, since we are not allocating
+ // into it.
+ NewSlab->NextPtr = CurSlab->NextPtr;
+ CurSlab->NextPtr = NewSlab;
+
+ Ptr = AlignPtr((char*)(NewSlab + 1), Alignment);
+ assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size);
+ return Ptr;
+ }
+
+ // Otherwise, start a new slab and try again.
+ StartNewSlab();
+ Ptr = AlignPtr(CurPtr, Alignment);
+ CurPtr = Ptr + Size;
+ assert(CurPtr <= End && "Unable to allocate memory!");
+ return Ptr;
+}
+
+unsigned BumpPtrAllocator::GetNumSlabs() const {
+ unsigned NumSlabs = 0;
+ for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+ ++NumSlabs;
+ }
+ return NumSlabs;
+}
+
+size_t BumpPtrAllocator::getTotalMemory() const {
+ size_t TotalMemory = 0;
+ for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+ TotalMemory += Slab->Size;
+ }
+ return TotalMemory;
+}
+
+void BumpPtrAllocator::PrintStats() const {
+ unsigned NumSlabs = 0;
+ size_t TotalMemory = 0;
+ for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+ TotalMemory += Slab->Size;
+ ++NumSlabs;
+ }
+
+ errs() << "\nNumber of memory regions: " << NumSlabs << '\n'
+ << "Bytes used: " << BytesAllocated << '\n'
+ << "Bytes allocated: " << TotalMemory << '\n'
+ << "Bytes wasted: " << (TotalMemory - BytesAllocated)
+ << " (includes alignment, etc)\n";
+}
+
+MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator =
+ MallocSlabAllocator();
+
+SlabAllocator::~SlabAllocator() { }
+
+MallocSlabAllocator::~MallocSlabAllocator() { }
+
+MemSlab *MallocSlabAllocator::Allocate(size_t Size) {
+ MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0);
+ Slab->Size = Size;
+ Slab->NextPtr = 0;
+ return Slab;
+}
+
+void MallocSlabAllocator::Deallocate(MemSlab *Slab) {
+ Allocator.Deallocate(Slab);
+}
+
+void PrintRecyclerStats(size_t Size,
+ size_t Align,
+ size_t FreeListSize) {
+ errs() << "Recycler element size: " << Size << '\n'
+ << "Recycler element alignment: " << Align << '\n'
+ << "Number of elements free for recycling: " << FreeListSize << '\n';
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Atomic.cpp b/contrib/llvm/lib/Support/Atomic.cpp
new file mode 100644
index 000000000000..8214521f9827
--- /dev/null
+++ b/contrib/llvm/lib/Support/Atomic.cpp
@@ -0,0 +1,112 @@
+//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Atomic.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#undef MemoryFence
+#endif
+
+void sys::MemoryFence() {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+ return;
+#else
+# if defined(__GNUC__)
+ __sync_synchronize();
+# elif defined(_MSC_VER)
+ MemoryBarrier();
+# else
+# error No memory fence implementation for your platform!
+# endif
+#endif
+}
+
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+ sys::cas_flag new_value,
+ sys::cas_flag old_value) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+ sys::cas_flag result = *ptr;
+ if (result == old_value)
+ *ptr = new_value;
+ return result;
+#elif defined(__GNUC__)
+ return __sync_val_compare_and_swap(ptr, old_value, new_value);
+#elif defined(_MSC_VER)
+ return InterlockedCompareExchange(ptr, new_value, old_value);
+#else
+# error No compare-and-swap implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+ ++(*ptr);
+ return *ptr;
+#elif defined(__GNUC__)
+ return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+ return InterlockedIncrement(ptr);
+#else
+# error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+ --(*ptr);
+ return *ptr;
+#elif defined(__GNUC__)
+ return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+ return InterlockedDecrement(ptr);
+#else
+# error No atomic decrement implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+ *ptr += val;
+ return *ptr;
+#elif defined(__GNUC__)
+ return __sync_add_and_fetch(ptr, val);
+#elif defined(_MSC_VER)
+ return InterlockedExchangeAdd(ptr, val) + val;
+#else
+# error No atomic add implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+ sys::cas_flag original, result;
+ do {
+ original = *ptr;
+ result = original * val;
+ } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+ return result;
+}
+
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+ sys::cas_flag original, result;
+ do {
+ original = *ptr;
+ result = original / val;
+ } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+ return result;
+}
diff --git a/contrib/llvm/lib/Support/BranchProbability.cpp b/contrib/llvm/lib/Support/BranchProbability.cpp
new file mode 100644
index 000000000000..97342da3f078
--- /dev/null
+++ b/contrib/llvm/lib/Support/BranchProbability.cpp
@@ -0,0 +1,44 @@
+//===-------------- lib/Support/BranchProbability.cpp -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Branch Probability class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+BranchProbability::BranchProbability(uint32_t n, uint32_t d) {
+ assert(d > 0 && "Denomiator cannot be 0!");
+ assert(n <= d && "Probability cannot be bigger than 1!");
+ N = n;
+ D = d;
+}
+
+raw_ostream &BranchProbability::print(raw_ostream &OS) const {
+ OS << N << " / " << D << " = " << ((double)N / D);
+ return OS;
+}
+
+void BranchProbability::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
+ Prob.print(OS);
+ return OS;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
new file mode 100644
index 000000000000..92c60a9631f7
--- /dev/null
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -0,0 +1,1426 @@
+//===-- CommandLine.cpp - Command line parser implementation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a command line argument processor that is useful when
+// creating a tool. It provides a simple, minimalistic interface that is easily
+// extensible and supports nonlocal (library) command line options.
+//
+// Note that rather than trying to figure out what this code does, you could try
+// reading the library documentation located in docs/CommandLine.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include <cerrno>
+#include <cstdlib>
+using namespace llvm;
+using namespace cl;
+
+//===----------------------------------------------------------------------===//
+// Template instantiations and anchors.
+//
+namespace llvm { namespace cl {
+TEMPLATE_INSTANTIATION(class basic_parser<bool>);
+TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
+TEMPLATE_INSTANTIATION(class basic_parser<int>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+TEMPLATE_INSTANTIATION(class basic_parser<double>);
+TEMPLATE_INSTANTIATION(class basic_parser<float>);
+TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
+TEMPLATE_INSTANTIATION(class basic_parser<char>);
+
+TEMPLATE_INSTANTIATION(class opt<unsigned>);
+TEMPLATE_INSTANTIATION(class opt<int>);
+TEMPLATE_INSTANTIATION(class opt<std::string>);
+TEMPLATE_INSTANTIATION(class opt<char>);
+TEMPLATE_INSTANTIATION(class opt<bool>);
+} } // end namespace llvm::cl
+
+void Option::anchor() {}
+void basic_parser_impl::anchor() {}
+void parser<bool>::anchor() {}
+void parser<boolOrDefault>::anchor() {}
+void parser<int>::anchor() {}
+void parser<unsigned>::anchor() {}
+void parser<double>::anchor() {}
+void parser<float>::anchor() {}
+void parser<std::string>::anchor() {}
+void parser<char>::anchor() {}
+
+//===----------------------------------------------------------------------===//
+
+// Globals for name and overview of program. Program name is not a string to
+// avoid static ctor/dtor issues.
+static char ProgramName[80] = "<premain>";
+static const char *ProgramOverview = 0;
+
+// This collects additional help to be printed.
+static ManagedStatic<std::vector<const char*> > MoreHelp;
+
+extrahelp::extrahelp(const char *Help)
+ : morehelp(Help) {
+ MoreHelp->push_back(Help);
+}
+
+static bool OptionListChanged = false;
+
+// MarkOptionsChanged - Internal helper function.
+void cl::MarkOptionsChanged() {
+ OptionListChanged = true;
+}
+
+/// RegisteredOptionList - This is the list of the command line options that
+/// have statically constructed themselves.
+static Option *RegisteredOptionList = 0;
+
+void Option::addArgument() {
+ assert(NextRegistered == 0 && "argument multiply registered!");
+
+ NextRegistered = RegisteredOptionList;
+ RegisteredOptionList = this;
+ MarkOptionsChanged();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Basic, shared command line option processing machinery.
+//
+
+/// GetOptionInfo - Scan the list of registered options, turning them into data
+/// structures that are easier to handle.
+static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
+ SmallVectorImpl<Option*> &SinkOpts,
+ StringMap<Option*> &OptionsMap) {
+ SmallVector<const char*, 16> OptionNames;
+ Option *CAOpt = 0; // The ConsumeAfter option if it exists.
+ for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
+ // If this option wants to handle multiple option names, get the full set.
+ // This handles enum options like "-O1 -O2" etc.
+ O->getExtraOptionNames(OptionNames);
+ if (O->ArgStr[0])
+ OptionNames.push_back(O->ArgStr);
+
+ // Handle named options.
+ for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+ // Add argument to the argument map!
+ if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
+ errs() << ProgramName << ": CommandLine Error: Argument '"
+ << OptionNames[i] << "' defined more than once!\n";
+ }
+ }
+
+ OptionNames.clear();
+
+ // Remember information about positional options.
+ if (O->getFormattingFlag() == cl::Positional)
+ PositionalOpts.push_back(O);
+ else if (O->getMiscFlags() & cl::Sink) // Remember sink options
+ SinkOpts.push_back(O);
+ else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+ if (CAOpt)
+ O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+ CAOpt = O;
+ }
+ }
+
+ if (CAOpt)
+ PositionalOpts.push_back(CAOpt);
+
+ // Make sure that they are in order of registration not backwards.
+ std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+}
+
+
+/// LookupOption - Lookup the option specified by the specified option on the
+/// command line. If there is a value specified (after an equal sign) return
+/// that as well. This assumes that leading dashes have already been stripped.
+static Option *LookupOption(StringRef &Arg, StringRef &Value,
+ const StringMap<Option*> &OptionsMap) {
+ // Reject all dashes.
+ if (Arg.empty()) return 0;
+
+ size_t EqualPos = Arg.find('=');
+
+ // If we have an equals sign, remember the value.
+ if (EqualPos == StringRef::npos) {
+ // Look up the option.
+ StringMap<Option*>::const_iterator I = OptionsMap.find(Arg);
+ return I != OptionsMap.end() ? I->second : 0;
+ }
+
+ // If the argument before the = is a valid option name, we match. If not,
+ // return Arg unmolested.
+ StringMap<Option*>::const_iterator I =
+ OptionsMap.find(Arg.substr(0, EqualPos));
+ if (I == OptionsMap.end()) return 0;
+
+ Value = Arg.substr(EqualPos+1);
+ Arg = Arg.substr(0, EqualPos);
+ return I->second;
+}
+
+/// LookupNearestOption - Lookup the closest match to the option specified by
+/// the specified option on the command line. If there is a value specified
+/// (after an equal sign) return that as well. This assumes that leading dashes
+/// have already been stripped.
+static Option *LookupNearestOption(StringRef Arg,
+ const StringMap<Option*> &OptionsMap,
+ std::string &NearestString) {
+ // Reject all dashes.
+ if (Arg.empty()) return 0;
+
+ // Split on any equal sign.
+ std::pair<StringRef, StringRef> SplitArg = Arg.split('=');
+ StringRef &LHS = SplitArg.first; // LHS == Arg when no '=' is present.
+ StringRef &RHS = SplitArg.second;
+
+ // Find the closest match.
+ Option *Best = 0;
+ unsigned BestDistance = 0;
+ for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
+ ie = OptionsMap.end(); it != ie; ++it) {
+ Option *O = it->second;
+ SmallVector<const char*, 16> OptionNames;
+ O->getExtraOptionNames(OptionNames);
+ if (O->ArgStr[0])
+ OptionNames.push_back(O->ArgStr);
+
+ bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed;
+ StringRef Flag = PermitValue ? LHS : Arg;
+ for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+ StringRef Name = OptionNames[i];
+ unsigned Distance = StringRef(Name).edit_distance(
+ Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+ if (!Best || Distance < BestDistance) {
+ Best = O;
+ BestDistance = Distance;
+ if (RHS.empty() || !PermitValue)
+ NearestString = OptionNames[i];
+ else
+ NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
+ }
+ }
+ }
+
+ return Best;
+}
+
+/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
+/// does special handling of cl::CommaSeparated options.
+static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
+ StringRef ArgName,
+ StringRef Value, bool MultiArg = false)
+{
+ // Check to see if this option accepts a comma separated list of values. If
+ // it does, we have to split up the value into multiple values.
+ if (Handler->getMiscFlags() & CommaSeparated) {
+ StringRef Val(Value);
+ StringRef::size_type Pos = Val.find(',');
+
+ while (Pos != StringRef::npos) {
+ // Process the portion before the comma.
+ if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg))
+ return true;
+ // Erase the portion before the comma, AND the comma.
+ Val = Val.substr(Pos+1);
+ Value.substr(Pos+1); // Increment the original value pointer as well.
+ // Check for another comma.
+ Pos = Val.find(',');
+ }
+
+ Value = Val;
+ }
+
+ if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
+ return true;
+
+ return false;
+}
+
+/// ProvideOption - For Value, this differentiates between an empty value ("")
+/// and a null value (StringRef()). The later is accepted for arguments that
+/// don't allow a value (-foo) the former is rejected (-foo=).
+static inline bool ProvideOption(Option *Handler, StringRef ArgName,
+ StringRef Value, int argc, char **argv,
+ int &i) {
+ // Is this a multi-argument option?
+ unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
+
+ // Enforce value requirements
+ switch (Handler->getValueExpectedFlag()) {
+ case ValueRequired:
+ if (Value.data() == 0) { // No value specified?
+ if (i+1 >= argc)
+ return Handler->error("requires a value!");
+ // Steal the next argument, like for '-o filename'
+ Value = argv[++i];
+ }
+ break;
+ case ValueDisallowed:
+ if (NumAdditionalVals > 0)
+ return Handler->error("multi-valued option specified"
+ " with ValueDisallowed modifier!");
+
+ if (Value.data())
+ return Handler->error("does not allow a value! '" +
+ Twine(Value) + "' specified.");
+ break;
+ case ValueOptional:
+ break;
+
+ default:
+ errs() << ProgramName
+ << ": Bad ValueMask flag! CommandLine usage error:"
+ << Handler->getValueExpectedFlag() << "\n";
+ llvm_unreachable(0);
+ }
+
+ // If this isn't a multi-arg option, just run the handler.
+ if (NumAdditionalVals == 0)
+ return CommaSeparateAndAddOccurence(Handler, i, ArgName, Value);
+
+ // If it is, run the handle several times.
+ bool MultiArg = false;
+
+ if (Value.data()) {
+ if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+ return true;
+ --NumAdditionalVals;
+ MultiArg = true;
+ }
+
+ while (NumAdditionalVals > 0) {
+ if (i+1 >= argc)
+ return Handler->error("not enough values!");
+ Value = argv[++i];
+
+ if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+ return true;
+ MultiArg = true;
+ --NumAdditionalVals;
+ }
+ return false;
+}
+
+static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
+ int Dummy = i;
+ return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy);
+}
+
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+ return O->getFormattingFlag() == cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+ return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+}
+
+// getOptionPred - Check to see if there are any options that satisfy the
+// specified predicate with names that are the prefixes in Name. This is
+// checked by progressively stripping characters off of the name, checking to
+// see if there options that satisfy the predicate. If we find one, return it,
+// otherwise return null.
+//
+static Option *getOptionPred(StringRef Name, size_t &Length,
+ bool (*Pred)(const Option*),
+ const StringMap<Option*> &OptionsMap) {
+
+ StringMap<Option*>::const_iterator OMI = OptionsMap.find(Name);
+
+ // Loop while we haven't found an option and Name still has at least two
+ // characters in it (so that the next iteration will not be the empty
+ // string.
+ while (OMI == OptionsMap.end() && Name.size() > 1) {
+ Name = Name.substr(0, Name.size()-1); // Chop off the last character.
+ OMI = OptionsMap.find(Name);
+ }
+
+ if (OMI != OptionsMap.end() && Pred(OMI->second)) {
+ Length = Name.size();
+ return OMI->second; // Found one!
+ }
+ return 0; // No option found!
+}
+
+/// HandlePrefixedOrGroupedOption - The specified argument string (which started
+/// with at least one '-') does not fully match an available option. Check to
+/// see if this is a prefix or grouped option. If so, split arg into output an
+/// Arg/Value pair and return the Option to parse it with.
+static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
+ bool &ErrorParsing,
+ const StringMap<Option*> &OptionsMap) {
+ if (Arg.size() == 1) return 0;
+
+ // Do the lookup!
+ size_t Length = 0;
+ Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
+ if (PGOpt == 0) return 0;
+
+ // If the option is a prefixed option, then the value is simply the
+ // rest of the name... so fall through to later processing, by
+ // setting up the argument name flags and value fields.
+ if (PGOpt->getFormattingFlag() == cl::Prefix) {
+ Value = Arg.substr(Length);
+ Arg = Arg.substr(0, Length);
+ assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
+ return PGOpt;
+ }
+
+ // This must be a grouped option... handle them now. Grouping options can't
+ // have values.
+ assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+ do {
+ // Move current arg name out of Arg into OneArgName.
+ StringRef OneArgName = Arg.substr(0, Length);
+ Arg = Arg.substr(Length);
+
+ // Because ValueRequired is an invalid flag for grouped arguments,
+ // we don't need to pass argc/argv in.
+ assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
+ "Option can not be cl::Grouping AND cl::ValueRequired!");
+ int Dummy = 0;
+ ErrorParsing |= ProvideOption(PGOpt, OneArgName,
+ StringRef(), 0, 0, Dummy);
+
+ // Get the next grouping option.
+ PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
+ } while (PGOpt && Length != Arg.size());
+
+ // Return the last option with Arg cut down to just the last one.
+ return PGOpt;
+}
+
+
+
+static bool RequiresValue(const Option *O) {
+ return O->getNumOccurrencesFlag() == cl::Required ||
+ O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+static bool EatsUnboundedNumberOfValues(const Option *O) {
+ return O->getNumOccurrencesFlag() == cl::ZeroOrMore ||
+ O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+/// ParseCStringVector - Break INPUT up wherever one or more
+/// whitespace characters are found, and store the resulting tokens in
+/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
+/// using strdup(), so it is the caller's responsibility to free()
+/// them later.
+///
+static void ParseCStringVector(std::vector<char *> &OutputVector,
+ const char *Input) {
+ // Characters which will be treated as token separators:
+ StringRef Delims = " \v\f\t\r\n";
+
+ StringRef WorkStr(Input);
+ while (!WorkStr.empty()) {
+ // If the first character is a delimiter, strip them off.
+ if (Delims.find(WorkStr[0]) != StringRef::npos) {
+ size_t Pos = WorkStr.find_first_not_of(Delims);
+ if (Pos == StringRef::npos) Pos = WorkStr.size();
+ WorkStr = WorkStr.substr(Pos);
+ continue;
+ }
+
+ // Find position of first delimiter.
+ size_t Pos = WorkStr.find_first_of(Delims);
+ if (Pos == StringRef::npos) Pos = WorkStr.size();
+
+ // Everything from 0 to Pos is the next word to copy.
+ char *NewStr = (char*)malloc(Pos+1);
+ memcpy(NewStr, WorkStr.data(), Pos);
+ NewStr[Pos] = 0;
+ OutputVector.push_back(NewStr);
+
+ WorkStr = WorkStr.substr(Pos);
+ }
+}
+
+/// ParseEnvironmentOptions - An alternative entry point to the
+/// CommandLine library, which allows you to read the program's name
+/// from the caller (as PROGNAME) and its command-line arguments from
+/// an environment variable (whose name is given in ENVVAR).
+///
+void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
+ const char *Overview, bool ReadResponseFiles) {
+ // Check args.
+ assert(progName && "Program name not specified");
+ assert(envVar && "Environment variable name missing");
+
+ // Get the environment variable they want us to parse options out of.
+ const char *envValue = getenv(envVar);
+ if (!envValue)
+ return;
+
+ // Get program's "name", which we wouldn't know without the caller
+ // telling us.
+ std::vector<char*> newArgv;
+ newArgv.push_back(strdup(progName));
+
+ // Parse the value of the environment variable into a "command line"
+ // and hand it off to ParseCommandLineOptions().
+ ParseCStringVector(newArgv, envValue);
+ int newArgc = static_cast<int>(newArgv.size());
+ ParseCommandLineOptions(newArgc, &newArgv[0], Overview, ReadResponseFiles);
+
+ // Free all the strdup()ed strings.
+ for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+ i != e; ++i)
+ free(*i);
+}
+
+
+/// ExpandResponseFiles - Copy the contents of argv into newArgv,
+/// substituting the contents of the response files for the arguments
+/// of type @file.
+static void ExpandResponseFiles(unsigned argc, char** argv,
+ std::vector<char*>& newArgv) {
+ for (unsigned i = 1; i != argc; ++i) {
+ char *arg = argv[i];
+
+ if (arg[0] == '@') {
+ sys::PathWithStatus respFile(++arg);
+
+ // Check that the response file is not empty (mmap'ing empty
+ // files can be problematic).
+ const sys::FileStatus *FileStat = respFile.getFileStatus();
+ if (FileStat && FileStat->getSize() != 0) {
+
+ // If we could open the file, parse its contents, otherwise
+ // pass the @file option verbatim.
+
+ // TODO: we should also support recursive loading of response files,
+ // since this is how gcc behaves. (From their man page: "The file may
+ // itself contain additional @file options; any such options will be
+ // processed recursively.")
+
+ // Mmap the response file into memory.
+ OwningPtr<MemoryBuffer> respFilePtr;
+ if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
+ ParseCStringVector(newArgv, respFilePtr->getBufferStart());
+ continue;
+ }
+ }
+ }
+ newArgv.push_back(strdup(arg));
+ }
+}
+
+void cl::ParseCommandLineOptions(int argc, char **argv,
+ const char *Overview, bool ReadResponseFiles) {
+ // Process all registered options.
+ SmallVector<Option*, 4> PositionalOpts;
+ SmallVector<Option*, 4> SinkOpts;
+ StringMap<Option*> Opts;
+ GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+
+ assert((!Opts.empty() || !PositionalOpts.empty()) &&
+ "No options specified!");
+
+ // Expand response files.
+ std::vector<char*> newArgv;
+ if (ReadResponseFiles) {
+ newArgv.push_back(strdup(argv[0]));
+ ExpandResponseFiles(argc, argv, newArgv);
+ argv = &newArgv[0];
+ argc = static_cast<int>(newArgv.size());
+ }
+
+ // Copy the program name into ProgName, making sure not to overflow it.
+ std::string ProgName = sys::path::filename(argv[0]);
+ size_t Len = std::min(ProgName.size(), size_t(79));
+ memcpy(ProgramName, ProgName.data(), Len);
+ ProgramName[Len] = '\0';
+
+ ProgramOverview = Overview;
+ bool ErrorParsing = false;
+
+ // Check out the positional arguments to collect information about them.
+ unsigned NumPositionalRequired = 0;
+
+ // Determine whether or not there are an unlimited number of positionals
+ bool HasUnlimitedPositionals = false;
+
+ Option *ConsumeAfterOpt = 0;
+ if (!PositionalOpts.empty()) {
+ if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+ assert(PositionalOpts.size() > 1 &&
+ "Cannot specify cl::ConsumeAfter without a positional argument!");
+ ConsumeAfterOpt = PositionalOpts[0];
+ }
+
+ // Calculate how many positional values are _required_.
+ bool UnboundedFound = false;
+ for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
+ i != e; ++i) {
+ Option *Opt = PositionalOpts[i];
+ if (RequiresValue(Opt))
+ ++NumPositionalRequired;
+ else if (ConsumeAfterOpt) {
+ // ConsumeAfter cannot be combined with "optional" positional options
+ // unless there is only one positional argument...
+ if (PositionalOpts.size() > 2)
+ ErrorParsing |=
+ Opt->error("error - this positional option will never be matched, "
+ "because it does not Require a value, and a "
+ "cl::ConsumeAfter option is active!");
+ } else if (UnboundedFound && !Opt->ArgStr[0]) {
+ // This option does not "require" a value... Make sure this option is
+ // not specified after an option that eats all extra arguments, or this
+ // one will never get any!
+ //
+ ErrorParsing |= Opt->error("error - option can never match, because "
+ "another positional argument will match an "
+ "unbounded number of values, and this option"
+ " does not require a value!");
+ }
+ UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
+ }
+ HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt;
+ }
+
+ // PositionalVals - A vector of "positional" arguments we accumulate into
+ // the process at the end.
+ //
+ SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals;
+
+ // If the program has named positional arguments, and the name has been run
+ // across, keep track of which positional argument was named. Otherwise put
+ // the positional args into the PositionalVals list...
+ Option *ActivePositionalArg = 0;
+
+ // Loop over all of the arguments... processing them.
+ bool DashDashFound = false; // Have we read '--'?
+ for (int i = 1; i < argc; ++i) {
+ Option *Handler = 0;
+ Option *NearestHandler = 0;
+ std::string NearestHandlerString;
+ StringRef Value;
+ StringRef ArgName = "";
+
+ // If the option list changed, this means that some command line
+ // option has just been registered or deregistered. This can occur in
+ // response to things like -load, etc. If this happens, rescan the options.
+ if (OptionListChanged) {
+ PositionalOpts.clear();
+ SinkOpts.clear();
+ Opts.clear();
+ GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+ OptionListChanged = false;
+ }
+
+ // Check to see if this is a positional argument. This argument is
+ // considered to be positional if it doesn't start with '-', if it is "-"
+ // itself, or if we have seen "--" already.
+ //
+ if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
+ // Positional argument!
+ if (ActivePositionalArg) {
+ ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+ continue; // We are done!
+ }
+
+ if (!PositionalOpts.empty()) {
+ PositionalVals.push_back(std::make_pair(argv[i],i));
+
+ // All of the positional arguments have been fulfulled, give the rest to
+ // the consume after option... if it's specified...
+ //
+ if (PositionalVals.size() >= NumPositionalRequired &&
+ ConsumeAfterOpt != 0) {
+ for (++i; i < argc; ++i)
+ PositionalVals.push_back(std::make_pair(argv[i],i));
+ break; // Handle outside of the argument processing loop...
+ }
+
+ // Delay processing positional arguments until the end...
+ continue;
+ }
+ } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 &&
+ !DashDashFound) {
+ DashDashFound = true; // This is the mythical "--"?
+ continue; // Don't try to process it as an argument itself.
+ } else if (ActivePositionalArg &&
+ (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) {
+ // If there is a positional argument eating options, check to see if this
+ // option is another positional argument. If so, treat it as an argument,
+ // otherwise feed it to the eating positional.
+ ArgName = argv[i]+1;
+ // Eat leading dashes.
+ while (!ArgName.empty() && ArgName[0] == '-')
+ ArgName = ArgName.substr(1);
+
+ Handler = LookupOption(ArgName, Value, Opts);
+ if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
+ ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+ continue; // We are done!
+ }
+
+ } else { // We start with a '-', must be an argument.
+ ArgName = argv[i]+1;
+ // Eat leading dashes.
+ while (!ArgName.empty() && ArgName[0] == '-')
+ ArgName = ArgName.substr(1);
+
+ Handler = LookupOption(ArgName, Value, Opts);
+
+ // Check to see if this "option" is really a prefixed or grouped argument.
+ if (Handler == 0)
+ Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
+ ErrorParsing, Opts);
+
+ // Otherwise, look for the closest available option to report to the user
+ // in the upcoming error.
+ if (Handler == 0 && SinkOpts.empty())
+ NearestHandler = LookupNearestOption(ArgName, Opts,
+ NearestHandlerString);
+ }
+
+ if (Handler == 0) {
+ if (SinkOpts.empty()) {
+ errs() << ProgramName << ": Unknown command line argument '"
+ << argv[i] << "'. Try: '" << argv[0] << " -help'\n";
+
+ if (NearestHandler) {
+ // If we know a near match, report it as well.
+ errs() << ProgramName << ": Did you mean '-"
+ << NearestHandlerString << "'?\n";
+ }
+
+ ErrorParsing = true;
+ } else {
+ for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
+ E = SinkOpts.end(); I != E ; ++I)
+ (*I)->addOccurrence(i, "", argv[i]);
+ }
+ continue;
+ }
+
+ // If this is a named positional argument, just remember that it is the
+ // active one...
+ if (Handler->getFormattingFlag() == cl::Positional)
+ ActivePositionalArg = Handler;
+ else
+ ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i);
+ }
+
+ // Check and handle positional arguments now...
+ if (NumPositionalRequired > PositionalVals.size()) {
+ errs() << ProgramName
+ << ": Not enough positional command line arguments specified!\n"
+ << "Must specify at least " << NumPositionalRequired
+ << " positional arguments: See: " << argv[0] << " -help\n";
+
+ ErrorParsing = true;
+ } else if (!HasUnlimitedPositionals &&
+ PositionalVals.size() > PositionalOpts.size()) {
+ errs() << ProgramName
+ << ": Too many positional arguments specified!\n"
+ << "Can specify at most " << PositionalOpts.size()
+ << " positional arguments: See: " << argv[0] << " -help\n";
+ ErrorParsing = true;
+
+ } else if (ConsumeAfterOpt == 0) {
+ // Positional args have already been handled if ConsumeAfter is specified.
+ unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
+ for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
+ if (RequiresValue(PositionalOpts[i])) {
+ ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ --NumPositionalRequired; // We fulfilled our duty...
+ }
+
+ // If we _can_ give this option more arguments, do so now, as long as we
+ // do not give it values that others need. 'Done' controls whether the
+ // option even _WANTS_ any more.
+ //
+ bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required;
+ while (NumVals-ValNo > NumPositionalRequired && !Done) {
+ switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
+ case cl::Optional:
+ Done = true; // Optional arguments want _at most_ one value
+ // FALL THROUGH
+ case cl::ZeroOrMore: // Zero or more will take all they can get...
+ case cl::OneOrMore: // One or more will take all they can get...
+ ProvidePositionalOption(PositionalOpts[i],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ break;
+ default:
+ llvm_unreachable("Internal error, unexpected NumOccurrences flag in "
+ "positional argument processing!");
+ }
+ }
+ }
+ } else {
+ assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
+ unsigned ValNo = 0;
+ for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
+ if (RequiresValue(PositionalOpts[j])) {
+ ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ }
+
+ // Handle the case where there is just one positional option, and it's
+ // optional. In this case, we want to give JUST THE FIRST option to the
+ // positional option and keep the rest for the consume after. The above
+ // loop would have assigned no values to positional options in this case.
+ //
+ if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
+ ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ }
+
+ // Handle over all of the rest of the arguments to the
+ // cl::ConsumeAfter command line option...
+ for (; ValNo != PositionalVals.size(); ++ValNo)
+ ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt,
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ }
+
+ // Loop over args and make sure all required args are specified!
+ for (StringMap<Option*>::iterator I = Opts.begin(),
+ E = Opts.end(); I != E; ++I) {
+ switch (I->second->getNumOccurrencesFlag()) {
+ case Required:
+ case OneOrMore:
+ if (I->second->getNumOccurrences() == 0) {
+ I->second->error("must be specified at least once!");
+ ErrorParsing = true;
+ }
+ // Fall through
+ default:
+ break;
+ }
+ }
+
+ // Now that we know if -debug is specified, we can use it.
+ // Note that if ReadResponseFiles == true, this must be done before the
+ // memory allocated for the expanded command line is free()d below.
+ DEBUG(dbgs() << "Args: ";
+ for (int i = 0; i < argc; ++i)
+ dbgs() << argv[i] << ' ';
+ dbgs() << '\n';
+ );
+
+ // Free all of the memory allocated to the map. Command line options may only
+ // be processed once!
+ Opts.clear();
+ PositionalOpts.clear();
+ MoreHelp->clear();
+
+ // Free the memory allocated by ExpandResponseFiles.
+ if (ReadResponseFiles) {
+ // Free all the strdup()ed strings.
+ for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+ i != e; ++i)
+ free(*i);
+ }
+
+ // If we had an error processing our arguments, don't let the program execute
+ if (ErrorParsing) exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// Option Base class implementation
+//
+
+bool Option::error(const Twine &Message, StringRef ArgName) {
+ if (ArgName.data() == 0) ArgName = ArgStr;
+ if (ArgName.empty())
+ errs() << HelpStr; // Be nice for positional arguments
+ else
+ errs() << ProgramName << ": for the -" << ArgName;
+
+ errs() << " option: " << Message << "\n";
+ return true;
+}
+
+bool Option::addOccurrence(unsigned pos, StringRef ArgName,
+ StringRef Value, bool MultiArg) {
+ if (!MultiArg)
+ NumOccurrences++; // Increment the number of times we have been seen
+
+ switch (getNumOccurrencesFlag()) {
+ case Optional:
+ if (NumOccurrences > 1)
+ return error("may only occur zero or one times!", ArgName);
+ break;
+ case Required:
+ if (NumOccurrences > 1)
+ return error("must occur exactly one time!", ArgName);
+ // Fall through
+ case OneOrMore:
+ case ZeroOrMore:
+ case ConsumeAfter: break;
+ default: return error("bad num occurrences flag value!");
+ }
+
+ return handleOccurrence(pos, ArgName, Value);
+}
+
+
+// getValueStr - Get the value description string, using "DefaultMsg" if nothing
+// has been specified yet.
+//
+static const char *getValueStr(const Option &O, const char *DefaultMsg) {
+ if (O.ValueStr[0] == 0) return DefaultMsg;
+ return O.ValueStr;
+}
+
+//===----------------------------------------------------------------------===//
+// cl::alias class implementation
+//
+
+// Return the width of the option tag for printing...
+size_t alias::getOptionWidth() const {
+ return std::strlen(ArgStr)+6;
+}
+
+// Print out the option for the alias.
+void alias::printOptionInfo(size_t GlobalWidth) const {
+ size_t L = std::strlen(ArgStr);
+ outs() << " -" << ArgStr;
+ outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Parser Implementation code...
+//
+
+// basic_parser implementation
+//
+
+// Return the width of the option tag for printing...
+size_t basic_parser_impl::getOptionWidth(const Option &O) const {
+ size_t Len = std::strlen(O.ArgStr);
+ if (const char *ValName = getValueName())
+ Len += std::strlen(getValueStr(O, ValName))+3;
+
+ return Len + 6;
+}
+
+// printOptionInfo - Print out information about this option. The
+// to-be-maintained width is specified.
+//
+void basic_parser_impl::printOptionInfo(const Option &O,
+ size_t GlobalWidth) const {
+ outs() << " -" << O.ArgStr;
+
+ if (const char *ValName = getValueName())
+ outs() << "=<" << getValueStr(O, ValName) << '>';
+
+ outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
+}
+
+void basic_parser_impl::printOptionName(const Option &O,
+ size_t GlobalWidth) const {
+ outs() << " -" << O.ArgStr;
+ outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+}
+
+
+// parser<bool> implementation
+//
+bool parser<bool>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, bool &Value) {
+ if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+ Arg == "1") {
+ Value = true;
+ return false;
+ }
+
+ if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+ Value = false;
+ return false;
+ }
+ return O.error("'" + Arg +
+ "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<boolOrDefault> implementation
+//
+bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, boolOrDefault &Value) {
+ if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+ Arg == "1") {
+ Value = BOU_TRUE;
+ return false;
+ }
+ if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+ Value = BOU_FALSE;
+ return false;
+ }
+
+ return O.error("'" + Arg +
+ "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<int> implementation
+//
+bool parser<int>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, int &Value) {
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for integer argument!");
+ return false;
+}
+
+// parser<unsigned> implementation
+//
+bool parser<unsigned>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, unsigned &Value) {
+
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for uint argument!");
+ return false;
+}
+
+// parser<double>/parser<float> implementation
+//
+static bool parseDouble(Option &O, StringRef Arg, double &Value) {
+ SmallString<32> TmpStr(Arg.begin(), Arg.end());
+ const char *ArgStart = TmpStr.c_str();
+ char *End;
+ Value = strtod(ArgStart, &End);
+ if (*End != 0)
+ return O.error("'" + Arg + "' value invalid for floating point argument!");
+ return false;
+}
+
+bool parser<double>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, double &Val) {
+ return parseDouble(O, Arg, Val);
+}
+
+bool parser<float>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, float &Val) {
+ double dVal;
+ if (parseDouble(O, Arg, dVal))
+ return true;
+ Val = (float)dVal;
+ return false;
+}
+
+
+
+// generic_parser_base implementation
+//
+
+// findOption - Return the option number corresponding to the specified
+// argument string. If the option is not found, getNumOptions() is returned.
+//
+unsigned generic_parser_base::findOption(const char *Name) {
+ unsigned e = getNumOptions();
+
+ for (unsigned i = 0; i != e; ++i) {
+ if (strcmp(getOption(i), Name) == 0)
+ return i;
+ }
+ return e;
+}
+
+
+// Return the width of the option tag for printing...
+size_t generic_parser_base::getOptionWidth(const Option &O) const {
+ if (O.hasArgStr()) {
+ size_t Size = std::strlen(O.ArgStr)+6;
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+ Size = std::max(Size, std::strlen(getOption(i))+8);
+ return Size;
+ } else {
+ size_t BaseSize = 0;
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+ BaseSize = std::max(BaseSize, std::strlen(getOption(i))+8);
+ return BaseSize;
+ }
+}
+
+// printOptionInfo - Print out information about this option. The
+// to-be-maintained width is specified.
+//
+void generic_parser_base::printOptionInfo(const Option &O,
+ size_t GlobalWidth) const {
+ if (O.hasArgStr()) {
+ size_t L = std::strlen(O.ArgStr);
+ outs() << " -" << O.ArgStr;
+ outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n';
+
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
+ outs() << " =" << getOption(i);
+ outs().indent(NumSpaces) << " - " << getDescription(i) << '\n';
+ }
+ } else {
+ if (O.HelpStr[0])
+ outs() << " " << O.HelpStr << '\n';
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ size_t L = std::strlen(getOption(i));
+ outs() << " -" << getOption(i);
+ outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n';
+ }
+ }
+}
+
+static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff
+
+// printGenericOptionDiff - Print the value of this option and it's default.
+//
+// "Generic" options have each value mapped to a name.
+void generic_parser_base::
+printGenericOptionDiff(const Option &O, const GenericOptionValue &Value,
+ const GenericOptionValue &Default,
+ size_t GlobalWidth) const {
+ outs() << " -" << O.ArgStr;
+ outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+
+ unsigned NumOpts = getNumOptions();
+ for (unsigned i = 0; i != NumOpts; ++i) {
+ if (Value.compare(getOptionValue(i)))
+ continue;
+
+ outs() << "= " << getOption(i);
+ size_t L = std::strlen(getOption(i));
+ size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0;
+ outs().indent(NumSpaces) << " (default: ";
+ for (unsigned j = 0; j != NumOpts; ++j) {
+ if (Default.compare(getOptionValue(j)))
+ continue;
+ outs() << getOption(j);
+ break;
+ }
+ outs() << ")\n";
+ return;
+ }
+ outs() << "= *unknown option value*\n";
+}
+
+// printOptionDiff - Specializations for printing basic value types.
+//
+#define PRINT_OPT_DIFF(T) \
+ void parser<T>:: \
+ printOptionDiff(const Option &O, T V, OptionValue<T> D, \
+ size_t GlobalWidth) const { \
+ printOptionName(O, GlobalWidth); \
+ std::string Str; \
+ { \
+ raw_string_ostream SS(Str); \
+ SS << V; \
+ } \
+ outs() << "= " << Str; \
+ size_t NumSpaces = MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0;\
+ outs().indent(NumSpaces) << " (default: "; \
+ if (D.hasValue()) \
+ outs() << D.getValue(); \
+ else \
+ outs() << "*no default*"; \
+ outs() << ")\n"; \
+ } \
+
+PRINT_OPT_DIFF(bool)
+PRINT_OPT_DIFF(boolOrDefault)
+PRINT_OPT_DIFF(int)
+PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(double)
+PRINT_OPT_DIFF(float)
+PRINT_OPT_DIFF(char)
+
+void parser<std::string>::
+printOptionDiff(const Option &O, StringRef V, OptionValue<std::string> D,
+ size_t GlobalWidth) const {
+ printOptionName(O, GlobalWidth);
+ outs() << "= " << V;
+ size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0;
+ outs().indent(NumSpaces) << " (default: ";
+ if (D.hasValue())
+ outs() << D.getValue();
+ else
+ outs() << "*no default*";
+ outs() << ")\n";
+}
+
+// Print a placeholder for options that don't yet support printOptionDiff().
+void basic_parser_impl::
+printOptionNoValue(const Option &O, size_t GlobalWidth) const {
+ printOptionName(O, GlobalWidth);
+ outs() << "= *cannot print option value*\n";
+}
+
+//===----------------------------------------------------------------------===//
+// -help and -help-hidden option implementation
+//
+
+static int OptNameCompare(const void *LHS, const void *RHS) {
+ typedef std::pair<const char *, Option*> pair_ty;
+
+ return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+}
+
+// Copy Options into a vector so we can sort them as we like.
+static void
+sortOpts(StringMap<Option*> &OptMap,
+ SmallVectorImpl< std::pair<const char *, Option*> > &Opts,
+ bool ShowHidden) {
+ SmallPtrSet<Option*, 128> OptionSet; // Duplicate option detection.
+
+ for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
+ I != E; ++I) {
+ // Ignore really-hidden options.
+ if (I->second->getOptionHiddenFlag() == ReallyHidden)
+ continue;
+
+ // Unless showhidden is set, ignore hidden flags.
+ if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
+ continue;
+
+ // If we've already seen this option, don't add it to the list again.
+ if (!OptionSet.insert(I->second))
+ continue;
+
+ Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
+ I->second));
+ }
+
+ // Sort the options list alphabetically.
+ qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+}
+
+namespace {
+
+class HelpPrinter {
+ size_t MaxArgLen;
+ const Option *EmptyArg;
+ const bool ShowHidden;
+
+public:
+ explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
+ EmptyArg = 0;
+ }
+
+ void operator=(bool Value) {
+ if (Value == false) return;
+
+ // Get all the options.
+ SmallVector<Option*, 4> PositionalOpts;
+ SmallVector<Option*, 4> SinkOpts;
+ StringMap<Option*> OptMap;
+ GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+ SmallVector<std::pair<const char *, Option*>, 128> Opts;
+ sortOpts(OptMap, Opts, ShowHidden);
+
+ if (ProgramOverview)
+ outs() << "OVERVIEW: " << ProgramOverview << "\n";
+
+ outs() << "USAGE: " << ProgramName << " [options]";
+
+ // Print out the positional options.
+ Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists...
+ if (!PositionalOpts.empty() &&
+ PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
+ CAOpt = PositionalOpts[0];
+
+ for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
+ if (PositionalOpts[i]->ArgStr[0])
+ outs() << " --" << PositionalOpts[i]->ArgStr;
+ outs() << " " << PositionalOpts[i]->HelpStr;
+ }
+
+ // Print the consume after option info if it exists...
+ if (CAOpt) outs() << " " << CAOpt->HelpStr;
+
+ outs() << "\n\n";
+
+ // Compute the maximum argument length...
+ MaxArgLen = 0;
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+ outs() << "OPTIONS:\n";
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ Opts[i].second->printOptionInfo(MaxArgLen);
+
+ // Print any extra help the user has declared.
+ for (std::vector<const char *>::iterator I = MoreHelp->begin(),
+ E = MoreHelp->end(); I != E; ++I)
+ outs() << *I;
+ MoreHelp->clear();
+
+ // Halt the program since help information was printed
+ exit(1);
+ }
+};
+} // End anonymous namespace
+
+// Define the two HelpPrinter instances that are used to print out help, or
+// help-hidden...
+//
+static HelpPrinter NormalPrinter(false);
+static HelpPrinter HiddenPrinter(true);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+ cl::location(NormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HHOp("help-hidden", cl::desc("Display all available options"),
+ cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+static cl::opt<bool>
+PrintOptions("print-options",
+ cl::desc("Print non-default options after command line parsing"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+PrintAllOptions("print-all-options",
+ cl::desc("Print all option values after command line parsing"),
+ cl::Hidden, cl::init(false));
+
+// Print the value of each option.
+void cl::PrintOptionValues() {
+ if (!PrintOptions && !PrintAllOptions) return;
+
+ // Get all the options.
+ SmallVector<Option*, 4> PositionalOpts;
+ SmallVector<Option*, 4> SinkOpts;
+ StringMap<Option*> OptMap;
+ GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+ SmallVector<std::pair<const char *, Option*>, 128> Opts;
+ sortOpts(OptMap, Opts, /*ShowHidden*/true);
+
+ // Compute the maximum argument length...
+ size_t MaxArgLen = 0;
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
+}
+
+static void (*OverrideVersionPrinter)() = 0;
+
+static int TargetArraySortFn(const void *LHS, const void *RHS) {
+ typedef std::pair<const char *, const Target*> pair_ty;
+ return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
+}
+
+namespace {
+class VersionPrinter {
+public:
+ void print() {
+ raw_ostream &OS = outs();
+ OS << "Low Level Virtual Machine (http://llvm.org/):\n"
+ << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
+#ifdef LLVM_VERSION_INFO
+ OS << LLVM_VERSION_INFO;
+#endif
+ OS << "\n ";
+#ifndef __OPTIMIZE__
+ OS << "DEBUG build";
+#else
+ OS << "Optimized build";
+#endif
+#ifndef NDEBUG
+ OS << " with assertions";
+#endif
+ std::string CPU = sys::getHostCPUName();
+ if (CPU == "generic") CPU = "(unknown)";
+ OS << ".\n"
+#if defined(ENABLE_TIMESTAMPS) && ENABLE_TIMESTAMPS == 1
+ << " Built " << __DATE__ << " (" << __TIME__ << ").\n"
+#endif
+ << " Host: " << sys::getHostTriple() << '\n'
+ << " Host CPU: " << CPU << '\n'
+ << '\n'
+ << " Registered Targets:\n";
+
+ std::vector<std::pair<const char *, const Target*> > Targets;
+ size_t Width = 0;
+ for (TargetRegistry::iterator it = TargetRegistry::begin(),
+ ie = TargetRegistry::end(); it != ie; ++it) {
+ Targets.push_back(std::make_pair(it->getName(), &*it));
+ Width = std::max(Width, strlen(Targets.back().first));
+ }
+ if (!Targets.empty())
+ qsort(&Targets[0], Targets.size(), sizeof(Targets[0]),
+ TargetArraySortFn);
+
+ for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
+ OS << " " << Targets[i].first;
+ OS.indent(Width - strlen(Targets[i].first)) << " - "
+ << Targets[i].second->getShortDescription() << '\n';
+ }
+ if (Targets.empty())
+ OS << " (none)\n";
+ }
+ void operator=(bool OptionWasSpecified) {
+ if (!OptionWasSpecified) return;
+
+ if (OverrideVersionPrinter == 0) {
+ print();
+ exit(1);
+ }
+ (*OverrideVersionPrinter)();
+ exit(1);
+ }
+};
+} // End anonymous namespace
+
+
+// Define the --version option that prints out the LLVM version for the tool
+static VersionPrinter VersionPrinterInstance;
+
+static cl::opt<VersionPrinter, true, parser<bool> >
+VersOp("version", cl::desc("Display the version of this program"),
+ cl::location(VersionPrinterInstance), cl::ValueDisallowed);
+
+// Utility function for printing the help message.
+void cl::PrintHelpMessage() {
+ // This looks weird, but it actually prints the help message. The
+ // NormalPrinter variable is a HelpPrinter and the help gets printed when
+ // its operator= is invoked. That's because the "normal" usages of the
+ // help printer is to be assigned true/false depending on whether the
+ // -help option was given or not. Since we're circumventing that we have
+ // to make it look like -help was given, so we assign true.
+ NormalPrinter = true;
+}
+
+/// Utility function for printing version number.
+void cl::PrintVersionMessage() {
+ VersionPrinterInstance.print();
+}
+
+void cl::SetVersionPrinter(void (*func)()) {
+ OverrideVersionPrinter = func;
+}
diff --git a/contrib/llvm/lib/Support/ConstantRange.cpp b/contrib/llvm/lib/Support/ConstantRange.cpp
new file mode 100644
index 000000000000..81382d08dc23
--- /dev/null
+++ b/contrib/llvm/lib/Support/ConstantRange.cpp
@@ -0,0 +1,702 @@
+//===-- ConstantRange.cpp - ConstantRange implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Represent a range of possible values that may occur when the program is run
+// for an integral value. This keeps track of a lower and upper bound for the
+// constant, which MAY wrap around the end of the numeric range. To do this, it
+// keeps track of a [lower, upper) bound, which specifies an interval just like
+// STL iterators. When used with boolean values, the following are important
+// ranges (other integral ranges use min/max values for special range values):
+//
+// [F, F) = {} = Empty set
+// [T, F) = {T}
+// [F, T) = {F}
+// [T, T) = {F, T} = Full set
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Instructions.h"
+using namespace llvm;
+
+/// Initialize a full (the default) or empty set for the specified type.
+///
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
+ if (Full)
+ Lower = Upper = APInt::getMaxValue(BitWidth);
+ else
+ Lower = Upper = APInt::getMinValue(BitWidth);
+}
+
+/// Initialize a range to hold the single specified value.
+///
+ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {}
+
+ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
+ Lower(L), Upper(U) {
+ assert(L.getBitWidth() == U.getBitWidth() &&
+ "ConstantRange with unequal bit widths");
+ assert((L != U || (L.isMaxValue() || L.isMinValue())) &&
+ "Lower == Upper, but they aren't min or max value!");
+}
+
+ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
+ const ConstantRange &CR) {
+ if (CR.isEmptySet())
+ return CR;
+
+ uint32_t W = CR.getBitWidth();
+ switch (Pred) {
+ default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
+ case ICmpInst::ICMP_EQ:
+ return CR;
+ case ICmpInst::ICMP_NE:
+ if (CR.isSingleElement())
+ return ConstantRange(CR.getUpper(), CR.getLower());
+ return ConstantRange(W);
+ case ICmpInst::ICMP_ULT: {
+ APInt UMax(CR.getUnsignedMax());
+ if (UMax.isMinValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(APInt::getMinValue(W), UMax);
+ }
+ case ICmpInst::ICMP_SLT: {
+ APInt SMax(CR.getSignedMax());
+ if (SMax.isMinSignedValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(APInt::getSignedMinValue(W), SMax);
+ }
+ case ICmpInst::ICMP_ULE: {
+ APInt UMax(CR.getUnsignedMax());
+ if (UMax.isMaxValue())
+ return ConstantRange(W);
+ return ConstantRange(APInt::getMinValue(W), UMax + 1);
+ }
+ case ICmpInst::ICMP_SLE: {
+ APInt SMax(CR.getSignedMax());
+ if (SMax.isMaxSignedValue())
+ return ConstantRange(W);
+ return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+ }
+ case ICmpInst::ICMP_UGT: {
+ APInt UMin(CR.getUnsignedMin());
+ if (UMin.isMaxValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(UMin + 1, APInt::getNullValue(W));
+ }
+ case ICmpInst::ICMP_SGT: {
+ APInt SMin(CR.getSignedMin());
+ if (SMin.isMaxSignedValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+ }
+ case ICmpInst::ICMP_UGE: {
+ APInt UMin(CR.getUnsignedMin());
+ if (UMin.isMinValue())
+ return ConstantRange(W);
+ return ConstantRange(UMin, APInt::getNullValue(W));
+ }
+ case ICmpInst::ICMP_SGE: {
+ APInt SMin(CR.getSignedMin());
+ if (SMin.isMinSignedValue())
+ return ConstantRange(W);
+ return ConstantRange(SMin, APInt::getSignedMinValue(W));
+ }
+ }
+}
+
+/// isFullSet - Return true if this set contains all of the elements possible
+/// for this data-type
+bool ConstantRange::isFullSet() const {
+ return Lower == Upper && Lower.isMaxValue();
+}
+
+/// isEmptySet - Return true if this set contains no members.
+///
+bool ConstantRange::isEmptySet() const {
+ return Lower == Upper && Lower.isMinValue();
+}
+
+/// isWrappedSet - Return true if this set wraps around the top of the range,
+/// for example: [100, 8)
+///
+bool ConstantRange::isWrappedSet() const {
+ return Lower.ugt(Upper);
+}
+
+/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+/// its bitwidth, for example: i8 [120, 140).
+///
+bool ConstantRange::isSignWrappedSet() const {
+ return contains(APInt::getSignedMaxValue(getBitWidth())) &&
+ contains(APInt::getSignedMinValue(getBitWidth()));
+}
+
+/// getSetSize - Return the number of elements in this set.
+///
+APInt ConstantRange::getSetSize() const {
+ if (isEmptySet())
+ return APInt(getBitWidth(), 0);
+ if (getBitWidth() == 1) {
+ if (Lower != Upper) // One of T or F in the set...
+ return APInt(2, 1);
+ return APInt(2, 2); // Must be full set...
+ }
+
+ // Simply subtract the bounds...
+ return Upper - Lower;
+}
+
+/// getUnsignedMax - Return the largest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMax() const {
+ if (isFullSet() || isWrappedSet())
+ return APInt::getMaxValue(getBitWidth());
+ else
+ return getUpper() - 1;
+}
+
+/// getUnsignedMin - Return the smallest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMin() const {
+ if (isFullSet() || (isWrappedSet() && getUpper() != 0))
+ return APInt::getMinValue(getBitWidth());
+ else
+ return getLower();
+}
+
+/// getSignedMax - Return the largest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMax() const {
+ APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
+ if (!isWrappedSet()) {
+ if (getLower().sle(getUpper() - 1))
+ return getUpper() - 1;
+ else
+ return SignedMax;
+ } else {
+ if (getLower().isNegative() == getUpper().isNegative())
+ return SignedMax;
+ else
+ return getUpper() - 1;
+ }
+}
+
+/// getSignedMin - Return the smallest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMin() const {
+ APInt SignedMin(APInt::getSignedMinValue(getBitWidth()));
+ if (!isWrappedSet()) {
+ if (getLower().sle(getUpper() - 1))
+ return getLower();
+ else
+ return SignedMin;
+ } else {
+ if ((getUpper() - 1).slt(getLower())) {
+ if (getUpper() != SignedMin)
+ return SignedMin;
+ else
+ return getLower();
+ } else {
+ return getLower();
+ }
+ }
+}
+
+/// contains - Return true if the specified value is in the set.
+///
+bool ConstantRange::contains(const APInt &V) const {
+ if (Lower == Upper)
+ return isFullSet();
+
+ if (!isWrappedSet())
+ return Lower.ule(V) && V.ult(Upper);
+ else
+ return Lower.ule(V) || V.ult(Upper);
+}
+
+/// contains - Return true if the argument is a subset of this range.
+/// Two equal sets contain each other. The empty set contained by all other
+/// sets.
+///
+bool ConstantRange::contains(const ConstantRange &Other) const {
+ if (isFullSet() || Other.isEmptySet()) return true;
+ if (isEmptySet() || Other.isFullSet()) return false;
+
+ if (!isWrappedSet()) {
+ if (Other.isWrappedSet())
+ return false;
+
+ return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
+ }
+
+ if (!Other.isWrappedSet())
+ return Other.getUpper().ule(Upper) ||
+ Lower.ule(Other.getLower());
+
+ return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
+}
+
+/// subtract - Subtract the specified constant from the endpoints of this
+/// constant range.
+ConstantRange ConstantRange::subtract(const APInt &Val) const {
+ assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
+ // If the set is empty or full, don't modify the endpoints.
+ if (Lower == Upper)
+ return *this;
+ return ConstantRange(Lower - Val, Upper - Val);
+}
+
+/// intersectWith - Return the range that results from the intersection of this
+/// range with another range. The resultant range is guaranteed to include all
+/// elements contained in both input ranges, and to have the smallest possible
+/// set size that does so. Because there may be two intersections with the
+/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A).
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+
+ // Handle common cases.
+ if ( isEmptySet() || CR.isFullSet()) return *this;
+ if (CR.isEmptySet() || isFullSet()) return CR;
+
+ if (!isWrappedSet() && CR.isWrappedSet())
+ return CR.intersectWith(*this);
+
+ if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (Lower.ult(CR.Lower)) {
+ if (Upper.ule(CR.Lower))
+ return ConstantRange(getBitWidth(), false);
+
+ if (Upper.ult(CR.Upper))
+ return ConstantRange(CR.Lower, Upper);
+
+ return CR;
+ } else {
+ if (Upper.ult(CR.Upper))
+ return *this;
+
+ if (Lower.ult(CR.Upper))
+ return ConstantRange(Lower, CR.Upper);
+
+ return ConstantRange(getBitWidth(), false);
+ }
+ }
+
+ if (isWrappedSet() && !CR.isWrappedSet()) {
+ if (CR.Lower.ult(Upper)) {
+ if (CR.Upper.ult(Upper))
+ return CR;
+
+ if (CR.Upper.ult(Lower))
+ return ConstantRange(CR.Lower, Upper);
+
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ else
+ return CR;
+ } else if (CR.Lower.ult(Lower)) {
+ if (CR.Upper.ule(Lower))
+ return ConstantRange(getBitWidth(), false);
+
+ return ConstantRange(Lower, CR.Upper);
+ }
+ return CR;
+ }
+
+ if (CR.Upper.ult(Upper)) {
+ if (CR.Lower.ult(Upper)) {
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ else
+ return CR;
+ }
+
+ if (CR.Lower.ult(Lower))
+ return ConstantRange(Lower, CR.Upper);
+
+ return CR;
+ } else if (CR.Upper.ult(Lower)) {
+ if (CR.Lower.ult(Lower))
+ return *this;
+
+ return ConstantRange(CR.Lower, Upper);
+ }
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ else
+ return CR;
+}
+
+
+/// unionWith - Return the range that results from the union of this range with
+/// another range. The resultant range is guaranteed to include the elements of
+/// both sets, but may contain more. For example, [3, 9) union [12,15) is
+/// [3, 15), which includes 9, 10, and 11, which were not included in either
+/// set before.
+///
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+
+ if ( isFullSet() || CR.isEmptySet()) return *this;
+ if (CR.isFullSet() || isEmptySet()) return CR;
+
+ if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+
+ if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
+ // If the two ranges are disjoint, find the smaller gap and bridge it.
+ APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+ if (d1.ult(d2))
+ return ConstantRange(Lower, CR.Upper);
+ else
+ return ConstantRange(CR.Lower, Upper);
+ }
+
+ APInt L = Lower, U = Upper;
+ if (CR.Lower.ult(L))
+ L = CR.Lower;
+ if ((CR.Upper - 1).ugt(U - 1))
+ U = CR.Upper;
+
+ if (L == 0 && U == 0)
+ return ConstantRange(getBitWidth());
+
+ return ConstantRange(L, U);
+ }
+
+ if (!CR.isWrappedSet()) {
+ // ------U L----- and ------U L----- : this
+ // L--U L--U : CR
+ if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
+ return *this;
+
+ // ------U L----- : this
+ // L---------U : CR
+ if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
+ return ConstantRange(getBitWidth());
+
+ // ----U L---- : this
+ // L---U : CR
+ // <d1> <d2>
+ if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
+ APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+ if (d1.ult(d2))
+ return ConstantRange(Lower, CR.Upper);
+ else
+ return ConstantRange(CR.Lower, Upper);
+ }
+
+ // ----U L----- : this
+ // L----U : CR
+ if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+ return ConstantRange(CR.Lower, Upper);
+
+ // ------U L---- : this
+ // L-----U : CR
+ if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower))
+ return ConstantRange(Lower, CR.Upper);
+ }
+
+ assert(isWrappedSet() && CR.isWrappedSet() &&
+ "ConstantRange::unionWith missed wrapped union unwrapped case");
+
+ // ------U L---- and ------U L---- : this
+ // -U L----------- and ------------U L : CR
+ if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
+ return ConstantRange(getBitWidth());
+
+ APInt L = Lower, U = Upper;
+ if (CR.Upper.ugt(U))
+ U = CR.Upper;
+ if (CR.Lower.ult(L))
+ L = CR.Lower;
+
+ return ConstantRange(L, U);
+}
+
+/// zeroExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// zero extended.
+ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+ if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize < DstTySize && "Not a value extension");
+ if (isFullSet() || isWrappedSet())
+ // Change into [0, 1 << src bit width)
+ return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize));
+
+ return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
+}
+
+/// signExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// sign extended.
+ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+ if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize < DstTySize && "Not a value extension");
+ if (isFullSet() || isSignWrappedSet()) {
+ return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
+ APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
+ }
+
+ return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
+}
+
+/// truncate - Return a new range in the specified integer type, which must be
+/// strictly smaller than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// truncated to the specified type.
+ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize > DstTySize && "Not a value truncation");
+ APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
+ if (isFullSet() || getSetSize().ugt(Size))
+ return ConstantRange(DstTySize, /*isFullSet=*/true);
+
+ return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
+}
+
+/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is zero extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ if (SrcTySize > DstTySize)
+ return truncate(DstTySize);
+ else if (SrcTySize < DstTySize)
+ return zeroExtend(DstTySize);
+ else
+ return *this;
+}
+
+/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is sign extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ if (SrcTySize > DstTySize)
+ return truncate(DstTySize);
+ else if (SrcTySize < DstTySize)
+ return signExtend(DstTySize);
+ else
+ return *this;
+}
+
+ConstantRange
+ConstantRange::add(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+ APInt NewLower = getLower() + Other.getLower();
+ APInt NewUpper = getUpper() + Other.getUpper() - 1;
+ if (NewLower == NewUpper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ ConstantRange X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ // We've wrapped, therefore, full set.
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return X;
+}
+
+ConstantRange
+ConstantRange::sub(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+ APInt NewLower = getLower() - Other.getUpper() + 1;
+ APInt NewUpper = getUpper() - Other.getLower();
+ if (NewLower == NewUpper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ ConstantRange X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ // We've wrapped, therefore, full set.
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return X;
+}
+
+ConstantRange
+ConstantRange::multiply(const ConstantRange &Other) const {
+ // TODO: If either operand is a single element and the multiply is known to
+ // be non-wrapping, round the result min and max value to the appropriate
+ // multiple of that element. If wrapping is possible, at least adjust the
+ // range according to the greatest power-of-two factor of the single element.
+
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
+ APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
+ APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
+ APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2);
+
+ ConstantRange Result_zext = ConstantRange(this_min * Other_min,
+ this_max * Other_max + 1);
+ return Result_zext.truncate(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::smax(const ConstantRange &Other) const {
+ // X smax Y is: range(smax(X_smin, Y_smin),
+ // smax(X_smax, Y_smax))
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
+ APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
+ if (NewU == NewL)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::umax(const ConstantRange &Other) const {
+ // X umax Y is: range(umax(X_umin, Y_umin),
+ // umax(X_umax, Y_umax))
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+ APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
+ if (NewU == NewL)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::udiv(const ConstantRange &RHS) const {
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (RHS.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
+
+ APInt RHS_umin = RHS.getUnsignedMin();
+ if (RHS_umin == 0) {
+ // We want the lowest value in RHS excluding zero. Usually that would be 1
+ // except for a range in the form of [X, 1) in which case it would be X.
+ if (RHS.getUpper() == 1)
+ RHS_umin = RHS.getLower();
+ else
+ RHS_umin = APInt(getBitWidth(), 1);
+ }
+
+ APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+
+ // If the LHS is Full and the RHS is a wrapped interval containing 1 then
+ // this could occur.
+ if (Lower == Upper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return ConstantRange(Lower, Upper);
+}
+
+ConstantRange
+ConstantRange::binaryAnd(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ // TODO: replace this with something less conservative
+
+ APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
+ if (umin.isAllOnesValue())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+}
+
+ConstantRange
+ConstantRange::binaryOr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ // TODO: replace this with something less conservative
+
+ APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+ if (umax.isMinValue())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+}
+
+ConstantRange
+ConstantRange::shl(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt min = getUnsignedMin().shl(Other.getUnsignedMin());
+ APInt max = getUnsignedMax().shl(Other.getUnsignedMax());
+
+ // there's no overflow!
+ APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
+ if (Zeros.ugt(Other.getUnsignedMax()))
+ return ConstantRange(min, max + 1);
+
+ // FIXME: implement the other tricky cases
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+}
+
+ConstantRange
+ConstantRange::lshr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt max = getUnsignedMax().lshr(Other.getUnsignedMin());
+ APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
+ if (min == max + 1)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return ConstantRange(min, max + 1);
+}
+
+ConstantRange ConstantRange::inverse() const {
+ if (isFullSet()) {
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ } else if (isEmptySet()) {
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ }
+ return ConstantRange(Upper, Lower);
+}
+
+/// print - Print out the bounds to a stream...
+///
+void ConstantRange::print(raw_ostream &OS) const {
+ if (isFullSet())
+ OS << "full-set";
+ else if (isEmptySet())
+ OS << "empty-set";
+ else
+ OS << "[" << Lower << "," << Upper << ")";
+}
+
+/// dump - Allow printing from a debugger easily...
+///
+void ConstantRange::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
new file mode 100644
index 000000000000..899c3890d78a
--- /dev/null
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -0,0 +1,281 @@
+//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ThreadLocal.h"
+#include <setjmp.h>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+
+struct CrashRecoveryContextImpl;
+
+static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+
+struct CrashRecoveryContextImpl {
+ CrashRecoveryContext *CRC;
+ std::string Backtrace;
+ ::jmp_buf JumpBuffer;
+ volatile unsigned Failed : 1;
+
+public:
+ CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
+ Failed(false) {
+ CurrentContext.set(this);
+ }
+ ~CrashRecoveryContextImpl() {
+ CurrentContext.erase();
+ }
+
+ void HandleCrash() {
+ // Eliminate the current context entry, to avoid re-entering in case the
+ // cleanup code crashes.
+ CurrentContext.erase();
+
+ assert(!Failed && "Crash recovery context already failed!");
+ Failed = true;
+
+ // FIXME: Stash the backtrace.
+
+ // Jump back to the RunSafely we were called under.
+ longjmp(JumpBuffer, 1);
+ }
+};
+
+}
+
+static sys::Mutex gCrashRecoveryContexMutex;
+static bool gCrashRecoveryEnabled = false;
+
+static sys::ThreadLocal<const CrashRecoveryContextCleanup>
+ tlIsRecoveringFromCrash;
+
+CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
+
+CrashRecoveryContext::~CrashRecoveryContext() {
+ // Reclaim registered resources.
+ CrashRecoveryContextCleanup *i = head;
+ tlIsRecoveringFromCrash.set(head);
+ while (i) {
+ CrashRecoveryContextCleanup *tmp = i;
+ i = tmp->next;
+ tmp->cleanupFired = true;
+ tmp->recoverResources();
+ delete tmp;
+ }
+ tlIsRecoveringFromCrash.erase();
+
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ delete CRCI;
+}
+
+bool CrashRecoveryContext::isRecoveringFromCrash() {
+ return tlIsRecoveringFromCrash.get() != 0;
+}
+
+CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+ if (!gCrashRecoveryEnabled)
+ return 0;
+
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ if (!CRCI)
+ return 0;
+
+ return CRCI->CRC;
+}
+
+void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
+{
+ if (!cleanup)
+ return;
+ if (head)
+ head->prev = cleanup;
+ cleanup->next = head;
+ head = cleanup;
+}
+
+void
+CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
+ if (!cleanup)
+ return;
+ if (cleanup == head) {
+ head = cleanup->next;
+ if (head)
+ head->prev = 0;
+ }
+ else {
+ cleanup->prev->next = cleanup->next;
+ if (cleanup->next)
+ cleanup->next->prev = cleanup->prev;
+ }
+ delete cleanup;
+}
+
+#ifdef LLVM_ON_WIN32
+
+// FIXME: No real Win32 implementation currently.
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+}
+
+#else
+
+// Generic POSIX implementation.
+//
+// This implementation relies on synchronous signals being delivered to the
+// current thread. We use a thread local object to keep track of the active
+// crash recovery context, and install signal handlers to invoke HandleCrash on
+// the active object.
+//
+// This implementation does not to attempt to chain signal handlers in any
+// reliable fashion -- if we get a signal outside of a crash recovery context we
+// simply disable crash recovery and raise the signal again.
+
+#include <signal.h>
+
+static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static struct sigaction PrevActions[NumSignals];
+
+static void CrashRecoverySignalHandler(int Signal) {
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+ if (!CRCI) {
+ // We didn't find a crash recovery context -- this means either we got a
+ // signal on a thread we didn't expect it on, the application got a signal
+ // outside of a crash recovery context, or something else went horribly
+ // wrong.
+ //
+ // Disable crash recovery and raise the signal again. The assumption here is
+ // that the enclosing application will terminate soon, and we won't want to
+ // attempt crash recovery again.
+ //
+ // This call of Disable isn't thread safe, but it doesn't actually matter.
+ CrashRecoveryContext::Disable();
+ raise(Signal);
+
+ // The signal will be thrown once the signal mask is restored.
+ return;
+ }
+
+ // Unblock the signal we received.
+ sigset_t SigMask;
+ sigemptyset(&SigMask);
+ sigaddset(&SigMask, Signal);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ if (CRCI)
+ const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+}
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+
+ // Setup the signal handler.
+ struct sigaction Handler;
+ Handler.sa_handler = CrashRecoverySignalHandler;
+ Handler.sa_flags = 0;
+ sigemptyset(&Handler.sa_mask);
+
+ for (unsigned i = 0; i != NumSignals; ++i) {
+ sigaction(Signals[i], &Handler, &PrevActions[i]);
+ }
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+
+ // Restore the previous signal handlers.
+ for (unsigned i = 0; i != NumSignals; ++i)
+ sigaction(Signals[i], &PrevActions[i], 0);
+}
+
+#endif
+
+bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+ // If crash recovery is disabled, do nothing.
+ if (gCrashRecoveryEnabled) {
+ assert(!Impl && "Crash recovery context already initialized!");
+ CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+ Impl = CRCI;
+
+ if (setjmp(CRCI->JumpBuffer) != 0) {
+ return false;
+ }
+ }
+
+ Fn(UserData);
+ return true;
+}
+
+void CrashRecoveryContext::HandleCrash() {
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ assert(CRCI && "Crash recovery context never initialized!");
+ CRCI->HandleCrash();
+}
+
+const std::string &CrashRecoveryContext::getBacktrace() const {
+ CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl;
+ assert(CRC && "Crash recovery context never initialized!");
+ assert(CRC->Failed && "No crash was detected!");
+ return CRC->Backtrace;
+}
+
+//
+
+namespace {
+struct RunSafelyOnThreadInfo {
+ void (*UserFn)(void*);
+ void *UserData;
+ CrashRecoveryContext *CRC;
+ bool Result;
+};
+}
+
+static void RunSafelyOnThread_Dispatch(void *UserData) {
+ RunSafelyOnThreadInfo *Info =
+ reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+ Info->Result = Info->CRC->RunSafely(Info->UserFn, Info->UserData);
+}
+bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+ llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+ return Info.Result;
+}
diff --git a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
new file mode 100644
index 000000000000..814566494d30
--- /dev/null
+++ b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -0,0 +1,357 @@
+//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// The algorithm we use attempts to exploit the dependency information by
+// minimizing top-down. We start by constructing an initial root set R, and
+// then iteratively:
+//
+// 1. Minimize the set R using the test predicate:
+// P'(S) = P(S union pred*(S))
+//
+// 2. Extend R to R' = R union pred(R).
+//
+// until a fixed point is reached.
+//
+// The idea is that we want to quickly prune entire portions of the graph, so we
+// try to find high-level nodes that can be eliminated with all of their
+// dependents.
+//
+// FIXME: The current algorithm doesn't actually provide a strong guarantee
+// about the minimality of the result. The problem is that after adding nodes to
+// the required set, we no longer consider them for elimination. For strictly
+// well formed predicates, this doesn't happen, but it commonly occurs in
+// practice when there are unmodelled dependencies. I believe we can resolve
+// this by allowing the required set to be minimized as well, but need more test
+// cases first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+class DAGDeltaAlgorithmImpl {
+ friend class DeltaActiveSetHelper;
+
+public:
+ typedef DAGDeltaAlgorithm::change_ty change_ty;
+ typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
+ typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
+ typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+
+private:
+ typedef std::vector<change_ty>::iterator pred_iterator_ty;
+ typedef std::vector<change_ty>::iterator succ_iterator_ty;
+ typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
+ typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+
+ DAGDeltaAlgorithm &DDA;
+
+ const changeset_ty &Changes;
+ const std::vector<edge_ty> &Dependencies;
+
+ std::vector<change_ty> Roots;
+
+ /// Cache of failed test results. Successful test results are never cached
+ /// since we always reduce following a success. We maintain an independent
+ /// cache from that used by the individual delta passes because we may get
+ /// hits across multiple individual delta invocations.
+ mutable std::set<changeset_ty> FailedTestsCache;
+
+ // FIXME: Gross.
+ std::map<change_ty, std::vector<change_ty> > Predecessors;
+ std::map<change_ty, std::vector<change_ty> > Successors;
+
+ std::map<change_ty, std::set<change_ty> > PredClosure;
+ std::map<change_ty, std::set<change_ty> > SuccClosure;
+
+private:
+ pred_iterator_ty pred_begin(change_ty Node) {
+ assert(Predecessors.count(Node) && "Invalid node!");
+ return Predecessors[Node].begin();
+ }
+ pred_iterator_ty pred_end(change_ty Node) {
+ assert(Predecessors.count(Node) && "Invalid node!");
+ return Predecessors[Node].end();
+ }
+
+ pred_closure_iterator_ty pred_closure_begin(change_ty Node) {
+ assert(PredClosure.count(Node) && "Invalid node!");
+ return PredClosure[Node].begin();
+ }
+ pred_closure_iterator_ty pred_closure_end(change_ty Node) {
+ assert(PredClosure.count(Node) && "Invalid node!");
+ return PredClosure[Node].end();
+ }
+
+ succ_iterator_ty succ_begin(change_ty Node) {
+ assert(Successors.count(Node) && "Invalid node!");
+ return Successors[Node].begin();
+ }
+ succ_iterator_ty succ_end(change_ty Node) {
+ assert(Successors.count(Node) && "Invalid node!");
+ return Successors[Node].end();
+ }
+
+ succ_closure_iterator_ty succ_closure_begin(change_ty Node) {
+ assert(SuccClosure.count(Node) && "Invalid node!");
+ return SuccClosure[Node].begin();
+ }
+ succ_closure_iterator_ty succ_closure_end(change_ty Node) {
+ assert(SuccClosure.count(Node) && "Invalid node!");
+ return SuccClosure[Node].end();
+ }
+
+ void UpdatedSearchState(const changeset_ty &Changes,
+ const changesetlist_ty &Sets,
+ const changeset_ty &Required) {
+ DDA.UpdatedSearchState(Changes, Sets, Required);
+ }
+
+ /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+ bool ExecuteOneTest(const changeset_ty &S) {
+ // Check dependencies invariant.
+ DEBUG({
+ for (changeset_ty::const_iterator it = S.begin(),
+ ie = S.end(); it != ie; ++it)
+ for (succ_iterator_ty it2 = succ_begin(*it),
+ ie2 = succ_end(*it); it2 != ie2; ++it2)
+ assert(S.count(*it2) && "Attempt to run invalid changeset!");
+ });
+
+ return DDA.ExecuteOneTest(S);
+ }
+
+public:
+ DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+ const changeset_ty &_Changes,
+ const std::vector<edge_ty> &_Dependencies);
+
+ changeset_ty Run();
+
+ /// GetTestResult - Get the test result for the active set \arg Changes with
+ /// \arg Required changes from the cache, executing the test if necessary.
+ ///
+ /// \param Changes - The set of active changes being minimized, which should
+ /// have their pred closure included in the test.
+ /// \param Required - The set of changes which have previously been
+ /// established to be required.
+ /// \return - The test result.
+ bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required);
+};
+
+/// Helper object for minimizing an active set of changes.
+class DeltaActiveSetHelper : public DeltaAlgorithm {
+ DAGDeltaAlgorithmImpl &DDAI;
+
+ const changeset_ty &Required;
+
+protected:
+ /// UpdatedSearchState - Callback used when the search state changes.
+ virtual void UpdatedSearchState(const changeset_ty &Changes,
+ const changesetlist_ty &Sets) {
+ DDAI.UpdatedSearchState(Changes, Sets, Required);
+ }
+
+ virtual bool ExecuteOneTest(const changeset_ty &S) {
+ return DDAI.GetTestResult(S, Required);
+ }
+
+public:
+ DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
+ const changeset_ty &_Required)
+ : DDAI(_DDAI), Required(_Required) {}
+};
+
+}
+
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+ const changeset_ty &_Changes,
+ const std::vector<edge_ty>
+ &_Dependencies)
+ : DDA(_DDA),
+ Changes(_Changes),
+ Dependencies(_Dependencies)
+{
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ Successors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ }
+ for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(),
+ ie = Dependencies.end(); it != ie; ++it) {
+ Predecessors[it->second].push_back(it->first);
+ Successors[it->first].push_back(it->second);
+ }
+
+ // Compute the roots.
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ if (succ_begin(*it) == succ_end(*it))
+ Roots.push_back(*it);
+
+ // Pre-compute the closure of the successor relation.
+ std::vector<change_ty> Worklist(Roots.begin(), Roots.end());
+ while (!Worklist.empty()) {
+ change_ty Change = Worklist.back();
+ Worklist.pop_back();
+
+ std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
+ for (pred_iterator_ty it = pred_begin(Change),
+ ie = pred_end(Change); it != ie; ++it) {
+ SuccClosure[*it].insert(Change);
+ SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
+ Worklist.push_back(*it);
+ }
+ }
+
+ // Invert to form the predecessor closure map.
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ PredClosure.insert(std::make_pair(*it, std::set<change_ty>()));
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+ ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
+ PredClosure[*it2].insert(*it);
+
+ // Dump useful debug info.
+ DEBUG({
+ llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
+ llvm::errs() << "Changes: [";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ if (it != Changes.begin()) llvm::errs() << ", ";
+ llvm::errs() << *it;
+
+ if (succ_begin(*it) != succ_end(*it)) {
+ llvm::errs() << "(";
+ for (succ_iterator_ty it2 = succ_begin(*it),
+ ie2 = succ_end(*it); it2 != ie2; ++it2) {
+ if (it2 != succ_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << "->" << *it2;
+ }
+ llvm::errs() << ")";
+ }
+ }
+ llvm::errs() << "]\n";
+
+ llvm::errs() << "Roots: [";
+ for (std::vector<change_ty>::const_iterator it = Roots.begin(),
+ ie = Roots.end(); it != ie; ++it) {
+ if (it != Roots.begin()) llvm::errs() << ", ";
+ llvm::errs() << *it;
+ }
+ llvm::errs() << "]\n";
+
+ llvm::errs() << "Predecessor Closure:\n";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ llvm::errs() << format(" %-4d: [", *it);
+ for (pred_closure_iterator_ty it2 = pred_closure_begin(*it),
+ ie2 = pred_closure_end(*it); it2 != ie2; ++it2) {
+ if (it2 != pred_closure_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << *it2;
+ }
+ llvm::errs() << "]\n";
+ }
+
+ llvm::errs() << "Successor Closure:\n";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ llvm::errs() << format(" %-4d: [", *it);
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+ ie2 = succ_closure_end(*it); it2 != ie2; ++it2) {
+ if (it2 != succ_closure_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << *it2;
+ }
+ llvm::errs() << "]\n";
+ }
+
+ llvm::errs() << "\n\n";
+ });
+}
+
+bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes,
+ const changeset_ty &Required) {
+ changeset_ty Extended(Required);
+ Extended.insert(Changes.begin(), Changes.end());
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ Extended.insert(pred_closure_begin(*it), pred_closure_end(*it));
+
+ if (FailedTestsCache.count(Extended))
+ return false;
+
+ bool Result = ExecuteOneTest(Extended);
+ if (!Result)
+ FailedTestsCache.insert(Extended);
+
+ return Result;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithmImpl::Run() {
+ // The current set of changes we are minimizing, starting at the roots.
+ changeset_ty CurrentSet(Roots.begin(), Roots.end());
+
+ // The set of required changes.
+ changeset_ty Required;
+
+ // Iterate until the active set of changes is empty. Convergence is guaranteed
+ // assuming input was a DAG.
+ //
+ // Invariant: CurrentSet intersect Required == {}
+ // Invariant: Required == (Required union succ*(Required))
+ while (!CurrentSet.empty()) {
+ DEBUG({
+ llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, "
+ << Required.size() << " required changes\n";
+ });
+
+ // Minimize the current set of changes.
+ DeltaActiveSetHelper Helper(*this, Required);
+ changeset_ty CurrentMinSet = Helper.Run(CurrentSet);
+
+ // Update the set of required changes. Since
+ // CurrentMinSet subset CurrentSet
+ // and after the last iteration,
+ // succ(CurrentSet) subset Required
+ // then
+ // succ(CurrentMinSet) subset Required
+ // and our invariant on Required is maintained.
+ Required.insert(CurrentMinSet.begin(), CurrentMinSet.end());
+
+ // Replace the current set with the predecssors of the minimized set of
+ // active changes.
+ CurrentSet.clear();
+ for (changeset_ty::const_iterator it = CurrentMinSet.begin(),
+ ie = CurrentMinSet.end(); it != ie; ++it)
+ CurrentSet.insert(pred_begin(*it), pred_end(*it));
+
+ // FIXME: We could enforce CurrentSet intersect Required == {} here if we
+ // wanted to protect against cyclic graphs.
+ }
+
+ return Required;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
+ const std::vector<edge_ty> &Dependencies) {
+ return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run();
+}
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
new file mode 100644
index 000000000000..9fdb12ecfdcb
--- /dev/null
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -0,0 +1,134 @@
+//===-- Debug.cpp - An easy way to add debug output to your code ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a handle way of adding debugging information to your
+// code, without it being enabled all of the time, and without having to add
+// command line options to enable it.
+//
+// In particular, just wrap your code with the DEBUG() macro, and it will be
+// enabled automatically if you specify '-debug' on the command-line.
+// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify
+// that your debug code belongs to class "foo". Then, on the command line, you
+// can specify '-debug-only=foo' to enable JUST the debug information for the
+// foo class.
+//
+// When compiling in release mode, the -debug-* options and all code in DEBUG()
+// statements disappears, so it does not effect the runtime of the code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/Signals.h"
+
+using namespace llvm;
+
+// All Debug.h functionality is a no-op in NDEBUG mode.
+#ifndef NDEBUG
+bool llvm::DebugFlag; // DebugFlag - Exported boolean set by the -debug option
+
+// -debug - Command line option to enable the DEBUG statements in the passes.
+// This flag may only be enabled in debug builds.
+static cl::opt<bool, true>
+Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
+ cl::location(DebugFlag));
+
+// -debug-buffer-size - Buffer the last N characters of debug output
+//until program termination.
+static cl::opt<unsigned>
+DebugBufferSize("debug-buffer-size",
+ cl::desc("Buffer the last N characters of debug output"
+ "until program termination. "
+ "[default 0 -- immediate print-out]"),
+ cl::Hidden,
+ cl::init(0));
+
+static std::string CurrentDebugType;
+
+namespace {
+
+struct DebugOnlyOpt {
+ void operator=(const std::string &Val) const {
+ DebugFlag |= !Val.empty();
+ CurrentDebugType = Val;
+ }
+};
+
+}
+
+static DebugOnlyOpt DebugOnlyOptLoc;
+
+static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+ cl::Hidden, cl::value_desc("debug string"),
+ cl::location(DebugOnlyOptLoc), cl::ValueRequired);
+
+// Signal handlers - dump debug output on termination.
+static void debug_user_sig_handler(void *Cookie) {
+ // This is a bit sneaky. Since this is under #ifndef NDEBUG, we
+ // know that debug mode is enabled and dbgs() really is a
+ // circular_raw_ostream. If NDEBUG is defined, then dbgs() ==
+ // errs() but this will never be invoked.
+ llvm::circular_raw_ostream *dbgout =
+ static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
+ dbgout->flushBufferWithBanner();
+}
+
+// isCurrentDebugType - Return true if the specified string is the debug type
+// specified on the command line, or if none was specified on the command line
+// with the -debug-only=X option.
+//
+bool llvm::isCurrentDebugType(const char *DebugType) {
+ return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+}
+
+/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified. Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void llvm::SetCurrentDebugType(const char *Type) {
+ CurrentDebugType = Type;
+}
+
+/// dbgs - Return a circular-buffered debug stream.
+raw_ostream &llvm::dbgs() {
+ // Do one-time initialization in a thread-safe way.
+ static struct dbgstream {
+ circular_raw_ostream strm;
+
+ dbgstream() :
+ strm(errs(), "*** Debug Log Output ***\n",
+ (!EnableDebugBuffering || !DebugFlag) ? 0 : DebugBufferSize) {
+ if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0)
+ // TODO: Add a handler for SIGUSER1-type signals so the user can
+ // force a debug dump.
+ sys::AddSignalHandler(&debug_user_sig_handler, 0);
+ // Otherwise we've already set the debug stream buffer size to
+ // zero, disabling buffering so it will output directly to errs().
+ }
+ } thestrm;
+
+ return thestrm.strm;
+}
+
+#else
+// Avoid "has no symbols" warning.
+namespace llvm {
+ /// dbgs - Return errs().
+ raw_ostream &dbgs() {
+ return errs();
+ }
+}
+
+#endif
+
+/// EnableDebugBuffering - Turn on signal handler installation.
+///
+bool llvm::EnableDebugBuffering = false;
diff --git a/contrib/llvm/lib/Support/DeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
new file mode 100644
index 000000000000..9e52874de832
--- /dev/null
+++ b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -0,0 +1,114 @@
+//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <iterator>
+using namespace llvm;
+
+DeltaAlgorithm::~DeltaAlgorithm() {
+}
+
+bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
+ if (FailedTestsCache.count(Changes))
+ return false;
+
+ bool Result = ExecuteOneTest(Changes);
+ if (!Result)
+ FailedTestsCache.insert(Changes);
+
+ return Result;
+}
+
+void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
+ // FIXME: Allow clients to provide heuristics for improved splitting.
+
+ // FIXME: This is really slow.
+ changeset_ty LHS, RHS;
+ unsigned idx = 0, N = S.size() / 2;
+ for (changeset_ty::const_iterator it = S.begin(),
+ ie = S.end(); it != ie; ++it, ++idx)
+ ((idx < N) ? LHS : RHS).insert(*it);
+ if (!LHS.empty())
+ Res.push_back(LHS);
+ if (!RHS.empty())
+ Res.push_back(RHS);
+}
+
+DeltaAlgorithm::changeset_ty
+DeltaAlgorithm::Delta(const changeset_ty &Changes,
+ const changesetlist_ty &Sets) {
+ // Invariant: union(Res) == Changes
+ UpdatedSearchState(Changes, Sets);
+
+ // If there is nothing left we can remove, we are done.
+ if (Sets.size() <= 1)
+ return Changes;
+
+ // Look for a passing subset.
+ changeset_ty Res;
+ if (Search(Changes, Sets, Res))
+ return Res;
+
+ // Otherwise, partition the sets if possible; if not we are done.
+ changesetlist_ty SplitSets;
+ for (changesetlist_ty::const_iterator it = Sets.begin(),
+ ie = Sets.end(); it != ie; ++it)
+ Split(*it, SplitSets);
+ if (SplitSets.size() == Sets.size())
+ return Changes;
+
+ return Delta(Changes, SplitSets);
+}
+
+bool DeltaAlgorithm::Search(const changeset_ty &Changes,
+ const changesetlist_ty &Sets,
+ changeset_ty &Res) {
+ // FIXME: Parallelize.
+ for (changesetlist_ty::const_iterator it = Sets.begin(),
+ ie = Sets.end(); it != ie; ++it) {
+ // If the test passes on this subset alone, recurse.
+ if (GetTestResult(*it)) {
+ changesetlist_ty Sets;
+ Split(*it, Sets);
+ Res = Delta(*it, Sets);
+ return true;
+ }
+
+ // Otherwise, if we have more than two sets, see if test passes on the
+ // complement.
+ if (Sets.size() > 2) {
+ // FIXME: This is really slow.
+ changeset_ty Complement;
+ std::set_difference(
+ Changes.begin(), Changes.end(), it->begin(), it->end(),
+ std::insert_iterator<changeset_ty>(Complement, Complement.begin()));
+ if (GetTestResult(Complement)) {
+ changesetlist_ty ComplementSets;
+ ComplementSets.insert(ComplementSets.end(), Sets.begin(), it);
+ ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end());
+ Res = Delta(Complement, ComplementSets);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) {
+ // Check empty set first to quickly find poor test functions.
+ if (GetTestResult(changeset_ty()))
+ return changeset_ty();
+
+ // Otherwise run the real delta algorithm.
+ changesetlist_ty Sets;
+ Split(Changes, Sets);
+
+ return Delta(Changes, Sets);
+}
diff --git a/contrib/llvm/lib/Support/Disassembler.cpp b/contrib/llvm/lib/Support/Disassembler.cpp
new file mode 100644
index 000000000000..6362aff43a9d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Disassembler.cpp
@@ -0,0 +1,75 @@
+//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Disassembler.h"
+
+#include <cassert>
+#include <iomanip>
+#include <string>
+#include <sstream>
+
+#if USE_UDIS86
+#include <udis86.h>
+#endif
+
+using namespace llvm;
+
+bool llvm::sys::hasDisassembler()
+{
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+ // We have option to enable udis86 library.
+# if USE_UDIS86
+ return true;
+#else
+ return false;
+#endif
+#else
+ return false;
+#endif
+}
+
+std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
+ uint64_t pc) {
+ std::stringstream res;
+
+#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
+ && USE_UDIS86
+ unsigned bits;
+# if defined(__i386__)
+ bits = 32;
+# else
+ bits = 64;
+# endif
+
+ ud_t ud_obj;
+
+ ud_init(&ud_obj);
+ ud_set_input_buffer(&ud_obj, start, length);
+ ud_set_mode(&ud_obj, bits);
+ ud_set_pc(&ud_obj, pc);
+ ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+ res << std::setbase(16)
+ << std::setw(bits/4);
+
+ while (ud_disassemble(&ud_obj)) {
+ res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
+ }
+#else
+ res << "No disassembler available. See configure help for options.\n";
+#endif
+
+ return res.str();
+}
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
new file mode 100644
index 000000000000..0813321b6f79
--- /dev/null
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -0,0 +1,658 @@
+//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for generic dwarf information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+/// TagString - Return the string for the specified tag.
+///
+const char *llvm::dwarf::TagString(unsigned Tag) {
+ switch (Tag) {
+ case DW_TAG_array_type: return "DW_TAG_array_type";
+ case DW_TAG_class_type: return "DW_TAG_class_type";
+ case DW_TAG_entry_point: return "DW_TAG_entry_point";
+ case DW_TAG_enumeration_type: return "DW_TAG_enumeration_type";
+ case DW_TAG_formal_parameter: return "DW_TAG_formal_parameter";
+ case DW_TAG_imported_declaration: return "DW_TAG_imported_declaration";
+ case DW_TAG_label: return "DW_TAG_label";
+ case DW_TAG_lexical_block: return "DW_TAG_lexical_block";
+ case DW_TAG_member: return "DW_TAG_member";
+ case DW_TAG_pointer_type: return "DW_TAG_pointer_type";
+ case DW_TAG_reference_type: return "DW_TAG_reference_type";
+ case DW_TAG_compile_unit: return "DW_TAG_compile_unit";
+ case DW_TAG_string_type: return "DW_TAG_string_type";
+ case DW_TAG_structure_type: return "DW_TAG_structure_type";
+ case DW_TAG_subroutine_type: return "DW_TAG_subroutine_type";
+ case DW_TAG_typedef: return "DW_TAG_typedef";
+ case DW_TAG_union_type: return "DW_TAG_union_type";
+ case DW_TAG_unspecified_parameters: return "DW_TAG_unspecified_parameters";
+ case DW_TAG_variant: return "DW_TAG_variant";
+ case DW_TAG_common_block: return "DW_TAG_common_block";
+ case DW_TAG_common_inclusion: return "DW_TAG_common_inclusion";
+ case DW_TAG_inheritance: return "DW_TAG_inheritance";
+ case DW_TAG_inlined_subroutine: return "DW_TAG_inlined_subroutine";
+ case DW_TAG_module: return "DW_TAG_module";
+ case DW_TAG_ptr_to_member_type: return "DW_TAG_ptr_to_member_type";
+ case DW_TAG_set_type: return "DW_TAG_set_type";
+ case DW_TAG_subrange_type: return "DW_TAG_subrange_type";
+ case DW_TAG_with_stmt: return "DW_TAG_with_stmt";
+ case DW_TAG_access_declaration: return "DW_TAG_access_declaration";
+ case DW_TAG_base_type: return "DW_TAG_base_type";
+ case DW_TAG_catch_block: return "DW_TAG_catch_block";
+ case DW_TAG_const_type: return "DW_TAG_const_type";
+ case DW_TAG_constant: return "DW_TAG_constant";
+ case DW_TAG_enumerator: return "DW_TAG_enumerator";
+ case DW_TAG_file_type: return "DW_TAG_file_type";
+ case DW_TAG_friend: return "DW_TAG_friend";
+ case DW_TAG_namelist: return "DW_TAG_namelist";
+ case DW_TAG_namelist_item: return "DW_TAG_namelist_item";
+ case DW_TAG_packed_type: return "DW_TAG_packed_type";
+ case DW_TAG_subprogram: return "DW_TAG_subprogram";
+ case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter";
+ case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter";
+ case DW_TAG_thrown_type: return "DW_TAG_thrown_type";
+ case DW_TAG_try_block: return "DW_TAG_try_block";
+ case DW_TAG_variant_part: return "DW_TAG_variant_part";
+ case DW_TAG_variable: return "DW_TAG_variable";
+ case DW_TAG_volatile_type: return "DW_TAG_volatile_type";
+ case DW_TAG_dwarf_procedure: return "DW_TAG_dwarf_procedure";
+ case DW_TAG_restrict_type: return "DW_TAG_restrict_type";
+ case DW_TAG_interface_type: return "DW_TAG_interface_type";
+ case DW_TAG_namespace: return "DW_TAG_namespace";
+ case DW_TAG_imported_module: return "DW_TAG_imported_module";
+ case DW_TAG_unspecified_type: return "DW_TAG_unspecified_type";
+ case DW_TAG_partial_unit: return "DW_TAG_partial_unit";
+ case DW_TAG_imported_unit: return "DW_TAG_imported_unit";
+ case DW_TAG_condition: return "DW_TAG_condition";
+ case DW_TAG_shared_type: return "DW_TAG_shared_type";
+ case DW_TAG_lo_user: return "DW_TAG_lo_user";
+ case DW_TAG_hi_user: return "DW_TAG_hi_user";
+ case DW_TAG_auto_variable: return "DW_TAG_auto_variable";
+ case DW_TAG_arg_variable: return "DW_TAG_arg_variable";
+ case DW_TAG_return_variable: return "DW_TAG_return_variable";
+ case DW_TAG_vector_type: return "DW_TAG_vector_type";
+ }
+ return 0;
+}
+
+/// ChildrenString - Return the string for the specified children flag.
+///
+const char *llvm::dwarf::ChildrenString(unsigned Children) {
+ switch (Children) {
+ case DW_CHILDREN_no: return "DW_CHILDREN_no";
+ case DW_CHILDREN_yes: return "DW_CHILDREN_yes";
+ }
+ return 0;
+}
+
+/// AttributeString - Return the string for the specified attribute.
+///
+const char *llvm::dwarf::AttributeString(unsigned Attribute) {
+ switch (Attribute) {
+ case DW_AT_sibling: return "DW_AT_sibling";
+ case DW_AT_location: return "DW_AT_location";
+ case DW_AT_name: return "DW_AT_name";
+ case DW_AT_ordering: return "DW_AT_ordering";
+ case DW_AT_byte_size: return "DW_AT_byte_size";
+ case DW_AT_bit_offset: return "DW_AT_bit_offset";
+ case DW_AT_bit_size: return "DW_AT_bit_size";
+ case DW_AT_stmt_list: return "DW_AT_stmt_list";
+ case DW_AT_low_pc: return "DW_AT_low_pc";
+ case DW_AT_high_pc: return "DW_AT_high_pc";
+ case DW_AT_language: return "DW_AT_language";
+ case DW_AT_discr: return "DW_AT_discr";
+ case DW_AT_discr_value: return "DW_AT_discr_value";
+ case DW_AT_visibility: return "DW_AT_visibility";
+ case DW_AT_import: return "DW_AT_import";
+ case DW_AT_string_length: return "DW_AT_string_length";
+ case DW_AT_common_reference: return "DW_AT_common_reference";
+ case DW_AT_comp_dir: return "DW_AT_comp_dir";
+ case DW_AT_const_value: return "DW_AT_const_value";
+ case DW_AT_containing_type: return "DW_AT_containing_type";
+ case DW_AT_default_value: return "DW_AT_default_value";
+ case DW_AT_inline: return "DW_AT_inline";
+ case DW_AT_is_optional: return "DW_AT_is_optional";
+ case DW_AT_lower_bound: return "DW_AT_lower_bound";
+ case DW_AT_producer: return "DW_AT_producer";
+ case DW_AT_prototyped: return "DW_AT_prototyped";
+ case DW_AT_return_addr: return "DW_AT_return_addr";
+ case DW_AT_start_scope: return "DW_AT_start_scope";
+ case DW_AT_bit_stride: return "DW_AT_bit_stride";
+ case DW_AT_upper_bound: return "DW_AT_upper_bound";
+ case DW_AT_abstract_origin: return "DW_AT_abstract_origin";
+ case DW_AT_accessibility: return "DW_AT_accessibility";
+ case DW_AT_address_class: return "DW_AT_address_class";
+ case DW_AT_artificial: return "DW_AT_artificial";
+ case DW_AT_base_types: return "DW_AT_base_types";
+ case DW_AT_calling_convention: return "DW_AT_calling_convention";
+ case DW_AT_count: return "DW_AT_count";
+ case DW_AT_data_member_location: return "DW_AT_data_member_location";
+ case DW_AT_decl_column: return "DW_AT_decl_column";
+ case DW_AT_decl_file: return "DW_AT_decl_file";
+ case DW_AT_decl_line: return "DW_AT_decl_line";
+ case DW_AT_declaration: return "DW_AT_declaration";
+ case DW_AT_discr_list: return "DW_AT_discr_list";
+ case DW_AT_encoding: return "DW_AT_encoding";
+ case DW_AT_external: return "DW_AT_external";
+ case DW_AT_frame_base: return "DW_AT_frame_base";
+ case DW_AT_friend: return "DW_AT_friend";
+ case DW_AT_identifier_case: return "DW_AT_identifier_case";
+ case DW_AT_macro_info: return "DW_AT_macro_info";
+ case DW_AT_namelist_item: return "DW_AT_namelist_item";
+ case DW_AT_priority: return "DW_AT_priority";
+ case DW_AT_segment: return "DW_AT_segment";
+ case DW_AT_specification: return "DW_AT_specification";
+ case DW_AT_static_link: return "DW_AT_static_link";
+ case DW_AT_type: return "DW_AT_type";
+ case DW_AT_use_location: return "DW_AT_use_location";
+ case DW_AT_variable_parameter: return "DW_AT_variable_parameter";
+ case DW_AT_virtuality: return "DW_AT_virtuality";
+ case DW_AT_vtable_elem_location: return "DW_AT_vtable_elem_location";
+ case DW_AT_allocated: return "DW_AT_allocated";
+ case DW_AT_associated: return "DW_AT_associated";
+ case DW_AT_data_location: return "DW_AT_data_location";
+ case DW_AT_byte_stride: return "DW_AT_byte_stride";
+ case DW_AT_entry_pc: return "DW_AT_entry_pc";
+ case DW_AT_use_UTF8: return "DW_AT_use_UTF8";
+ case DW_AT_extension: return "DW_AT_extension";
+ case DW_AT_ranges: return "DW_AT_ranges";
+ case DW_AT_trampoline: return "DW_AT_trampoline";
+ case DW_AT_call_column: return "DW_AT_call_column";
+ case DW_AT_call_file: return "DW_AT_call_file";
+ case DW_AT_call_line: return "DW_AT_call_line";
+ case DW_AT_description: return "DW_AT_description";
+ case DW_AT_binary_scale: return "DW_AT_binary_scale";
+ case DW_AT_decimal_scale: return "DW_AT_decimal_scale";
+ case DW_AT_small: return "DW_AT_small";
+ case DW_AT_decimal_sign: return "DW_AT_decimal_sign";
+ case DW_AT_digit_count: return "DW_AT_digit_count";
+ case DW_AT_picture_string: return "DW_AT_picture_string";
+ case DW_AT_mutable: return "DW_AT_mutable";
+ case DW_AT_threads_scaled: return "DW_AT_threads_scaled";
+ case DW_AT_explicit: return "DW_AT_explicit";
+ case DW_AT_object_pointer: return "DW_AT_object_pointer";
+ case DW_AT_endianity: return "DW_AT_endianity";
+ case DW_AT_elemental: return "DW_AT_elemental";
+ case DW_AT_pure: return "DW_AT_pure";
+ case DW_AT_recursive: return "DW_AT_recursive";
+ case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name";
+ case DW_AT_sf_names: return "DW_AT_sf_names";
+ case DW_AT_src_info: return "DW_AT_src_info";
+ case DW_AT_mac_info: return "DW_AT_mac_info";
+ case DW_AT_src_coords: return "DW_AT_src_coords";
+ case DW_AT_body_begin: return "DW_AT_body_begin";
+ case DW_AT_body_end: return "DW_AT_body_end";
+ case DW_AT_GNU_vector: return "DW_AT_GNU_vector";
+ case DW_AT_lo_user: return "DW_AT_lo_user";
+ case DW_AT_hi_user: return "DW_AT_hi_user";
+ case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized";
+ case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags";
+ case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa";
+ case DW_AT_APPLE_block: return "DW_AT_APPLE_block";
+ case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers";
+ case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class";
+ case DW_AT_APPLE_omit_frame_ptr: return "DW_AT_APPLE_omit_frame_ptr";
+ case DW_AT_APPLE_property_name: return "DW_AT_APPLE_property_name";
+ case DW_AT_APPLE_property_getter: return "DW_AT_APPLE_property_getter";
+ case DW_AT_APPLE_property_setter: return "DW_AT_APPLE_property_setter";
+ case DW_AT_APPLE_property_attribute: return "DW_AT_APPLE_property_attribute";
+ case DW_AT_APPLE_objc_complete_type: return "DW_AT_APPLE_objc_complete_type";
+ }
+ return 0;
+}
+
+/// FormEncodingString - Return the string for the specified form encoding.
+///
+const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_FORM_addr: return "DW_FORM_addr";
+ case DW_FORM_block2: return "DW_FORM_block2";
+ case DW_FORM_block4: return "DW_FORM_block4";
+ case DW_FORM_data2: return "DW_FORM_data2";
+ case DW_FORM_data4: return "DW_FORM_data4";
+ case DW_FORM_data8: return "DW_FORM_data8";
+ case DW_FORM_string: return "DW_FORM_string";
+ case DW_FORM_block: return "DW_FORM_block";
+ case DW_FORM_block1: return "DW_FORM_block1";
+ case DW_FORM_data1: return "DW_FORM_data1";
+ case DW_FORM_flag: return "DW_FORM_flag";
+ case DW_FORM_sdata: return "DW_FORM_sdata";
+ case DW_FORM_strp: return "DW_FORM_strp";
+ case DW_FORM_udata: return "DW_FORM_udata";
+ case DW_FORM_ref_addr: return "DW_FORM_ref_addr";
+ case DW_FORM_ref1: return "DW_FORM_ref1";
+ case DW_FORM_ref2: return "DW_FORM_ref2";
+ case DW_FORM_ref4: return "DW_FORM_ref4";
+ case DW_FORM_ref8: return "DW_FORM_ref8";
+ case DW_FORM_ref_udata: return "DW_FORM_ref_udata";
+ case DW_FORM_indirect: return "DW_FORM_indirect";
+ }
+ return 0;
+}
+
+/// OperationEncodingString - Return the string for the specified operation
+/// encoding.
+const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_OP_addr: return "DW_OP_addr";
+ case DW_OP_deref: return "DW_OP_deref";
+ case DW_OP_const1u: return "DW_OP_const1u";
+ case DW_OP_const1s: return "DW_OP_const1s";
+ case DW_OP_const2u: return "DW_OP_const2u";
+ case DW_OP_const2s: return "DW_OP_const2s";
+ case DW_OP_const4u: return "DW_OP_const4u";
+ case DW_OP_const4s: return "DW_OP_const4s";
+ case DW_OP_const8u: return "DW_OP_const8u";
+ case DW_OP_const8s: return "DW_OP_const8s";
+ case DW_OP_constu: return "DW_OP_constu";
+ case DW_OP_consts: return "DW_OP_consts";
+ case DW_OP_dup: return "DW_OP_dup";
+ case DW_OP_drop: return "DW_OP_drop";
+ case DW_OP_over: return "DW_OP_over";
+ case DW_OP_pick: return "DW_OP_pick";
+ case DW_OP_swap: return "DW_OP_swap";
+ case DW_OP_rot: return "DW_OP_rot";
+ case DW_OP_xderef: return "DW_OP_xderef";
+ case DW_OP_abs: return "DW_OP_abs";
+ case DW_OP_and: return "DW_OP_and";
+ case DW_OP_div: return "DW_OP_div";
+ case DW_OP_minus: return "DW_OP_minus";
+ case DW_OP_mod: return "DW_OP_mod";
+ case DW_OP_mul: return "DW_OP_mul";
+ case DW_OP_neg: return "DW_OP_neg";
+ case DW_OP_not: return "DW_OP_not";
+ case DW_OP_or: return "DW_OP_or";
+ case DW_OP_plus: return "DW_OP_plus";
+ case DW_OP_plus_uconst: return "DW_OP_plus_uconst";
+ case DW_OP_shl: return "DW_OP_shl";
+ case DW_OP_shr: return "DW_OP_shr";
+ case DW_OP_shra: return "DW_OP_shra";
+ case DW_OP_xor: return "DW_OP_xor";
+ case DW_OP_skip: return "DW_OP_skip";
+ case DW_OP_bra: return "DW_OP_bra";
+ case DW_OP_eq: return "DW_OP_eq";
+ case DW_OP_ge: return "DW_OP_ge";
+ case DW_OP_gt: return "DW_OP_gt";
+ case DW_OP_le: return "DW_OP_le";
+ case DW_OP_lt: return "DW_OP_lt";
+ case DW_OP_ne: return "DW_OP_ne";
+ case DW_OP_lit0: return "DW_OP_lit0";
+ case DW_OP_lit1: return "DW_OP_lit1";
+ case DW_OP_lit2: return "DW_OP_lit2";
+ case DW_OP_lit3: return "DW_OP_lit3";
+ case DW_OP_lit4: return "DW_OP_lit4";
+ case DW_OP_lit5: return "DW_OP_lit5";
+ case DW_OP_lit6: return "DW_OP_lit6";
+ case DW_OP_lit7: return "DW_OP_lit7";
+ case DW_OP_lit8: return "DW_OP_lit8";
+ case DW_OP_lit9: return "DW_OP_lit9";
+ case DW_OP_lit10: return "DW_OP_lit10";
+ case DW_OP_lit11: return "DW_OP_lit11";
+ case DW_OP_lit12: return "DW_OP_lit12";
+ case DW_OP_lit13: return "DW_OP_lit13";
+ case DW_OP_lit14: return "DW_OP_lit14";
+ case DW_OP_lit15: return "DW_OP_lit15";
+ case DW_OP_lit16: return "DW_OP_lit16";
+ case DW_OP_lit17: return "DW_OP_lit17";
+ case DW_OP_lit18: return "DW_OP_lit18";
+ case DW_OP_lit19: return "DW_OP_lit19";
+ case DW_OP_lit20: return "DW_OP_lit20";
+ case DW_OP_lit21: return "DW_OP_lit21";
+ case DW_OP_lit22: return "DW_OP_lit22";
+ case DW_OP_lit23: return "DW_OP_lit23";
+ case DW_OP_lit24: return "DW_OP_lit24";
+ case DW_OP_lit25: return "DW_OP_lit25";
+ case DW_OP_lit26: return "DW_OP_lit26";
+ case DW_OP_lit27: return "DW_OP_lit27";
+ case DW_OP_lit28: return "DW_OP_lit28";
+ case DW_OP_lit29: return "DW_OP_lit29";
+ case DW_OP_lit30: return "DW_OP_lit30";
+ case DW_OP_lit31: return "DW_OP_lit31";
+ case DW_OP_reg0: return "DW_OP_reg0";
+ case DW_OP_reg1: return "DW_OP_reg1";
+ case DW_OP_reg2: return "DW_OP_reg2";
+ case DW_OP_reg3: return "DW_OP_reg3";
+ case DW_OP_reg4: return "DW_OP_reg4";
+ case DW_OP_reg5: return "DW_OP_reg5";
+ case DW_OP_reg6: return "DW_OP_reg6";
+ case DW_OP_reg7: return "DW_OP_reg7";
+ case DW_OP_reg8: return "DW_OP_reg8";
+ case DW_OP_reg9: return "DW_OP_reg9";
+ case DW_OP_reg10: return "DW_OP_reg10";
+ case DW_OP_reg11: return "DW_OP_reg11";
+ case DW_OP_reg12: return "DW_OP_reg12";
+ case DW_OP_reg13: return "DW_OP_reg13";
+ case DW_OP_reg14: return "DW_OP_reg14";
+ case DW_OP_reg15: return "DW_OP_reg15";
+ case DW_OP_reg16: return "DW_OP_reg16";
+ case DW_OP_reg17: return "DW_OP_reg17";
+ case DW_OP_reg18: return "DW_OP_reg18";
+ case DW_OP_reg19: return "DW_OP_reg19";
+ case DW_OP_reg20: return "DW_OP_reg20";
+ case DW_OP_reg21: return "DW_OP_reg21";
+ case DW_OP_reg22: return "DW_OP_reg22";
+ case DW_OP_reg23: return "DW_OP_reg23";
+ case DW_OP_reg24: return "DW_OP_reg24";
+ case DW_OP_reg25: return "DW_OP_reg25";
+ case DW_OP_reg26: return "DW_OP_reg26";
+ case DW_OP_reg27: return "DW_OP_reg27";
+ case DW_OP_reg28: return "DW_OP_reg28";
+ case DW_OP_reg29: return "DW_OP_reg29";
+ case DW_OP_reg30: return "DW_OP_reg30";
+ case DW_OP_reg31: return "DW_OP_reg31";
+ case DW_OP_breg0: return "DW_OP_breg0";
+ case DW_OP_breg1: return "DW_OP_breg1";
+ case DW_OP_breg2: return "DW_OP_breg2";
+ case DW_OP_breg3: return "DW_OP_breg3";
+ case DW_OP_breg4: return "DW_OP_breg4";
+ case DW_OP_breg5: return "DW_OP_breg5";
+ case DW_OP_breg6: return "DW_OP_breg6";
+ case DW_OP_breg7: return "DW_OP_breg7";
+ case DW_OP_breg8: return "DW_OP_breg8";
+ case DW_OP_breg9: return "DW_OP_breg9";
+ case DW_OP_breg10: return "DW_OP_breg10";
+ case DW_OP_breg11: return "DW_OP_breg11";
+ case DW_OP_breg12: return "DW_OP_breg12";
+ case DW_OP_breg13: return "DW_OP_breg13";
+ case DW_OP_breg14: return "DW_OP_breg14";
+ case DW_OP_breg15: return "DW_OP_breg15";
+ case DW_OP_breg16: return "DW_OP_breg16";
+ case DW_OP_breg17: return "DW_OP_breg17";
+ case DW_OP_breg18: return "DW_OP_breg18";
+ case DW_OP_breg19: return "DW_OP_breg19";
+ case DW_OP_breg20: return "DW_OP_breg20";
+ case DW_OP_breg21: return "DW_OP_breg21";
+ case DW_OP_breg22: return "DW_OP_breg22";
+ case DW_OP_breg23: return "DW_OP_breg23";
+ case DW_OP_breg24: return "DW_OP_breg24";
+ case DW_OP_breg25: return "DW_OP_breg25";
+ case DW_OP_breg26: return "DW_OP_breg26";
+ case DW_OP_breg27: return "DW_OP_breg27";
+ case DW_OP_breg28: return "DW_OP_breg28";
+ case DW_OP_breg29: return "DW_OP_breg29";
+ case DW_OP_breg30: return "DW_OP_breg30";
+ case DW_OP_breg31: return "DW_OP_breg31";
+ case DW_OP_regx: return "DW_OP_regx";
+ case DW_OP_fbreg: return "DW_OP_fbreg";
+ case DW_OP_bregx: return "DW_OP_bregx";
+ case DW_OP_piece: return "DW_OP_piece";
+ case DW_OP_deref_size: return "DW_OP_deref_size";
+ case DW_OP_xderef_size: return "DW_OP_xderef_size";
+ case DW_OP_nop: return "DW_OP_nop";
+ case DW_OP_push_object_address: return "DW_OP_push_object_address";
+ case DW_OP_call2: return "DW_OP_call2";
+ case DW_OP_call4: return "DW_OP_call4";
+ case DW_OP_call_ref: return "DW_OP_call_ref";
+ case DW_OP_form_tls_address: return "DW_OP_form_tls_address";
+ case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa";
+ case DW_OP_bit_piece: return "DW_OP_bit_piece";
+ case DW_OP_lo_user: return "DW_OP_lo_user";
+ case DW_OP_hi_user: return "DW_OP_hi_user";
+ }
+ return 0;
+}
+
+/// AttributeEncodingString - Return the string for the specified attribute
+/// encoding.
+const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_ATE_address: return "DW_ATE_address";
+ case DW_ATE_boolean: return "DW_ATE_boolean";
+ case DW_ATE_complex_float: return "DW_ATE_complex_float";
+ case DW_ATE_float: return "DW_ATE_float";
+ case DW_ATE_signed: return "DW_ATE_signed";
+ case DW_ATE_signed_char: return "DW_ATE_signed_char";
+ case DW_ATE_unsigned: return "DW_ATE_unsigned";
+ case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char";
+ case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float";
+ case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal";
+ case DW_ATE_numeric_string: return "DW_ATE_numeric_string";
+ case DW_ATE_edited: return "DW_ATE_edited";
+ case DW_ATE_signed_fixed: return "DW_ATE_signed_fixed";
+ case DW_ATE_unsigned_fixed: return "DW_ATE_unsigned_fixed";
+ case DW_ATE_decimal_float: return "DW_ATE_decimal_float";
+ case DW_ATE_lo_user: return "DW_ATE_lo_user";
+ case DW_ATE_hi_user: return "DW_ATE_hi_user";
+ }
+ return 0;
+}
+
+/// DecimalSignString - Return the string for the specified decimal sign
+/// attribute.
+const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
+ switch (Sign) {
+ case DW_DS_unsigned: return "DW_DS_unsigned";
+ case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch";
+ case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch";
+ case DW_DS_leading_separate: return "DW_DS_leading_separate";
+ case DW_DS_trailing_separate: return "DW_DS_trailing_separate";
+ }
+ return 0;
+}
+
+/// EndianityString - Return the string for the specified endianity.
+///
+const char *llvm::dwarf::EndianityString(unsigned Endian) {
+ switch (Endian) {
+ case DW_END_default: return "DW_END_default";
+ case DW_END_big: return "DW_END_big";
+ case DW_END_little: return "DW_END_little";
+ case DW_END_lo_user: return "DW_END_lo_user";
+ case DW_END_hi_user: return "DW_END_hi_user";
+ }
+ return 0;
+}
+
+/// AccessibilityString - Return the string for the specified accessibility.
+///
+const char *llvm::dwarf::AccessibilityString(unsigned Access) {
+ switch (Access) {
+ // Accessibility codes
+ case DW_ACCESS_public: return "DW_ACCESS_public";
+ case DW_ACCESS_protected: return "DW_ACCESS_protected";
+ case DW_ACCESS_private: return "DW_ACCESS_private";
+ }
+ return 0;
+}
+
+/// VisibilityString - Return the string for the specified visibility.
+///
+const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
+ switch (Visibility) {
+ case DW_VIS_local: return "DW_VIS_local";
+ case DW_VIS_exported: return "DW_VIS_exported";
+ case DW_VIS_qualified: return "DW_VIS_qualified";
+ }
+ return 0;
+}
+
+/// VirtualityString - Return the string for the specified virtuality.
+///
+const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
+ switch (Virtuality) {
+ case DW_VIRTUALITY_none: return "DW_VIRTUALITY_none";
+ case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual";
+ case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual";
+ }
+ return 0;
+}
+
+/// LanguageString - Return the string for the specified language.
+///
+const char *llvm::dwarf::LanguageString(unsigned Language) {
+ switch (Language) {
+ case DW_LANG_C89: return "DW_LANG_C89";
+ case DW_LANG_C: return "DW_LANG_C";
+ case DW_LANG_Ada83: return "DW_LANG_Ada83";
+ case DW_LANG_C_plus_plus: return "DW_LANG_C_plus_plus";
+ case DW_LANG_Cobol74: return "DW_LANG_Cobol74";
+ case DW_LANG_Cobol85: return "DW_LANG_Cobol85";
+ case DW_LANG_Fortran77: return "DW_LANG_Fortran77";
+ case DW_LANG_Fortran90: return "DW_LANG_Fortran90";
+ case DW_LANG_Pascal83: return "DW_LANG_Pascal83";
+ case DW_LANG_Modula2: return "DW_LANG_Modula2";
+ case DW_LANG_Java: return "DW_LANG_Java";
+ case DW_LANG_C99: return "DW_LANG_C99";
+ case DW_LANG_Ada95: return "DW_LANG_Ada95";
+ case DW_LANG_Fortran95: return "DW_LANG_Fortran95";
+ case DW_LANG_PLI: return "DW_LANG_PLI";
+ case DW_LANG_ObjC: return "DW_LANG_ObjC";
+ case DW_LANG_ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus";
+ case DW_LANG_UPC: return "DW_LANG_UPC";
+ case DW_LANG_D: return "DW_LANG_D";
+ case DW_LANG_lo_user: return "DW_LANG_lo_user";
+ case DW_LANG_hi_user: return "DW_LANG_hi_user";
+ }
+ return 0;
+}
+
+/// CaseString - Return the string for the specified identifier case.
+///
+const char *llvm::dwarf::CaseString(unsigned Case) {
+ switch (Case) {
+ case DW_ID_case_sensitive: return "DW_ID_case_sensitive";
+ case DW_ID_up_case: return "DW_ID_up_case";
+ case DW_ID_down_case: return "DW_ID_down_case";
+ case DW_ID_case_insensitive: return "DW_ID_case_insensitive";
+ }
+ return 0;
+}
+
+/// ConventionString - Return the string for the specified calling convention.
+///
+const char *llvm::dwarf::ConventionString(unsigned Convention) {
+ switch (Convention) {
+ case DW_CC_normal: return "DW_CC_normal";
+ case DW_CC_program: return "DW_CC_program";
+ case DW_CC_nocall: return "DW_CC_nocall";
+ case DW_CC_lo_user: return "DW_CC_lo_user";
+ case DW_CC_hi_user: return "DW_CC_hi_user";
+ }
+ return 0;
+}
+
+/// InlineCodeString - Return the string for the specified inline code.
+///
+const char *llvm::dwarf::InlineCodeString(unsigned Code) {
+ switch (Code) {
+ case DW_INL_not_inlined: return "DW_INL_not_inlined";
+ case DW_INL_inlined: return "DW_INL_inlined";
+ case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined";
+ case DW_INL_declared_inlined: return "DW_INL_declared_inlined";
+ }
+ return 0;
+}
+
+/// ArrayOrderString - Return the string for the specified array order.
+///
+const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
+ switch (Order) {
+ case DW_ORD_row_major: return "DW_ORD_row_major";
+ case DW_ORD_col_major: return "DW_ORD_col_major";
+ }
+ return 0;
+}
+
+/// DiscriminantString - Return the string for the specified discriminant
+/// descriptor.
+const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
+ switch (Discriminant) {
+ case DW_DSC_label: return "DW_DSC_label";
+ case DW_DSC_range: return "DW_DSC_range";
+ }
+ return 0;
+}
+
+/// LNStandardString - Return the string for the specified line number standard.
+///
+const char *llvm::dwarf::LNStandardString(unsigned Standard) {
+ switch (Standard) {
+ case DW_LNS_copy: return "DW_LNS_copy";
+ case DW_LNS_advance_pc: return "DW_LNS_advance_pc";
+ case DW_LNS_advance_line: return "DW_LNS_advance_line";
+ case DW_LNS_set_file: return "DW_LNS_set_file";
+ case DW_LNS_set_column: return "DW_LNS_set_column";
+ case DW_LNS_negate_stmt: return "DW_LNS_negate_stmt";
+ case DW_LNS_set_basic_block: return "DW_LNS_set_basic_block";
+ case DW_LNS_const_add_pc: return "DW_LNS_const_add_pc";
+ case DW_LNS_fixed_advance_pc: return "DW_LNS_fixed_advance_pc";
+ case DW_LNS_set_prologue_end: return "DW_LNS_set_prologue_end";
+ case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin";
+ case DW_LNS_set_isa: return "DW_LNS_set_isa";
+ }
+ return 0;
+}
+
+/// LNExtendedString - Return the string for the specified line number extended
+/// opcode encodings.
+const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
+ switch (Encoding) {
+ // Line Number Extended Opcode Encodings
+ case DW_LNE_end_sequence: return "DW_LNE_end_sequence";
+ case DW_LNE_set_address: return "DW_LNE_set_address";
+ case DW_LNE_define_file: return "DW_LNE_define_file";
+ case DW_LNE_lo_user: return "DW_LNE_lo_user";
+ case DW_LNE_hi_user: return "DW_LNE_hi_user";
+ }
+ return 0;
+}
+
+/// MacinfoString - Return the string for the specified macinfo type encodings.
+///
+const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
+ switch (Encoding) {
+ // Macinfo Type Encodings
+ case DW_MACINFO_define: return "DW_MACINFO_define";
+ case DW_MACINFO_undef: return "DW_MACINFO_undef";
+ case DW_MACINFO_start_file: return "DW_MACINFO_start_file";
+ case DW_MACINFO_end_file: return "DW_MACINFO_end_file";
+ case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext";
+ }
+ return 0;
+}
+
+/// CallFrameString - Return the string for the specified call frame instruction
+/// encodings.
+const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_CFA_advance_loc: return "DW_CFA_advance_loc";
+ case DW_CFA_offset: return "DW_CFA_offset";
+ case DW_CFA_restore: return "DW_CFA_restore";
+ case DW_CFA_set_loc: return "DW_CFA_set_loc";
+ case DW_CFA_advance_loc1: return "DW_CFA_advance_loc1";
+ case DW_CFA_advance_loc2: return "DW_CFA_advance_loc2";
+ case DW_CFA_advance_loc4: return "DW_CFA_advance_loc4";
+ case DW_CFA_offset_extended: return "DW_CFA_offset_extended";
+ case DW_CFA_restore_extended: return "DW_CFA_restore_extended";
+ case DW_CFA_undefined: return "DW_CFA_undefined";
+ case DW_CFA_same_value: return "DW_CFA_same_value";
+ case DW_CFA_register: return "DW_CFA_register";
+ case DW_CFA_remember_state: return "DW_CFA_remember_state";
+ case DW_CFA_restore_state: return "DW_CFA_restore_state";
+ case DW_CFA_def_cfa: return "DW_CFA_def_cfa";
+ case DW_CFA_def_cfa_register: return "DW_CFA_def_cfa_register";
+ case DW_CFA_def_cfa_offset: return "DW_CFA_def_cfa_offset";
+ case DW_CFA_def_cfa_expression: return "DW_CFA_def_cfa_expression";
+ case DW_CFA_expression: return "DW_CFA_expression";
+ case DW_CFA_offset_extended_sf: return "DW_CFA_offset_extended_sf";
+ case DW_CFA_def_cfa_sf: return "DW_CFA_def_cfa_sf";
+ case DW_CFA_def_cfa_offset_sf: return "DW_CFA_def_cfa_offset_sf";
+ case DW_CFA_val_offset: return "DW_CFA_val_offset";
+ case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf";
+ case DW_CFA_val_expression: return "DW_CFA_val_expression";
+ case DW_CFA_lo_user: return "DW_CFA_lo_user";
+ case DW_CFA_hi_user: return "DW_CFA_hi_user";
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp
new file mode 100644
index 000000000000..455c3801cc68
--- /dev/null
+++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp
@@ -0,0 +1,170 @@
+//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system DynamicLibrary concept.
+//
+// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
+// not thread safe!
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <vector>
+
+// Collection of symbol name/value pairs to be searched prior to any libraries.
+static std::map<std::string, void*> *ExplicitSymbols = 0;
+
+namespace {
+
+struct ExplicitSymbolsDeleter {
+ ~ExplicitSymbolsDeleter() {
+ if (ExplicitSymbols)
+ delete ExplicitSymbols;
+ }
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
+
+void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
+ void *symbolValue) {
+ if (ExplicitSymbols == 0)
+ ExplicitSymbols = new std::map<std::string, void*>();
+ (*ExplicitSymbols)[symbolName] = symbolValue;
+}
+
+#ifdef LLVM_ON_WIN32
+
+#include "Windows/DynamicLibrary.inc"
+
+#else
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<void *> *OpenedHandles = 0;
+
+
+static SmartMutex<true>& getMutex() {
+ static SmartMutex<true> HandlesMutex;
+ return HandlesMutex;
+}
+
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+ std::string *ErrMsg) {
+ void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
+ if (H == 0) {
+ if (ErrMsg) *ErrMsg = dlerror();
+ return true;
+ }
+#ifdef __CYGWIN__
+ // Cygwin searches symbols only in the main
+ // with the handle of dlopen(NULL, RTLD_GLOBAL).
+ if (Filename == NULL)
+ H = RTLD_DEFAULT;
+#endif
+ SmartScopedLock<true> Lock(getMutex());
+ if (OpenedHandles == 0)
+ OpenedHandles = new std::vector<void *>();
+ OpenedHandles->push_back(H);
+ return false;
+}
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+ std::string *ErrMsg) {
+ if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
+ return true;
+}
+#endif
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName);
+}
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+ // First check symbols added via AddSymbol().
+ if (ExplicitSymbols) {
+ std::map<std::string, void *>::iterator I =
+ ExplicitSymbols->find(symbolName);
+ std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+
+ if (I != E)
+ return I->second;
+ }
+
+#if HAVE_DLFCN_H
+ // Now search the libraries.
+ SmartScopedLock<true> Lock(getMutex());
+ if (OpenedHandles) {
+ for (std::vector<void *>::iterator I = OpenedHandles->begin(),
+ E = OpenedHandles->end(); I != E; ++I) {
+ //lt_ptr ptr = lt_dlsym(*I, symbolName);
+ void *ptr = dlsym(*I, symbolName);
+ if (ptr) {
+ return ptr;
+ }
+ }
+ }
+#endif
+
+ if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
+ return Result;
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__)
+ {
+ EXPLICIT_SYMBOL(stderr);
+ EXPLICIT_SYMBOL(stdout);
+ EXPLICIT_SYMBOL(stdin);
+ }
+#else
+ // For everything else, we want to check to make sure the symbol isn't defined
+ // as a macro before using EXPLICIT_SYMBOL.
+ {
+#ifndef stdin
+ EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+ EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+ EXPLICIT_SYMBOL(stderr);
+#endif
+ }
+#endif
+#undef EXPLICIT_SYMBOL
+
+ return 0;
+}
+
+#endif // LLVM_ON_WIN32
diff --git a/contrib/llvm/lib/Support/Errno.cpp b/contrib/llvm/lib/Support/Errno.cpp
new file mode 100644
index 000000000000..18c658173a7a
--- /dev/null
+++ b/contrib/llvm/lib/Support/Errno.cpp
@@ -0,0 +1,74 @@
+//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the errno wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+
+#if HAVE_STRING_H
+#include <string.h>
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+#if HAVE_ERRNO_H
+std::string StrError() {
+ return StrError(errno);
+}
+#endif // HAVE_ERRNO_H
+
+std::string StrError(int errnum) {
+ const int MaxErrStrLen = 2000;
+ char buffer[MaxErrStrLen];
+ buffer[0] = '\0';
+ char* str = buffer;
+#ifdef HAVE_STRERROR_R
+ // strerror_r is thread-safe.
+ if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+ // glibc defines its own incompatible version of strerror_r
+ // which may not use the buffer supplied.
+ str = strerror_r(errnum,buffer,MaxErrStrLen-1);
+# else
+ strerror_r(errnum,buffer,MaxErrStrLen-1);
+# endif
+#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
+ if (errnum)
+ strerror_s(buffer, errnum);
+#elif defined(HAVE_STRERROR)
+ // Copy the thread un-safe result of strerror into
+ // the buffer as fast as possible to minimize impact
+ // of collision of strerror in multiple threads.
+ if (errnum)
+ strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
+ buffer[MaxErrStrLen-1] = '\0';
+#else
+ // Strange that this system doesn't even have strerror
+ // but, oh well, just use a generic message
+ sprintf(buffer, "Error #%d", errnum);
+#endif
+ return str;
+}
+
+} // namespace sys
+} // namespace llvm
+
+#endif // HAVE_STRING_H
diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp
new file mode 100644
index 000000000000..e6cc57db8243
--- /dev/null
+++ b/contrib/llvm/lib/Support/ErrorHandling.cpp
@@ -0,0 +1,99 @@
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API used to indicate fatal error conditions. Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+#include <cstdlib>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+# include <io.h>
+# include <fcntl.h>
+#endif
+
+using namespace llvm;
+
+static fatal_error_handler_t ErrorHandler = 0;
+static void *ErrorHandlerUserData = 0;
+
+void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
+ void *user_data) {
+ assert(!llvm_is_multithreaded() &&
+ "Cannot register error handlers after starting multithreaded mode!\n");
+ assert(!ErrorHandler && "Error handler already registered!\n");
+ ErrorHandler = handler;
+ ErrorHandlerUserData = user_data;
+}
+
+void llvm::remove_fatal_error_handler() {
+ ErrorHandler = 0;
+}
+
+void llvm::report_fatal_error(const char *Reason) {
+ report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(const std::string &Reason) {
+ report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(StringRef Reason) {
+ report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(const Twine &Reason) {
+ if (ErrorHandler) {
+ ErrorHandler(ErrorHandlerUserData, Reason.str());
+ } else {
+ // Blast the result out to stderr. We don't try hard to make sure this
+ // succeeds (e.g. handling EINTR) and we can't use errs() here because
+ // raw ostreams can call report_fatal_error.
+ SmallVector<char, 64> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << "LLVM ERROR: " << Reason << "\n";
+ StringRef MessageStr = OS.str();
+ ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
+ (void)written; // If something went wrong, we deliberately just give up.
+ }
+
+ // If we reached here, we are failing ungracefully. Run the interrupt handlers
+ // to make sure any special cleanups get done, in particular that we remove
+ // files registered with RemoveFileOnSignal.
+ sys::RunInterruptHandlers();
+
+ exit(1);
+}
+
+void llvm::llvm_unreachable_internal(const char *msg, const char *file,
+ unsigned line) {
+ // This code intentionally doesn't call the ErrorHandler callback, because
+ // llvm_unreachable is intended to be used to indicate "impossible"
+ // situations, and not legitimate runtime errors.
+ if (msg)
+ dbgs() << msg << "\n";
+ dbgs() << "UNREACHABLE executed";
+ if (file)
+ dbgs() << " at " << file << ":" << line;
+ dbgs() << "!\n";
+ abort();
+}
diff --git a/contrib/llvm/lib/Support/FileUtilities.cpp b/contrib/llvm/lib/Support/FileUtilities.cpp
new file mode 100644
index 000000000000..4c8c0c63ffc4
--- /dev/null
+++ b/contrib/llvm/lib/Support/FileUtilities.cpp
@@ -0,0 +1,281 @@
+//===- Support/FileUtilities.cpp - File System Utilities ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a family of utility functions which are useful for doing
+// various things with files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+static bool isSignedChar(char C) {
+ return (C == '+' || C == '-');
+}
+
+static bool isExponentChar(char C) {
+ switch (C) {
+ case 'D': // Strange exponential notation.
+ case 'd': // Strange exponential notation.
+ case 'e':
+ case 'E': return true;
+ default: return false;
+ }
+}
+
+static bool isNumberChar(char C) {
+ switch (C) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '.': return true;
+ default: return isSignedChar(C) || isExponentChar(C);
+ }
+}
+
+static const char *BackupNumber(const char *Pos, const char *FirstChar) {
+ // If we didn't stop in the middle of a number, don't backup.
+ if (!isNumberChar(*Pos)) return Pos;
+
+ // Otherwise, return to the start of the number.
+ bool HasPeriod = false;
+ while (Pos > FirstChar && isNumberChar(Pos[-1])) {
+ // Backup over at most one period.
+ if (Pos[-1] == '.') {
+ if (HasPeriod)
+ break;
+ HasPeriod = true;
+ }
+
+ --Pos;
+ if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
+ break;
+ }
+ return Pos;
+}
+
+/// EndOfNumber - Return the first character that is not part of the specified
+/// number. This assumes that the buffer is null terminated, so it won't fall
+/// off the end.
+static const char *EndOfNumber(const char *Pos) {
+ while (isNumberChar(*Pos))
+ ++Pos;
+ return Pos;
+}
+
+/// CompareNumbers - compare two numbers, returning true if they are different.
+static bool CompareNumbers(const char *&F1P, const char *&F2P,
+ const char *F1End, const char *F2End,
+ double AbsTolerance, double RelTolerance,
+ std::string *ErrorMsg) {
+ const char *F1NumEnd, *F2NumEnd;
+ double V1 = 0.0, V2 = 0.0;
+
+ // If one of the positions is at a space and the other isn't, chomp up 'til
+ // the end of the space.
+ while (isspace(*F1P) && F1P != F1End)
+ ++F1P;
+ while (isspace(*F2P) && F2P != F2End)
+ ++F2P;
+
+ // If we stop on numbers, compare their difference.
+ if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) {
+ // The diff failed.
+ F1NumEnd = F1P;
+ F2NumEnd = F2P;
+ } else {
+ // Note that some ugliness is built into this to permit support for numbers
+ // that use "D" or "d" as their exponential marker, e.g. "1.234D45". This
+ // occurs in 200.sixtrack in spec2k.
+ V1 = strtod(F1P, const_cast<char**>(&F1NumEnd));
+ V2 = strtod(F2P, const_cast<char**>(&F2NumEnd));
+
+ if (*F1NumEnd == 'D' || *F1NumEnd == 'd') {
+ // Copy string into tmp buffer to replace the 'D' with an 'e'.
+ SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
+ // Strange exponential notation!
+ StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
+
+ V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
+ F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
+ }
+
+ if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
+ // Copy string into tmp buffer to replace the 'D' with an 'e'.
+ SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
+ // Strange exponential notation!
+ StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
+
+ V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
+ F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
+ }
+ }
+
+ if (F1NumEnd == F1P || F2NumEnd == F2P) {
+ if (ErrorMsg) {
+ *ErrorMsg = "FP Comparison failed, not a numeric difference between '";
+ *ErrorMsg += F1P[0];
+ *ErrorMsg += "' and '";
+ *ErrorMsg += F2P[0];
+ *ErrorMsg += "'";
+ }
+ return true;
+ }
+
+ // Check to see if these are inside the absolute tolerance
+ if (AbsTolerance < std::abs(V1-V2)) {
+ // Nope, check the relative tolerance...
+ double Diff;
+ if (V2)
+ Diff = std::abs(V1/V2 - 1.0);
+ else if (V1)
+ Diff = std::abs(V2/V1 - 1.0);
+ else
+ Diff = 0; // Both zero.
+ if (Diff > RelTolerance) {
+ if (ErrorMsg) {
+ raw_string_ostream(*ErrorMsg)
+ << "Compared: " << V1 << " and " << V2 << '\n'
+ << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n'
+ << "Out of tolerance: rel/abs: " << RelTolerance << '/'
+ << AbsTolerance;
+ }
+ return true;
+ }
+ }
+
+ // Otherwise, advance our read pointers to the end of the numbers.
+ F1P = F1NumEnd; F2P = F2NumEnd;
+ return false;
+}
+
+/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
+/// files match, 1 if they are different, and 2 if there is a file error. This
+/// function differs from DiffFiles in that you can specify an absolete and
+/// relative FP error that is allowed to exist. If you specify a string to fill
+/// in for the error option, it will set the string to an error message if an
+/// error occurs, allowing the caller to distinguish between a failed diff and a
+/// file system error.
+///
+int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
+ const sys::PathWithStatus &FileB,
+ double AbsTol, double RelTol,
+ std::string *Error) {
+ const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error);
+ if (!FileAStat)
+ return 2;
+ const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error);
+ if (!FileBStat)
+ return 2;
+
+ // Check for zero length files because some systems croak when you try to
+ // mmap an empty file.
+ size_t A_size = FileAStat->getSize();
+ size_t B_size = FileBStat->getSize();
+
+ // If they are both zero sized then they're the same
+ if (A_size == 0 && B_size == 0)
+ return 0;
+
+ // If only one of them is zero sized then they can't be the same
+ if ((A_size == 0 || B_size == 0)) {
+ if (Error)
+ *Error = "Files differ: one is zero-sized, the other isn't";
+ return 1;
+ }
+
+ // Now its safe to mmap the files into memory because both files
+ // have a non-zero size.
+ error_code ec;
+ OwningPtr<MemoryBuffer> F1;
+ if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
+ if (Error)
+ *Error = ec.message();
+ return 2;
+ }
+ OwningPtr<MemoryBuffer> F2;
+ if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) {
+ if (Error)
+ *Error = ec.message();
+ return 2;
+ }
+
+ // Okay, now that we opened the files, scan them for the first difference.
+ const char *File1Start = F1->getBufferStart();
+ const char *File2Start = F2->getBufferStart();
+ const char *File1End = F1->getBufferEnd();
+ const char *File2End = F2->getBufferEnd();
+ const char *F1P = File1Start;
+ const char *F2P = File2Start;
+
+ // Are the buffers identical? Common case: Handle this efficiently.
+ if (A_size == B_size &&
+ std::memcmp(File1Start, File2Start, A_size) == 0)
+ return 0;
+
+ // Otherwise, we are done a tolerances are set.
+ if (AbsTol == 0 && RelTol == 0) {
+ if (Error)
+ *Error = "Files differ without tolerance allowance";
+ return 1; // Files different!
+ }
+
+ bool CompareFailed = false;
+ while (1) {
+ // Scan for the end of file or next difference.
+ while (F1P < File1End && F2P < File2End && *F1P == *F2P)
+ ++F1P, ++F2P;
+
+ if (F1P >= File1End || F2P >= File2End) break;
+
+ // Okay, we must have found a difference. Backup to the start of the
+ // current number each stream is at so that we can compare from the
+ // beginning.
+ F1P = BackupNumber(F1P, File1Start);
+ F2P = BackupNumber(F2P, File2Start);
+
+ // Now that we are at the start of the numbers, compare them, exiting if
+ // they don't match.
+ if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) {
+ CompareFailed = true;
+ break;
+ }
+ }
+
+ // Okay, we reached the end of file. If both files are at the end, we
+ // succeeded.
+ bool F1AtEnd = F1P >= File1End;
+ bool F2AtEnd = F2P >= File2End;
+ if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) {
+ // Else, we might have run off the end due to a number: backup and retry.
+ if (F1AtEnd && isNumberChar(F1P[-1])) --F1P;
+ if (F2AtEnd && isNumberChar(F2P[-1])) --F2P;
+ F1P = BackupNumber(F1P, File1Start);
+ F2P = BackupNumber(F2P, File2Start);
+
+ // Now that we are at the start of the numbers, compare them, exiting if
+ // they don't match.
+ if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error))
+ CompareFailed = true;
+
+ // If we found the end, we succeeded.
+ if (F1P < File1End || F2P < File2End)
+ CompareFailed = true;
+ }
+
+ return CompareFailed;
+}
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
new file mode 100644
index 000000000000..1568342e9c9d
--- /dev/null
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -0,0 +1,426 @@
+//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a hash set that can be used to remove duplication of
+// nodes in a graph. This code was originally created by Chris Lattner for use
+// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeIDRef Implementation
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+/// used to lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeIDRef::ComputeHash() const {
+ // This is adapted from SuperFastHash by Paul Hsieh.
+ unsigned Hash = static_cast<unsigned>(Size);
+ for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
+ unsigned Data = *BP;
+ Hash += Data & 0xFFFF;
+ unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
+ Hash = (Hash << 16) ^ Tmp;
+ Hash += Hash >> 11;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ Hash ^= Hash << 3;
+ Hash += Hash >> 5;
+ Hash ^= Hash << 4;
+ Hash += Hash >> 17;
+ Hash ^= Hash << 25;
+ Hash += Hash >> 6;
+ return Hash;
+}
+
+bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
+ if (Size != RHS.Size) return false;
+ return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeID Implementation
+
+/// Add* - Add various data types to Bit data.
+///
+void FoldingSetNodeID::AddPointer(const void *Ptr) {
+ // Note: this adds pointers to the hash using sizes and endianness that
+ // depend on the host. It doesn't matter however, because hashing on
+ // pointer values in inherently unstable. Nothing should depend on the
+ // ordering of nodes in the folding set.
+ intptr_t PtrI = (intptr_t)Ptr;
+ Bits.push_back(unsigned(PtrI));
+ if (sizeof(intptr_t) > sizeof(unsigned))
+ Bits.push_back(unsigned(uint64_t(PtrI) >> 32));
+}
+void FoldingSetNodeID::AddInteger(signed I) {
+ Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(unsigned I) {
+ Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(long I) {
+ AddInteger((unsigned long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long I) {
+ if (sizeof(long) == sizeof(int))
+ AddInteger(unsigned(I));
+ else if (sizeof(long) == sizeof(long long)) {
+ AddInteger((unsigned long long)I);
+ } else {
+ llvm_unreachable("unexpected sizeof(long)");
+ }
+}
+void FoldingSetNodeID::AddInteger(long long I) {
+ AddInteger((unsigned long long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long long I) {
+ AddInteger(unsigned(I));
+ if ((uint64_t)(unsigned)I != I)
+ Bits.push_back(unsigned(I >> 32));
+}
+
+void FoldingSetNodeID::AddString(StringRef String) {
+ unsigned Size = String.size();
+ Bits.push_back(Size);
+ if (!Size) return;
+
+ unsigned Units = Size / 4;
+ unsigned Pos = 0;
+ const unsigned *Base = (const unsigned*) String.data();
+
+ // If the string is aligned do a bulk transfer.
+ if (!((intptr_t)Base & 3)) {
+ Bits.append(Base, Base + Units);
+ Pos = (Units + 1) * 4;
+ } else {
+ // Otherwise do it the hard way.
+ // To be compatible with above bulk transfer, we need to take endianness
+ // into account.
+ if (sys::isBigEndianHost()) {
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+ ((unsigned char)String[Pos - 3] << 16) |
+ ((unsigned char)String[Pos - 2] << 8) |
+ (unsigned char)String[Pos - 1];
+ Bits.push_back(V);
+ }
+ } else {
+ assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 1] << 24) |
+ ((unsigned char)String[Pos - 2] << 16) |
+ ((unsigned char)String[Pos - 3] << 8) |
+ (unsigned char)String[Pos - 4];
+ Bits.push_back(V);
+ }
+ }
+ }
+
+ // With the leftover bits.
+ unsigned V = 0;
+ // Pos will have overshot size by 4 - #bytes left over.
+ // No need to take endianness into account here - this is always executed.
+ switch (Pos - Size) {
+ case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
+ case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
+ case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
+ default: return; // Nothing left.
+ }
+
+ Bits.push_back(V);
+}
+
+// AddNodeID - Adds the Bit data of another ID to *this.
+void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
+ Bits.append(ID.Bits.begin(), ID.Bits.end());
+}
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
+/// lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeID::ComputeHash() const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
+ return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
+}
+
+/// Intern - Copy this node's data to a memory region allocated from the
+/// given allocator and return a FoldingSetNodeIDRef describing the
+/// interned data.
+FoldingSetNodeIDRef
+FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
+ unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
+ std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+ return FoldingSetNodeIDRef(New, Bits.size());
+}
+
+//===----------------------------------------------------------------------===//
+/// Helper functions for FoldingSetImpl.
+
+/// GetNextPtr - In order to save space, each bucket is a
+/// singly-linked-list. In order to make deletion more efficient, we make
+/// the list circular, so we can delete a node without computing its hash.
+/// The problem with this is that the start of the hash buckets are not
+/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
+/// use GetBucketPtr when this happens.
+static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
+ // The low bit is set if this is the pointer back to the bucket.
+ if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
+ return 0;
+
+ return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
+}
+
+
+/// testing.
+static void **GetBucketPtr(void *NextInBucketPtr) {
+ intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
+ assert((Ptr & 1) && "Not a bucket pointer");
+ return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
+}
+
+/// GetBucketFor - Hash the specified node ID and return the hash bucket for
+/// the specified ID.
+static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
+ // NumBuckets is always a power of 2.
+ unsigned BucketNum = Hash & (NumBuckets-1);
+ return Buckets + BucketNum;
+}
+
+/// AllocateBuckets - Allocated initialized bucket memory.
+static void **AllocateBuckets(unsigned NumBuckets) {
+ void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
+ // Set the very last bucket to be a non-null "pointer".
+ Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+ return Buckets;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetImpl Implementation
+
+FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
+ assert(5 < Log2InitSize && Log2InitSize < 32 &&
+ "Initial hash table size out of range");
+ NumBuckets = 1 << Log2InitSize;
+ Buckets = AllocateBuckets(NumBuckets);
+ NumNodes = 0;
+}
+FoldingSetImpl::~FoldingSetImpl() {
+ free(Buckets);
+}
+void FoldingSetImpl::clear() {
+ // Set all but the last bucket to null pointers.
+ memset(Buckets, 0, NumBuckets*sizeof(void*));
+
+ // Set the very last bucket to be a non-null "pointer".
+ Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+
+ // Reset the node count to zero.
+ NumNodes = 0;
+}
+
+/// GrowHashTable - Double the size of the hash table and rehash everything.
+///
+void FoldingSetImpl::GrowHashTable() {
+ void **OldBuckets = Buckets;
+ unsigned OldNumBuckets = NumBuckets;
+ NumBuckets <<= 1;
+
+ // Clear out new buckets.
+ Buckets = AllocateBuckets(NumBuckets);
+ NumNodes = 0;
+
+ // Walk the old buckets, rehashing nodes into their new place.
+ FoldingSetNodeID TempID;
+ for (unsigned i = 0; i != OldNumBuckets; ++i) {
+ void *Probe = OldBuckets[i];
+ if (!Probe) continue;
+ while (Node *NodeInBucket = GetNextPtr(Probe)) {
+ // Figure out the next link, remove NodeInBucket from the old link.
+ Probe = NodeInBucket->getNextInBucket();
+ NodeInBucket->SetNextInBucket(0);
+
+ // Insert the node into the new bucket, after recomputing the hash.
+ InsertNode(NodeInBucket,
+ GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
+ Buckets, NumBuckets));
+ TempID.clear();
+ }
+ }
+
+ free(OldBuckets);
+}
+
+/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
+/// return it. If not, return the insertion token that will make insertion
+/// faster.
+FoldingSetImpl::Node
+*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
+
+ void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
+ void *Probe = *Bucket;
+
+ InsertPos = 0;
+
+ FoldingSetNodeID TempID;
+ while (Node *NodeInBucket = GetNextPtr(Probe)) {
+ if (NodeEquals(NodeInBucket, ID, TempID))
+ return NodeInBucket;
+ TempID.clear();
+
+ Probe = NodeInBucket->getNextInBucket();
+ }
+
+ // Didn't find the node, return null with the bucket as the InsertPos.
+ InsertPos = Bucket;
+ return 0;
+}
+
+/// InsertNode - Insert the specified node into the folding set, knowing that it
+/// is not already in the map. InsertPos must be obtained from
+/// FindNodeOrInsertPos.
+void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
+ assert(N->getNextInBucket() == 0);
+ // Do we need to grow the hashtable?
+ if (NumNodes+1 > NumBuckets*2) {
+ GrowHashTable();
+ FoldingSetNodeID TempID;
+ InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
+ }
+
+ ++NumNodes;
+
+ /// The insert position is actually a bucket pointer.
+ void **Bucket = static_cast<void**>(InsertPos);
+
+ void *Next = *Bucket;
+
+ // If this is the first insertion into this bucket, its next pointer will be
+ // null. Pretend as if it pointed to itself, setting the low bit to indicate
+ // that it is a pointer to the bucket.
+ if (Next == 0)
+ Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
+
+ // Set the node's next pointer, and make the bucket point to the node.
+ N->SetNextInBucket(Next);
+ *Bucket = N;
+}
+
+/// RemoveNode - Remove a node from the folding set, returning true if one was
+/// removed or false if the node was not in the folding set.
+bool FoldingSetImpl::RemoveNode(Node *N) {
+ // Because each bucket is a circular list, we don't need to compute N's hash
+ // to remove it.
+ void *Ptr = N->getNextInBucket();
+ if (Ptr == 0) return false; // Not in folding set.
+
+ --NumNodes;
+ N->SetNextInBucket(0);
+
+ // Remember what N originally pointed to, either a bucket or another node.
+ void *NodeNextPtr = Ptr;
+
+ // Chase around the list until we find the node (or bucket) which points to N.
+ while (true) {
+ if (Node *NodeInBucket = GetNextPtr(Ptr)) {
+ // Advance pointer.
+ Ptr = NodeInBucket->getNextInBucket();
+
+ // We found a node that points to N, change it to point to N's next node,
+ // removing N from the list.
+ if (Ptr == N) {
+ NodeInBucket->SetNextInBucket(NodeNextPtr);
+ return true;
+ }
+ } else {
+ void **Bucket = GetBucketPtr(Ptr);
+ Ptr = *Bucket;
+
+ // If we found that the bucket points to N, update the bucket to point to
+ // whatever is next.
+ if (Ptr == N) {
+ *Bucket = NodeNextPtr;
+ return true;
+ }
+ }
+ }
+}
+
+/// GetOrInsertNode - If there is an existing simple Node exactly
+/// equal to the specified node, return it. Otherwise, insert 'N' and it
+/// instead.
+FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
+ FoldingSetNodeID ID;
+ GetNodeProfile(N, ID);
+ void *IP;
+ if (Node *E = FindNodeOrInsertPos(ID, IP))
+ return E;
+ InsertNode(N, IP);
+ return N;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetIteratorImpl Implementation
+
+FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
+ // Skip to the first non-null non-self-cycle bucket.
+ while (*Bucket != reinterpret_cast<void*>(-1) &&
+ (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
+ ++Bucket;
+
+ NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+}
+
+void FoldingSetIteratorImpl::advance() {
+ // If there is another link within this bucket, go to it.
+ void *Probe = NodePtr->getNextInBucket();
+
+ if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
+ NodePtr = NextNodeInBucket;
+ else {
+ // Otherwise, this is the last link in this bucket.
+ void **Bucket = GetBucketPtr(Probe);
+
+ // Skip to the next non-null non-self-cycle bucket.
+ do {
+ ++Bucket;
+ } while (*Bucket != reinterpret_cast<void*>(-1) &&
+ (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
+
+ NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetBucketIteratorImpl Implementation
+
+FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
+ Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
+}
diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp
new file mode 100644
index 000000000000..231ae48759e2
--- /dev/null
+++ b/contrib/llvm/lib/Support/FormattedStream.cpp
@@ -0,0 +1,101 @@
+//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of formatted_raw_ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+/// CountColumns - Examine the given char sequence and figure out which
+/// column we end up in after output.
+///
+static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) {
+ // Keep track of the current column by scanning the string for
+ // special characters
+
+ for (const char *End = Ptr + Size; Ptr != End; ++Ptr) {
+ ++Column;
+ if (*Ptr == '\n' || *Ptr == '\r')
+ Column = 0;
+ else if (*Ptr == '\t')
+ // Assumes tab stop = 8 characters.
+ Column += (8 - (Column & 0x7)) & 0x7;
+ }
+
+ return Column;
+}
+
+/// ComputeColumn - Examine the current output and figure out which
+/// column we end up in after output.
+void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
+ // If our previous scan pointer is inside the buffer, assume we already
+ // scanned those bytes. This depends on raw_ostream to not change our buffer
+ // in unexpected ways.
+ if (Ptr <= Scanned && Scanned <= Ptr + Size) {
+ // Scan all characters added since our last scan to determine the new
+ // column.
+ ColumnScanned = CountColumns(ColumnScanned, Scanned,
+ Size - (Scanned - Ptr));
+ } else
+ ColumnScanned = CountColumns(ColumnScanned, Ptr, Size);
+
+ // Update the scanning pointer.
+ Scanned = Ptr + Size;
+}
+
+/// PadToColumn - Align the output to some column number.
+///
+/// \param NewCol - The column to move to.
+///
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
+ // Figure out what's in the buffer and add it to the column count.
+ ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
+
+ // Output spaces until we reach the desired column.
+ indent(std::max(int(NewCol - ColumnScanned), 1));
+ return *this;
+}
+
+void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+ // Figure out what's in the buffer and add it to the column count.
+ ComputeColumn(Ptr, Size);
+
+ // Write the data to the underlying stream (which is unbuffered, so
+ // the data will be immediately written out).
+ TheStream->write(Ptr, Size);
+
+ // Reset the scanning pointer.
+ Scanned = 0;
+}
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output. Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &llvm::fouts() {
+ static formatted_raw_ostream S(outs());
+ return S;
+}
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error. Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &llvm::ferrs() {
+ static formatted_raw_ostream S(errs());
+ return S;
+}
+
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// the debug stream. Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &llvm::fdbgs() {
+ static formatted_raw_ostream S(dbgs());
+ return S;
+}
diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp
new file mode 100644
index 000000000000..0dba28a2530c
--- /dev/null
+++ b/contrib/llvm/lib/Support/GraphWriter.cpp
@@ -0,0 +1,200 @@
+//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements misc. GraphWriter support routines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+std::string llvm::DOT::EscapeString(const std::string &Label) {
+ std::string Str(Label);
+ for (unsigned i = 0; i != Str.length(); ++i)
+ switch (Str[i]) {
+ case '\n':
+ Str.insert(Str.begin()+i, '\\'); // Escape character...
+ ++i;
+ Str[i] = 'n';
+ break;
+ case '\t':
+ Str.insert(Str.begin()+i, ' '); // Convert to two spaces
+ ++i;
+ Str[i] = ' ';
+ break;
+ case '\\':
+ if (i+1 != Str.length())
+ switch (Str[i+1]) {
+ case 'l': continue; // don't disturb \l
+ case '|': case '{': case '}':
+ Str.erase(Str.begin()+i); continue;
+ default: break;
+ }
+ case '{': case '}':
+ case '<': case '>':
+ case '|': case '"':
+ Str.insert(Str.begin()+i, '\\'); // Escape character...
+ ++i; // don't infinite loop
+ break;
+ }
+ return Str;
+}
+
+
+
+void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
+ GraphProgram::Name program) {
+ std::string ErrMsg;
+#if HAVE_GRAPHVIZ
+ sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
+
+ std::vector<const char*> args;
+ args.push_back(Graphviz.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(0);
+
+ errs() << "Running 'Graphviz' program... ";
+ if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
+
+#elif HAVE_XDOT_PY
+ std::vector<const char*> args;
+ args.push_back(LLVM_PATH_XDOT_PY);
+ args.push_back(Filename.c_str());
+
+ switch (program) {
+ case GraphProgram::DOT: args.push_back("-f"); args.push_back("dot"); break;
+ case GraphProgram::FDP: args.push_back("-f"); args.push_back("fdp"); break;
+ case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
+ case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
+ case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+ default: errs() << "Unknown graph layout name; using default.\n";
+ }
+
+ args.push_back(0);
+
+ errs() << "Running 'xdot.py' program... ";
+ if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
+ &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
+
+#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
+ HAVE_TWOPI || HAVE_CIRCO))
+ sys::Path PSFilename = Filename;
+ PSFilename.appendSuffix("ps");
+
+ sys::Path prog;
+
+ // Set default grapher
+#if HAVE_CIRCO
+ prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+#if HAVE_TWOPI
+ prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if HAVE_NEATO
+ prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if HAVE_FDP
+ prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if HAVE_DOT
+ prog = sys::Path(LLVM_PATH_DOT);
+#endif
+
+ // Find which program the user wants
+#if HAVE_DOT
+ if (program == GraphProgram::DOT)
+ prog = sys::Path(LLVM_PATH_DOT);
+#endif
+#if (HAVE_FDP)
+ if (program == GraphProgram::FDP)
+ prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if (HAVE_NEATO)
+ if (program == GraphProgram::NEATO)
+ prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if (HAVE_TWOPI)
+ if (program == GraphProgram::TWOPI)
+ prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if (HAVE_CIRCO)
+ if (program == GraphProgram::CIRCO)
+ prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+
+ std::vector<const char*> args;
+ args.push_back(prog.c_str());
+ args.push_back("-Tps");
+ args.push_back("-Nfontname=Courier");
+ args.push_back("-Gsize=7.5,10");
+ args.push_back(Filename.c_str());
+ args.push_back("-o");
+ args.push_back(PSFilename.c_str());
+ args.push_back(0);
+
+ errs() << "Running '" << prog.str() << "' program... ";
+
+ if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return;
+ }
+ errs() << " done. \n";
+
+ sys::Path gv(LLVM_PATH_GV);
+ args.clear();
+ args.push_back(gv.c_str());
+ args.push_back(PSFilename.c_str());
+ args.push_back("--spartan");
+ args.push_back(0);
+
+ ErrMsg.clear();
+ if (wait) {
+ if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
+ errs() << "Error: " << ErrMsg << "\n";
+ Filename.eraseFromDisk();
+ PSFilename.eraseFromDisk();
+ }
+ else {
+ sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
+ errs() << "Remember to erase graph files: " << Filename.str() << " "
+ << PSFilename.str() << "\n";
+ }
+#elif HAVE_DOTTY
+ sys::Path dotty(LLVM_PATH_DOTTY);
+
+ std::vector<const char*> args;
+ args.push_back(dotty.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(0);
+
+ errs() << "Running 'dotty' program... ";
+ if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ } else {
+// Dotty spawns another app and doesn't wait until it returns
+#if defined (__MINGW32__) || defined (_WINDOWS)
+ return;
+#endif
+ Filename.eraseFromDisk();
+ }
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
new file mode 100644
index 000000000000..c525a1228129
--- /dev/null
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -0,0 +1,315 @@
+//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Host concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <string.h>
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Host.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Host.inc"
+#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//
+// Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+std::string sys::getHostCPUName() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ return "generic";
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectX86FamilyModel(EAX, Family, Model);
+
+ bool HasSSE3 = (ECX & 0x1);
+ GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ bool Em64T = (EDX >> 29) & 0x1;
+
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ switch (Family) {
+ case 3:
+ return "i386";
+ case 4:
+ switch (Model) {
+ case 0: // Intel486 DX processors
+ case 1: // Intel486 DX processors
+ case 2: // Intel486 SX processors
+ case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+ // IntelDX2 processors
+ case 4: // Intel486 SL processor
+ case 5: // IntelSX2 processors
+ case 7: // Write-Back Enhanced IntelDX2 processors
+ case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
+ default: return "i486";
+ }
+ case 5:
+ switch (Model) {
+ case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
+ // Pentium processors (60, 66)
+ case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
+ // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+ // 150, 166, 200)
+ case 3: // Pentium OverDrive processors for Intel486 processor-based
+ // systems
+ return "pentium";
+
+ case 4: // Pentium OverDrive processor with MMX technology for Pentium
+ // processor (75, 90, 100, 120, 133), Pentium processor with
+ // MMX technology (166, 200)
+ return "pentium-mmx";
+
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 1: // Pentium Pro processor
+ return "pentiumpro";
+
+ case 3: // Intel Pentium II OverDrive processor, Pentium II processor,
+ // model 03
+ case 5: // Pentium II processor, model 05, Pentium II Xeon processor,
+ // model 05, and Intel Celeron processor, model 05
+ case 6: // Celeron processor, model 06
+ return "pentium2";
+
+ case 7: // Pentium III processor, model 07, and Pentium III Xeon
+ // processor, model 07
+ case 8: // Pentium III processor, model 08, Pentium III Xeon processor,
+ // model 08, and Celeron processor, model 08
+ case 10: // Pentium III Xeon processor, model 0Ah
+ case 11: // Pentium III processor, model 0Bh
+ return "pentium3";
+
+ case 9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+ case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+ // 0Dh. All processors are manufactured using the 90 nm process.
+ return "pentium-m";
+
+ case 14: // Intel Core Duo processor, Intel Core Solo processor, model
+ // 0Eh. All processors are manufactured using the 65 nm process.
+ return "yonah";
+
+ case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+ // mobile processor, Intel Core 2 Extreme processor, Intel
+ // Pentium Dual-Core processor, Intel Xeon processor, model
+ // 0Fh. All processors are manufactured using the 65 nm process.
+ case 22: // Intel Celeron processor model 16h. All processors are
+ // manufactured using the 65 nm process
+ return "core2";
+
+ case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+ // Integrated Processor with Intel QuickAssist Technology
+ return "i686"; // FIXME: ???
+
+ case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+ // 17h. All processors are manufactured using the 45 nm process.
+ //
+ // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ return "penryn";
+
+ case 26: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 45 nm process.
+ case 29: // Intel Xeon processor MP. All processors are manufactured using
+ // the 45 nm process.
+ case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
+ // As found in a Summer 2010 model iMac.
+ case 37: // Intel Core i7, laptop version.
+ return "corei7";
+
+ // SandyBridge:
+ case 42: // Intel Core i7 processor. All processors are manufactured
+ // using the 32 nm process.
+ case 44: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 32 nm process.
+ case 45:
+ return "corei7-avx";
+
+ case 28: // Intel Atom processor. All processors are manufactured using
+ // the 45 nm process
+ return "atom";
+
+ default: return "i686";
+ }
+ case 15: {
+ switch (Model) {
+ case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
+ // model 00h and manufactured using the 0.18 micron process.
+ case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+ // processor MP, and Intel Celeron processor. All processors are
+ // model 01h and manufactured using the 0.18 micron process.
+ case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
+ // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+ // processor, and Mobile Intel Celeron processor. All processors
+ // are model 02h and manufactured using the 0.13 micron process.
+ return (Em64T) ? "x86-64" : "pentium4";
+
+ case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+ // processor. All processors are model 03h and manufactured using
+ // the 90 nm process.
+ case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+ // Pentium D processor, Intel Xeon processor, Intel Xeon
+ // processor MP, Intel Celeron D processor. All processors are
+ // model 04h and manufactured using the 90 nm process.
+ case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
+ // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+ // MP, Intel Celeron D processor. All processors are model 06h
+ // and manufactured using the 65 nm process.
+ return (Em64T) ? "nocona" : "prescott";
+
+ default:
+ return (Em64T) ? "x86-64" : "pentium4";
+ }
+ }
+
+ default:
+ return "generic";
+ }
+ } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 6:
+ case 7: return "k6";
+ case 8: return "k6-2";
+ case 9:
+ case 13: return "k6-3";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 4: return "athlon-tbird";
+ case 6:
+ case 7:
+ case 8: return "athlon-mp";
+ case 10: return "athlon-xp";
+ default: return "athlon";
+ }
+ case 15:
+ if (HasSSE3)
+ return "k8-sse3";
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
+ }
+ case 16:
+ return "amdfam10";
+ default:
+ return "generic";
+ }
+ }
+ return "generic";
+}
+#else
+std::string sys::getHostCPUName() {
+ return "generic";
+}
+#endif
+
+bool sys::getHostCPUFeatures(StringMap<bool> &Features){
+ return false;
+}
diff --git a/contrib/llvm/lib/Support/IncludeFile.cpp b/contrib/llvm/lib/Support/IncludeFile.cpp
new file mode 100644
index 000000000000..5da88261ce53
--- /dev/null
+++ b/contrib/llvm/lib/Support/IncludeFile.cpp
@@ -0,0 +1,20 @@
+//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IncludeFile constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IncludeFile.h"
+
+using namespace llvm;
+
+// This constructor is used to ensure linking of other modules. See the
+// llvm/Support/IncludeFile.h header for details.
+IncludeFile::IncludeFile(const void*) {}
diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp
new file mode 100644
index 000000000000..11344956e4c9
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntEqClasses.cpp
@@ -0,0 +1,70 @@
+//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+
+using namespace llvm;
+
+void IntEqClasses::grow(unsigned N) {
+ assert(NumClasses == 0 && "grow() called after compress().");
+ EC.reserve(N);
+ while (EC.size() < N)
+ EC.push_back(EC.size());
+}
+
+void IntEqClasses::join(unsigned a, unsigned b) {
+ assert(NumClasses == 0 && "join() called after compress().");
+ unsigned eca = EC[a];
+ unsigned ecb = EC[b];
+ // Update pointers while searching for the leaders, compressing the paths
+ // incrementally. The larger leader will eventually be updated, joining the
+ // classes.
+ while (eca != ecb)
+ if (eca < ecb)
+ EC[b] = eca, b = ecb, ecb = EC[b];
+ else
+ EC[a] = ecb, a = eca, eca = EC[a];
+}
+
+unsigned IntEqClasses::findLeader(unsigned a) const {
+ assert(NumClasses == 0 && "findLeader() called after compress().");
+ while (a != EC[a])
+ a = EC[a];
+ return a;
+}
+
+void IntEqClasses::compress() {
+ if (NumClasses)
+ return;
+ for (unsigned i = 0, e = EC.size(); i != e; ++i)
+ EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]];
+}
+
+void IntEqClasses::uncompress() {
+ if (!NumClasses)
+ return;
+ SmallVector<unsigned, 8> Leader;
+ for (unsigned i = 0, e = EC.size(); i != e; ++i)
+ if (EC[i] < Leader.size())
+ EC[i] = Leader[EC[i]];
+ else
+ Leader.push_back(EC[i] = i);
+ NumClasses = 0;
+}
diff --git a/contrib/llvm/lib/Support/IntervalMap.cpp b/contrib/llvm/lib/Support/IntervalMap.cpp
new file mode 100644
index 000000000000..4dfcc404ca42
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntervalMap.cpp
@@ -0,0 +1,161 @@
+//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the few non-templated functions in IntervalMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+
+namespace llvm {
+namespace IntervalMapImpl {
+
+void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) {
+ assert(!path.empty() && "Can't replace missing root");
+ path.front() = Entry(Root, Size, Offsets.first);
+ path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second));
+}
+
+NodeRef Path::getLeftSibling(unsigned Level) const {
+ // The root has no siblings.
+ if (Level == 0)
+ return NodeRef();
+
+ // Go up the tree until we can go left.
+ unsigned l = Level - 1;
+ while (l && path[l].offset == 0)
+ --l;
+
+ // We can't go left.
+ if (path[l].offset == 0)
+ return NodeRef();
+
+ // NR is the subtree containing our left sibling.
+ NodeRef NR = path[l].subtree(path[l].offset - 1);
+
+ // Keep right all the way down.
+ for (++l; l != Level; ++l)
+ NR = NR.subtree(NR.size() - 1);
+ return NR;
+}
+
+void Path::moveLeft(unsigned Level) {
+ assert(Level != 0 && "Cannot move the root node");
+
+ // Go up the tree until we can go left.
+ unsigned l = 0;
+ if (valid()) {
+ l = Level - 1;
+ while (path[l].offset == 0) {
+ assert(l != 0 && "Cannot move beyond begin()");
+ --l;
+ }
+ } else if (height() < Level)
+ // end() may have created a height=0 path.
+ path.resize(Level + 1, Entry(0, 0, 0));
+
+ // NR is the subtree containing our left sibling.
+ --path[l].offset;
+ NodeRef NR = subtree(l);
+
+ // Get the rightmost node in the subtree.
+ for (++l; l != Level; ++l) {
+ path[l] = Entry(NR, NR.size() - 1);
+ NR = NR.subtree(NR.size() - 1);
+ }
+ path[l] = Entry(NR, NR.size() - 1);
+}
+
+NodeRef Path::getRightSibling(unsigned Level) const {
+ // The root has no siblings.
+ if (Level == 0)
+ return NodeRef();
+
+ // Go up the tree until we can go right.
+ unsigned l = Level - 1;
+ while (l && atLastEntry(l))
+ --l;
+
+ // We can't go right.
+ if (atLastEntry(l))
+ return NodeRef();
+
+ // NR is the subtree containing our right sibling.
+ NodeRef NR = path[l].subtree(path[l].offset + 1);
+
+ // Keep left all the way down.
+ for (++l; l != Level; ++l)
+ NR = NR.subtree(0);
+ return NR;
+}
+
+void Path::moveRight(unsigned Level) {
+ assert(Level != 0 && "Cannot move the root node");
+
+ // Go up the tree until we can go right.
+ unsigned l = Level - 1;
+ while (l && atLastEntry(l))
+ --l;
+
+ // NR is the subtree containing our right sibling. If we hit end(), we have
+ // offset(0) == node(0).size().
+ if (++path[l].offset == path[l].size)
+ return;
+ NodeRef NR = subtree(l);
+
+ for (++l; l != Level; ++l) {
+ path[l] = Entry(NR, 0);
+ NR = NR.subtree(0);
+ }
+ path[l] = Entry(NR, 0);
+}
+
+
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+ const unsigned *CurSize, unsigned NewSize[],
+ unsigned Position, bool Grow) {
+ assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
+ assert(Position <= Elements && "Invalid position");
+ if (!Nodes)
+ return IdxPair();
+
+ // Trivial algorithm: left-leaning even distribution.
+ const unsigned PerNode = (Elements + Grow) / Nodes;
+ const unsigned Extra = (Elements + Grow) % Nodes;
+ IdxPair PosPair = IdxPair(Nodes, 0);
+ unsigned Sum = 0;
+ for (unsigned n = 0; n != Nodes; ++n) {
+ Sum += NewSize[n] = PerNode + (n < Extra);
+ if (PosPair.first == Nodes && Sum > Position)
+ PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
+ }
+ assert(Sum == Elements + Grow && "Bad distribution sum");
+
+ // Subtract the Grow element that was added.
+ if (Grow) {
+ assert(PosPair.first < Nodes && "Bad algebra");
+ assert(NewSize[PosPair.first] && "Too few elements to need Grow");
+ --NewSize[PosPair.first];
+ }
+
+#ifndef NDEBUG
+ Sum = 0;
+ for (unsigned n = 0; n != Nodes; ++n) {
+ assert(NewSize[n] <= Capacity && "Overallocated node");
+ Sum += NewSize[n];
+ }
+ assert(Sum == Elements && "Bad distribution sum");
+#endif
+
+ return PosPair;
+}
+
+} // namespace IntervalMapImpl
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/Support/IsInf.cpp b/contrib/llvm/lib/Support/IsInf.cpp
new file mode 100644
index 000000000000..d6da0c99e8d8
--- /dev/null
+++ b/contrib/llvm/lib/Support/IsInf.cpp
@@ -0,0 +1,49 @@
+//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isinf()
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISINF_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISINF_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISINF_IN_CMATH
+# include <cmath>
+using std::isinf;
+#elif HAVE_FINITE_IN_IEEEFP_H
+// A handy workaround I found at http://www.unixguide.net/sun/faq ...
+// apparently this has been a problem with Solaris for years.
+# include <ieeefp.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isinf(X) (!_finite(X))
+#elif defined(_AIX) && defined(__GNUC__)
+// GCC's fixincludes seems to be removing the isinf() declaration from the
+// system header /usr/include/math.h
+# include <math.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(__hpux)
+// HP-UX is "special"
+#include <math.h>
+static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
+#else
+# error "Don't know how to get isinf()"
+#endif
+
+namespace llvm {
+
+int IsInf(float f) { return isinf(f); }
+int IsInf(double d) { return isinf(d); }
+
+} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/IsNAN.cpp b/contrib/llvm/lib/Support/IsNAN.cpp
new file mode 100644
index 000000000000..bdfdfbf3155d
--- /dev/null
+++ b/contrib/llvm/lib/Support/IsNAN.cpp
@@ -0,0 +1,33 @@
+//===-- IsNAN.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isnan().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISNAN_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISNAN_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISNAN_IN_CMATH
+# include <cmath>
+using std::isnan;
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isnan _isnan
+#else
+# error "Don't know how to get isnan()"
+#endif
+
+namespace llvm {
+ int IsNAN(float f) { return isnan(f); }
+ int IsNAN(double d) { return isnan(d); }
+} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/ManagedStatic.cpp b/contrib/llvm/lib/Support/ManagedStatic.cpp
new file mode 100644
index 000000000000..c767c15e71c9
--- /dev/null
+++ b/contrib/llvm/lib/Support/ManagedStatic.cpp
@@ -0,0 +1,75 @@
+//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ManagedStatic class and llvm_shutdown().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Atomic.h"
+#include <cassert>
+using namespace llvm;
+
+static const ManagedStaticBase *StaticList = 0;
+
+void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
+ void (*Deleter)(void*)) const {
+ if (llvm_is_multithreaded()) {
+ llvm_acquire_global_lock();
+
+ if (Ptr == 0) {
+ void* tmp = Creator ? Creator() : 0;
+
+ sys::MemoryFence();
+ Ptr = tmp;
+ DeleterFn = Deleter;
+
+ // Add to list of managed statics.
+ Next = StaticList;
+ StaticList = this;
+ }
+
+ llvm_release_global_lock();
+ } else {
+ assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
+ "Partially initialized ManagedStatic!?");
+ Ptr = Creator ? Creator() : 0;
+ DeleterFn = Deleter;
+
+ // Add to list of managed statics.
+ Next = StaticList;
+ StaticList = this;
+ }
+}
+
+void ManagedStaticBase::destroy() const {
+ assert(DeleterFn && "ManagedStatic not initialized correctly!");
+ assert(StaticList == this &&
+ "Not destroyed in reverse order of construction?");
+ // Unlink from list.
+ StaticList = Next;
+ Next = 0;
+
+ // Destroy memory.
+ DeleterFn(Ptr);
+
+ // Cleanup.
+ Ptr = 0;
+ DeleterFn = 0;
+}
+
+/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
+void llvm::llvm_shutdown() {
+ while (StaticList)
+ StaticList->destroy();
+
+ if (llvm_is_multithreaded()) llvm_stop_multithreaded();
+}
+
diff --git a/contrib/llvm/lib/Support/Memory.cpp b/contrib/llvm/lib/Support/Memory.cpp
new file mode 100644
index 000000000000..ac7af0ae8bdf
--- /dev/null
+++ b/contrib/llvm/lib/Support/Memory.cpp
@@ -0,0 +1,74 @@
+//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for allocating memory and dealing
+// with memory mapped files
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Memory.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Memory.inc"
+#endif
+
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
+ size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+
+# if (defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+ sys_icache_invalidate(Addr, Len);
+# endif
+
+#else
+
+# if (defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+ const size_t LineSize = 32;
+
+ const intptr_t Mask = ~(LineSize - 1);
+ const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+ const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("dcbf 0, %0" : : "r"(Line));
+ asm volatile("sync");
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("icbi 0, %0" : : "r"(Line));
+ asm volatile("isync");
+# elif defined(__arm__) && defined(__GNUC__) && !defined(__FreeBSD__)
+ // FIXME: Can we safely always call this for __GNUC__ everywhere?
+ char *Start = (char*) Addr;
+ char *End = Start + Len;
+ __clear_cache(Start, End);
+# endif
+
+#endif // end apple
+
+ ValgrindDiscardTranslations(Addr, Len);
+}
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
new file mode 100644
index 000000000000..d264be9aced1
--- /dev/null
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -0,0 +1,376 @@
+//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <new>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#include <sys/uio.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+namespace { const llvm::error_code success; }
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer implementation itself.
+//===----------------------------------------------------------------------===//
+
+MemoryBuffer::~MemoryBuffer() { }
+
+/// init - Initialize this MemoryBuffer as a reference to externally allocated
+/// memory, memory that we know is already null terminated.
+void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
+ bool RequiresNullTerminator) {
+ assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
+ "Buffer is not null terminated!");
+ BufferStart = BufStart;
+ BufferEnd = BufEnd;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBufferMem implementation.
+//===----------------------------------------------------------------------===//
+
+/// CopyStringRef - Copies contents of a StringRef into a block of memory and
+/// null-terminates it.
+static void CopyStringRef(char *Memory, StringRef Data) {
+ memcpy(Memory, Data.data(), Data.size());
+ Memory[Data.size()] = 0; // Null terminate string.
+}
+
+/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
+template <typename T>
+static T *GetNamedBuffer(StringRef Buffer, StringRef Name,
+ bool RequiresNullTerminator) {
+ char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
+ CopyStringRef(Mem + sizeof(T), Name);
+ return new (Mem) T(Buffer, RequiresNullTerminator);
+}
+
+namespace {
+/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
+class MemoryBufferMem : public MemoryBuffer {
+public:
+ MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
+ init(InputData.begin(), InputData.end(), RequiresNullTerminator);
+ }
+
+ virtual const char *getBufferIdentifier() const {
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char*>(this + 1);
+ }
+
+ virtual BufferKind getBufferKind() const {
+ return MemoryBuffer_Malloc;
+ }
+};
+}
+
+/// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note
+/// that InputData must be a null terminated if RequiresNullTerminator is true!
+MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
+ StringRef BufferName,
+ bool RequiresNullTerminator) {
+ return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
+ RequiresNullTerminator);
+}
+
+/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
+/// copying the contents and taking ownership of it. This has no requirements
+/// on EndPtr[0].
+MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
+ StringRef BufferName) {
+ MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
+ if (!Buf) return 0;
+ memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
+ InputData.size());
+ return Buf;
+}
+
+/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
+/// that is not initialized. Note that the caller should initialize the
+/// memory allocated by this method. The memory is owned by the MemoryBuffer
+/// object.
+MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
+ StringRef BufferName) {
+ // Allocate space for the MemoryBuffer, the data and the name. It is important
+ // that MemoryBuffer and data are aligned so PointerIntPair works with them.
+ size_t AlignedStringLen =
+ RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
+ sizeof(void*)); // TODO: Is sizeof(void*) enough?
+ size_t RealLen = AlignedStringLen + Size + 1;
+ char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
+ if (!Mem) return 0;
+
+ // The name is stored after the class itself.
+ CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
+
+ // The buffer begins after the name and must be aligned.
+ char *Buf = Mem + AlignedStringLen;
+ Buf[Size] = 0; // Null terminate buffer.
+
+ return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
+}
+
+/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
+/// is completely initialized to zeros. Note that the caller should
+/// initialize the memory allocated by this method. The memory is owned by
+/// the MemoryBuffer object.
+MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
+ MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
+ if (!SB) return 0;
+ memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
+ return SB;
+}
+
+
+/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
+/// if the Filename is "-". If an error occurs, this returns null and fills
+/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN)
+/// returns an empty buffer.
+error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
+ if (Filename == "-")
+ return getSTDIN(result);
+ return getFile(Filename, result, FileSize);
+}
+
+error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
+ if (strcmp(Filename, "-") == 0)
+ return getSTDIN(result);
+ return getFile(Filename, result, FileSize);
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getFile implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MemoryBufferMMapFile - This represents a file that was mapped in with the
+/// sys::Path::MapInFilePages method. When destroyed, it calls the
+/// sys::Path::UnMapFilePages method.
+class MemoryBufferMMapFile : public MemoryBufferMem {
+public:
+ MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
+ : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
+
+ ~MemoryBufferMMapFile() {
+ static int PageSize = sys::Process::GetPageSize();
+
+ uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
+ size_t Size = getBufferSize();
+ uintptr_t RealStart = Start & ~(PageSize - 1);
+ size_t RealSize = Size + (Start - RealStart);
+
+ sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
+ RealSize);
+ }
+
+ virtual BufferKind getBufferKind() const {
+ return MemoryBuffer_MMap;
+ }
+};
+}
+
+error_code MemoryBuffer::getFile(StringRef Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize,
+ bool RequiresNullTerminator) {
+ // Ensure the path is null terminated.
+ SmallString<256> PathBuf(Filename.begin(), Filename.end());
+ return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize,
+ RequiresNullTerminator);
+}
+
+error_code MemoryBuffer::getFile(const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize,
+ bool RequiresNullTerminator) {
+ int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+ OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
+#endif
+ int FD = ::open(Filename, OpenFlags);
+ if (FD == -1)
+ return error_code(errno, posix_category());
+
+ error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize,
+ 0, RequiresNullTerminator);
+ close(FD);
+ return ret;
+}
+
+static bool shouldUseMmap(int FD,
+ size_t FileSize,
+ size_t MapSize,
+ off_t Offset,
+ bool RequiresNullTerminator,
+ int PageSize) {
+ // We don't use mmap for small files because this can severely fragment our
+ // address space.
+ if (MapSize < 4096*4)
+ return false;
+
+ if (!RequiresNullTerminator)
+ return true;
+
+
+ // If we don't know the file size, use fstat to find out. fstat on an open
+ // file descriptor is cheaper than stat on a random path.
+ // FIXME: this chunk of code is duplicated, but it avoids a fstat when
+ // RequiresNullTerminator = false and MapSize != -1.
+ if (FileSize == size_t(-1)) {
+ struct stat FileInfo;
+ // TODO: This should use fstat64 when available.
+ if (fstat(FD, &FileInfo) == -1) {
+ return error_code(errno, posix_category());
+ }
+ FileSize = FileInfo.st_size;
+ }
+
+ // If we need a null terminator and the end of the map is inside the file,
+ // we cannot use mmap.
+ size_t End = Offset + MapSize;
+ assert(End <= FileSize);
+ if (End != FileSize)
+ return false;
+
+ // Don't try to map files that are exactly a multiple of the system page size
+ // if we need a null terminator.
+ if ((FileSize & (PageSize -1)) == 0)
+ return false;
+
+ return true;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ size_t FileSize, size_t MapSize,
+ off_t Offset,
+ bool RequiresNullTerminator) {
+ static int PageSize = sys::Process::GetPageSize();
+
+ // Default is to map the full file.
+ if (MapSize == size_t(-1)) {
+ // If we don't know the file size, use fstat to find out. fstat on an open
+ // file descriptor is cheaper than stat on a random path.
+ if (FileSize == size_t(-1)) {
+ struct stat FileInfo;
+ // TODO: This should use fstat64 when available.
+ if (fstat(FD, &FileInfo) == -1) {
+ return error_code(errno, posix_category());
+ }
+ FileSize = FileInfo.st_size;
+ }
+ MapSize = FileSize;
+ }
+
+ if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
+ PageSize)) {
+ off_t RealMapOffset = Offset & ~(PageSize - 1);
+ off_t Delta = Offset - RealMapOffset;
+ size_t RealMapSize = MapSize + Delta;
+
+ if (const char *Pages = sys::Path::MapInFilePages(FD,
+ RealMapSize,
+ RealMapOffset)) {
+ result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
+ StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
+ return success;
+ }
+ }
+
+ MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
+ if (!Buf) {
+ // Failed to create a buffer. The only way it can fail is if
+ // new(std::nothrow) returns 0.
+ return make_error_code(errc::not_enough_memory);
+ }
+
+ OwningPtr<MemoryBuffer> SB(Buf);
+ char *BufPtr = const_cast<char*>(SB->getBufferStart());
+
+ size_t BytesLeft = MapSize;
+ if (lseek(FD, Offset, SEEK_SET) == -1)
+ return error_code(errno, posix_category());
+
+ while (BytesLeft) {
+ ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+ if (NumRead == -1) {
+ if (errno == EINTR)
+ continue;
+ // Error while reading.
+ return error_code(errno, posix_category());
+ } else if (NumRead == 0) {
+ // We hit EOF early, truncate and terminate buffer.
+ Buf->BufferEnd = BufPtr;
+ *BufPtr = 0;
+ result.swap(SB);
+ return success;
+ }
+ BytesLeft -= NumRead;
+ BufPtr += NumRead;
+ }
+
+ result.swap(SB);
+ return success;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getSTDIN implementation.
+//===----------------------------------------------------------------------===//
+
+error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
+ // Read in all of the data from stdin, we cannot mmap stdin.
+ //
+ // FIXME: That isn't necessarily true, we should try to mmap stdin and
+ // fallback if it fails.
+ sys::Program::ChangeStdinToBinary();
+
+ const ssize_t ChunkSize = 4096*4;
+ SmallString<ChunkSize> Buffer;
+ ssize_t ReadBytes;
+ // Read into Buffer until we hit EOF.
+ do {
+ Buffer.reserve(Buffer.size() + ChunkSize);
+ ReadBytes = read(0, Buffer.end(), ChunkSize);
+ if (ReadBytes == -1) {
+ if (errno == EINTR) continue;
+ return error_code(errno, posix_category());
+ }
+ Buffer.set_size(Buffer.size() + ReadBytes);
+ } while (ReadBytes != 0);
+
+ result.reset(getMemBufferCopy(Buffer, "<stdin>"));
+ return success;
+}
diff --git a/contrib/llvm/lib/Support/MemoryObject.cpp b/contrib/llvm/lib/Support/MemoryObject.cpp
new file mode 100644
index 000000000000..91e3ecd23a2e
--- /dev/null
+++ b/contrib/llvm/lib/Support/MemoryObject.cpp
@@ -0,0 +1,34 @@
+//===- MemoryObject.cpp - Abstract memory interface -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryObject.h"
+using namespace llvm;
+
+MemoryObject::~MemoryObject() {
+}
+
+int MemoryObject::readBytes(uint64_t address,
+ uint64_t size,
+ uint8_t* buf,
+ uint64_t* copied) const {
+ uint64_t current = address;
+ uint64_t limit = getBase() + getExtent();
+
+ while (current - address < size && current < limit) {
+ if (readByte(current, &buf[(current - address)]))
+ return -1;
+
+ current++;
+ }
+
+ if (copied)
+ *copied = current - address;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/Mutex.cpp b/contrib/llvm/lib/Support/Mutex.cpp
new file mode 100644
index 000000000000..b408973bbad1
--- /dev/null
+++ b/contrib/llvm/lib/Support/Mutex.cpp
@@ -0,0 +1,157 @@
+//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_mutex_init does have an address, then mutex support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+// is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_mutex_init is not
+// declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a Mutex using pthread calls
+MutexImpl::MutexImpl( bool recursive)
+ : data_(0)
+{
+ if (pthread_enabled)
+ {
+ // Declare the pthread_mutex data structures
+ pthread_mutex_t* mutex =
+ static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+ pthread_mutexattr_t attr;
+
+ // Initialize the mutex attributes
+ int errorcode = pthread_mutexattr_init(&attr);
+ assert(errorcode == 0);
+
+ // Initialize the mutex as a recursive mutex, if requested, or normal
+ // otherwise.
+ int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+ errorcode = pthread_mutexattr_settype(&attr, kind);
+ assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+ // Make it a process local mutex
+ errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+ assert(errorcode == 0);
+#endif
+
+ // Initialize the mutex
+ errorcode = pthread_mutex_init(mutex, &attr);
+ assert(errorcode == 0);
+
+ // Destroy the attributes
+ errorcode = pthread_mutexattr_destroy(&attr);
+ assert(errorcode == 0);
+
+ // Assign the data member
+ data_ = mutex;
+ }
+}
+
+// Destruct a Mutex
+MutexImpl::~MutexImpl()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+ pthread_mutex_destroy(mutex);
+ free(mutex);
+ }
+}
+
+bool
+MutexImpl::acquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_lock(mutex);
+ return errorcode == 0;
+ } else return false;
+}
+
+bool
+MutexImpl::release()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_unlock(mutex);
+ return errorcode == 0;
+ } else return false;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_trylock(mutex);
+ return errorcode == 0;
+ } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/Mutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/Mutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
new file mode 100644
index 000000000000..8fbaf2d42bf9
--- /dev/null
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -0,0 +1,303 @@
+//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Path concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Path.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
+#include <cassert>
+#include <cstring>
+#include <ostream>
+using namespace llvm;
+using namespace sys;
+namespace {
+using support::ulittle32_t;
+}
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+bool Path::operator==(const Path &that) const {
+ return path == that.path;
+}
+
+bool Path::operator<(const Path& that) const {
+ return path < that.path;
+}
+
+Path
+Path::GetLLVMConfigDir() {
+ Path result;
+#ifdef LLVM_ETCDIR
+ if (result.set(LLVM_ETCDIR))
+ return result;
+#endif
+ return GetLLVMDefaultConfigDir();
+}
+
+LLVMFileType
+sys::IdentifyFileType(const char *magic, unsigned length) {
+ assert(magic && "Invalid magic number string");
+ assert(length >=4 && "Invalid magic number length");
+ switch ((unsigned char)magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return Bitcode_FileType;
+ break;
+ case 'B':
+ if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+ return Bitcode_FileType;
+ break;
+ case '!':
+ if (length >= 8)
+ if (memcmp(magic,"!<arch>\n",8) == 0)
+ return Archive_FileType;
+ break;
+
+ case '\177':
+ if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+ if (length >= 18 && magic[17] == 0)
+ switch (magic[16]) {
+ default: break;
+ case 1: return ELF_Relocatable_FileType;
+ case 2: return ELF_Executable_FileType;
+ case 3: return ELF_SharedObject_FileType;
+ case 4: return ELF_Core_FileType;
+ }
+ }
+ break;
+
+ case 0xCA:
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ magic[3] == char(0xBE)) {
+ // This is complicated by an overlap with Java class files.
+ // See the Mach-O section in /usr/share/file/magic for details.
+ if (length >= 8 && magic[7] < 43)
+ // FIXME: Universal Binary of any type.
+ return Mach_O_DynamicallyLinkedSharedLib_FileType;
+ }
+ break;
+
+ // The two magic numbers for mach-o are:
+ // 0xfeedface - 32-bit mach-o
+ // 0xfeedfacf - 64-bit mach-o
+ case 0xFE:
+ case 0xCE:
+ case 0xCF: {
+ uint16_t type = 0;
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ magic[2] == char(0xFA) &&
+ (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+ /* Native endian */
+ if (length >= 16) type = magic[14] << 8 | magic[15];
+ } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+ magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+ magic[3] == char(0xFE)) {
+ /* Reverse endian */
+ if (length >= 14) type = magic[13] << 8 | magic[12];
+ }
+ switch (type) {
+ default: break;
+ case 1: return Mach_O_Object_FileType;
+ case 2: return Mach_O_Executable_FileType;
+ case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
+ case 4: return Mach_O_Core_FileType;
+ case 5: return Mach_O_PreloadExecutable_FileType;
+ case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
+ case 7: return Mach_O_DynamicLinker_FileType;
+ case 8: return Mach_O_Bundle_FileType;
+ case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
+ case 10: break; // FIXME: MH_DSYM companion file with only debug.
+ }
+ break;
+ }
+ case 0xF0: // PowerPC Windows
+ case 0x83: // Alpha 32-bit
+ case 0x84: // Alpha 64-bit
+ case 0x66: // MPS R4000 Windows
+ case 0x50: // mc68K
+ case 0x4c: // 80386 Windows
+ if (magic[1] == 0x01)
+ return COFF_FileType;
+
+ case 0x90: // PA-RISC Windows
+ case 0x68: // mc68K Windows
+ if (magic[1] == 0x02)
+ return COFF_FileType;
+ break;
+
+ case 0x4d: // Possible MS-DOS stub on Windows PE file
+ if (magic[1] == 0x5a) {
+ uint32_t off = *reinterpret_cast<const ulittle32_t *>(magic + 0x3c);
+ // PE/COFF file, either EXE or DLL.
+ if (off < length && memcmp(magic + off, "PE\0\0",4) == 0)
+ return COFF_FileType;
+ }
+ break;
+
+ case 0x64: // x86-64 Windows.
+ if (magic[1] == char(0x86))
+ return COFF_FileType;
+ break;
+
+ default:
+ break;
+ }
+ return Unknown_FileType;
+}
+
+bool
+Path::isArchive() const {
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ return type == Archive_FileType;
+}
+
+bool
+Path::isDynamicLibrary() const {
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ switch (type) {
+ default: return false;
+ case Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case ELF_SharedObject_FileType:
+ case COFF_FileType: return true;
+ }
+}
+
+bool
+Path::isObjectFile() const {
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+ return false;
+ return true;
+}
+
+Path
+Path::FindLibrary(std::string& name) {
+ std::vector<sys::Path> LibPaths;
+ GetSystemLibraryPaths(LibPaths);
+ for (unsigned i = 0; i < LibPaths.size(); ++i) {
+ sys::Path FullPath(LibPaths[i]);
+ FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
+ if (FullPath.isDynamicLibrary())
+ return FullPath;
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix("a");
+ if (FullPath.isArchive())
+ return FullPath;
+ }
+ return sys::Path();
+}
+
+StringRef Path::GetDLLSuffix() {
+ return &(LTDL_SHLIB_EXT[1]);
+}
+
+void
+Path::appendSuffix(StringRef suffix) {
+ if (!suffix.empty()) {
+ path.append(".");
+ path.append(suffix);
+ }
+}
+
+bool
+Path::isBitcodeFile() const {
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ return type == Bitcode_FileType;
+}
+
+bool Path::hasMagicNumber(StringRef Magic) const {
+ std::string actualMagic;
+ if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
+ return Magic == actualMagic;
+ return false;
+}
+
+static void getPathList(const char*path, std::vector<Path>& Paths) {
+ const char* at = path;
+ const char* delim = strchr(at, PathSeparator);
+ Path tmpPath;
+ while (delim != 0) {
+ std::string tmp(at, size_t(delim-at));
+ if (tmpPath.set(tmp))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ at = delim + 1;
+ delim = strchr(at, PathSeparator);
+ }
+
+ if (*at != 0)
+ if (tmpPath.set(std::string(at)))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+}
+
+static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
+ assert(Sep[0] != '\0' && Sep[1] == '\0' &&
+ "Sep must be a 1-character string literal.");
+ if (path.empty())
+ return ".";
+
+ // If the path is all slashes, return a single slash.
+ // Otherwise, remove all trailing slashes.
+
+ signed pos = static_cast<signed>(path.size()) - 1;
+
+ while (pos >= 0 && path[pos] == Sep[0])
+ --pos;
+
+ if (pos < 0)
+ return path[0] == Sep[0] ? Sep : ".";
+
+ // Any slashes left?
+ signed i = 0;
+
+ while (i < pos && path[i] != Sep[0])
+ ++i;
+
+ if (i == pos) // No slashes? Return "."
+ return ".";
+
+ // There is at least one slash left. Remove all trailing non-slashes.
+ while (pos >= 0 && path[pos] != Sep[0])
+ --pos;
+
+ // Remove any trailing slashes.
+ while (pos >= 0 && path[pos] == Sep[0])
+ --pos;
+
+ if (pos < 0)
+ return path[0] == Sep[0] ? Sep : ".";
+
+ return path.substr(0, pos+1);
+}
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/Path.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp
new file mode 100644
index 000000000000..896c94c071bc
--- /dev/null
+++ b/contrib/llvm/lib/Support/PathV2.cpp
@@ -0,0 +1,774 @@
+//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operating system PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+#include <cstdio>
+#include <cstring>
+
+namespace {
+ using llvm::StringRef;
+ using llvm::sys::path::is_separator;
+
+#ifdef LLVM_ON_WIN32
+ const StringRef separators = "\\/";
+ const char prefered_separator = '\\';
+#else
+ const StringRef separators = "/";
+ const char prefered_separator = '/';
+#endif
+
+ const llvm::error_code success;
+
+ StringRef find_first_component(StringRef path) {
+ // Look for this first component in the following order.
+ // * empty (in this case we return an empty string)
+ // * either C: or {//,\\}net.
+ // * {/,\}
+ // * {.,..}
+ // * {file,directory}name
+
+ if (path.empty())
+ return path;
+
+#ifdef LLVM_ON_WIN32
+ // C:
+ if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':')
+ return path.substr(0, 2);
+#endif
+
+ // //net
+ if ((path.size() > 2) &&
+ is_separator(path[0]) &&
+ path[0] == path[1] &&
+ !is_separator(path[2])) {
+ // Find the next directory separator.
+ size_t end = path.find_first_of(separators, 2);
+ return path.substr(0, end);
+ }
+
+ // {/,\}
+ if (is_separator(path[0]))
+ return path.substr(0, 1);
+
+ if (path.startswith(".."))
+ return path.substr(0, 2);
+
+ if (path[0] == '.')
+ return path.substr(0, 1);
+
+ // * {file,directory}name
+ size_t end = path.find_first_of(separators, 2);
+ return path.substr(0, end);
+ }
+
+ size_t filename_pos(StringRef str) {
+ if (str.size() == 2 &&
+ is_separator(str[0]) &&
+ str[0] == str[1])
+ return 0;
+
+ if (str.size() > 0 && is_separator(str[str.size() - 1]))
+ return str.size() - 1;
+
+ size_t pos = str.find_last_of(separators, str.size() - 1);
+
+#ifdef LLVM_ON_WIN32
+ if (pos == StringRef::npos)
+ pos = str.find_last_of(':', str.size() - 2);
+#endif
+
+ if (pos == StringRef::npos ||
+ (pos == 1 && is_separator(str[0])))
+ return 0;
+
+ return pos + 1;
+ }
+
+ size_t root_dir_start(StringRef str) {
+ // case "c:/"
+#ifdef LLVM_ON_WIN32
+ if (str.size() > 2 &&
+ str[1] == ':' &&
+ is_separator(str[2]))
+ return 2;
+#endif
+
+ // case "//"
+ if (str.size() == 2 &&
+ is_separator(str[0]) &&
+ str[0] == str[1])
+ return StringRef::npos;
+
+ // case "//net"
+ if (str.size() > 3 &&
+ is_separator(str[0]) &&
+ str[0] == str[1] &&
+ !is_separator(str[2])) {
+ return str.find_first_of(separators, 2);
+ }
+
+ // case "/"
+ if (str.size() > 0 && is_separator(str[0]))
+ return 0;
+
+ return StringRef::npos;
+ }
+
+ size_t parent_path_end(StringRef path) {
+ size_t end_pos = filename_pos(path);
+
+ bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+
+ // Skip separators except for root dir.
+ size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+
+ while(end_pos > 0 &&
+ (end_pos - 1) != root_dir_pos &&
+ is_separator(path[end_pos - 1]))
+ --end_pos;
+
+ if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
+ return StringRef::npos;
+
+ return end_pos;
+ }
+} // end unnamed namespace
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+const_iterator begin(StringRef path) {
+ const_iterator i;
+ i.Path = path;
+ i.Component = find_first_component(path);
+ i.Position = 0;
+ return i;
+}
+
+const_iterator end(StringRef path) {
+ const_iterator i;
+ i.Path = path;
+ i.Position = path.size();
+ return i;
+}
+
+const_iterator &const_iterator::operator++() {
+ assert(Position < Path.size() && "Tried to increment past end!");
+
+ // Increment Position to past the current component
+ Position += Component.size();
+
+ // Check for end.
+ if (Position == Path.size()) {
+ Component = StringRef();
+ return *this;
+ }
+
+ // Both POSIX and Windows treat paths that begin with exactly two separators
+ // specially.
+ bool was_net = Component.size() > 2 &&
+ is_separator(Component[0]) &&
+ Component[1] == Component[0] &&
+ !is_separator(Component[2]);
+
+ // Handle separators.
+ if (is_separator(Path[Position])) {
+ // Root dir.
+ if (was_net
+#ifdef LLVM_ON_WIN32
+ // c:/
+ || Component.endswith(":")
+#endif
+ ) {
+ Component = Path.substr(Position, 1);
+ return *this;
+ }
+
+ // Skip extra separators.
+ while (Position != Path.size() &&
+ is_separator(Path[Position])) {
+ ++Position;
+ }
+
+ // Treat trailing '/' as a '.'.
+ if (Position == Path.size()) {
+ --Position;
+ Component = ".";
+ return *this;
+ }
+ }
+
+ // Find next component.
+ size_t end_pos = Path.find_first_of(separators, Position);
+ Component = Path.slice(Position, end_pos);
+
+ return *this;
+}
+
+const_iterator &const_iterator::operator--() {
+ // If we're at the end and the previous char was a '/', return '.'.
+ if (Position == Path.size() &&
+ Path.size() > 1 &&
+ is_separator(Path[Position - 1])
+#ifdef LLVM_ON_WIN32
+ && Path[Position - 2] != ':'
+#endif
+ ) {
+ --Position;
+ Component = ".";
+ return *this;
+ }
+
+ // Skip separators unless it's the root directory.
+ size_t root_dir_pos = root_dir_start(Path);
+ size_t end_pos = Position;
+
+ while(end_pos > 0 &&
+ (end_pos - 1) != root_dir_pos &&
+ is_separator(Path[end_pos - 1]))
+ --end_pos;
+
+ // Find next separator.
+ size_t start_pos = filename_pos(Path.substr(0, end_pos));
+ Component = Path.slice(start_pos, end_pos);
+ Position = start_pos;
+ return *this;
+}
+
+bool const_iterator::operator==(const const_iterator &RHS) const {
+ return Path.begin() == RHS.Path.begin() &&
+ Position == RHS.Position;
+}
+
+bool const_iterator::operator!=(const const_iterator &RHS) const {
+ return !(*this == RHS);
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+ return Position - RHS.Position;
+}
+
+const StringRef root_path(StringRef path) {
+ const_iterator b = begin(path),
+ pos = b,
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if (has_net || has_drive) {
+ if ((++pos != e) && is_separator((*pos)[0])) {
+ // {C:/,//net/}, so get the first two components.
+ return path.substr(0, b->size() + pos->size());
+ } else {
+ // just {C:,//net}, return the first component.
+ return *b;
+ }
+ }
+
+ // POSIX style root directory.
+ if (is_separator((*b)[0])) {
+ return *b;
+ }
+ }
+
+ return StringRef();
+}
+
+const StringRef root_name(StringRef path) {
+ const_iterator b = begin(path),
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if (has_net || has_drive) {
+ // just {C:,//net}, return the first component.
+ return *b;
+ }
+ }
+
+ // No path or no name.
+ return StringRef();
+}
+
+const StringRef root_directory(StringRef path) {
+ const_iterator b = begin(path),
+ pos = b,
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if ((has_net || has_drive) &&
+ // {C:,//net}, skip to the next component.
+ (++pos != e) && is_separator((*pos)[0])) {
+ return *pos;
+ }
+
+ // POSIX style root directory.
+ if (!has_net && is_separator((*b)[0])) {
+ return *b;
+ }
+ }
+
+ // No path or no root.
+ return StringRef();
+}
+
+const StringRef relative_path(StringRef path) {
+ StringRef root = root_path(path);
+ return root.substr(root.size());
+}
+
+void append(SmallVectorImpl<char> &path, const Twine &a,
+ const Twine &b,
+ const Twine &c,
+ const Twine &d) {
+ SmallString<32> a_storage;
+ SmallString<32> b_storage;
+ SmallString<32> c_storage;
+ SmallString<32> d_storage;
+
+ SmallVector<StringRef, 4> components;
+ if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
+ if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
+ if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
+ if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
+
+ for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
+ e = components.end();
+ i != e; ++i) {
+ bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
+ bool component_has_sep = !i->empty() && is_separator((*i)[0]);
+ bool is_root_name = has_root_name(*i);
+
+ if (path_has_sep) {
+ // Strip separators from beginning of component.
+ size_t loc = i->find_first_not_of(separators);
+ StringRef c = i->substr(loc);
+
+ // Append it.
+ path.append(c.begin(), c.end());
+ continue;
+ }
+
+ if (!component_has_sep && !(path.empty() || is_root_name)) {
+ // Add a separator.
+ path.push_back(prefered_separator);
+ }
+
+ path.append(i->begin(), i->end());
+ }
+}
+
+void append(SmallVectorImpl<char> &path,
+ const_iterator begin, const_iterator end) {
+ for (; begin != end; ++begin)
+ path::append(path, *begin);
+}
+
+const StringRef parent_path(StringRef path) {
+ size_t end_pos = parent_path_end(path);
+ if (end_pos == StringRef::npos)
+ return StringRef();
+ else
+ return path.substr(0, end_pos);
+}
+
+void remove_filename(SmallVectorImpl<char> &path) {
+ size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+ if (end_pos != StringRef::npos)
+ path.set_size(end_pos);
+}
+
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+ StringRef p(path.begin(), path.size());
+ SmallString<32> ext_storage;
+ StringRef ext = extension.toStringRef(ext_storage);
+
+ // Erase existing extension.
+ size_t pos = p.find_last_of('.');
+ if (pos != StringRef::npos && pos >= filename_pos(p))
+ path.set_size(pos);
+
+ // Append '.' if needed.
+ if (ext.size() > 0 && ext[0] != '.')
+ path.push_back('.');
+
+ // Append extension.
+ path.append(ext.begin(), ext.end());
+}
+
+void native(const Twine &path, SmallVectorImpl<char> &result) {
+ // Clear result.
+ result.clear();
+#ifdef LLVM_ON_WIN32
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+ result.reserve(p.size());
+ for (StringRef::const_iterator i = p.begin(),
+ e = p.end();
+ i != e;
+ ++i) {
+ if (*i == '/')
+ result.push_back('\\');
+ else
+ result.push_back(*i);
+ }
+#else
+ path.toVector(result);
+#endif
+}
+
+const StringRef filename(StringRef path) {
+ return *(--end(path));
+}
+
+const StringRef stem(StringRef path) {
+ StringRef fname = filename(path);
+ size_t pos = fname.find_last_of('.');
+ if (pos == StringRef::npos)
+ return fname;
+ else
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return fname;
+ else
+ return fname.substr(0, pos);
+}
+
+const StringRef extension(StringRef path) {
+ StringRef fname = filename(path);
+ size_t pos = fname.find_last_of('.');
+ if (pos == StringRef::npos)
+ return StringRef();
+ else
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return StringRef();
+ else
+ return fname.substr(pos);
+}
+
+bool is_separator(char value) {
+ switch(value) {
+#ifdef LLVM_ON_WIN32
+ case '\\': // fall through
+#endif
+ case '/': return true;
+ default: return false;
+ }
+}
+
+bool has_root_name(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_name(p).empty();
+}
+
+bool has_root_directory(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_directory(p).empty();
+}
+
+bool has_root_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_path(p).empty();
+}
+
+bool has_relative_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !relative_path(p).empty();
+}
+
+bool has_filename(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !filename(p).empty();
+}
+
+bool has_parent_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !parent_path(p).empty();
+}
+
+bool has_stem(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !stem(p).empty();
+}
+
+bool has_extension(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !extension(p).empty();
+}
+
+bool is_absolute(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ bool rootDir = has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+ rootName = has_root_name(p);
+#else
+ rootName = true;
+#endif
+
+ return rootDir && rootName;
+}
+
+bool is_relative(const Twine &path) {
+ return !is_absolute(path);
+}
+
+} // end namespace path
+
+namespace fs {
+
+error_code make_absolute(SmallVectorImpl<char> &path) {
+ StringRef p(path.data(), path.size());
+
+ bool rootName = path::has_root_name(p),
+ rootDirectory = path::has_root_directory(p);
+
+ // Already absolute.
+ if (rootName && rootDirectory)
+ return success;
+
+ // All of the following conditions will need the current directory.
+ SmallString<128> current_dir;
+ if (error_code ec = current_path(current_dir)) return ec;
+
+ // Relative path. Prepend the current directory.
+ if (!rootName && !rootDirectory) {
+ // Append path to the current directory.
+ path::append(current_dir, p);
+ // Set path to the result.
+ path.swap(current_dir);
+ return success;
+ }
+
+ if (!rootName && rootDirectory) {
+ StringRef cdrn = path::root_name(current_dir);
+ SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+ path::append(curDirRootName, p);
+ // Set path to the result.
+ path.swap(curDirRootName);
+ return success;
+ }
+
+ if (rootName && !rootDirectory) {
+ StringRef pRootName = path::root_name(p);
+ StringRef bRootDirectory = path::root_directory(current_dir);
+ StringRef bRelativePath = path::relative_path(current_dir);
+ StringRef pRelativePath = path::relative_path(p);
+
+ SmallString<128> res;
+ path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+ path.swap(res);
+ return success;
+ }
+
+ llvm_unreachable("All rootName and rootDirectory combinations should have "
+ "occurred above!");
+}
+
+error_code create_directories(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ StringRef parent = path::parent_path(p);
+ bool parent_exists;
+
+ if (error_code ec = fs::exists(parent, parent_exists)) return ec;
+
+ if (!parent_exists)
+ return create_directories(parent, existed);
+
+ return create_directory(p, existed);
+}
+
+bool exists(file_status status) {
+ return status_known(status) && status.type() != file_type::file_not_found;
+}
+
+bool status_known(file_status s) {
+ return s.type() != file_type::status_error;
+}
+
+bool is_directory(file_status status) {
+ return status.type() == file_type::directory_file;
+}
+
+error_code is_directory(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_directory(st);
+ return success;
+}
+
+bool is_regular_file(file_status status) {
+ return status.type() == file_type::regular_file;
+}
+
+error_code is_regular_file(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_regular_file(st);
+ return success;
+}
+
+bool is_symlink(file_status status) {
+ return status.type() == file_type::symlink_file;
+}
+
+error_code is_symlink(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_symlink(st);
+ return success;
+}
+
+bool is_other(file_status status) {
+ return exists(status) &&
+ !is_regular_file(status) &&
+ !is_directory(status) &&
+ !is_symlink(status);
+}
+
+void directory_entry::replace_filename(const Twine &filename, file_status st,
+ file_status symlink_st) {
+ SmallString<128> path(Path.begin(), Path.end());
+ path::remove_filename(path);
+ path::append(path, filename);
+ Path = path.str();
+ Status = st;
+ SymlinkStatus = symlink_st;
+}
+
+error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
+ SmallString<32> MagicStorage;
+ StringRef Magic = magic.toStringRef(MagicStorage);
+ SmallString<32> Buffer;
+
+ if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
+ if (ec == errc::value_too_large) {
+ // Magic.size() > file_size(Path).
+ result = false;
+ return success;
+ }
+ return ec;
+ }
+
+ result = Magic == Buffer;
+ return success;
+}
+
+error_code identify_magic(const Twine &path, LLVMFileType &result) {
+ SmallString<32> Magic;
+ error_code ec = get_magic(path, Magic.capacity(), Magic);
+ if (ec && ec != errc::value_too_large)
+ return ec;
+
+ result = IdentifyFileType(Magic.data(), Magic.size());
+ return success;
+}
+
+namespace {
+error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
+ if (ft == file_type::directory_file) {
+ // This code would be a lot better with exceptions ;/.
+ error_code ec;
+ for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+ if (ec) return ec;
+ file_status st;
+ if (error_code ec = i->status(st)) return ec;
+ if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec;
+ }
+ bool obviously_this_exists;
+ if (error_code ec = remove(path, obviously_this_exists)) return ec;
+ assert(obviously_this_exists);
+ ++count; // Include the directory itself in the items removed.
+ } else {
+ bool obviously_this_exists;
+ if (error_code ec = remove(path, obviously_this_exists)) return ec;
+ assert(obviously_this_exists);
+ ++count;
+ }
+
+ return success;
+}
+} // end unnamed namespace
+
+error_code remove_all(const Twine &path, uint32_t &num_removed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ file_status fs;
+ if (error_code ec = status(path, fs))
+ return ec;
+ num_removed = 0;
+ return remove_all_r(p, fs.type(), num_removed);
+}
+
+error_code directory_entry::status(file_status &result) const {
+ return fs::status(Path, result);
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+// Include the truly platform-specific parts.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/PathV2.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/PathV2.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PluginLoader.cpp b/contrib/llvm/lib/Support/PluginLoader.cpp
new file mode 100644
index 000000000000..2924cfa38897
--- /dev/null
+++ b/contrib/llvm/lib/Support/PluginLoader.cpp
@@ -0,0 +1,47 @@
+//===-- PluginLoader.cpp - Implement -load command line option ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -load <plugin> command line option handler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DONT_GET_PLUGIN_LOADER_OPTION
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+using namespace llvm;
+
+static ManagedStatic<std::vector<std::string> > Plugins;
+static ManagedStatic<sys::SmartMutex<true> > PluginsLock;
+
+void PluginLoader::operator=(const std::string &Filename) {
+ sys::SmartScopedLock<true> Lock(*PluginsLock);
+ std::string Error;
+ if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) {
+ errs() << "Error opening '" << Filename << "': " << Error
+ << "\n -load request ignored.\n";
+ } else {
+ Plugins->push_back(Filename);
+ }
+}
+
+unsigned PluginLoader::getNumPlugins() {
+ sys::SmartScopedLock<true> Lock(*PluginsLock);
+ return Plugins.isConstructed() ? Plugins->size() : 0;
+}
+
+std::string &PluginLoader::getPlugin(unsigned num) {
+ sys::SmartScopedLock<true> Lock(*PluginsLock);
+ assert(Plugins.isConstructed() && num < Plugins->size() &&
+ "Asking for an out of bounds plugin");
+ return (*Plugins)[num];
+}
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
new file mode 100644
index 000000000000..082b7012eb23
--- /dev/null
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -0,0 +1,133 @@
+//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadLocal.h"
+#include "llvm/ADT/SmallString.h"
+
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+#include <CrashReporterClient.h>
+#endif
+
+using namespace llvm;
+
+namespace llvm {
+ bool DisablePrettyStackTrace = false;
+}
+
+// FIXME: This should be thread local when llvm supports threads.
+static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
+
+static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
+ unsigned NextID = 0;
+ if (Entry->getNextEntry())
+ NextID = PrintStack(Entry->getNextEntry(), OS);
+ OS << NextID << ".\t";
+ Entry->print(OS);
+
+ return NextID+1;
+}
+
+/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+static void PrintCurStackTrace(raw_ostream &OS) {
+ // Don't print an empty trace.
+ if (PrettyStackTraceHead.get() == 0) return;
+
+ // If there are pretty stack frames registered, walk and emit them.
+ OS << "Stack dump:\n";
+
+ PrintStack(PrettyStackTraceHead.get(), OS);
+ OS.flush();
+}
+
+// Integrate with crash reporter libraries.
+#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
+// If any clients of llvm try to link to libCrashReporterClient.a themselves,
+// only one crash info struct will be used.
+extern "C" {
+CRASH_REPORTER_CLIENT_HIDDEN
+struct crashreporter_annotations_t gCRAnnotations
+ __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
+ = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
+}
+#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
+static const char *__crashreporter_info__ = 0;
+asm(".desc ___crashreporter_info__, 0x10");
+#endif
+
+
+/// CrashHandler - This callback is run if a fatal signal is delivered to the
+/// process, it prints the pretty stack trace.
+static void CrashHandler(void *) {
+#ifndef __APPLE__
+ // On non-apple systems, just emit the crash stack trace to stderr.
+ PrintCurStackTrace(errs());
+#else
+ // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
+ // put it into __crashreporter_info__.
+ SmallString<2048> TmpStr;
+ {
+ raw_svector_ostream Stream(TmpStr);
+ PrintCurStackTrace(Stream);
+ }
+
+ if (!TmpStr.empty()) {
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+ // Cast to void to avoid warning.
+ (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#elif HAVE_CRASHREPORTER_INFO
+ __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
+#endif
+ errs() << TmpStr.str();
+ }
+
+#endif
+}
+
+static bool RegisterCrashPrinter() {
+ if (!DisablePrettyStackTrace)
+ sys::AddSignalHandler(CrashHandler, 0);
+ return false;
+}
+
+PrettyStackTraceEntry::PrettyStackTraceEntry() {
+ // The first time this is called, we register the crash printer.
+ static bool HandlerRegistered = RegisterCrashPrinter();
+ (void)HandlerRegistered;
+
+ // Link ourselves.
+ NextEntry = PrettyStackTraceHead.get();
+ PrettyStackTraceHead.set(this);
+}
+
+PrettyStackTraceEntry::~PrettyStackTraceEntry() {
+ assert(PrettyStackTraceHead.get() == this &&
+ "Pretty stack trace entry destruction is out of order");
+ PrettyStackTraceHead.set(getNextEntry());
+}
+
+void PrettyStackTraceString::print(raw_ostream &OS) const {
+ OS << Str << "\n";
+}
+
+void PrettyStackTraceProgram::print(raw_ostream &OS) const {
+ OS << "Program arguments: ";
+ // Print the argument list.
+ for (unsigned i = 0, e = ArgC; i != e; ++i)
+ OS << ArgV[i] << ' ';
+ OS << '\n';
+}
diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
new file mode 100644
index 000000000000..88ca7c3f220f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -0,0 +1,33 @@
+//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Process concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Process.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Process.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Process.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Program.cpp b/contrib/llvm/lib/Support/Program.cpp
new file mode 100644
index 000000000000..01860b082d62
--- /dev/null
+++ b/contrib/llvm/lib/Support/Program.cpp
@@ -0,0 +1,56 @@
+//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Program concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+int
+Program::ExecuteAndWait(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned secondsToWait,
+ unsigned memoryLimit,
+ std::string* ErrMsg) {
+ Program prg;
+ if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+ return prg.Wait(path, secondsToWait, ErrMsg);
+ else
+ return -1;
+}
+
+void
+Program::ExecuteNoWait(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned memoryLimit,
+ std::string* ErrMsg) {
+ Program prg;
+ prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Program.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Program.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/RWMutex.cpp b/contrib/llvm/lib/Support/RWMutex.cpp
new file mode 100644
index 000000000000..fc02f9cf7c11
--- /dev/null
+++ b/contrib/llvm/lib/Support/RWMutex.cpp
@@ -0,0 +1,157 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+// is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+// declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+ : data_(0)
+{
+ if (pthread_enabled)
+ {
+ // Declare the pthread_rwlock data structures
+ pthread_rwlock_t* rwlock =
+ static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+ // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+ bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+ // Initialize the rwlock
+ int errorcode = pthread_rwlock_init(rwlock, NULL);
+ (void)errorcode;
+ assert(errorcode == 0);
+
+ // Assign the data member
+ data_ = rwlock;
+ }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+ pthread_rwlock_destroy(rwlock);
+ free(rwlock);
+ }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_rdlock(rwlock);
+ return errorcode == 0;
+ } else return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
+ } else return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_wrlock(rwlock);
+ return errorcode == 0;
+ } else return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
+ } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
new file mode 100644
index 000000000000..d293da07d684
--- /dev/null
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -0,0 +1,168 @@
+//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "regex_impl.h"
+#include <string>
+using namespace llvm;
+
+Regex::Regex(StringRef regex, unsigned Flags) {
+ unsigned flags = 0;
+ preg = new llvm_regex();
+ preg->re_endp = regex.end();
+ if (Flags & IgnoreCase)
+ flags |= REG_ICASE;
+ if (Flags & Newline)
+ flags |= REG_NEWLINE;
+ error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+}
+
+Regex::~Regex() {
+ llvm_regfree(preg);
+ delete preg;
+}
+
+bool Regex::isValid(std::string &Error) {
+ if (!error)
+ return true;
+
+ size_t len = llvm_regerror(error, preg, NULL, 0);
+
+ Error.resize(len);
+ llvm_regerror(error, preg, &Error[0], len);
+ return false;
+}
+
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+ return preg->re_nsub;
+}
+
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
+ unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+
+ // pmatch needs to have at least one element.
+ SmallVector<llvm_regmatch_t, 8> pm;
+ pm.resize(nmatch > 0 ? nmatch : 1);
+ pm[0].rm_so = 0;
+ pm[0].rm_eo = String.size();
+
+ int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+
+ if (rc == REG_NOMATCH)
+ return false;
+ if (rc != 0) {
+ // regexec can fail due to invalid pattern or running out of memory.
+ error = rc;
+ return false;
+ }
+
+ // There was a match.
+
+ if (Matches) { // match position requested
+ Matches->clear();
+
+ for (unsigned i = 0; i != nmatch; ++i) {
+ if (pm[i].rm_so == -1) {
+ // this group didn't match
+ Matches->push_back(StringRef());
+ continue;
+ }
+ assert(pm[i].rm_eo >= pm[i].rm_so);
+ Matches->push_back(StringRef(String.data()+pm[i].rm_so,
+ pm[i].rm_eo-pm[i].rm_so));
+ }
+ }
+
+ return true;
+}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
diff --git a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
new file mode 100644
index 000000000000..d63830185c32
--- /dev/null
+++ b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -0,0 +1,73 @@
+//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file pulls the addresses of certain symbols out of the linker. It must
+// include as few header files as possible because it declares the symbols as
+// void*, which would conflict with the actual symbol type if any header
+// declared it.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* symbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+ extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+ // If this is darwin, it has some funky issues, try to solve them here. Some
+ // important symbols are marked 'private external' which doesn't allow
+ // SearchForAddressOfSymbol to find them. As such, we special case them here,
+ // there is only a small handful of them.
+
+#ifdef __APPLE__
+ {
+ EXPLICIT_SYMBOL(__ashldi3);
+ EXPLICIT_SYMBOL(__ashrdi3);
+ EXPLICIT_SYMBOL(__cmpdi2);
+ EXPLICIT_SYMBOL(__divdi3);
+ EXPLICIT_SYMBOL(__fixdfdi);
+ EXPLICIT_SYMBOL(__fixsfdi);
+ EXPLICIT_SYMBOL(__fixunsdfdi);
+ EXPLICIT_SYMBOL(__fixunssfdi);
+ EXPLICIT_SYMBOL(__floatdidf);
+ EXPLICIT_SYMBOL(__floatdisf);
+ EXPLICIT_SYMBOL(__lshrdi3);
+ EXPLICIT_SYMBOL(__moddi3);
+ EXPLICIT_SYMBOL(__udivdi3);
+ EXPLICIT_SYMBOL(__umoddi3);
+
+ // __eprintf is sometimes used for assert() handling on x86.
+ //
+ // FIXME: Currently disabled when using Clang, as we don't always have our
+ // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+ EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
+ }
+#endif
+
+#ifdef __CYGWIN__
+ {
+ EXPLICIT_SYMBOL(_alloca);
+ EXPLICIT_SYMBOL(__main);
+ }
+#endif
+
+#undef EXPLICIT_SYMBOL
+ return 0;
+}
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
+ return DoSearch(symbolName);
+}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
new file mode 100644
index 000000000000..a11789372d93
--- /dev/null
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -0,0 +1,34 @@
+//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signals.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Signals.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Signals.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/SmallPtrSet.cpp b/contrib/llvm/lib/Support/SmallPtrSet.cpp
new file mode 100644
index 000000000000..997ce0b74cd2
--- /dev/null
+++ b/contrib/llvm/lib/Support/SmallPtrSet.cpp
@@ -0,0 +1,229 @@
+//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallPtrSet class. See SmallPtrSet.h for an
+// overview of the algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+void SmallPtrSetImpl::shrink_and_clear() {
+ assert(!isSmall() && "Can't shrink a small set!");
+ free(CurArray);
+
+ // Reduce the number of buckets.
+ CurArraySize = NumElements > 16 ? 1 << (Log2_32_Ceil(NumElements) + 1) : 32;
+ NumElements = NumTombstones = 0;
+
+ // Install the new array. Clear all the buckets to empty.
+ CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ memset(CurArray, -1, CurArraySize*sizeof(void*));
+
+ // The end pointer, always valid, is set to a valid element to help the
+ // iterator.
+ CurArray[CurArraySize] = 0;
+}
+
+bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
+ if (isSmall()) {
+ // Check to see if it is already in the set.
+ for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+ APtr != E; ++APtr)
+ if (*APtr == Ptr)
+ return false;
+
+ // Nope, there isn't. If we stay small, just 'pushback' now.
+ if (NumElements < CurArraySize-1) {
+ SmallArray[NumElements++] = Ptr;
+ return true;
+ }
+ // Otherwise, hit the big set case, which will call grow.
+ }
+
+ if (NumElements*4 >= CurArraySize*3) {
+ // If more than 3/4 of the array is full, grow.
+ Grow(CurArraySize < 64 ? 128 : CurArraySize*2);
+ } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) {
+ // If fewer of 1/8 of the array is empty (meaning that many are filled with
+ // tombstones), rehash.
+ Grow(CurArraySize);
+ }
+
+ // Okay, we know we have space. Find a hash bucket.
+ const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
+ if (*Bucket == Ptr) return false; // Already inserted, good.
+
+ // Otherwise, insert it!
+ if (*Bucket == getTombstoneMarker())
+ --NumTombstones;
+ *Bucket = Ptr;
+ ++NumElements; // Track density.
+ return true;
+}
+
+bool SmallPtrSetImpl::erase_imp(const void * Ptr) {
+ if (isSmall()) {
+ // Check to see if it is in the set.
+ for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+ APtr != E; ++APtr)
+ if (*APtr == Ptr) {
+ // If it is in the set, replace this element.
+ *APtr = E[-1];
+ E[-1] = getEmptyMarker();
+ --NumElements;
+ return true;
+ }
+
+ return false;
+ }
+
+ // Okay, we know we have space. Find a hash bucket.
+ void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
+ if (*Bucket != Ptr) return false; // Not in the set?
+
+ // Set this as a tombstone.
+ *Bucket = getTombstoneMarker();
+ --NumElements;
+ ++NumTombstones;
+ return true;
+}
+
+const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const {
+ unsigned Bucket = Hash(Ptr);
+ unsigned ArraySize = CurArraySize;
+ unsigned ProbeAmt = 1;
+ const void *const *Array = CurArray;
+ const void *const *Tombstone = 0;
+ while (1) {
+ // Found Ptr's bucket?
+ if (Array[Bucket] == Ptr)
+ return Array+Bucket;
+
+ // If we found an empty bucket, the pointer doesn't exist in the set.
+ // Return a tombstone if we've seen one so far, or the empty bucket if
+ // not.
+ if (Array[Bucket] == getEmptyMarker())
+ return Tombstone ? Tombstone : Array+Bucket;
+
+ // If this is a tombstone, remember it. If Ptr ends up not in the set, we
+ // prefer to return it than something that would require more probing.
+ if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
+ Tombstone = Array+Bucket; // Remember the first tombstone found.
+
+ // It's a hash collision or a tombstone. Reprobe.
+ Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
+ }
+}
+
+/// Grow - Allocate a larger backing store for the buckets and move it over.
+///
+void SmallPtrSetImpl::Grow(unsigned NewSize) {
+ // Allocate at twice as many buckets, but at least 128.
+ unsigned OldSize = CurArraySize;
+
+ const void **OldBuckets = CurArray;
+ bool WasSmall = isSmall();
+
+ // Install the new array. Clear all the buckets to empty.
+ CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ CurArraySize = NewSize;
+ memset(CurArray, -1, NewSize*sizeof(void*));
+
+ // The end pointer, always valid, is set to a valid element to help the
+ // iterator.
+ CurArray[NewSize] = 0;
+
+ // Copy over all the elements.
+ if (WasSmall) {
+ // Small sets store their elements in order.
+ for (const void **BucketPtr = OldBuckets, **E = OldBuckets+NumElements;
+ BucketPtr != E; ++BucketPtr) {
+ const void *Elt = *BucketPtr;
+ *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+ }
+ } else {
+ // Copy over all valid entries.
+ for (const void **BucketPtr = OldBuckets, **E = OldBuckets+OldSize;
+ BucketPtr != E; ++BucketPtr) {
+ // Copy over the element if it is valid.
+ const void *Elt = *BucketPtr;
+ if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
+ *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+ }
+
+ free(OldBuckets);
+ NumTombstones = 0;
+ }
+}
+
+SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
+ const SmallPtrSetImpl& that) {
+ SmallArray = SmallStorage;
+
+ // If we're becoming small, prepare to insert into our stack space
+ if (that.isSmall()) {
+ CurArray = SmallArray;
+ // Otherwise, allocate new heap space (unless we were the same size)
+ } else {
+ CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ }
+
+ // Copy over the new array size
+ CurArraySize = that.CurArraySize;
+
+ // Copy over the contents from the other set
+ memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+
+ NumElements = that.NumElements;
+ NumTombstones = that.NumTombstones;
+}
+
+/// CopyFrom - implement operator= from a smallptrset that has the same pointer
+/// type, but may have a different small size.
+void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
+ if (isSmall() && RHS.isSmall())
+ assert(CurArraySize == RHS.CurArraySize &&
+ "Cannot assign sets with different small sizes");
+
+ // If we're becoming small, prepare to insert into our stack space
+ if (RHS.isSmall()) {
+ if (!isSmall())
+ free(CurArray);
+ CurArray = SmallArray;
+ // Otherwise, allocate new heap space (unless we were the same size)
+ } else if (CurArraySize != RHS.CurArraySize) {
+ if (isSmall())
+ CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+ else
+ CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ }
+
+ // Copy over the new array size
+ CurArraySize = RHS.CurArraySize;
+
+ // Copy over the contents from the other set
+ memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+
+ NumElements = RHS.NumElements;
+ NumTombstones = RHS.NumTombstones;
+}
+
+SmallPtrSetImpl::~SmallPtrSetImpl() {
+ if (!isSmall())
+ free(CurArray);
+}
diff --git a/contrib/llvm/lib/Support/SmallVector.cpp b/contrib/llvm/lib/Support/SmallVector.cpp
new file mode 100644
index 000000000000..a89f14957635
--- /dev/null
+++ b/contrib/llvm/lib/Support/SmallVector.cpp
@@ -0,0 +1,40 @@
+//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+/// grow_pod - This is an implementation of the grow() method which only works
+/// on POD-like datatypes and is out of line to reduce code duplication.
+void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) {
+ size_t CurSizeBytes = size_in_bytes();
+ size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
+ if (NewCapacityInBytes < MinSizeInBytes)
+ NewCapacityInBytes = MinSizeInBytes;
+
+ void *NewElts;
+ if (this->isSmall()) {
+ NewElts = malloc(NewCapacityInBytes);
+
+ // Copy the elements over. No need to run dtors on PODs.
+ memcpy(NewElts, this->BeginX, CurSizeBytes);
+ } else {
+ // If this wasn't grown from the inline copy, grow the allocated space.
+ NewElts = realloc(this->BeginX, NewCapacityInBytes);
+ }
+
+ this->EndX = (char*)NewElts+CurSizeBytes;
+ this->BeginX = NewElts;
+ this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
+}
+
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
new file mode 100644
index 000000000000..de042a9f53c8
--- /dev/null
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -0,0 +1,232 @@
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceMgr class. This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+namespace {
+ struct LineNoCacheTy {
+ int LastQueryBufferID;
+ const char *LastQuery;
+ unsigned LineNoOfQuery;
+ };
+}
+
+static LineNoCacheTy *getCache(void *Ptr) {
+ return (LineNoCacheTy*)Ptr;
+}
+
+
+SourceMgr::~SourceMgr() {
+ // Delete the line # cache if allocated.
+ if (LineNoCacheTy *Cache = getCache(LineNoCache))
+ delete Cache;
+
+ while (!Buffers.empty()) {
+ delete Buffers.back().Buffer;
+ Buffers.pop_back();
+ }
+}
+
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs. If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+ SMLoc IncludeLoc,
+ std::string &IncludedFile) {
+ OwningPtr<MemoryBuffer> NewBuf;
+ IncludedFile = Filename;
+ MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+
+ // If the file didn't exist directly, see if it's in an include path.
+ for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+ IncludedFile = IncludeDirectories[i] + "/" + Filename;
+ MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+ }
+
+ if (NewBuf == 0) return ~0U;
+
+ return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
+}
+
+
+/// FindBufferContainingLoc - Return the ID of the buffer containing the
+/// specified location, returning -1 if not found.
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+ for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
+ if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
+ // Use <= here so that a pointer to the null at the end of the buffer
+ // is included as part of the buffer.
+ Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
+ return i;
+ return -1;
+}
+
+/// FindLineNumber - Find the line number for the specified location in the
+/// specified file. This is not a fast method.
+unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
+ if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
+ assert(BufferID != -1 && "Invalid Location!");
+
+ MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+
+ // Count the number of \n's between the start of the file and the specified
+ // location.
+ unsigned LineNo = 1;
+
+ const char *Ptr = Buff->getBufferStart();
+
+ // If we have a line number cache, and if the query is to a later point in the
+ // same file, start searching from the last query location. This optimizes
+ // for the case when multiple diagnostics come out of one file in order.
+ if (LineNoCacheTy *Cache = getCache(LineNoCache))
+ if (Cache->LastQueryBufferID == BufferID &&
+ Cache->LastQuery <= Loc.getPointer()) {
+ Ptr = Cache->LastQuery;
+ LineNo = Cache->LineNoOfQuery;
+ }
+
+ // Scan for the location being queried, keeping track of the number of lines
+ // we see.
+ for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+ if (*Ptr == '\n') ++LineNo;
+
+
+ // Allocate the line number cache if it doesn't exist.
+ if (LineNoCache == 0)
+ LineNoCache = new LineNoCacheTy();
+
+ // Update the line # cache.
+ LineNoCacheTy &Cache = *getCache(LineNoCache);
+ Cache.LastQueryBufferID = BufferID;
+ Cache.LastQuery = Ptr;
+ Cache.LineNoOfQuery = LineNo;
+ return LineNo;
+}
+
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
+ if (IncludeLoc == SMLoc()) return; // Top of stack.
+
+ int CurBuf = FindBufferContainingLoc(IncludeLoc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+ PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+ OS << "Included from "
+ << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+ << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+}
+
+
+/// GetMessage - Return an SMDiagnostic at the specified location with the
+/// specified string.
+///
+/// @param Type - If non-null, the kind of message (e.g., "error") which is
+/// prefixed to the message.
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
+ const char *Type, bool ShowLine) const {
+
+ // First thing to do: find the current buffer containing the specified
+ // location.
+ int CurBuf = FindBufferContainingLoc(Loc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+ MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+
+ // Scan backward to find the start of the line.
+ const char *LineStart = Loc.getPointer();
+ while (LineStart != CurMB->getBufferStart() &&
+ LineStart[-1] != '\n' && LineStart[-1] != '\r')
+ --LineStart;
+
+ std::string LineStr;
+ if (ShowLine) {
+ // Get the end of the line.
+ const char *LineEnd = Loc.getPointer();
+ while (LineEnd != CurMB->getBufferEnd() &&
+ LineEnd[0] != '\n' && LineEnd[0] != '\r')
+ ++LineEnd;
+ LineStr = std::string(LineStart, LineEnd);
+ }
+
+ std::string PrintedMsg;
+ raw_string_ostream OS(PrintedMsg);
+ if (Type)
+ OS << Type << ": ";
+ OS << Msg;
+
+ return SMDiagnostic(*this, Loc,
+ CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
+ Loc.getPointer()-LineStart, OS.str(),
+ LineStr, ShowLine);
+}
+
+void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
+ const char *Type, bool ShowLine) const {
+ // Report the message with the diagnostic handler if present.
+ if (DiagHandler) {
+ DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
+ return;
+ }
+
+ raw_ostream &OS = errs();
+
+ int CurBuf = FindBufferContainingLoc(Loc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+ PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+ GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
+}
+
+//===----------------------------------------------------------------------===//
+// SMDiagnostic Implementation
+//===----------------------------------------------------------------------===//
+
+void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
+ if (ProgName && ProgName[0])
+ S << ProgName << ": ";
+
+ if (!Filename.empty()) {
+ if (Filename == "-")
+ S << "<stdin>";
+ else
+ S << Filename;
+
+ if (LineNo != -1) {
+ S << ':' << LineNo;
+ if (ColumnNo != -1)
+ S << ':' << (ColumnNo+1);
+ }
+ S << ": ";
+ }
+
+ S << Message << '\n';
+
+ if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
+ S << LineContents << '\n';
+
+ // Print out spaces/tabs before the caret.
+ for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
+ S << (LineContents[i] == '\t' ? '\t' : ' ');
+ S << "^\n";
+ }
+}
+
+
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
new file mode 100644
index 000000000000..1e733d92e610
--- /dev/null
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -0,0 +1,152 @@
+//===-- Statistic.cpp - Easy way to expose stats information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the 'Statistic' class, which is designed to be an easy
+// way to expose various success metrics from passes. These statistics are
+// printed at the end of a run, when the -stats command line option is enabled
+// on the command line.
+//
+// This is useful for reporting information like the number of instructions
+// simplified, optimized or removed by various transformations, like this:
+//
+// static Statistic NumInstEliminated("GCSE", "Number of instructions killed");
+//
+// Later, in the code: ++NumInstEliminated;
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cstring>
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+/// -stats - Command line option to cause transformations to emit stats about
+/// what they did.
+///
+static cl::opt<bool>
+Enabled("stats", cl::desc("Enable statistics output from program"));
+
+
+namespace {
+/// StatisticInfo - This class is used in a ManagedStatic so that it is created
+/// on demand (when the first statistic is bumped) and destroyed only when
+/// llvm_shutdown is called. We print statistics from the destructor.
+class StatisticInfo {
+ std::vector<const Statistic*> Stats;
+ friend void llvm::PrintStatistics();
+ friend void llvm::PrintStatistics(raw_ostream &OS);
+public:
+ ~StatisticInfo();
+
+ void addStatistic(const Statistic *S) {
+ Stats.push_back(S);
+ }
+};
+}
+
+static ManagedStatic<StatisticInfo> StatInfo;
+static ManagedStatic<sys::SmartMutex<true> > StatLock;
+
+/// RegisterStatistic - The first time a statistic is bumped, this method is
+/// called.
+void Statistic::RegisterStatistic() {
+ // If stats are enabled, inform StatInfo that this statistic should be
+ // printed.
+ sys::SmartScopedLock<true> Writer(*StatLock);
+ if (!Initialized) {
+ if (Enabled)
+ StatInfo->addStatistic(this);
+
+ sys::MemoryFence();
+ // Remember we have been registered.
+ Initialized = true;
+ }
+}
+
+namespace {
+
+struct NameCompare {
+ bool operator()(const Statistic *LHS, const Statistic *RHS) const {
+ int Cmp = std::strcmp(LHS->getName(), RHS->getName());
+ if (Cmp != 0) return Cmp < 0;
+
+ // Secondary key is the description.
+ return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
+ }
+};
+
+}
+
+// Print information when destroyed, iff command line option is specified.
+StatisticInfo::~StatisticInfo() {
+ llvm::PrintStatistics();
+}
+
+void llvm::EnableStatistics() {
+ Enabled.setValue(true);
+}
+
+bool llvm::AreStatisticsEnabled() {
+ return Enabled;
+}
+
+void llvm::PrintStatistics(raw_ostream &OS) {
+ StatisticInfo &Stats = *StatInfo;
+
+ // Figure out how long the biggest Value and Name fields are.
+ unsigned MaxNameLen = 0, MaxValLen = 0;
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+ MaxValLen = std::max(MaxValLen,
+ (unsigned)utostr(Stats.Stats[i]->getValue()).size());
+ MaxNameLen = std::max(MaxNameLen,
+ (unsigned)std::strlen(Stats.Stats[i]->getName()));
+ }
+
+ // Sort the fields by name.
+ std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
+
+ // Print out the statistics header...
+ OS << "===" << std::string(73, '-') << "===\n"
+ << " ... Statistics Collected ...\n"
+ << "===" << std::string(73, '-') << "===\n\n";
+
+ // Print all of the statistics.
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+ std::string CountStr = utostr(Stats.Stats[i]->getValue());
+ OS << std::string(MaxValLen-CountStr.size(), ' ')
+ << CountStr << " " << Stats.Stats[i]->getName()
+ << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
+ << " - " << Stats.Stats[i]->getDesc() << "\n";
+ }
+
+ OS << '\n'; // Flush the output stream.
+ OS.flush();
+
+}
+
+void llvm::PrintStatistics() {
+ StatisticInfo &Stats = *StatInfo;
+
+ // Statistics not enabled?
+ if (Stats.Stats.empty()) return;
+
+ // Get the stream to write to.
+ raw_ostream &OutStream = *CreateInfoOutputFile();
+ PrintStatistics(OutStream);
+ delete &OutStream; // Close the file.
+}
diff --git a/contrib/llvm/lib/Support/StringExtras.cpp b/contrib/llvm/lib/Support/StringExtras.cpp
new file mode 100644
index 000000000000..eb2fa084218a
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringExtras.cpp
@@ -0,0 +1,81 @@
+//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringExtras.h header
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+/// StrInStrNoCase - Portable version of strcasestr. Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case. Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
+ size_t N = s2.size(), M = s1.size();
+ if (N > M)
+ return StringRef::npos;
+ for (size_t i = 0, e = M - N + 1; i != e; ++i)
+ if (s1.substr(i, N).equals_lower(s2))
+ return i;
+ return StringRef::npos;
+}
+
+/// getToken - This function extracts one token from source, ignoring any
+/// leading characters that appear in the Delimiters string, and ending the
+/// token at any of the characters that appear in the Delimiters string. If
+/// there are no tokens in the source string, an empty string is returned.
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
+ StringRef Delimiters) {
+ // Figure out where the token starts.
+ StringRef::size_type Start = Source.find_first_not_of(Delimiters);
+
+ // Find the next occurrence of the delimiter.
+ StringRef::size_type End = Source.find_first_of(Delimiters, Start);
+
+ return std::make_pair(Source.slice(Start, End), Source.substr(End));
+}
+
+/// SplitString - Split up the specified string according to the specified
+/// delimiters, appending the result fragments to the output list.
+void llvm::SplitString(StringRef Source,
+ SmallVectorImpl<StringRef> &OutFragments,
+ StringRef Delimiters) {
+ StringRef S2, S;
+ tie(S2, S) = getToken(Source, Delimiters);
+ while (!S2.empty()) {
+ OutFragments.push_back(S2);
+ tie(S2, S) = getToken(S, Delimiters);
+ }
+}
+
+void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
+ StringRef Separators, int MaxSplit,
+ bool KeepEmpty) const {
+ StringRef rest = *this;
+
+ // rest.data() is used to distinguish cases like "a," that splits into
+ // "a" + "" and "a" that splits into "a" + 0.
+ for (int splits = 0;
+ rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+ ++splits) {
+ std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
+
+ if (p.first.size() != 0 || KeepEmpty)
+ A.push_back(p.first);
+ rest = p.second;
+ }
+ // If we have a tail left, add it.
+ if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+ A.push_back(rest);
+}
diff --git a/contrib/llvm/lib/Support/StringMap.cpp b/contrib/llvm/lib/Support/StringMap.cpp
new file mode 100644
index 000000000000..a1ac512fa244
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringMap.cpp
@@ -0,0 +1,230 @@
+//===--- StringMap.cpp - String Hash table map implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cassert>
+using namespace llvm;
+
+StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
+ ItemSize = itemSize;
+
+ // If a size is specified, initialize the table with that many buckets.
+ if (InitSize) {
+ init(InitSize);
+ return;
+ }
+
+ // Otherwise, initialize it with zero buckets to avoid the allocation.
+ TheTable = 0;
+ NumBuckets = 0;
+ NumItems = 0;
+ NumTombstones = 0;
+}
+
+void StringMapImpl::init(unsigned InitSize) {
+ assert((InitSize & (InitSize-1)) == 0 &&
+ "Init Size must be a power of 2 or zero!");
+ NumBuckets = InitSize ? InitSize : 16;
+ NumItems = 0;
+ NumTombstones = 0;
+
+ TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
+
+ // Allocate one extra bucket, set it to look filled so the iterators stop at
+ // end.
+ TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+}
+
+
+/// LookupBucketFor - Look up the bucket that the specified string should end
+/// up in. If it already exists as a key in the map, the Item pointer for the
+/// specified bucket will be non-null. Otherwise, it will be null. In either
+/// case, the FullHashValue field of the bucket will be set to the hash value
+/// of the string.
+unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
+ unsigned HTSize = NumBuckets;
+ if (HTSize == 0) { // Hash table unallocated so far?
+ init(16);
+ HTSize = NumBuckets;
+ }
+ unsigned FullHashValue = HashString(Name);
+ unsigned BucketNo = FullHashValue & (HTSize-1);
+
+ unsigned ProbeAmt = 1;
+ int FirstTombstone = -1;
+ while (1) {
+ ItemBucket &Bucket = TheTable[BucketNo];
+ StringMapEntryBase *BucketItem = Bucket.Item;
+ // If we found an empty bucket, this key isn't in the table yet, return it.
+ if (BucketItem == 0) {
+ // If we found a tombstone, we want to reuse the tombstone instead of an
+ // empty bucket. This reduces probing.
+ if (FirstTombstone != -1) {
+ TheTable[FirstTombstone].FullHashValue = FullHashValue;
+ return FirstTombstone;
+ }
+
+ Bucket.FullHashValue = FullHashValue;
+ return BucketNo;
+ }
+
+ if (BucketItem == getTombstoneVal()) {
+ // Skip over tombstones. However, remember the first one we see.
+ if (FirstTombstone == -1) FirstTombstone = BucketNo;
+ } else if (Bucket.FullHashValue == FullHashValue) {
+ // If the full hash value matches, check deeply for a match. The common
+ // case here is that we are only looking at the buckets (for item info
+ // being non-null and for the full hash value) not at the items. This
+ // is important for cache locality.
+
+ // Do the comparison like this because Name isn't necessarily
+ // null-terminated!
+ char *ItemStr = (char*)BucketItem+ItemSize;
+ if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
+ // We found a match!
+ return BucketNo;
+ }
+ }
+
+ // Okay, we didn't find the item. Probe to the next bucket.
+ BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+
+ // Use quadratic probing, it has fewer clumping artifacts than linear
+ // probing and has good cache behavior in the common case.
+ ++ProbeAmt;
+ }
+}
+
+
+/// FindKey - Look up the bucket that contains the specified key. If it exists
+/// in the map, return the bucket number of the key. Otherwise return -1.
+/// This does not modify the map.
+int StringMapImpl::FindKey(StringRef Key) const {
+ unsigned HTSize = NumBuckets;
+ if (HTSize == 0) return -1; // Really empty table?
+ unsigned FullHashValue = HashString(Key);
+ unsigned BucketNo = FullHashValue & (HTSize-1);
+
+ unsigned ProbeAmt = 1;
+ while (1) {
+ ItemBucket &Bucket = TheTable[BucketNo];
+ StringMapEntryBase *BucketItem = Bucket.Item;
+ // If we found an empty bucket, this key isn't in the table yet, return.
+ if (BucketItem == 0)
+ return -1;
+
+ if (BucketItem == getTombstoneVal()) {
+ // Ignore tombstones.
+ } else if (Bucket.FullHashValue == FullHashValue) {
+ // If the full hash value matches, check deeply for a match. The common
+ // case here is that we are only looking at the buckets (for item info
+ // being non-null and for the full hash value) not at the items. This
+ // is important for cache locality.
+
+ // Do the comparison like this because NameStart isn't necessarily
+ // null-terminated!
+ char *ItemStr = (char*)BucketItem+ItemSize;
+ if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
+ // We found a match!
+ return BucketNo;
+ }
+ }
+
+ // Okay, we didn't find the item. Probe to the next bucket.
+ BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+
+ // Use quadratic probing, it has fewer clumping artifacts than linear
+ // probing and has good cache behavior in the common case.
+ ++ProbeAmt;
+ }
+}
+
+/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
+/// delete it. This aborts if the value isn't in the table.
+void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
+ const char *VStr = (char*)V + ItemSize;
+ StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
+ (void)V2;
+ assert(V == V2 && "Didn't find key?");
+}
+
+/// RemoveKey - Remove the StringMapEntry for the specified key from the
+/// table, returning it. If the key is not in the table, this returns null.
+StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
+ int Bucket = FindKey(Key);
+ if (Bucket == -1) return 0;
+
+ StringMapEntryBase *Result = TheTable[Bucket].Item;
+ TheTable[Bucket].Item = getTombstoneVal();
+ --NumItems;
+ ++NumTombstones;
+ assert(NumItems + NumTombstones <= NumBuckets);
+
+ return Result;
+}
+
+
+
+/// RehashTable - Grow the table, redistributing values into the buckets with
+/// the appropriate mod-of-hashtable-size.
+void StringMapImpl::RehashTable() {
+ unsigned NewSize;
+
+ // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
+ // the buckets are empty (meaning that many are filled with tombstones),
+ // grow/rehash the table.
+ if (NumItems*4 > NumBuckets*3) {
+ NewSize = NumBuckets*2;
+ } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) {
+ NewSize = NumBuckets;
+ } else {
+ return;
+ }
+
+ // Allocate one extra bucket which will always be non-empty. This allows the
+ // iterators to stop at end.
+ ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
+ NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
+
+ // Rehash all the items into their new buckets. Luckily :) we already have
+ // the hash values available, so we don't have to rehash any strings.
+ for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
+ if (IB->Item && IB->Item != getTombstoneVal()) {
+ // Fast case, bucket available.
+ unsigned FullHash = IB->FullHashValue;
+ unsigned NewBucket = FullHash & (NewSize-1);
+ if (NewTableArray[NewBucket].Item == 0) {
+ NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
+ NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+ continue;
+ }
+
+ // Otherwise probe for a spot.
+ unsigned ProbeSize = 1;
+ do {
+ NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
+ } while (NewTableArray[NewBucket].Item);
+
+ // Finally found a slot. Fill it in.
+ NewTableArray[NewBucket].Item = IB->Item;
+ NewTableArray[NewBucket].FullHashValue = FullHash;
+ }
+ }
+
+ free(TheTable);
+
+ TheTable = NewTableArray;
+ NumBuckets = NewSize;
+ NumTombstones = 0;
+}
diff --git a/contrib/llvm/lib/Support/StringPool.cpp b/contrib/llvm/lib/Support/StringPool.cpp
new file mode 100644
index 000000000000..ff607cf8c4ad
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringPool.cpp
@@ -0,0 +1,35 @@
+//===-- StringPool.cpp - Interned string pool -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringPool class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+StringPool::StringPool() {}
+
+StringPool::~StringPool() {
+ assert(InternTable.empty() && "PooledStringPtr leaked!");
+}
+
+PooledStringPtr StringPool::intern(StringRef Key) {
+ table_t::iterator I = InternTable.find(Key);
+ if (I != InternTable.end())
+ return PooledStringPtr(&*I);
+
+ entry_t *S = entry_t::Create(Key.begin(), Key.end());
+ S->getValue().Pool = this;
+ InternTable.insert(S);
+
+ return PooledStringPtr(S);
+}
diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp
new file mode 100644
index 000000000000..8c3fc094cd11
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringRef.cpp
@@ -0,0 +1,415 @@
+//===-- StringRef.cpp - Lightweight String References ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/OwningPtr.h"
+#include <bitset>
+
+using namespace llvm;
+
+// MSVC emits references to this into the translation units which reference it.
+#ifndef _MSC_VER
+const size_t StringRef::npos;
+#endif
+
+static char ascii_tolower(char x) {
+ if (x >= 'A' && x <= 'Z')
+ return x - 'A' + 'a';
+ return x;
+}
+
+static bool ascii_isdigit(char x) {
+ return x >= '0' && x <= '9';
+}
+
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+ for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+ unsigned char LHC = ascii_tolower(Data[I]);
+ unsigned char RHC = ascii_tolower(RHS.Data[I]);
+ if (LHC != RHC)
+ return LHC < RHC ? -1 : 1;
+ }
+
+ if (Length == RHS.Length)
+ return 0;
+ return Length < RHS.Length ? -1 : 1;
+}
+
+/// compare_numeric - Compare strings, handle embedded numbers.
+int StringRef::compare_numeric(StringRef RHS) const {
+ for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+ if (Data[I] == RHS.Data[I])
+ continue;
+ if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
+ // The longer sequence of numbers is larger. This doesn't really handle
+ // prefixed zeros well.
+ for (size_t J = I+1; J != E+1; ++J) {
+ bool ld = J < Length && ascii_isdigit(Data[J]);
+ bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+ if (ld != rd)
+ return rd ? -1 : 1;
+ if (!rd)
+ break;
+ }
+ }
+ return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+ }
+ if (Length == RHS.Length)
+ return 0;
+ return Length < RHS.Length ? -1 : 1;
+}
+
+// Compute the edit distance between the two given strings.
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+ bool AllowReplacements,
+ unsigned MaxEditDistance) {
+ // The algorithm implemented below is the "classic"
+ // dynamic-programming algorithm for computing the Levenshtein
+ // distance, which is described here:
+ //
+ // http://en.wikipedia.org/wiki/Levenshtein_distance
+ //
+ // Although the algorithm is typically described using an m x n
+ // array, only two rows are used at a time, so this implemenation
+ // just keeps two separate vectors for those two rows.
+ size_type m = size();
+ size_type n = Other.size();
+
+ const unsigned SmallBufferSize = 64;
+ unsigned SmallBuffer[SmallBufferSize];
+ llvm::OwningArrayPtr<unsigned> Allocated;
+ unsigned *previous = SmallBuffer;
+ if (2*(n + 1) > SmallBufferSize) {
+ previous = new unsigned [2*(n+1)];
+ Allocated.reset(previous);
+ }
+ unsigned *current = previous + (n + 1);
+
+ for (unsigned i = 0; i <= n; ++i)
+ previous[i] = i;
+
+ for (size_type y = 1; y <= m; ++y) {
+ current[0] = y;
+ unsigned BestThisRow = current[0];
+
+ for (size_type x = 1; x <= n; ++x) {
+ if (AllowReplacements) {
+ current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
+ min(current[x-1], previous[x])+1);
+ }
+ else {
+ if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
+ else current[x] = min(current[x-1], previous[x]) + 1;
+ }
+ BestThisRow = min(BestThisRow, current[x]);
+ }
+
+ if (MaxEditDistance && BestThisRow > MaxEditDistance)
+ return MaxEditDistance + 1;
+
+ unsigned *tmp = current;
+ current = previous;
+ previous = tmp;
+ }
+
+ unsigned Result = previous[n];
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// String Searching
+//===----------------------------------------------------------------------===//
+
+
+/// find - Search for the first string \arg Str in the string.
+///
+/// \return - The index of the first occurrence of \arg Str, or npos if not
+/// found.
+size_t StringRef::find(StringRef Str, size_t From) const {
+ size_t N = Str.size();
+ if (N > Length)
+ return npos;
+ for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+ if (substr(i, N).equals(Str))
+ return i;
+ return npos;
+}
+
+/// rfind - Search for the last string \arg Str in the string.
+///
+/// \return - The index of the last occurrence of \arg Str, or npos if not
+/// found.
+size_t StringRef::rfind(StringRef Str) const {
+ size_t N = Str.size();
+ if (N > Length)
+ return npos;
+ for (size_t i = Length - N + 1, e = 0; i != e;) {
+ --i;
+ if (substr(i, N).equals(Str))
+ return i;
+ }
+ return npos;
+}
+
+/// find_first_of - Find the first character in the string that is in \arg
+/// Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ if (CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// \arg C or npos if not found.
+StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
+ for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ if (Data[i] != C)
+ return i;
+ return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string \arg Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ if (!CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+/// find_last_of - Find the last character in the string that is in \arg C,
+/// or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ if (CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpful Algorithms
+//===----------------------------------------------------------------------===//
+
+/// count - Return the number of non-overlapped occurrences of \arg Str in
+/// the string.
+size_t StringRef::count(StringRef Str) const {
+ size_t Count = 0;
+ size_t N = Str.size();
+ if (N > Length)
+ return 0;
+ for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+ if (substr(i, N).equals(Str))
+ ++Count;
+ return Count;
+}
+
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+ if (Str.startswith("0x")) {
+ Str = Str.substr(2);
+ return 16;
+ } else if (Str.startswith("0b")) {
+ Str = Str.substr(2);
+ return 2;
+ } else if (Str.startswith("0")) {
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
+ unsigned long long &Result) {
+ // Autosense radix if not specified.
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
+
+ // Empty strings (after the radix autosense) are invalid.
+ if (Str.empty()) return true;
+
+ // Parse all the bytes of the string given this radix. Watch for overflow.
+ Result = 0;
+ while (!Str.empty()) {
+ unsigned CharVal;
+ if (Str[0] >= '0' && Str[0] <= '9')
+ CharVal = Str[0]-'0';
+ else if (Str[0] >= 'a' && Str[0] <= 'z')
+ CharVal = Str[0]-'a'+10;
+ else if (Str[0] >= 'A' && Str[0] <= 'Z')
+ CharVal = Str[0]-'A'+10;
+ else
+ return true;
+
+ // If the parsed value is larger than the integer radix, the string is
+ // invalid.
+ if (CharVal >= Radix)
+ return true;
+
+ // Add in this character.
+ unsigned long long PrevResult = Result;
+ Result = Result*Radix+CharVal;
+
+ // Check for overflow.
+ if (Result < PrevResult)
+ return true;
+
+ Str = Str.substr(1);
+ }
+
+ return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
+ return GetAsUnsignedInteger(*this, Radix, Result);
+}
+
+
+bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+ unsigned long long ULLVal;
+
+ // Handle positive strings first.
+ if (empty() || front() != '-') {
+ if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+ // Check for value so large it overflows a signed value.
+ (long long)ULLVal < 0)
+ return true;
+ Result = ULLVal;
+ return false;
+ }
+
+ // Get the positive part of the value.
+ if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+ // Reject values so large they'd overflow as negative signed, but allow
+ // "-0". This negates the unsigned so that the negative isn't undefined
+ // on signed overflow.
+ (long long)-ULLVal > 0)
+ return true;
+
+ Result = -ULLVal;
+ return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
+ long long Val;
+ if (getAsInteger(Radix, Val) ||
+ (int)Val != Val)
+ return true;
+ Result = Val;
+ return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
+ unsigned long long Val;
+ if (getAsInteger(Radix, Val) ||
+ (unsigned)Val != Val)
+ return true;
+ Result = Val;
+ return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+ StringRef Str = *this;
+
+ // Autosense radix if not specified.
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
+
+ assert(Radix > 1 && Radix <= 36);
+
+ // Empty strings (after the radix autosense) are invalid.
+ if (Str.empty()) return true;
+
+ // Skip leading zeroes. This can be a significant improvement if
+ // it means we don't need > 64 bits.
+ while (!Str.empty() && Str.front() == '0')
+ Str = Str.substr(1);
+
+ // If it was nothing but zeroes....
+ if (Str.empty()) {
+ Result = APInt(64, 0);
+ return false;
+ }
+
+ // (Over-)estimate the required number of bits.
+ unsigned Log2Radix = 0;
+ while ((1U << Log2Radix) < Radix) Log2Radix++;
+ bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+ unsigned BitWidth = Log2Radix * Str.size();
+ if (BitWidth < Result.getBitWidth())
+ BitWidth = Result.getBitWidth(); // don't shrink the result
+ else
+ Result = Result.zext(BitWidth);
+
+ APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+ if (!IsPowerOf2Radix) {
+ // These must have the same bit-width as Result.
+ RadixAP = APInt(BitWidth, Radix);
+ CharAP = APInt(BitWidth, 0);
+ }
+
+ // Parse all the bytes of the string given this radix.
+ Result = 0;
+ while (!Str.empty()) {
+ unsigned CharVal;
+ if (Str[0] >= '0' && Str[0] <= '9')
+ CharVal = Str[0]-'0';
+ else if (Str[0] >= 'a' && Str[0] <= 'z')
+ CharVal = Str[0]-'a'+10;
+ else if (Str[0] >= 'A' && Str[0] <= 'Z')
+ CharVal = Str[0]-'A'+10;
+ else
+ return true;
+
+ // If the parsed value is larger than the integer radix, the string is
+ // invalid.
+ if (CharVal >= Radix)
+ return true;
+
+ // Add in this character.
+ if (IsPowerOf2Radix) {
+ Result <<= Log2Radix;
+ Result |= CharVal;
+ } else {
+ Result *= RadixAP;
+ CharAP = CharVal;
+ Result += CharAP;
+ }
+
+ Str = Str.substr(1);
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Support/SystemUtils.cpp b/contrib/llvm/lib/Support/SystemUtils.cpp
new file mode 100644
index 000000000000..54b5e97bfe18
--- /dev/null
+++ b/contrib/llvm/lib/Support/SystemUtils.cpp
@@ -0,0 +1,55 @@
+//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used to do a variety of low-level, often
+// system-specific, tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
+ bool print_warning) {
+ if (stream_to_check.is_displayed()) {
+ if (print_warning) {
+ errs() << "WARNING: You're attempting to print out a bitcode file.\n"
+ "This is inadvisable as it may cause display problems. If\n"
+ "you REALLY want to taste LLVM bitcode first-hand, you\n"
+ "can force output with the `-f' option.\n\n";
+ }
+ return true;
+ }
+ return false;
+}
+
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
+/// @brief Find a named executable.
+sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName,
+ const char *Argv0, void *MainAddr) {
+ // Check the directory that the calling program is in. We can do
+ // this if ProgramPath contains at least one / character, indicating that it
+ // is a relative path to the executable itself.
+ sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
+ Result.eraseComponent();
+
+ if (!Result.isEmpty()) {
+ Result.appendComponent(ExeName);
+ Result.appendSuffix(sys::Path::GetEXESuffix());
+ }
+
+ return Result;
+}
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
new file mode 100644
index 000000000000..293a5d7a0168
--- /dev/null
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -0,0 +1,92 @@
+//===--- TargetRegistry.cpp - Target registration -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include <cassert>
+using namespace llvm;
+
+// Clients are responsible for avoid race conditions in registration.
+static Target *FirstTarget = 0;
+
+TargetRegistry::iterator TargetRegistry::begin() {
+ return iterator(FirstTarget);
+}
+
+const Target *TargetRegistry::lookupTarget(const std::string &TT,
+ std::string &Error) {
+ // Provide special warning when no targets are initialized.
+ if (begin() == end()) {
+ Error = "Unable to find target for this triple (no targets are registered)";
+ return 0;
+ }
+ const Target *Best = 0, *EquallyBest = 0;
+ unsigned BestQuality = 0;
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (unsigned Qual = it->TripleMatchQualityFn(TT)) {
+ if (!Best || Qual > BestQuality) {
+ Best = &*it;
+ EquallyBest = 0;
+ BestQuality = Qual;
+ } else if (Qual == BestQuality)
+ EquallyBest = &*it;
+ }
+ }
+
+ if (!Best) {
+ Error = "No available targets are compatible with this triple, "
+ "see -version for the available targets.";
+ return 0;
+ }
+
+ // Otherwise, take the best target, but make sure we don't have two equally
+ // good best targets.
+ if (EquallyBest) {
+ Error = std::string("Cannot choose between targets \"") +
+ Best->Name + "\" and \"" + EquallyBest->Name + "\"";
+ return 0;
+ }
+
+ return Best;
+}
+
+void TargetRegistry::RegisterTarget(Target &T,
+ const char *Name,
+ const char *ShortDesc,
+ Target::TripleMatchQualityFnTy TQualityFn,
+ bool HasJIT) {
+ assert(Name && ShortDesc && TQualityFn &&
+ "Missing required target information!");
+
+ // Check if this target has already been initialized, we allow this as a
+ // convenience to some clients.
+ if (T.Name)
+ return;
+
+ // Add to the list of targets.
+ T.Next = FirstTarget;
+ FirstTarget = &T;
+
+ T.Name = Name;
+ T.ShortDesc = ShortDesc;
+ T.TripleMatchQualityFn = TQualityFn;
+ T.HasJIT = HasJIT;
+}
+
+const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
+ const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error);
+
+ if (TheTarget && !TheTarget->hasJIT()) {
+ Error = "No JIT compatible target available for this host";
+ return 0;
+ }
+
+ return TheTarget;
+}
+
diff --git a/contrib/llvm/lib/Support/ThreadLocal.cpp b/contrib/llvm/lib/Support/ThreadLocal.cpp
new file mode 100644
index 000000000000..6b43048da155
--- /dev/null
+++ b/contrib/llvm/lib/Support/ThreadLocal.cpp
@@ -0,0 +1,84 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { data = 0; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data(0) {
+ pthread_key_t* key = new pthread_key_t;
+ int errorcode = pthread_key_create(key, NULL);
+ assert(errorcode == 0);
+ (void) errorcode;
+ data = (void*)key;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+ pthread_key_t* key = static_cast<pthread_key_t*>(data);
+ int errorcode = pthread_key_delete(*key);
+ assert(errorcode == 0);
+ (void) errorcode;
+ delete key;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+ pthread_key_t* key = static_cast<pthread_key_t*>(data);
+ int errorcode = pthread_setspecific(*key, d);
+ assert(errorcode == 0);
+ (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+ pthread_key_t* key = static_cast<pthread_key_t*>(data);
+ return pthread_getspecific(*key);
+}
+
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Threading.cpp b/contrib/llvm/lib/Support/Threading.cpp
new file mode 100644
index 000000000000..b62b1a94b234
--- /dev/null
+++ b/contrib/llvm/lib/Support/Threading.cpp
@@ -0,0 +1,116 @@
+//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1
+ assert(!multithreaded_mode && "Already multithreaded!");
+ multithreaded_mode = true;
+ global_lock = new sys::Mutex(true);
+
+ // We fence here to ensure that all initialization is complete BEFORE we
+ // return from llvm_start_multithreaded().
+ sys::MemoryFence();
+ return true;
+#else
+ return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1
+ assert(multithreaded_mode && "Not currently multithreaded!");
+
+ // We fence here to insure that all threaded operations are complete BEFORE we
+ // return from llvm_stop_multithreaded().
+ sys::MemoryFence();
+
+ multithreaded_mode = false;
+ delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+ return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+ if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+ if (multithreaded_mode) global_lock->release();
+}
+
+#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1 && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+
+struct ThreadInfo {
+ void (*UserFn)(void *);
+ void *UserData;
+};
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+ ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+ TI->UserFn(TI->UserData);
+ return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ ThreadInfo Info = { Fn, UserData };
+ pthread_attr_t Attr;
+ pthread_t Thread;
+
+ // Construct the attributes object.
+ if (::pthread_attr_init(&Attr) != 0)
+ return;
+
+ // Set the requested stack size, if given.
+ if (RequestedStackSize != 0) {
+ if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+ goto error;
+ }
+
+ // Construct and execute the thread.
+ if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+ goto error;
+
+ // Wait for the thread and clean up.
+ ::pthread_join(Thread, 0);
+
+ error:
+ ::pthread_attr_destroy(&Attr);
+}
+
+#else
+
+// No non-pthread implementation, currently.
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ (void) RequestedStackSize;
+ Fn(UserData);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
new file mode 100644
index 000000000000..1a0f7bc36394
--- /dev/null
+++ b/contrib/llvm/lib/Support/TimeValue.cpp
@@ -0,0 +1,57 @@
+//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 );
+const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 );
+const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
+
+void
+TimeValue::normalize( void ) {
+ if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
+ do {
+ seconds_++;
+ nanos_ -= NANOSECONDS_PER_SECOND;
+ } while ( nanos_ >= NANOSECONDS_PER_SECOND );
+ } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
+ do {
+ seconds_--;
+ nanos_ += NANOSECONDS_PER_SECOND;
+ } while (nanos_ <= -NANOSECONDS_PER_SECOND);
+ }
+
+ if (seconds_ >= 1 && nanos_ < 0) {
+ seconds_--;
+ nanos_ += NANOSECONDS_PER_SECOND;
+ } else if (seconds_ < 0 && nanos_ > 0) {
+ seconds_++;
+ nanos_ -= NANOSECONDS_PER_SECOND;
+ }
+}
+
+}
+
+/// Include the platform specific portion of TimeValue class
+#ifdef LLVM_ON_UNIX
+#include "Unix/TimeValue.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/TimeValue.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
new file mode 100644
index 000000000000..a9ed5eecfa7e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -0,0 +1,393 @@
+//===-- Timer.cpp - Interval Timing Support -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interval Timing implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
+// of constructor/destructor ordering being unspecified by C++. Basically the
+// problem is that a Statistic object gets destroyed, which ends up calling
+// 'GetLibSupportInfoOutputFile()' (below), which calls this function.
+// LibSupportInfoOutputFilename used to be a global variable, but sometimes it
+// would get destroyed before the Statistic, causing havoc to ensue. We "fix"
+// this by creating the string the first time it is needed and never destroying
+// it.
+static ManagedStatic<std::string> LibSupportInfoOutputFilename;
+static std::string &getLibSupportInfoOutputFilename() {
+ return *LibSupportInfoOutputFilename;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+
+namespace {
+ static cl::opt<bool>
+ TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
+ "tracking (this may be slow)"),
+ cl::Hidden);
+
+ static cl::opt<std::string, true>
+ InfoOutputFilename("info-output-file", cl::value_desc("filename"),
+ cl::desc("File to append -stats and -timer output to"),
+ cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
+}
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+raw_ostream *llvm::CreateInfoOutputFile() {
+ const std::string &OutputFilename = getLibSupportInfoOutputFilename();
+ if (OutputFilename.empty())
+ return new raw_fd_ostream(2, false); // stderr.
+ if (OutputFilename == "-")
+ return new raw_fd_ostream(1, false); // stdout.
+
+ // Append mode is used because the info output file is opened and closed
+ // each time -stats or -time-passes wants to print output to it. To
+ // compensate for this, the test-suite Makefiles have code to delete the
+ // info output file before running commands which write to it.
+ std::string Error;
+ raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
+ Error, raw_fd_ostream::F_Append);
+ if (Error.empty())
+ return Result;
+
+ errs() << "Error opening info-output-file '"
+ << OutputFilename << " for appending!\n";
+ delete Result;
+ return new raw_fd_ostream(2, false); // stderr.
+}
+
+
+static TimerGroup *DefaultTimerGroup = 0;
+static TimerGroup *getDefaultTimerGroup() {
+ TimerGroup *tmp = DefaultTimerGroup;
+ sys::MemoryFence();
+ if (tmp) return tmp;
+
+ llvm_acquire_global_lock();
+ tmp = DefaultTimerGroup;
+ if (!tmp) {
+ tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
+ sys::MemoryFence();
+ DefaultTimerGroup = tmp;
+ }
+ llvm_release_global_lock();
+
+ return tmp;
+}
+
+//===----------------------------------------------------------------------===//
+// Timer Implementation
+//===----------------------------------------------------------------------===//
+
+void Timer::init(StringRef N) {
+ assert(TG == 0 && "Timer already initialized");
+ Name.assign(N.begin(), N.end());
+ Started = false;
+ TG = getDefaultTimerGroup();
+ TG->addTimer(*this);
+}
+
+void Timer::init(StringRef N, TimerGroup &tg) {
+ assert(TG == 0 && "Timer already initialized");
+ Name.assign(N.begin(), N.end());
+ Started = false;
+ TG = &tg;
+ TG->addTimer(*this);
+}
+
+Timer::~Timer() {
+ if (!TG) return; // Never initialized, or already cleared.
+ TG->removeTimer(*this);
+}
+
+static inline size_t getMemUsage() {
+ if (!TrackSpace) return 0;
+ return sys::Process::GetMallocUsage();
+}
+
+TimeRecord TimeRecord::getCurrentTime(bool Start) {
+ TimeRecord Result;
+ sys::TimeValue now(0,0), user(0,0), sys(0,0);
+
+ if (Start) {
+ Result.MemUsed = getMemUsage();
+ sys::Process::GetTimeUsage(now, user, sys);
+ } else {
+ sys::Process::GetTimeUsage(now, user, sys);
+ Result.MemUsed = getMemUsage();
+ }
+
+ Result.WallTime = now.seconds() + now.microseconds() / 1000000.0;
+ Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
+ Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0;
+ return Result;
+}
+
+static ManagedStatic<std::vector<Timer*> > ActiveTimers;
+
+void Timer::startTimer() {
+ Started = true;
+ ActiveTimers->push_back(this);
+ Time -= TimeRecord::getCurrentTime(true);
+}
+
+void Timer::stopTimer() {
+ Time += TimeRecord::getCurrentTime(false);
+
+ if (ActiveTimers->back() == this) {
+ ActiveTimers->pop_back();
+ } else {
+ std::vector<Timer*>::iterator I =
+ std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
+ assert(I != ActiveTimers->end() && "stop but no startTimer?");
+ ActiveTimers->erase(I);
+ }
+}
+
+static void printVal(double Val, double Total, raw_ostream &OS) {
+ if (Total < 1e-7) // Avoid dividing by zero.
+ OS << " ----- ";
+ else {
+ OS << " " << format("%7.4f", Val) << " (";
+ OS << format("%5.1f", Val*100/Total) << "%)";
+ }
+}
+
+void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
+ if (Total.getUserTime())
+ printVal(getUserTime(), Total.getUserTime(), OS);
+ if (Total.getSystemTime())
+ printVal(getSystemTime(), Total.getSystemTime(), OS);
+ if (Total.getProcessTime())
+ printVal(getProcessTime(), Total.getProcessTime(), OS);
+ printVal(getWallTime(), Total.getWallTime(), OS);
+
+ OS << " ";
+
+ if (Total.getMemUsed())
+ OS << format("%9lld", (long long)getMemUsed()) << " ";
+}
+
+
+//===----------------------------------------------------------------------===//
+// NamedRegionTimer Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+typedef StringMap<Timer> Name2TimerMap;
+
+class Name2PairMap {
+ StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
+public:
+ ~Name2PairMap() {
+ for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator
+ I = Map.begin(), E = Map.end(); I != E; ++I)
+ delete I->second.first;
+ }
+
+ Timer &get(StringRef Name, StringRef GroupName) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName];
+
+ if (!GroupEntry.first)
+ GroupEntry.first = new TimerGroup(GroupName);
+
+ Timer &T = GroupEntry.second[Name];
+ if (!T.isInitialized())
+ T.init(Name, *GroupEntry.first);
+ return T;
+ }
+};
+
+}
+
+static ManagedStatic<Name2TimerMap> NamedTimers;
+static ManagedStatic<Name2PairMap> NamedGroupedTimers;
+
+static Timer &getNamedRegionTimer(StringRef Name) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ Timer &T = (*NamedTimers)[Name];
+ if (!T.isInitialized())
+ T.init(Name);
+ return T;
+}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name,
+ bool Enabled)
+ : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
+ bool Enabled)
+ : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {}
+
+//===----------------------------------------------------------------------===//
+// TimerGroup Implementation
+//===----------------------------------------------------------------------===//
+
+/// TimerGroupList - This is the global list of TimerGroups, maintained by the
+/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
+static TimerGroup *TimerGroupList = 0;
+
+TimerGroup::TimerGroup(StringRef name)
+ : Name(name.begin(), name.end()), FirstTimer(0) {
+
+ // Add the group to TimerGroupList.
+ sys::SmartScopedLock<true> L(*TimerLock);
+ if (TimerGroupList)
+ TimerGroupList->Prev = &Next;
+ Next = TimerGroupList;
+ Prev = &TimerGroupList;
+ TimerGroupList = this;
+}
+
+TimerGroup::~TimerGroup() {
+ // If the timer group is destroyed before the timers it owns, accumulate and
+ // print the timing data.
+ while (FirstTimer != 0)
+ removeTimer(*FirstTimer);
+
+ // Remove the group from the TimerGroupList.
+ sys::SmartScopedLock<true> L(*TimerLock);
+ *Prev = Next;
+ if (Next)
+ Next->Prev = Prev;
+}
+
+
+void TimerGroup::removeTimer(Timer &T) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // If the timer was started, move its data to TimersToPrint.
+ if (T.Started)
+ TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+
+ T.TG = 0;
+
+ // Unlink the timer from our list.
+ *T.Prev = T.Next;
+ if (T.Next)
+ T.Next->Prev = T.Prev;
+
+ // Print the report when all timers in this group are destroyed if some of
+ // them were started.
+ if (FirstTimer != 0 || TimersToPrint.empty())
+ return;
+
+ raw_ostream *OutStream = CreateInfoOutputFile();
+ PrintQueuedTimers(*OutStream);
+ delete OutStream; // Close the file.
+}
+
+void TimerGroup::addTimer(Timer &T) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // Add the timer to our list.
+ if (FirstTimer)
+ FirstTimer->Prev = &T.Next;
+ T.Next = FirstTimer;
+ T.Prev = &FirstTimer;
+ FirstTimer = &T;
+}
+
+void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
+ // Sort the timers in descending order by amount of time taken.
+ std::sort(TimersToPrint.begin(), TimersToPrint.end());
+
+ TimeRecord Total;
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+ Total += TimersToPrint[i].first;
+
+ // Print out timing header.
+ OS << "===" << std::string(73, '-') << "===\n";
+ // Figure out how many spaces to indent TimerGroup name.
+ unsigned Padding = (80-Name.length())/2;
+ if (Padding > 80) Padding = 0; // Don't allow "negative" numbers
+ OS.indent(Padding) << Name << '\n';
+ OS << "===" << std::string(73, '-') << "===\n";
+
+ // If this is not an collection of ungrouped times, print the total time.
+ // Ungrouped timers don't really make sense to add up. We still print the
+ // TOTAL line to make the percentages make sense.
+ if (this != DefaultTimerGroup) {
+ OS << " Total Execution Time: ";
+ OS << format("%5.4f", Total.getProcessTime()) << " seconds (";
+ OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
+ }
+ OS << '\n';
+
+ if (Total.getUserTime())
+ OS << " ---User Time---";
+ if (Total.getSystemTime())
+ OS << " --System Time--";
+ if (Total.getProcessTime())
+ OS << " --User+System--";
+ OS << " ---Wall Time---";
+ if (Total.getMemUsed())
+ OS << " ---Mem---";
+ OS << " --- Name ---\n";
+
+ // Loop through all of the timing data, printing it out.
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) {
+ const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1];
+ Entry.first.print(Total, OS);
+ OS << Entry.second << '\n';
+ }
+
+ Total.print(Total, OS);
+ OS << "Total\n\n";
+ OS.flush();
+
+ TimersToPrint.clear();
+}
+
+/// print - Print any started timers in this group and zero them.
+void TimerGroup::print(raw_ostream &OS) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // See if any of our timers were started, if so add them to TimersToPrint and
+ // reset them.
+ for (Timer *T = FirstTimer; T; T = T->Next) {
+ if (!T->Started) continue;
+ TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+
+ // Clear out the time.
+ T->Started = 0;
+ T->Time = TimeRecord();
+ }
+
+ // If any timers were started, print the group.
+ if (!TimersToPrint.empty())
+ PrintQueuedTimers(OS);
+}
+
+/// printAll - This static method prints all timers and clears them all out.
+void TimerGroup::printAll(raw_ostream &OS) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+ TG->print(OS);
+}
diff --git a/contrib/llvm/lib/Support/ToolOutputFile.cpp b/contrib/llvm/lib/Support/ToolOutputFile.cpp
new file mode 100644
index 000000000000..e7ca927ea537
--- /dev/null
+++ b/contrib/llvm/lib/Support/ToolOutputFile.cpp
@@ -0,0 +1,43 @@
+//===--- ToolOutputFile.cpp - Implement the tool_output_file class --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+using namespace llvm;
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+ : Filename(filename), Keep(false) {
+ // Arrange for the file to be deleted if the process is killed.
+ if (Filename != "-")
+ sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+ // Delete the file if the client hasn't told us not to.
+ if (!Keep && Filename != "-")
+ sys::Path(Filename).eraseFromDisk();
+
+ // Ok, the file is successfully written and closed, or deleted. There's no
+ // further need to clean it up on signals.
+ if (Filename != "-")
+ sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Installer(filename),
+ OS(filename, ErrorInfo, Flags) {
+ // If open fails, no cleanup is needed.
+ if (!ErrorInfo.empty())
+ Installer.Keep = true;
+}
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
new file mode 100644
index 000000000000..7e094ee78f36
--- /dev/null
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -0,0 +1,640 @@
+//===--- Triple.cpp - Target triple helper class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//
+
+const char *Triple::getArchTypeName(ArchType Kind) {
+ switch (Kind) {
+ case InvalidArch: return "<invalid>";
+ case UnknownArch: return "unknown";
+
+ case alpha: return "alpha";
+ case arm: return "arm";
+ case bfin: return "bfin";
+ case cellspu: return "cellspu";
+ case mips: return "mips";
+ case mipsel: return "mipsel";
+ case msp430: return "msp430";
+ case ppc64: return "powerpc64";
+ case ppc: return "powerpc";
+ case sparc: return "sparc";
+ case sparcv9: return "sparcv9";
+ case systemz: return "s390x";
+ case tce: return "tce";
+ case thumb: return "thumb";
+ case x86: return "i386";
+ case x86_64: return "x86_64";
+ case xcore: return "xcore";
+ case mblaze: return "mblaze";
+ case ptx32: return "ptx32";
+ case ptx64: return "ptx64";
+ }
+
+ return "<invalid>";
+}
+
+const char *Triple::getArchTypePrefix(ArchType Kind) {
+ switch (Kind) {
+ default:
+ return 0;
+
+ case alpha: return "alpha";
+
+ case arm:
+ case thumb: return "arm";
+
+ case bfin: return "bfin";
+
+ case cellspu: return "spu";
+
+ case ppc64:
+ case ppc: return "ppc";
+
+ case mblaze: return "mblaze";
+
+ case sparcv9:
+ case sparc: return "sparc";
+
+ case x86:
+ case x86_64: return "x86";
+
+ case xcore: return "xcore";
+
+ case ptx32: return "ptx";
+ case ptx64: return "ptx";
+ }
+}
+
+const char *Triple::getVendorTypeName(VendorType Kind) {
+ switch (Kind) {
+ case UnknownVendor: return "unknown";
+
+ case Apple: return "apple";
+ case PC: return "pc";
+ case SCEI: return "scei";
+ }
+
+ return "<invalid>";
+}
+
+const char *Triple::getOSTypeName(OSType Kind) {
+ switch (Kind) {
+ case UnknownOS: return "unknown";
+
+ case AuroraUX: return "auroraux";
+ case Cygwin: return "cygwin";
+ case Darwin: return "darwin";
+ case DragonFly: return "dragonfly";
+ case FreeBSD: return "freebsd";
+ case IOS: return "ios";
+ case Linux: return "linux";
+ case Lv2: return "lv2";
+ case MacOSX: return "macosx";
+ case MinGW32: return "mingw32";
+ case NetBSD: return "netbsd";
+ case OpenBSD: return "openbsd";
+ case Psp: return "psp";
+ case Solaris: return "solaris";
+ case Win32: return "win32";
+ case Haiku: return "haiku";
+ case Minix: return "minix";
+ case RTEMS: return "rtems";
+ }
+
+ return "<invalid>";
+}
+
+const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+ switch (Kind) {
+ case UnknownEnvironment: return "unknown";
+ case GNU: return "gnu";
+ case GNUEABI: return "gnueabi";
+ case EABI: return "eabi";
+ case MachO: return "macho";
+ }
+
+ return "<invalid>";
+}
+
+Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+ if (Name == "alpha")
+ return alpha;
+ if (Name == "arm")
+ return arm;
+ if (Name == "bfin")
+ return bfin;
+ if (Name == "cellspu")
+ return cellspu;
+ if (Name == "mips")
+ return mips;
+ if (Name == "mipsel")
+ return mipsel;
+ if (Name == "msp430")
+ return msp430;
+ if (Name == "ppc64")
+ return ppc64;
+ if (Name == "ppc")
+ return ppc;
+ if (Name == "mblaze")
+ return mblaze;
+ if (Name == "sparc")
+ return sparc;
+ if (Name == "sparcv9")
+ return sparcv9;
+ if (Name == "systemz")
+ return systemz;
+ if (Name == "tce")
+ return tce;
+ if (Name == "thumb")
+ return thumb;
+ if (Name == "x86")
+ return x86;
+ if (Name == "x86-64")
+ return x86_64;
+ if (Name == "xcore")
+ return xcore;
+ if (Name == "ptx32")
+ return ptx32;
+ if (Name == "ptx64")
+ return ptx64;
+
+ return UnknownArch;
+}
+
+Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
+ // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
+ // archs which Darwin doesn't use.
+
+ // The matching this routine does is fairly pointless, since it is neither the
+ // complete architecture list, nor a reasonable subset. The problem is that
+ // historically the driver driver accepts this and also ties its -march=
+ // handling to the architecture name, so we need to be careful before removing
+ // support for it.
+
+ // This code must be kept in sync with Clang's Darwin specific argument
+ // translation.
+
+ if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" ||
+ Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" ||
+ Str == "ppc7450" || Str == "ppc970")
+ return Triple::ppc;
+
+ if (Str == "ppc64")
+ return Triple::ppc64;
+
+ if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" ||
+ Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" ||
+ Str == "pentIIm5" || Str == "pentium4")
+ return Triple::x86;
+
+ if (Str == "x86_64")
+ return Triple::x86_64;
+
+ // This is derived from the driver driver.
+ if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
+ Str == "armv6" || Str == "armv7")
+ return Triple::arm;
+
+ if (Str == "ptx32")
+ return Triple::ptx32;
+ if (Str == "ptx64")
+ return Triple::ptx64;
+
+ return Triple::UnknownArch;
+}
+
+// Returns architecture name that is understood by the target assembler.
+const char *Triple::getArchNameForAssembler() {
+ if (!isOSDarwin() && getVendor() != Triple::Apple)
+ return NULL;
+
+ StringRef Str = getArchName();
+ if (Str == "i386")
+ return "i386";
+ if (Str == "x86_64")
+ return "x86_64";
+ if (Str == "powerpc")
+ return "ppc";
+ if (Str == "powerpc64")
+ return "ppc64";
+ if (Str == "mblaze" || Str == "microblaze")
+ return "mblaze";
+ if (Str == "arm")
+ return "arm";
+ if (Str == "armv4t" || Str == "thumbv4t")
+ return "armv4t";
+ if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
+ || Str == "thumbv5e")
+ return "armv5";
+ if (Str == "armv6" || Str == "thumbv6")
+ return "armv6";
+ if (Str == "armv7" || Str == "thumbv7")
+ return "armv7";
+ if (Str == "ptx32")
+ return "ptx32";
+ if (Str == "ptx64")
+ return "ptx64";
+ return NULL;
+}
+
+//
+
+Triple::ArchType Triple::ParseArch(StringRef ArchName) {
+ if (ArchName.size() == 4 && ArchName[0] == 'i' &&
+ ArchName[2] == '8' && ArchName[3] == '6' &&
+ ArchName[1] - '3' < 6) // i[3-9]86
+ return x86;
+ else if (ArchName == "amd64" || ArchName == "x86_64")
+ return x86_64;
+ else if (ArchName == "bfin")
+ return bfin;
+ else if (ArchName == "powerpc")
+ return ppc;
+ else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
+ return ppc64;
+ else if (ArchName == "mblaze")
+ return mblaze;
+ else if (ArchName == "arm" ||
+ ArchName.startswith("armv") ||
+ ArchName == "xscale")
+ return arm;
+ else if (ArchName == "thumb" ||
+ ArchName.startswith("thumbv"))
+ return thumb;
+ else if (ArchName.startswith("alpha"))
+ return alpha;
+ else if (ArchName == "spu" || ArchName == "cellspu")
+ return cellspu;
+ else if (ArchName == "msp430")
+ return msp430;
+ else if (ArchName == "mips" || ArchName == "mipseb" ||
+ ArchName == "mipsallegrex")
+ return mips;
+ else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
+ ArchName == "psp")
+ return mipsel;
+ else if (ArchName == "sparc")
+ return sparc;
+ else if (ArchName == "sparcv9")
+ return sparcv9;
+ else if (ArchName == "s390x")
+ return systemz;
+ else if (ArchName == "tce")
+ return tce;
+ else if (ArchName == "xcore")
+ return xcore;
+ else if (ArchName == "ptx32")
+ return ptx32;
+ else if (ArchName == "ptx64")
+ return ptx64;
+ else
+ return UnknownArch;
+}
+
+Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
+ if (VendorName == "apple")
+ return Apple;
+ else if (VendorName == "pc")
+ return PC;
+ else if (VendorName == "scei")
+ return SCEI;
+ else
+ return UnknownVendor;
+}
+
+Triple::OSType Triple::ParseOS(StringRef OSName) {
+ if (OSName.startswith("auroraux"))
+ return AuroraUX;
+ else if (OSName.startswith("cygwin"))
+ return Cygwin;
+ else if (OSName.startswith("darwin"))
+ return Darwin;
+ else if (OSName.startswith("dragonfly"))
+ return DragonFly;
+ else if (OSName.startswith("freebsd"))
+ return FreeBSD;
+ else if (OSName.startswith("ios"))
+ return IOS;
+ else if (OSName.startswith("linux"))
+ return Linux;
+ else if (OSName.startswith("lv2"))
+ return Lv2;
+ else if (OSName.startswith("macosx"))
+ return MacOSX;
+ else if (OSName.startswith("mingw32"))
+ return MinGW32;
+ else if (OSName.startswith("netbsd"))
+ return NetBSD;
+ else if (OSName.startswith("openbsd"))
+ return OpenBSD;
+ else if (OSName.startswith("psp"))
+ return Psp;
+ else if (OSName.startswith("solaris"))
+ return Solaris;
+ else if (OSName.startswith("win32"))
+ return Win32;
+ else if (OSName.startswith("haiku"))
+ return Haiku;
+ else if (OSName.startswith("minix"))
+ return Minix;
+ else if (OSName.startswith("rtems"))
+ return RTEMS;
+ else
+ return UnknownOS;
+}
+
+Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
+ if (EnvironmentName.startswith("eabi"))
+ return EABI;
+ else if (EnvironmentName.startswith("gnueabi"))
+ return GNUEABI;
+ else if (EnvironmentName.startswith("gnu"))
+ return GNU;
+ else if (EnvironmentName.startswith("macho"))
+ return MachO;
+ else
+ return UnknownEnvironment;
+}
+
+void Triple::Parse() const {
+ assert(!isInitialized() && "Invalid parse call.");
+
+ Arch = ParseArch(getArchName());
+ Vendor = ParseVendor(getVendorName());
+ OS = ParseOS(getOSName());
+ Environment = ParseEnvironment(getEnvironmentName());
+
+ assert(isInitialized() && "Failed to initialize!");
+}
+
+std::string Triple::normalize(StringRef Str) {
+ // Parse into components.
+ SmallVector<StringRef, 4> Components;
+ for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
+ Last = Str.find('-', First);
+ Components.push_back(Str.slice(First, Last));
+ }
+
+ // If the first component corresponds to a known architecture, preferentially
+ // use it for the architecture. If the second component corresponds to a
+ // known vendor, preferentially use it for the vendor, etc. This avoids silly
+ // component movement when a component parses as (eg) both a valid arch and a
+ // valid os.
+ ArchType Arch = UnknownArch;
+ if (Components.size() > 0)
+ Arch = ParseArch(Components[0]);
+ VendorType Vendor = UnknownVendor;
+ if (Components.size() > 1)
+ Vendor = ParseVendor(Components[1]);
+ OSType OS = UnknownOS;
+ if (Components.size() > 2)
+ OS = ParseOS(Components[2]);
+ EnvironmentType Environment = UnknownEnvironment;
+ if (Components.size() > 3)
+ Environment = ParseEnvironment(Components[3]);
+
+ // Note which components are already in their final position. These will not
+ // be moved.
+ bool Found[4];
+ Found[0] = Arch != UnknownArch;
+ Found[1] = Vendor != UnknownVendor;
+ Found[2] = OS != UnknownOS;
+ Found[3] = Environment != UnknownEnvironment;
+
+ // If they are not there already, permute the components into their canonical
+ // positions by seeing if they parse as a valid architecture, and if so moving
+ // the component to the architecture position etc.
+ for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
+ if (Found[Pos])
+ continue; // Already in the canonical position.
+
+ for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
+ // Do not reparse any components that already matched.
+ if (Idx < array_lengthof(Found) && Found[Idx])
+ continue;
+
+ // Does this component parse as valid for the target position?
+ bool Valid = false;
+ StringRef Comp = Components[Idx];
+ switch (Pos) {
+ default:
+ assert(false && "unexpected component type!");
+ case 0:
+ Arch = ParseArch(Comp);
+ Valid = Arch != UnknownArch;
+ break;
+ case 1:
+ Vendor = ParseVendor(Comp);
+ Valid = Vendor != UnknownVendor;
+ break;
+ case 2:
+ OS = ParseOS(Comp);
+ Valid = OS != UnknownOS;
+ break;
+ case 3:
+ Environment = ParseEnvironment(Comp);
+ Valid = Environment != UnknownEnvironment;
+ break;
+ }
+ if (!Valid)
+ continue; // Nope, try the next component.
+
+ // Move the component to the target position, pushing any non-fixed
+ // components that are in the way to the right. This tends to give
+ // good results in the common cases of a forgotten vendor component
+ // or a wrongly positioned environment.
+ if (Pos < Idx) {
+ // Insert left, pushing the existing components to the right. For
+ // example, a-b-i386 -> i386-a-b when moving i386 to the front.
+ StringRef CurrentComponent(""); // The empty component.
+ // Replace the component we are moving with an empty component.
+ std::swap(CurrentComponent, Components[Idx]);
+ // Insert the component being moved at Pos, displacing any existing
+ // components to the right.
+ for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
+ // Skip over any fixed components.
+ while (i < array_lengthof(Found) && Found[i]) ++i;
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ }
+ } else if (Pos > Idx) {
+ // Push right by inserting empty components until the component at Idx
+ // reaches the target position Pos. For example, pc-a -> -pc-a when
+ // moving pc to the second position.
+ do {
+ // Insert one empty component at Idx.
+ StringRef CurrentComponent(""); // The empty component.
+ for (unsigned i = Idx; i < Components.size();) {
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ // If it was placed on top of an empty component then we are done.
+ if (CurrentComponent.empty())
+ break;
+ // Advance to the next component, skipping any fixed components.
+ while (++i < array_lengthof(Found) && Found[i])
+ ;
+ }
+ // The last component was pushed off the end - append it.
+ if (!CurrentComponent.empty())
+ Components.push_back(CurrentComponent);
+
+ // Advance Idx to the component's new position.
+ while (++Idx < array_lengthof(Found) && Found[Idx]) {}
+ } while (Idx < Pos); // Add more until the final position is reached.
+ }
+ assert(Pos < Components.size() && Components[Pos] == Comp &&
+ "Component moved wrong!");
+ Found[Pos] = true;
+ break;
+ }
+ }
+
+ // Special case logic goes here. At this point Arch, Vendor and OS have the
+ // correct values for the computed components.
+
+ // Stick the corrected components back together to form the normalized string.
+ std::string Normalized;
+ for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+ if (i) Normalized += '-';
+ Normalized += Components[i];
+ }
+ return Normalized;
+}
+
+StringRef Triple::getArchName() const {
+ return StringRef(Data).split('-').first; // Isolate first component
+}
+
+StringRef Triple::getVendorName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ return Tmp.split('-').first; // Isolate second component
+}
+
+StringRef Triple::getOSName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ Tmp = Tmp.split('-').second; // Strip second component
+ return Tmp.split('-').first; // Isolate third component
+}
+
+StringRef Triple::getEnvironmentName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ Tmp = Tmp.split('-').second; // Strip second component
+ return Tmp.split('-').second; // Strip third component
+}
+
+StringRef Triple::getOSAndEnvironmentName() const {
+ StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+ return Tmp.split('-').second; // Strip second component
+}
+
+static unsigned EatNumber(StringRef &Str) {
+ assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
+ unsigned Result = 0;
+
+ do {
+ // Consume the leading digit.
+ Result = Result*10 + (Str[0] - '0');
+
+ // Eat the digit.
+ Str = Str.substr(1);
+ } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9');
+
+ return Result;
+}
+
+void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ StringRef OSName = getOSName();
+
+ // Assume that the OS portion of the triple starts with the canonical name.
+ StringRef OSTypeName = getOSTypeName(getOS());
+ if (OSName.startswith(OSTypeName))
+ OSName = OSName.substr(OSTypeName.size());
+
+ // Any unset version defaults to 0.
+ Major = Minor = Micro = 0;
+
+ // Parse up to three components.
+ unsigned *Components[3] = { &Major, &Minor, &Micro };
+ for (unsigned i = 0; i != 3; ++i) {
+ if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+ break;
+
+ // Consume the leading number.
+ *Components[i] = EatNumber(OSName);
+
+ // Consume the separator, if present.
+ if (OSName.startswith("."))
+ OSName = OSName.substr(1);
+ }
+}
+
+void Triple::setTriple(const Twine &Str) {
+ Data = Str.str();
+ Arch = InvalidArch;
+}
+
+void Triple::setArch(ArchType Kind) {
+ setArchName(getArchTypeName(Kind));
+}
+
+void Triple::setVendor(VendorType Kind) {
+ setVendorName(getVendorTypeName(Kind));
+}
+
+void Triple::setOS(OSType Kind) {
+ setOSName(getOSTypeName(Kind));
+}
+
+void Triple::setEnvironment(EnvironmentType Kind) {
+ setEnvironmentName(getEnvironmentTypeName(Kind));
+}
+
+void Triple::setArchName(StringRef Str) {
+ // Work around a miscompilation bug for Twines in gcc 4.0.3.
+ SmallString<64> Triple;
+ Triple += Str;
+ Triple += "-";
+ Triple += getVendorName();
+ Triple += "-";
+ Triple += getOSAndEnvironmentName();
+ setTriple(Triple.str());
+}
+
+void Triple::setVendorName(StringRef Str) {
+ setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
+}
+
+void Triple::setOSName(StringRef Str) {
+ if (hasEnvironment())
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
+ "-" + getEnvironmentName());
+ else
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
+
+void Triple::setEnvironmentName(StringRef Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
+ "-" + Str);
+}
+
+void Triple::setOSAndEnvironmentName(StringRef Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp
new file mode 100644
index 000000000000..d62123cc985e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Twine.cpp
@@ -0,0 +1,165 @@
+//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+std::string Twine::str() const {
+ // If we're storing only a std::string, just return it.
+ if (LHSKind == StdStringKind && RHSKind == EmptyKind)
+ return *static_cast<const std::string*>(LHS);
+
+ // Otherwise, flatten and copy the contents first.
+ SmallString<256> Vec;
+ return toStringRef(Vec).str();
+}
+
+void Twine::toVector(SmallVectorImpl<char> &Out) const {
+ raw_svector_ostream OS(Out);
+ print(OS);
+}
+
+StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
+ if (isSingleStringRef())
+ return getSingleStringRef();
+ toVector(Out);
+ return StringRef(Out.data(), Out.size());
+}
+
+StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
+ if (isUnary()) {
+ switch (getLHSKind()) {
+ case CStringKind:
+ // Already null terminated, yay!
+ return StringRef(static_cast<const char*>(LHS));
+ case StdStringKind: {
+ const std::string *str = static_cast<const std::string*>(LHS);
+ return StringRef(str->c_str(), str->size());
+ }
+ default:
+ break;
+ }
+ }
+ toVector(Out);
+ Out.push_back(0);
+ Out.pop_back();
+ return StringRef(Out.data(), Out.size());
+}
+
+void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
+ NodeKind Kind) const {
+ switch (Kind) {
+ case Twine::NullKind: break;
+ case Twine::EmptyKind: break;
+ case Twine::TwineKind:
+ static_cast<const Twine*>(Ptr)->print(OS);
+ break;
+ case Twine::CStringKind:
+ OS << static_cast<const char*>(Ptr);
+ break;
+ case Twine::StdStringKind:
+ OS << *static_cast<const std::string*>(Ptr);
+ break;
+ case Twine::StringRefKind:
+ OS << *static_cast<const StringRef*>(Ptr);
+ break;
+ case Twine::DecUIKind:
+ OS << (unsigned)(uintptr_t)Ptr;
+ break;
+ case Twine::DecIKind:
+ OS << (int)(intptr_t)Ptr;
+ break;
+ case Twine::DecULKind:
+ OS << *static_cast<const unsigned long*>(Ptr);
+ break;
+ case Twine::DecLKind:
+ OS << *static_cast<const long*>(Ptr);
+ break;
+ case Twine::DecULLKind:
+ OS << *static_cast<const unsigned long long*>(Ptr);
+ break;
+ case Twine::DecLLKind:
+ OS << *static_cast<const long long*>(Ptr);
+ break;
+ case Twine::UHexKind:
+ OS.write_hex(*static_cast<const uint64_t*>(Ptr));
+ break;
+ }
+}
+
+void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
+ NodeKind Kind) const {
+ switch (Kind) {
+ case Twine::NullKind:
+ OS << "null"; break;
+ case Twine::EmptyKind:
+ OS << "empty"; break;
+ case Twine::TwineKind:
+ OS << "rope:";
+ static_cast<const Twine*>(Ptr)->printRepr(OS);
+ break;
+ case Twine::CStringKind:
+ OS << "cstring:\""
+ << static_cast<const char*>(Ptr) << "\"";
+ break;
+ case Twine::StdStringKind:
+ OS << "std::string:\""
+ << static_cast<const std::string*>(Ptr) << "\"";
+ break;
+ case Twine::StringRefKind:
+ OS << "stringref:\""
+ << static_cast<const StringRef*>(Ptr) << "\"";
+ break;
+ case Twine::DecUIKind:
+ OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\"";
+ break;
+ case Twine::DecIKind:
+ OS << "decI:\"" << (int)(intptr_t)Ptr << "\"";
+ break;
+ case Twine::DecULKind:
+ OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
+ break;
+ case Twine::DecLKind:
+ OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\"";
+ break;
+ case Twine::DecULLKind:
+ OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\"";
+ break;
+ case Twine::DecLLKind:
+ OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\"";
+ break;
+ case Twine::UHexKind:
+ OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\"";
+ break;
+ }
+}
+
+void Twine::print(raw_ostream &OS) const {
+ printOneChild(OS, LHS, getLHSKind());
+ printOneChild(OS, RHS, getRHSKind());
+}
+
+void Twine::printRepr(raw_ostream &OS) const {
+ OS << "(Twine ";
+ printOneChildRepr(OS, LHS, getLHSKind());
+ OS << " ";
+ printOneChildRepr(OS, RHS, getRHSKind());
+ OS << ")";
+}
+
+void Twine::dump() const {
+ print(llvm::dbgs());
+}
+
+void Twine::dumpRepr() const {
+ printRepr(llvm::dbgs());
+}
diff --git a/contrib/llvm/lib/Support/Unix/Host.inc b/contrib/llvm/lib/Support/Unix/Host.inc
new file mode 100644
index 000000000000..eacb08b3fb00
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Host.inc
@@ -0,0 +1,69 @@
+ //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Host support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "Unix.h"
+#include <sys/utsname.h>
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+
+static std::string getOSVersion() {
+ struct utsname info;
+
+ if (uname(&info))
+ return "";
+
+ return info.release;
+}
+
+std::string sys::getHostTriple() {
+#ifdef __FreeBSD__
+ return LLVM_HOSTTRIPLE;
+#else
+ // FIXME: Derive directly instead of relying on the autoconf generated
+ // variable.
+
+ StringRef HostTripleString(LLVM_HOSTTRIPLE);
+ std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
+
+ // Normalize the arch, since the host triple may not actually match the host.
+ std::string Arch = ArchSplit.first;
+
+ std::string Triple(Arch);
+ Triple += '-';
+ Triple += ArchSplit.second;
+
+ // Force i<N>86 to i386.
+ if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+ Triple[2] == '8' && Triple[3] == '6')
+ Triple[1] = '3';
+
+ // On darwin, we want to update the version to match that of the
+ // host.
+ std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+ if (DarwinDashIdx != std::string::npos) {
+ Triple.resize(DarwinDashIdx + strlen("-darwin"));
+ Triple += getOSVersion();
+ }
+
+ return Triple;
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
new file mode 100644
index 000000000000..5a57a2870636
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -0,0 +1,151 @@
+//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some functions for various memory management utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
+
+/// AllocateRWX - Allocate a slab of memory with read/write/execute
+/// permissions. This is typically used for JIT applications where we want
+/// to emit code to the memory then jump to it. Getting this type of memory
+/// is very OS specific.
+///
+llvm::sys::MemoryBlock
+llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
+ std::string *ErrMsg) {
+ if (NumBytes == 0) return MemoryBlock();
+
+ size_t pageSize = Process::GetPageSize();
+ size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+ int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+ static int zero_fd = open("/dev/zero", O_RDWR);
+ if (zero_fd == -1) {
+ MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
+ return MemoryBlock();
+ }
+ fd = zero_fd;
+#endif
+
+ int flags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+ MAP_ANONYMOUS
+#else
+ MAP_ANON
+#endif
+ ;
+
+ void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+ NearBlock->size() : 0;
+
+#if defined(__APPLE__) && defined(__arm__)
+ void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
+ flags, fd, 0);
+#else
+ void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+ flags, fd, 0);
+#endif
+ if (pa == MAP_FAILED) {
+ if (NearBlock) //Try again without a near hint
+ return AllocateRWX(NumBytes, 0);
+
+ MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
+ return MemoryBlock();
+ }
+
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+ (vm_size_t)(pageSize*NumPages), 0,
+ VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ if (KERN_SUCCESS != kr) {
+ MakeErrMsg(ErrMsg, "vm_protect max RX failed");
+ return sys::MemoryBlock();
+ }
+
+ kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+ (vm_size_t)(pageSize*NumPages), 0,
+ VM_PROT_READ | VM_PROT_WRITE);
+ if (KERN_SUCCESS != kr) {
+ MakeErrMsg(ErrMsg, "vm_protect RW failed");
+ return sys::MemoryBlock();
+ }
+#endif
+
+ MemoryBlock result;
+ result.Address = pa;
+ result.Size = NumPages*pageSize;
+
+ return result;
+}
+
+bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+ if (M.Address == 0 || M.Size == 0) return false;
+ if (0 != ::munmap(M.Address, M.Size))
+ return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
+ return false;
+}
+
+bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+ if (M.Address == 0 || M.Size == 0) return false;
+ sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+ (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+ if (M.Address == 0 || M.Size == 0) return false;
+ sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+ (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+ (vm_size_t)Size, 0,
+ VM_PROT_READ | VM_PROT_WRITE);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+ (vm_size_t)Size, 0,
+ VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Unix/Mutex.inc b/contrib/llvm/lib/Support/Unix/Mutex.inc
new file mode 100644
index 000000000000..fe6b17041457
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Mutex.inc
@@ -0,0 +1,43 @@
+//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm
+{
+using namespace sys;
+
+MutexImpl::MutexImpl( bool recursive)
+{
+}
+
+MutexImpl::~MutexImpl()
+{
+}
+
+bool
+MutexImpl::release()
+{
+ return true;
+}
+
+bool
+MutexImpl::tryacquire( void )
+{
+ return true;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
new file mode 100644
index 000000000000..f295b92e4a5b
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -0,0 +1,890 @@
+//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_UTIME_H
+#include <utime.h>
+#endif
+#if HAVE_TIME_H
+#include <time.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+// Put in a hack for Cygwin which falsely reports that the mkdtemp function
+// is available when it is not.
+#ifdef __CYGWIN__
+# undef HAVE_MKDTEMP
+#endif
+
+namespace {
+inline bool lastIsSlash(const std::string& path) {
+ return !path.empty() && path[path.length() - 1] == '/';
+}
+
+}
+
+namespace llvm {
+using namespace sys;
+
+const char sys::PathSeparator = ':';
+
+StringRef Path::GetEXESuffix() {
+ return StringRef();
+}
+
+Path::Path(StringRef p)
+ : path(p) {}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+ : path(StrStart, StrLen) {}
+
+Path&
+Path::operator=(StringRef that) {
+ path.assign(that.data(), that.size());
+ return *this;
+}
+
+bool
+Path::isValid() const {
+ // Empty paths are considered invalid here.
+ // This code doesn't check MAXPATHLEN because there's no need. Nothing in
+ // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't
+ // handle names longer than some limit, it'll report this on demand using
+ // ENAMETOLONG.
+ return !path.empty();
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+ assert(NameStart);
+ if (NameLen == 0)
+ return false;
+ return NameStart[0] == '/';
+}
+
+bool
+Path::isAbsolute() const {
+ if (path.empty())
+ return false;
+ return path[0] == '/';
+}
+
+Path
+Path::GetRootDirectory() {
+ Path result;
+ result.set("/");
+ return result;
+}
+
+Path
+Path::GetTemporaryDirectory(std::string *ErrMsg) {
+#if defined(HAVE_MKDTEMP)
+ // The best way is with mkdtemp but that's not available on many systems,
+ // Linux and FreeBSD have it. Others probably won't.
+ char pathname[] = "/tmp/llvm_XXXXXX";
+ if (0 == mkdtemp(pathname)) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(pathname);
+#elif defined(HAVE_MKSTEMP)
+ // If no mkdtemp is available, mkstemp can be used to create a temporary file
+ // which is then removed and created as a directory. We prefer this over
+ // mktemp because of mktemp's inherent security and threading risks. We still
+ // have a slight race condition from the time the temporary file is created to
+ // the time it is re-created as a directoy.
+ char pathname[] = "/tmp/llvm_XXXXXX";
+ int fd = 0;
+ if (-1 == (fd = mkstemp(pathname))) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ ::close(fd);
+ ::unlink(pathname); // start race condition, ignore errors
+ if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(pathname);
+#elif defined(HAVE_MKTEMP)
+ // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
+ // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
+ // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
+ // the XXXXXX with the pid of the process and a letter. That leads to only
+ // twenty six temporary files that can be generated.
+ char pathname[] = "/tmp/llvm_XXXXXX";
+ char *TmpName = ::mktemp(pathname);
+ if (TmpName == 0) {
+ MakeErrMsg(ErrMsg,
+ std::string(TmpName) + ": can't create unique directory name");
+ return Path();
+ }
+ if (-1 == ::mkdir(TmpName, S_IRWXU)) {
+ MakeErrMsg(ErrMsg,
+ std::string(TmpName) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(TmpName);
+#else
+ // This is the worst case implementation. tempnam(3) leaks memory unless its
+ // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
+ // issues. The mktemp(3) function doesn't have enough variability in the
+ // temporary name generated. So, we provide our own implementation that
+ // increments an integer from a random number seeded by the current time. This
+ // should be sufficiently unique that we don't have many collisions between
+ // processes. Generally LLVM processes don't run very long and don't use very
+ // many temporary files so this shouldn't be a big issue for LLVM.
+ static time_t num = ::time(0);
+ char pathname[MAXPATHLEN];
+ do {
+ num++;
+ sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
+ } while ( 0 == access(pathname, F_OK ) );
+ if (-1 == ::mkdir(pathname, S_IRWXU)) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ return Path(pathname);
+#endif
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+#ifdef LTDL_SHLIBPATH_VAR
+ char* env_var = getenv(LTDL_SHLIBPATH_VAR);
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#endif
+ // FIXME: Should this look at LD_LIBRARY_PATH too?
+ Paths.push_back(sys::Path("/usr/local/lib/"));
+ Paths.push_back(sys::Path("/usr/X11R6/lib/"));
+ Paths.push_back(sys::Path("/usr/lib/"));
+ Paths.push_back(sys::Path("/lib/"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+ char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#ifdef LLVM_LIBDIR
+ {
+ Path tmpPath;
+ if (tmpPath.set(LLVM_LIBDIR))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ }
+#endif
+ GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+ return Path("/etc/llvm/");
+}
+
+Path
+Path::GetUserHomeDirectory() {
+ const char* home = getenv("HOME");
+ Path result;
+ if (home && result.set(home))
+ return result;
+ result.set("/");
+ return result;
+}
+
+Path
+Path::GetCurrentDirectory() {
+ char pathname[MAXPATHLEN];
+ if (!getcwd(pathname,MAXPATHLEN)) {
+ assert (false && "Could not query current working directory.");
+ return Path();
+ }
+
+ return Path(pathname);
+}
+
+#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+ defined(__OpenBSD__) || defined(__minix)
+static int
+test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
+ const char *dir, const char *bin)
+{
+ struct stat sb;
+
+ snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
+ if (realpath(buf, ret) == NULL)
+ return (1);
+ if (stat(buf, &sb) != 0)
+ return (1);
+
+ return (0);
+}
+
+static char *
+getprogpath(char ret[PATH_MAX], const char *bin)
+{
+ char *pv, *s, *t, buf[PATH_MAX];
+
+ /* First approach: absolute path. */
+ if (bin[0] == '/') {
+ if (test_dir(buf, ret, "/", bin) == 0)
+ return (ret);
+ return (NULL);
+ }
+
+ /* Second approach: relative path. */
+ if (strchr(bin, '/') != NULL) {
+ if (getcwd(buf, PATH_MAX) == NULL)
+ return (NULL);
+ if (test_dir(buf, ret, buf, bin) == 0)
+ return (ret);
+ return (NULL);
+ }
+
+ /* Third approach: $PATH */
+ if ((pv = getenv("PATH")) == NULL)
+ return (NULL);
+ s = pv = strdup(pv);
+ if (pv == NULL)
+ return (NULL);
+ while ((t = strsep(&s, ":")) != NULL) {
+ if (test_dir(buf, ret, t, bin) == 0) {
+ free(pv);
+ return (ret);
+ }
+ }
+ free(pv);
+ return (NULL);
+}
+#endif // __FreeBSD__ || __NetBSD__
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+#if defined(__APPLE__)
+ // On OS X the executable path is saved to the stack by dyld. Reading it
+ // from there is much faster than calling dladdr, especially for large
+ // binaries with symbols.
+ char exe_path[MAXPATHLEN];
+ uint32_t size = sizeof(exe_path);
+ if (_NSGetExecutablePath(exe_path, &size) == 0) {
+ char link_path[MAXPATHLEN];
+ if (realpath(exe_path, link_path))
+ return Path(link_path);
+ }
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+ defined(__OpenBSD__) || defined(__minix)
+ char exe_path[PATH_MAX];
+
+ if (getprogpath(exe_path, argv0) != NULL)
+ return Path(exe_path);
+#elif defined(__linux__) || defined(__CYGWIN__)
+ char exe_path[MAXPATHLEN];
+ ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
+ if (len >= 0)
+ return Path(StringRef(exe_path, len));
+#elif defined(HAVE_DLFCN_H)
+ // Use dladdr to get executable path if available.
+ Dl_info DLInfo;
+ int err = dladdr(MainAddr, &DLInfo);
+ if (err == 0)
+ return Path();
+
+ // If the filename is a symlink, we need to resolve and return the location of
+ // the actual executable.
+ char link_path[MAXPATHLEN];
+ if (realpath(DLInfo.dli_fname, link_path))
+ return Path(link_path);
+#else
+#error GetMainExecutable is not implemented on this host yet.
+#endif
+ return Path();
+}
+
+
+StringRef Path::getDirname() const {
+ return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+ // Find the last slash
+ std::string::size_type slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ std::string::size_type dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef(path).substr(slash);
+ else
+ return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+ // Find the last slash
+ std::string::size_type slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ std::string::size_type dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef();
+ else
+ return StringRef(path).substr(dot + 1);
+}
+
+bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
+ assert(len < 1024 && "Request for magic string too long");
+ char Buf[1025];
+ int fd = ::open(path.c_str(), O_RDONLY);
+ if (fd < 0)
+ return false;
+ ssize_t bytes_read = ::read(fd, Buf, len);
+ ::close(fd);
+ if (ssize_t(len) != bytes_read)
+ return false;
+ Magic.assign(Buf, len);
+ return true;
+}
+
+bool
+Path::exists() const {
+ return 0 == access(path.c_str(), F_OK );
+}
+
+bool
+Path::isDirectory() const {
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+ return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false;
+}
+
+bool
+Path::isSymLink() const {
+ struct stat buf;
+ if (0 != lstat(path.c_str(), &buf))
+ return false;
+ return S_ISLNK(buf.st_mode);
+}
+
+
+bool
+Path::canRead() const {
+ return 0 == access(path.c_str(), R_OK);
+}
+
+bool
+Path::canWrite() const {
+ return 0 == access(path.c_str(), W_OK);
+}
+
+bool
+Path::isRegularFile() const {
+ // Get the status so we can determine if it's a file or directory
+ struct stat buf;
+
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+
+ if (S_ISREG(buf.st_mode))
+ return true;
+
+ return false;
+}
+
+bool
+Path::canExecute() const {
+ if (0 != access(path.c_str(), R_OK | X_OK ))
+ return false;
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+ if (!S_ISREG(buf.st_mode))
+ return false;
+ return true;
+}
+
+StringRef
+Path::getLast() const {
+ // Find the last slash
+ size_t pos = path.rfind('/');
+
+ // Handle the corner cases
+ if (pos == std::string::npos)
+ return path;
+
+ // If the last character is a slash
+ if (pos == path.length()-1) {
+ // Find the second to last slash
+ size_t pos2 = path.rfind('/', pos-1);
+ if (pos2 == std::string::npos)
+ return StringRef(path).substr(0,pos);
+ else
+ return StringRef(path).substr(pos2+1,pos-pos2-1);
+ }
+ // Return everything after the last slash
+ return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+ if (!fsIsValid || update) {
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf)) {
+ MakeErrMsg(ErrStr, path + ": can't get status of file");
+ return 0;
+ }
+ status.fileSize = buf.st_size;
+ status.modTime.fromEpochTime(buf.st_mtime);
+ status.mode = buf.st_mode;
+ status.user = buf.st_uid;
+ status.group = buf.st_gid;
+ status.uniqueID = uint64_t(buf.st_ino);
+ status.isDir = S_ISDIR(buf.st_mode);
+ status.isFile = S_ISREG(buf.st_mode);
+ fsIsValid = true;
+ }
+ return &status;
+}
+
+static bool AddPermissionBits(const Path &File, int bits) {
+ // Get the umask value from the operating system. We want to use it
+ // when changing the file's permissions. Since calling umask() sets
+ // the umask and returns its old value, we must call it a second
+ // time to reset it to the user's preference.
+ int mask = umask(0777); // The arg. to umask is arbitrary.
+ umask(mask); // Restore the umask.
+
+ // Get the file's current mode.
+ struct stat buf;
+ if (0 != stat(File.c_str(), &buf))
+ return false;
+ // Change the file to have whichever permissions bits from 'bits'
+ // that the umask would not disable.
+ if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
+ return false;
+ return true;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0444))
+ return MakeErrMsg(ErrMsg, path + ": can't make file readable");
+ return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0222))
+ return MakeErrMsg(ErrMsg, path + ": can't make file writable");
+ return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0111))
+ return MakeErrMsg(ErrMsg, path + ": can't make file executable");
+ return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+ DIR* direntries = ::opendir(path.c_str());
+ if (direntries == 0)
+ return MakeErrMsg(ErrMsg, path + ": can't open directory");
+
+ std::string dirPath = path;
+ if (!lastIsSlash(dirPath))
+ dirPath += '/';
+
+ result.clear();
+ struct dirent* de = ::readdir(direntries);
+ for ( ; de != 0; de = ::readdir(direntries)) {
+ if (de->d_name[0] != '.') {
+ Path aPath(dirPath + (const char*)de->d_name);
+ struct stat st;
+ if (0 != lstat(aPath.path.c_str(), &st)) {
+ if (S_ISLNK(st.st_mode))
+ continue; // dangling symlink -- ignore
+ return MakeErrMsg(ErrMsg,
+ aPath.path + ": can't determine file object type");
+ }
+ result.insert(aPath);
+ }
+ }
+
+ closedir(direntries);
+ return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+ if (a_path.empty())
+ return false;
+ path = a_path;
+ return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+ if (name.empty())
+ return false;
+ if (!lastIsSlash(path))
+ path += '/';
+ path += name;
+ return true;
+}
+
+bool
+Path::eraseComponent() {
+ size_t slashpos = path.rfind('/',path.size());
+ if (slashpos == 0 || slashpos == std::string::npos) {
+ path.erase();
+ return true;
+ }
+ if (slashpos == path.size() - 1)
+ slashpos = path.rfind('/',slashpos-1);
+ if (slashpos == std::string::npos) {
+ path.erase();
+ return true;
+ }
+ path.erase(slashpos);
+ return true;
+}
+
+bool
+Path::eraseSuffix() {
+ size_t dotpos = path.rfind('.',path.size());
+ size_t slashpos = path.rfind('/',path.size());
+ if (dotpos != std::string::npos) {
+ if (slashpos == std::string::npos || dotpos > slashpos+1) {
+ path.erase(dotpos, path.size()-dotpos);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
+
+ if (access(beg, R_OK | W_OK) == 0)
+ return false;
+
+ if (create_parents) {
+
+ char* c = end;
+
+ for (; c != beg; --c)
+ if (*c == '/') {
+
+ // Recurse to handling the parent directory.
+ *c = '\0';
+ bool x = createDirectoryHelper(beg, c, create_parents);
+ *c = '/';
+
+ // Return if we encountered an error.
+ if (x)
+ return true;
+
+ break;
+ }
+ }
+
+ return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+}
+
+bool
+Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
+ // Get a writeable copy of the path name
+ std::string pathname(path);
+
+ // Null-terminate the last component
+ size_t lastchar = path.length() - 1 ;
+
+ if (pathname[lastchar] != '/')
+ ++lastchar;
+
+ pathname[lastchar] = '\0';
+
+ if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents))
+ return MakeErrMsg(ErrMsg, pathname + ": can't create directory");
+
+ return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+ // Create the file
+ int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ return MakeErrMsg(ErrMsg, path + ": can't create file");
+ ::close(fd);
+ return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+ // Make this into a unique file name
+ if (makeUnique( reuse_current, ErrMsg ))
+ return true;
+
+ // create the file
+ int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+ if (fd < 0)
+ return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
+ ::close(fd);
+ return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+ // Get the status so we can determine if it's a file or directory.
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf)) {
+ MakeErrMsg(ErrStr, path + ": can't get status of file");
+ return true;
+ }
+
+ // Note: this check catches strange situations. In all cases, LLVM should
+ // only be involved in the creation and deletion of regular files. This
+ // check ensures that what we're trying to erase is a regular file. It
+ // effectively prevents LLVM from erasing things like /dev/null, any block
+ // special file, or other things that aren't "regular" files.
+ if (S_ISREG(buf.st_mode)) {
+ if (unlink(path.c_str()) != 0)
+ return MakeErrMsg(ErrStr, path + ": can't destroy file");
+ return false;
+ }
+
+ if (!S_ISDIR(buf.st_mode)) {
+ if (ErrStr) *ErrStr = "not a file or directory";
+ return true;
+ }
+
+ if (remove_contents) {
+ // Recursively descend the directory to remove its contents.
+ std::string cmd = "/bin/rm -rf " + path;
+ if (system(cmd.c_str()) != 0) {
+ MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
+ return true;
+ }
+ return false;
+ }
+
+ // Otherwise, try to just remove the one directory.
+ std::string pathname(path);
+ size_t lastchar = path.length() - 1;
+ if (pathname[lastchar] == '/')
+ pathname[lastchar] = '\0';
+ else
+ pathname[lastchar+1] = '\0';
+
+ if (rmdir(pathname.c_str()) != 0)
+ return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
+ return false;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+ if (0 != ::rename(path.c_str(), newName.c_str()))
+ return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
+ newName.str() + "'");
+ return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+ struct utimbuf utb;
+ utb.actime = si.modTime.toPosixTime();
+ utb.modtime = utb.actime;
+ if (0 != ::utime(path.c_str(),&utb))
+ return MakeErrMsg(ErrStr, path + ": can't set file modification time");
+ if (0 != ::chmod(path.c_str(),si.mode))
+ return MakeErrMsg(ErrStr, path + ": can't set mode");
+ return false;
+}
+
+bool
+sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
+ int inFile = -1;
+ int outFile = -1;
+ inFile = ::open(Src.c_str(), O_RDONLY);
+ if (inFile == -1)
+ return MakeErrMsg(ErrMsg, Src.str() +
+ ": can't open source file to copy");
+
+ outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
+ if (outFile == -1) {
+ ::close(inFile);
+ return MakeErrMsg(ErrMsg, Dest.str() +
+ ": can't create destination file for copy");
+ }
+
+ char Buffer[16*1024];
+ while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
+ if (Amt == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ ::close(inFile);
+ ::close(outFile);
+ return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
+ }
+ } else {
+ char *BufPtr = Buffer;
+ while (Amt) {
+ ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
+ if (AmtWritten == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ ::close(inFile);
+ ::close(outFile);
+ return MakeErrMsg(ErrMsg, Dest.str() +
+ ": can't write destination file");
+ }
+ } else {
+ Amt -= AmtWritten;
+ BufPtr += AmtWritten;
+ }
+ }
+ }
+ }
+ ::close(inFile);
+ ::close(outFile);
+ return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+ bool Exists;
+ if (reuse_current && (fs::exists(path, Exists) || !Exists))
+ return false; // File doesn't exist already, just use it!
+
+ // Append an XXXXXX pattern to the end of the file for use with mkstemp,
+ // mktemp or our own implementation.
+ // This uses std::vector instead of SmallVector to avoid a dependence on
+ // libSupport. And performance isn't critical here.
+ std::vector<char> Buf;
+ Buf.resize(path.size()+8);
+ char *FNBuffer = &Buf[0];
+ path.copy(FNBuffer,path.size());
+ bool isdir;
+ if (!fs::is_directory(path, isdir) && isdir)
+ strcpy(FNBuffer+path.size(), "/XXXXXX");
+ else
+ strcpy(FNBuffer+path.size(), "-XXXXXX");
+
+#if defined(HAVE_MKSTEMP)
+ int TempFD;
+ if ((TempFD = mkstemp(FNBuffer)) == -1)
+ return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+ // We don't need to hold the temp file descriptor... we will trust that no one
+ // will overwrite/delete the file before we can open it again.
+ close(TempFD);
+
+ // Save the name
+ path = FNBuffer;
+
+ // By default mkstemp sets the mode to 0600, so update mode bits now.
+ AddPermissionBits (*this, 0666);
+#elif defined(HAVE_MKTEMP)
+ // If we don't have mkstemp, use the old and obsolete mktemp function.
+ if (mktemp(FNBuffer) == 0)
+ return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+ // Save the name
+ path = FNBuffer;
+#else
+ // Okay, looks like we have to do it all by our lonesome.
+ static unsigned FCounter = 0;
+ // Try to initialize with unique value.
+ if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
+ char* pos = strstr(FNBuffer, "XXXXXX");
+ do {
+ if (++FCounter > 0xFFFFFF) {
+ return MakeErrMsg(ErrMsg,
+ path + ": can't make unique filename: too many files");
+ }
+ sprintf(pos, "%06X", FCounter);
+ path = FNBuffer;
+ } while (exists());
+ // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
+ // LLVM.
+#endif
+ return false;
+}
+
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
+ int Flags = MAP_PRIVATE;
+#ifdef MAP_FILE
+ Flags |= MAP_FILE;
+#endif
+ void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset);
+ if (BasePtr == MAP_FAILED)
+ return 0;
+ return (const char*)BasePtr;
+}
+
+void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) {
+ ::munmap((void*)BasePtr, FileSize);
+}
+
+} // end llvm namespace
diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc
new file mode 100644
index 000000000000..03ff28367e44
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/PathV2.inc
@@ -0,0 +1,507 @@
+//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+
+using namespace llvm;
+
+namespace {
+ /// This class automatically closes the given file descriptor when it goes out
+ /// of scope. You can take back explicit ownership of the file descriptor by
+ /// calling take(). The destructor does not verify that close was successful.
+ /// Therefore, never allow this class to call close on a file descriptor that
+ /// has been read from or written to.
+ struct AutoFD {
+ int FileDescriptor;
+
+ AutoFD(int fd) : FileDescriptor(fd) {}
+ ~AutoFD() {
+ if (FileDescriptor >= 0)
+ ::close(FileDescriptor);
+ }
+
+ int take() {
+ int ret = FileDescriptor;
+ FileDescriptor = -1;
+ return ret;
+ }
+
+ operator int() const {return FileDescriptor;}
+ };
+
+ error_code TempDir(SmallVectorImpl<char> &result) {
+ // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
+ const char *dir = 0;
+ (dir = std::getenv("TMPDIR" )) ||
+ (dir = std::getenv("TMP" )) ||
+ (dir = std::getenv("TEMP" )) ||
+ (dir = std::getenv("TEMPDIR")) ||
+#ifdef P_tmpdir
+ (dir = P_tmpdir) ||
+#endif
+ (dir = "/tmp");
+
+ result.clear();
+ StringRef d(dir);
+ result.append(d.begin(), d.end());
+ return success;
+ }
+}
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+ result.reserve(MAXPATHLEN);
+
+ while (true) {
+ if (::getcwd(result.data(), result.capacity()) == 0) {
+ // See if there was a real error.
+ if (errno != errc::not_enough_memory)
+ return error_code(errno, system_category());
+ // Otherwise there just wasn't enough space.
+ result.reserve(result.capacity() * 2);
+ } else
+ break;
+ }
+
+ result.set_size(strlen(result.data()));
+ return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ const size_t buf_sz = 32768;
+ char buffer[buf_sz];
+ int from_file = -1, to_file = -1;
+
+ // Open from.
+ if ((from_file = ::open(f.begin(), O_RDONLY)) < 0)
+ return error_code(errno, system_category());
+ AutoFD from_fd(from_file);
+
+ // Stat from.
+ struct stat from_stat;
+ if (::stat(f.begin(), &from_stat) != 0)
+ return error_code(errno, system_category());
+
+ // Setup to flags.
+ int to_flags = O_CREAT | O_WRONLY;
+ if (copt == copy_option::fail_if_exists)
+ to_flags |= O_EXCL;
+
+ // Open to.
+ if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0)
+ return error_code(errno, system_category());
+ AutoFD to_fd(to_file);
+
+ // Copy!
+ ssize_t sz, sz_read = 1, sz_write;
+ while (sz_read > 0 &&
+ (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) {
+ // Allow for partial writes - see Advanced Unix Programming (2nd Ed.),
+ // Marc Rochkind, Addison-Wesley, 2004, page 94
+ sz_write = 0;
+ do {
+ if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) {
+ sz_read = sz; // cause read loop termination.
+ break; // error.
+ }
+ sz_write += sz;
+ } while (sz_write < sz_read);
+ }
+
+ // After all the file operations above the return value of close actually
+ // matters.
+ if (::close(from_fd.take()) < 0) sz_read = -1;
+ if (::close(to_fd.take()) < 0) sz_read = -1;
+
+ // Check for errors.
+ if (sz_read < 0)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+ if (errno != errc::file_exists)
+ return error_code(errno, system_category());
+ existed = true;
+ } else
+ existed = false;
+
+ return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::link(t.begin(), f.begin()) == -1)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::symlink(t.begin(), f.begin()) == -1)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::remove(p.begin()) == -1) {
+ if (errno != errc::no_such_file_or_directory)
+ return error_code(errno, system_category());
+ existed = false;
+ } else
+ existed = true;
+
+ return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::rename(f.begin(), t.begin()) == -1) {
+ // If it's a cross device link, copy then delete, otherwise return the error
+ if (errno == EXDEV) {
+ if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists))
+ return ec;
+ bool Existed;
+ if (error_code ec = remove(from, Existed))
+ return ec;
+ } else
+ return error_code(errno, system_category());
+ }
+
+ return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::truncate(p.begin(), size) == -1)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code exists(const Twine &path, bool &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) == -1) {
+ if (errno != errc::no_such_file_or_directory)
+ return error_code(errno, system_category());
+ result = false;
+ } else
+ result = true;
+
+ return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ // Get arguments.
+ SmallString<128> a_storage;
+ SmallString<128> b_storage;
+ StringRef a = A.toNullTerminatedStringRef(a_storage);
+ StringRef b = B.toNullTerminatedStringRef(b_storage);
+
+ struct stat stat_a, stat_b;
+ int error_b = ::stat(b.begin(), &stat_b);
+ int error_a = ::stat(a.begin(), &stat_a);
+
+ // If both are invalid, it's an error. If only one is, the result is false.
+ if (error_a != 0 || error_b != 0) {
+ if (error_a == error_b)
+ return error_code(errno, system_category());
+ result = false;
+ } else {
+ result =
+ stat_a.st_dev == stat_b.st_dev &&
+ stat_a.st_ino == stat_b.st_ino;
+ }
+
+ return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) == -1)
+ return error_code(errno, system_category());
+ if (!S_ISREG(status.st_mode))
+ return make_error_code(errc::operation_not_permitted);
+
+ result = status.st_size;
+ return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) != 0) {
+ error_code ec(errno, system_category());
+ if (ec == errc::no_such_file_or_directory)
+ result = file_status(file_type::file_not_found);
+ else
+ result = file_status(file_type::status_error);
+ return ec;
+ }
+
+ if (S_ISDIR(status.st_mode))
+ result = file_status(file_type::directory_file);
+ else if (S_ISREG(status.st_mode))
+ result = file_status(file_type::regular_file);
+ else if (S_ISBLK(status.st_mode))
+ result = file_status(file_type::block_file);
+ else if (S_ISCHR(status.st_mode))
+ result = file_status(file_type::character_file);
+ else if (S_ISFIFO(status.st_mode))
+ result = file_status(file_type::fifo_file);
+ else if (S_ISSOCK(status.st_mode))
+ result = file_status(file_type::socket_file);
+ else
+ result = file_status(file_type::type_unknown);
+
+ return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+ SmallVectorImpl<char> &result_path) {
+ SmallString<128> Model;
+ model.toVector(Model);
+ // Null terminate.
+ Model.c_str();
+
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(Twine(Model));
+ if (!absolute) {
+ SmallString<128> TDir;
+ if (error_code ec = TempDir(TDir)) return ec;
+ path::append(TDir, Twine(Model));
+ Model.swap(TDir);
+ }
+
+ // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+ // needed if the randomly chosen path already exists.
+ SmallString<128> RandomPath;
+ RandomPath.reserve(Model.size() + 1);
+ ::srand(::time(NULL));
+
+retry_random_path:
+ // This is opened here instead of above to make it easier to track when to
+ // close it. Collisions should be rare enough for the possible extra syscalls
+ // not to matter.
+ FILE *RandomSource = ::fopen("/dev/urandom", "r");
+ RandomPath.set_size(0);
+ for (SmallVectorImpl<char>::const_iterator i = Model.begin(),
+ e = Model.end(); i != e; ++i) {
+ if (*i == '%') {
+ char val = 0;
+ if (RandomSource)
+ val = fgetc(RandomSource);
+ else
+ val = ::rand();
+ RandomPath.push_back("0123456789abcdef"[val & 15]);
+ } else
+ RandomPath.push_back(*i);
+ }
+
+ if (RandomSource)
+ ::fclose(RandomSource);
+
+ // Try to open + create the file.
+rety_open_create:
+ int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
+ if (RandomFD == -1) {
+ // If the file existed, try again, otherwise, error.
+ if (errno == errc::file_exists)
+ goto retry_random_path;
+ // The path prefix doesn't exist.
+ if (errno == errc::no_such_file_or_directory) {
+ StringRef p(RandomPath.begin(), RandomPath.size());
+ SmallString<64> dir_to_create;
+ for (path::const_iterator i = path::begin(p),
+ e = --path::end(p); i != e; ++i) {
+ path::append(dir_to_create, *i);
+ bool Exists;
+ if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+ if (!Exists) {
+ // Don't try to create network paths.
+ if (i->size() > 2 && (*i)[0] == '/' &&
+ (*i)[1] == '/' &&
+ (*i)[2] != '/')
+ return make_error_code(errc::no_such_file_or_directory);
+ if (::mkdir(dir_to_create.c_str(), 0700) == -1)
+ return error_code(errno, system_category());
+ }
+ }
+ goto rety_open_create;
+ }
+ return error_code(errno, system_category());
+ }
+
+ // Make the path absolute.
+ char real_path_buff[PATH_MAX + 1];
+ if (realpath(RandomPath.c_str(), real_path_buff) == NULL) {
+ int error = errno;
+ ::close(RandomFD);
+ ::unlink(RandomPath.c_str());
+ return error_code(error, system_category());
+ }
+
+ result_path.clear();
+ StringRef d(real_path_buff);
+ result_path.append(d.begin(), d.end());
+
+ result_fd = RandomFD;
+ return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+ SmallString<128> path_null(path);
+ DIR *directory = ::opendir(path_null.c_str());
+ if (directory == 0)
+ return error_code(errno, system_category());
+
+ it.IterationHandle = reinterpret_cast<intptr_t>(directory);
+ // Add something for replace_filename to replace.
+ path::append(path_null, ".");
+ it.CurrentEntry = directory_entry(path_null.str());
+ return directory_iterator_increment(it);
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+ if (it.IterationHandle)
+ ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
+ it.IterationHandle = 0;
+ it.CurrentEntry = directory_entry();
+ return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+ errno = 0;
+ dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
+ if (cur_dir == 0 && errno != 0) {
+ return error_code(errno, system_category());
+ } else if (cur_dir != 0) {
+ StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
+ if ((name.size() == 1 && name[0] == '.') ||
+ (name.size() == 2 && name[0] == '.' && name[1] == '.'))
+ return directory_iterator_increment(it);
+ it.CurrentEntry.replace_filename(name);
+ } else
+ return directory_iterator_destruct(it);
+
+ return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+ SmallVectorImpl<char> &result) {
+ SmallString<128> PathStorage;
+ StringRef Path = path.toNullTerminatedStringRef(PathStorage);
+ result.set_size(0);
+
+ // Open path.
+ std::FILE *file = std::fopen(Path.data(), "rb");
+ if (file == 0)
+ return error_code(errno, system_category());
+
+ // Reserve storage.
+ result.reserve(len);
+
+ // Read magic!
+ size_t size = std::fread(result.data(), 1, len, file);
+ if (std::ferror(file) != 0) {
+ std::fclose(file);
+ return error_code(errno, system_category());
+ } else if (size != result.size()) {
+ if (std::feof(file) != 0) {
+ std::fclose(file);
+ result.set_size(size);
+ return make_error_code(errc::value_too_large);
+ }
+ }
+ std::fclose(file);
+ result.set_size(len);
+ return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
new file mode 100644
index 000000000000..5cdb11ccebc4
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -0,0 +1,295 @@
+//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
+// Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
+#include <malloc.h>
+#endif
+#ifdef HAVE_MALLOC_MALLOC_H
+#include <malloc/malloc.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+# include <termios.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+using namespace sys;
+
+unsigned
+Process::GetPageSize()
+{
+#if defined(__CYGWIN__)
+ // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
+ // memory protection and mmap() is 4k.
+ // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
+ const int page_size = 0x1000;
+#elif defined(HAVE_GETPAGESIZE)
+ const int page_size = ::getpagesize();
+#elif defined(HAVE_SYSCONF)
+ long page_size = ::sysconf(_SC_PAGE_SIZE);
+#else
+#warning Cannot get the page size on this machine
+#endif
+ return static_cast<unsigned>(page_size);
+}
+
+size_t Process::GetMallocUsage() {
+#if defined(HAVE_MALLINFO)
+ struct mallinfo mi;
+ mi = ::mallinfo();
+ return mi.uordblks;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+ malloc_statistics_t Stats;
+ malloc_zone_statistics(malloc_default_zone(), &Stats);
+ return Stats.size_in_use; // darwin
+#elif defined(HAVE_SBRK)
+ // Note this is only an approximation and more closely resembles
+ // the value returned by mallinfo in the arena field.
+ static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
+ char *EndOfMemory = (char*)sbrk(0);
+ if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
+ return EndOfMemory - StartOfMemory;
+ else
+ return 0;
+#else
+#warning Cannot get malloc info on this platform
+ return 0;
+#endif
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+#if defined(HAVE_MALLINFO)
+ struct mallinfo mi = ::mallinfo();
+ return mi.uordblks + mi.hblkhd;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+ malloc_statistics_t Stats;
+ malloc_zone_statistics(malloc_default_zone(), &Stats);
+ return Stats.size_allocated; // darwin
+#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
+ struct rusage usage;
+ ::getrusage(RUSAGE_SELF, &usage);
+ return usage.ru_maxrss;
+#else
+#warning Cannot get total memory size on this platform
+ return 0;
+#endif
+}
+
+void
+Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
+ TimeValue& sys_time)
+{
+ elapsed = TimeValue::now();
+#if defined(HAVE_GETRUSAGE)
+ struct rusage usage;
+ ::getrusage(RUSAGE_SELF, &usage);
+ user_time = TimeValue(
+ static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
+ TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+ sys_time = TimeValue(
+ static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
+ TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+#else
+#warning Cannot get usage times on this platform
+ user_time.seconds(0);
+ user_time.microseconds(0);
+ sys_time.seconds(0);
+ sys_time.microseconds(0);
+#endif
+}
+
+int Process::GetCurrentUserId() {
+ return getuid();
+}
+
+int Process::GetCurrentGroupId() {
+ return getgid();
+}
+
+#ifdef HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this function
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+#if HAVE_SETRLIMIT
+ struct rlimit rlim;
+ rlim.rlim_cur = rlim.rlim_max = 0;
+ setrlimit(RLIMIT_CORE, &rlim);
+#endif
+
+#ifdef HAVE_MACH_MACH_H
+ // Disable crash reporting on Mac OS X 10.0-10.4
+
+ // get information about the original set of exception ports for the task
+ mach_msg_type_number_t Count = 0;
+ exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
+ exception_port_t OriginalPorts[EXC_TYPES_COUNT];
+ exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
+ thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
+ kern_return_t err =
+ task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
+ &Count, OriginalPorts, OriginalBehaviors,
+ OriginalFlavors);
+ if (err == KERN_SUCCESS) {
+ // replace each with MACH_PORT_NULL.
+ for (unsigned i = 0; i != Count; ++i)
+ task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+ MACH_PORT_NULL, OriginalBehaviors[i],
+ OriginalFlavors[i]);
+ }
+
+ // Disable crash reporting on Mac OS X 10.5
+ signal(SIGABRT, _exit);
+ signal(SIGILL, _exit);
+ signal(SIGFPE, _exit);
+ signal(SIGSEGV, _exit);
+ signal(SIGBUS, _exit);
+#endif
+}
+
+bool Process::StandardInIsUserInput() {
+ return FileDescriptorIsDisplayed(STDIN_FILENO);
+}
+
+bool Process::StandardOutIsDisplayed() {
+ return FileDescriptorIsDisplayed(STDOUT_FILENO);
+}
+
+bool Process::StandardErrIsDisplayed() {
+ return FileDescriptorIsDisplayed(STDERR_FILENO);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+#if HAVE_ISATTY
+ return isatty(fd);
+#else
+ // If we don't have isatty, just return false.
+ return false;
+#endif
+}
+
+static unsigned getColumns(int FileID) {
+ // If COLUMNS is defined in the environment, wrap to that many columns.
+ if (const char *ColumnsStr = std::getenv("COLUMNS")) {
+ int Columns = std::atoi(ColumnsStr);
+ if (Columns > 0)
+ return Columns;
+ }
+
+ unsigned Columns = 0;
+
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+ // Try to determine the width of the terminal.
+ struct winsize ws;
+ if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
+ Columns = ws.ws_col;
+#endif
+
+ return Columns;
+}
+
+unsigned Process::StandardOutColumns() {
+ if (!StandardOutIsDisplayed())
+ return 0;
+
+ return getColumns(1);
+}
+
+unsigned Process::StandardErrColumns() {
+ if (!StandardErrIsDisplayed())
+ return 0;
+
+ return getColumns(2);
+}
+
+static bool terminalHasColors() {
+ if (const char *term = std::getenv("TERM")) {
+ // Most modern terminals support ANSI escape sequences for colors.
+ // We could check terminfo, or have a list of known terms that support
+ // colors, but that would be overkill.
+ // The user can always ask for no colors by setting TERM to dumb, or
+ // using a commandline flag.
+ return strcmp(term, "dumb") != 0;
+ }
+ return false;
+}
+
+bool Process::StandardOutHasColors() {
+ if (!StandardOutIsDisplayed())
+ return false;
+ return terminalHasColors();
+}
+
+bool Process::StandardErrHasColors() {
+ if (!StandardErrIsDisplayed())
+ return false;
+ return terminalHasColors();
+}
+
+bool Process::ColorNeedsFlush() {
+ // No, we use ANSI escape sequences.
+ return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+ COLOR(FGBG, "0", BOLD),\
+ COLOR(FGBG, "1", BOLD),\
+ COLOR(FGBG, "2", BOLD),\
+ COLOR(FGBG, "3", BOLD),\
+ COLOR(FGBG, "4", BOLD),\
+ COLOR(FGBG, "5", BOLD),\
+ COLOR(FGBG, "6", BOLD),\
+ COLOR(FGBG, "7", BOLD)\
+ }
+
+static const char colorcodes[2][2][8][10] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+ return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+ return "\033[1m";
+}
+
+const char *Process::ResetColor() {
+ return "\033[0m";
+}
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
new file mode 100644
index 000000000000..346baf1744dc
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -0,0 +1,430 @@
+//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Config/config.h>
+#include "llvm/Support/FileSystem.h"
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+ extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {}
+
+unsigned Program::GetPid() const {
+ uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+ return static_cast<unsigned>(pid);
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+ // Check some degenerate cases
+ if (progName.length() == 0) // no program
+ return Path();
+ Path temp;
+ if (!temp.set(progName)) // invalid name
+ return Path();
+ // Use the given path verbatim if it contains any slashes; this matches
+ // the behavior of sh(1) and friends.
+ if (progName.find('/') != std::string::npos)
+ return temp;
+
+ // At this point, the file name is valid and does not contain slashes. Search
+ // for it through the directories specified in the PATH environment variable.
+
+ // Get the path. If its empty, we can't do anything to find it.
+ const char *PathStr = getenv("PATH");
+ if (PathStr == 0)
+ return Path();
+
+ // Now we have a colon separated list of directories to search; try them.
+ size_t PathLen = strlen(PathStr);
+ while (PathLen) {
+ // Find the first colon...
+ const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+
+ // Check to see if this first directory contains the executable...
+ Path FilePath;
+ if (FilePath.set(std::string(PathStr,Colon))) {
+ FilePath.appendComponent(progName);
+ if (FilePath.canExecute())
+ return FilePath; // Found the executable!
+ }
+
+ // Nope it wasn't in this directory, check the next path in the list!
+ PathLen -= Colon-PathStr;
+ PathStr = Colon;
+
+ // Advance past duplicate colons
+ while (*PathStr == ':') {
+ PathStr++;
+ PathLen--;
+ }
+ }
+ return Path();
+}
+
+static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+ if (Path == 0) // Noop
+ return false;
+ const char *File;
+ if (Path->isEmpty())
+ // Redirect empty paths to /dev/null
+ File = "/dev/null";
+ else
+ File = Path->c_str();
+
+ // Open the file
+ int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+ if (InFD == -1) {
+ MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
+ + (FD == 0 ? "input" : "output"));
+ return true;
+ }
+
+ // Install it as the requested FD
+ if (dup2(InFD, FD) == -1) {
+ MakeErrMsg(ErrMsg, "Cannot dup2");
+ close(InFD);
+ return true;
+ }
+ close(InFD); // Close the original FD
+ return false;
+}
+
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+ posix_spawn_file_actions_t *FileActions) {
+ if (Path == 0) // Noop
+ return false;
+ const char *File;
+ if (Path->isEmpty())
+ // Redirect empty paths to /dev/null
+ File = "/dev/null";
+ else
+ File = Path->c_str();
+
+ if (int Err = posix_spawn_file_actions_addopen(FileActions, FD,
+ File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+ return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+ return false;
+}
+#endif
+
+static void TimeOutHandler(int Sig) {
+}
+
+static void SetMemoryLimits (unsigned size)
+{
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
+ struct rlimit r;
+ __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
+
+ // Heap size
+ getrlimit (RLIMIT_DATA, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_DATA, &r);
+#ifdef RLIMIT_RSS
+ // Resident set size.
+ getrlimit (RLIMIT_RSS, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_RSS, &r);
+#endif
+#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it.
+ // Virtual memory.
+ getrlimit (RLIMIT_AS, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_AS, &r);
+#endif
+#endif
+}
+
+bool
+Program::Execute(const Path &path, const char **args, const char **envp,
+ const Path **redirects, unsigned memoryLimit,
+ std::string *ErrMsg) {
+ // If this OS has posix_spawn and there is no memory limit being implied, use
+ // posix_spawn. It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+ if (memoryLimit == 0) {
+ posix_spawn_file_actions_t FileActionsStore;
+ posix_spawn_file_actions_t *FileActions = 0;
+
+ if (redirects) {
+ FileActions = &FileActionsStore;
+ posix_spawn_file_actions_init(FileActions);
+
+ // Redirect stdin/stdout.
+ if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+ RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+ return false;
+ if (redirects[1] == 0 || redirects[2] == 0 ||
+ *redirects[1] != *redirects[2]) {
+ // Just redirect stderr
+ if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+ } else {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the FD already open for stdout.
+ if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2))
+ return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+ }
+ }
+
+ if (!envp)
+#if !defined(__APPLE__)
+ envp = const_cast<const char **>(environ);
+#else
+ // environ is missing in dylibs.
+ envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+ // Explicitly initialized to prevent what appears to be a valgrind false
+ // positive.
+ pid_t PID = 0;
+ int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0,
+ const_cast<char **>(args), const_cast<char **>(envp));
+
+ if (FileActions)
+ posix_spawn_file_actions_destroy(FileActions);
+
+ if (Err)
+ return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+
+ Data_ = reinterpret_cast<void*>(PID);
+ return true;
+ }
+#endif
+
+ // Create a child process.
+ int child = fork();
+ switch (child) {
+ // An error occurred: Return to the caller.
+ case -1:
+ MakeErrMsg(ErrMsg, "Couldn't fork");
+ return false;
+
+ // Child process: Execute the program.
+ case 0: {
+ // Redirect file descriptors...
+ if (redirects) {
+ // Redirect stdin
+ if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
+ // Redirect stdout
+ if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
+ if (redirects[1] && redirects[2] &&
+ *(redirects[1]) == *(redirects[2])) {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the FD already open for stdout.
+ if (-1 == dup2(1,2)) {
+ MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
+ return false;
+ }
+ } else {
+ // Just redirect stderr
+ if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
+ }
+ }
+
+ // Set memory limits
+ if (memoryLimit!=0) {
+ SetMemoryLimits(memoryLimit);
+ }
+
+ // Execute!
+ if (envp != 0)
+ execve(path.c_str(),
+ const_cast<char **>(args),
+ const_cast<char **>(envp));
+ else
+ execv(path.c_str(),
+ const_cast<char **>(args));
+ // If the execve() failed, we should exit. Follow Unix protocol and
+ // return 127 if the executable was not found, and 126 otherwise.
+ // Use _exit rather than exit so that atexit functions and static
+ // object destructors cloned from the parent process aren't
+ // redundantly run, and so that any data buffered in stdio buffers
+ // cloned from the parent aren't redundantly written out.
+ _exit(errno == ENOENT ? 127 : 126);
+ }
+
+ // Parent process: Break out of the switch to do our processing.
+ default:
+ break;
+ }
+
+ Data_ = reinterpret_cast<void*>(child);
+
+ return true;
+}
+
+int
+Program::Wait(const sys::Path &path,
+ unsigned secondsToWait,
+ std::string* ErrMsg)
+{
+#ifdef HAVE_SYS_WAIT_H
+ struct sigaction Act, Old;
+
+ if (Data_ == 0) {
+ MakeErrMsg(ErrMsg, "Process not started!");
+ return -1;
+ }
+
+ // Install a timeout handler. The handler itself does nothing, but the simple
+ // fact of having a handler at all causes the wait below to return with EINTR,
+ // unlike if we used SIG_IGN.
+ if (secondsToWait) {
+ memset(&Act, 0, sizeof(Act));
+ Act.sa_handler = TimeOutHandler;
+ sigemptyset(&Act.sa_mask);
+ sigaction(SIGALRM, &Act, &Old);
+ alarm(secondsToWait);
+ }
+
+ // Parent process: Wait for the child process to terminate.
+ int status;
+ uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+ pid_t child = static_cast<pid_t>(pid);
+ while (waitpid(pid, &status, 0) != child)
+ if (secondsToWait && errno == EINTR) {
+ // Kill the child.
+ kill(child, SIGKILL);
+
+ // Turn off the alarm and restore the signal handler
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+
+ // Wait for child to die
+ if (wait(&status) != child)
+ MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+ else
+ MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+ return -2; // Timeout detected
+ } else if (errno != EINTR) {
+ MakeErrMsg(ErrMsg, "Error waiting for child process");
+ return -1;
+ }
+
+ // We exited normally without timeout, so turn off the timer.
+ if (secondsToWait) {
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+ }
+
+ // Return the proper exit status. Detect error conditions
+ // so we can return -1 for them and set ErrMsg informatively.
+ int result = 0;
+ if (WIFEXITED(status)) {
+ result = WEXITSTATUS(status);
+#ifdef HAVE_POSIX_SPAWN
+ // The posix_spawn child process returns 127 on any kind of error.
+ // Following the POSIX convention for command-line tools (which posix_spawn
+ // itself apparently does not), check to see if the failure was due to some
+ // reason other than the file not existing, and return 126 in this case.
+ bool Exists;
+ if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists)
+ result = 126;
+#endif
+ if (result == 127) {
+ if (ErrMsg)
+ *ErrMsg = llvm::sys::StrError(ENOENT);
+ return -1;
+ }
+ if (result == 126) {
+ if (ErrMsg)
+ *ErrMsg = "Program could not be executed";
+ return -1;
+ }
+ } else if (WIFSIGNALED(status)) {
+ if (ErrMsg) {
+ *ErrMsg = strsignal(WTERMSIG(status));
+#ifdef WCOREDUMP
+ if (WCOREDUMP(status))
+ *ErrMsg += " (core dumped)";
+#endif
+ }
+ // Return a special value to indicate that the process received an unhandled
+ // signal during execution as opposed to failing to execute.
+ return -2;
+ }
+ return result;
+#else
+ if (ErrMsg)
+ *ErrMsg = "Program::Wait is not implemented on this platform yet!";
+ return -1;
+#endif
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+ if (Data_ == 0) {
+ MakeErrMsg(ErrMsg, "Process not started!");
+ return true;
+ }
+
+ uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
+ pid_t pid = static_cast<pid_t>(pid64);
+
+ if (kill(pid, SIGKILL) != 0) {
+ MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+ return true;
+ }
+
+ return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return false;
+}
+
+bool Program::ChangeStdoutToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return false;
+}
+
+bool Program::ChangeStderrToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return false;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/README.txt b/contrib/llvm/lib/Support/Unix/README.txt
new file mode 100644
index 000000000000..3d547c2990d5
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/Support/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix - only code that is truly generic to all UNIX platforms
+ Posix - code that is specific to Posix variants of UNIX
+ SUS - code that is specific to the Single Unix Specification
+ SysV - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/contrib/llvm/lib/Support/Unix/RWMutex.inc b/contrib/llvm/lib/Support/Unix/RWMutex.inc
new file mode 100644
index 000000000000..40e87ff13111
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+ return true;
+}
+
+bool RWMutexImpl::reader_release() {
+ return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+ return true;
+}
+
+bool RWMutexImpl::writer_release() {
+ return true;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
new file mode 100644
index 000000000000..e286869e775d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -0,0 +1,303 @@
+//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+#include <algorithm>
+#if HAVE_EXECINFO_H
+# include <execinfo.h> // For backtrace().
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_DLFCN_H && __GNUG__
+#include <dlfcn.h>
+#include <cxxabi.h>
+#endif
+using namespace llvm;
+
+static RETSIGTYPE SignalHandler(int Sig); // defined below.
+
+static SmartMutex<true> SignalsMutex;
+
+/// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<sys::Path> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
+
+// IntSigs - Signals that may interrupt the program at any time.
+static const int IntSigs[] = {
+ SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+};
+static const int *const IntSigsEnd =
+ IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
+
+// KillSigs - Signals that are synchronous with the program that will cause it
+// to die.
+static const int KillSigs[] = {
+ SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+ , SIGSYS
+#endif
+#ifdef SIGXCPU
+ , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+ , SIGXFSZ
+#endif
+#ifdef SIGEMT
+ , SIGEMT
+#endif
+};
+static const int *const KillSigsEnd =
+ KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
+
+static unsigned NumRegisteredSignals = 0;
+static struct {
+ struct sigaction SA;
+ int SigNo;
+} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+
+
+static void RegisterHandler(int Signal) {
+ assert(NumRegisteredSignals <
+ sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+ "Out of space for signal handlers!");
+
+ struct sigaction NewHandler;
+
+ NewHandler.sa_handler = SignalHandler;
+ NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
+ sigemptyset(&NewHandler.sa_mask);
+
+ // Install the new handler, save the old one in RegisteredSignalInfo.
+ sigaction(Signal, &NewHandler,
+ &RegisteredSignalInfo[NumRegisteredSignals].SA);
+ RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
+ ++NumRegisteredSignals;
+}
+
+static void RegisterHandlers() {
+ // If the handlers are already registered, we're done.
+ if (NumRegisteredSignals != 0) return;
+
+ std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
+ std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+}
+
+static void UnregisterHandlers() {
+ // Restore all of the signal handlers to how they were before we showed up.
+ for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
+ sigaction(RegisteredSignalInfo[i].SigNo,
+ &RegisteredSignalInfo[i].SA, 0);
+ NumRegisteredSignals = 0;
+}
+
+
+/// RemoveFilesToRemove - Process the FilesToRemove list. This function
+/// should be called with the SignalsMutex lock held.
+static void RemoveFilesToRemove() {
+ while (!FilesToRemove.empty()) {
+ FilesToRemove.back().eraseFromDisk(true);
+ FilesToRemove.pop_back();
+ }
+}
+
+// SignalHandler - The signal handler that runs.
+static RETSIGTYPE SignalHandler(int Sig) {
+ // Restore the signal behavior to default, so that the program actually
+ // crashes when we return and the signal reissues. This also ensures that if
+ // we crash in our signal handler that the program will terminate immediately
+ // instead of recursing in the signal handler.
+ UnregisterHandlers();
+
+ // Unmask all potentially blocked kill signals.
+ sigset_t SigMask;
+ sigfillset(&SigMask);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ SignalsMutex.acquire();
+ RemoveFilesToRemove();
+
+ if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
+ if (InterruptFunction) {
+ void (*IF)() = InterruptFunction;
+ SignalsMutex.release();
+ InterruptFunction = 0;
+ IF(); // run the interrupt function.
+ return;
+ }
+
+ SignalsMutex.release();
+ raise(Sig); // Execute the default handler.
+ return;
+ }
+
+ SignalsMutex.release();
+
+ // Otherwise if it is a fault (like SEGV) run any handler.
+ for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+ CallBacksToRun[i].first(CallBacksToRun[i].second);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+ SignalsMutex.acquire();
+ RemoveFilesToRemove();
+ SignalsMutex.release();
+}
+
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
+ SignalsMutex.acquire();
+ InterruptFunction = IF;
+ SignalsMutex.release();
+ RegisterHandlers();
+}
+
+// RemoveFileOnSignal - The public API
+bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
+ std::string* ErrMsg) {
+ SignalsMutex.acquire();
+ FilesToRemove.push_back(Filename);
+
+ SignalsMutex.release();
+
+ RegisterHandlers();
+ return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ SignalsMutex.acquire();
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
+ if (I != FilesToRemove.rend())
+ FilesToRemove.erase(I.base()-1);
+ SignalsMutex.release();
+}
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process. The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+ CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
+ RegisterHandlers();
+}
+
+
+// PrintStackTrace - In the case of a program crash or fault, print out a stack
+// trace so that the user has an indication of why and where we died.
+//
+// On glibc systems we have the 'backtrace' function, which works nicely, but
+// doesn't demangle symbols.
+static void PrintStackTrace(void *) {
+#ifdef HAVE_BACKTRACE
+ static void* StackTrace[256];
+ // Use backtrace() to output a backtrace on Linux systems with glibc.
+ int depth = backtrace(StackTrace,
+ static_cast<int>(array_lengthof(StackTrace)));
+#if HAVE_DLFCN_H && __GNUG__
+ int width = 0;
+ for (int i = 0; i < depth; ++i) {
+ Dl_info dlinfo;
+ dladdr(StackTrace[i], &dlinfo);
+ const char* name = strrchr(dlinfo.dli_fname, '/');
+
+ int nwidth;
+ if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
+ else nwidth = strlen(name) - 1;
+
+ if (nwidth > width) width = nwidth;
+ }
+
+ for (int i = 0; i < depth; ++i) {
+ Dl_info dlinfo;
+ dladdr(StackTrace[i], &dlinfo);
+
+ fprintf(stderr, "%-2d", i);
+
+ const char* name = strrchr(dlinfo.dli_fname, '/');
+ if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
+ else fprintf(stderr, " %-*s", width, name+1);
+
+ fprintf(stderr, " %#0*lx",
+ (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+
+ if (dlinfo.dli_sname != NULL) {
+ int res;
+ fputc(' ', stderr);
+ char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+ if (d == NULL) fputs(dlinfo.dli_sname, stderr);
+ else fputs(d, stderr);
+ free(d);
+
+ fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
+ }
+ fputc('\n', stderr);
+ }
+#else
+ backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+#endif
+#endif
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void llvm::sys::PrintStackTraceOnErrorSignal() {
+ AddSignalHandler(PrintStackTrace, 0);
+}
+
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+#include <signal.h>
+#include <pthread.h>
+
+int raise(int sig) {
+ return pthread_kill(pthread_self(), sig);
+}
+
+void __assert_rtn(const char *func,
+ const char *file,
+ int line,
+ const char *expr) {
+ if (func)
+ fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+ expr, func, file, line);
+ else
+ fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+ expr, file, line);
+ abort();
+}
+
+void abort() {
+ raise(SIGABRT);
+ usleep(1000);
+ __builtin_trap();
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
new file mode 100644
index 000000000000..2b4c9017cd91
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
@@ -0,0 +1,26 @@
+//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
+}
diff --git a/contrib/llvm/lib/Support/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc
new file mode 100644
index 000000000000..5cf5a9d44ed6
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/TimeValue.inc
@@ -0,0 +1,56 @@
+//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+
+namespace llvm {
+ using namespace sys;
+
+std::string TimeValue::str() const {
+ char buffer[32];
+
+ time_t ourTime = time_t(this->toEpochTime());
+#ifdef __hpux
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+ asctime_r(localtime(&ourTime), buffer);
+#else
+ ::asctime_r(::localtime(&ourTime), buffer);
+#endif
+
+ std::string result(buffer);
+ return result.substr(0,24);
+}
+
+TimeValue TimeValue::now() {
+ struct timeval the_time;
+ timerclear(&the_time);
+ if (0 != ::gettimeofday(&the_time,0)) {
+ // This is *really* unlikely to occur because the only gettimeofday
+ // errors concern the timezone parameter which we're passing in as 0.
+ // In the unlikely case it does happen, just return MinTime, no error
+ // message needed.
+ return MinTime;
+ }
+
+ return TimeValue(
+ static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
+ static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+ NANOSECONDS_PER_MICROSECOND ) );
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h
new file mode 100644
index 000000000000..b7be3111d431
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Unix.h
@@ -0,0 +1,87 @@
+//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Unix implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_UNIX_UNIX_H
+#define LLVM_SYSTEM_UNIX_UNIX_H
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on all UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/Errno.h"
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <string>
+#include <algorithm>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+#include <assert.h>
+#endif
+
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+/// This function builds an error message into \p ErrMsg using the \p prefix
+/// string and the Unix error number given by \p errnum. If errnum is -1, the
+/// default then the value of errno is used.
+/// @brief Make an error message
+///
+/// If the error number can be converted to a string, it will be
+/// separated from prefix by ": ".
+static inline bool MakeErrMsg(
+ std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
+ if (!ErrMsg)
+ return true;
+ if (errnum == -1)
+ errnum = errno;
+ *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
+ return true;
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/Unix/system_error.inc b/contrib/llvm/lib/Support/Unix/system_error.inc
new file mode 100644
index 000000000000..681e919edb4e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/system_error.inc
@@ -0,0 +1,34 @@
+//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+ return _do_message::message(ev);
+}
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+#ifdef ELAST
+ if (ev > ELAST)
+ return error_condition(ev, system_category());
+#endif // ELAST
+ return error_condition(ev, generic_category());
+}
diff --git a/contrib/llvm/lib/Support/Valgrind.cpp b/contrib/llvm/lib/Support/Valgrind.cpp
new file mode 100644
index 000000000000..703448524ed9
--- /dev/null
+++ b/contrib/llvm/lib/Support/Valgrind.cpp
@@ -0,0 +1,54 @@
+//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
+// defined. If we have valgrind.h but valgrind isn't running, its macros are
+// no-ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+
+static bool InitNotUnderValgrind() {
+ return !RUNNING_ON_VALGRIND;
+}
+
+// This bool is negated from what we'd expect because code may run before it
+// gets initialized. If that happens, it will appear to be 0 (false), and we
+// want that to cause the rest of the code in this file to run the
+// Valgrind-provided macros.
+static const bool NotUnderValgrind = InitNotUnderValgrind();
+
+bool llvm::sys::RunningOnValgrind() {
+ if (NotUnderValgrind)
+ return false;
+ return RUNNING_ON_VALGRIND;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+ if (NotUnderValgrind)
+ return;
+
+ VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
+}
+
+#else // !HAVE_VALGRIND_VALGRIND_H
+
+bool llvm::sys::RunningOnValgrind() {
+ return false;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+}
+
+#endif // !HAVE_VALGRIND_VALGRIND_H
diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
new file mode 100644
index 000000000000..fc5f5809cb40
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
@@ -0,0 +1,137 @@
+//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+
+#ifdef _MSC_VER
+ #include <ntverp.h>
+#endif
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBIMAGEHLP != 1)
+ #error "libimagehlp.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<HMODULE> OpenedHandles;
+
+extern "C" {
+
+ static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName,
+ ULONG_PTR ModuleBase,
+ ULONG ModuleSize,
+ PVOID UserContext)
+ {
+ // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded
+ // into the process.
+ if (stricmp(ModuleName, "msvci70") != 0 &&
+ stricmp(ModuleName, "msvcirt") != 0 &&
+ stricmp(ModuleName, "msvcp50") != 0 &&
+ stricmp(ModuleName, "msvcp60") != 0 &&
+ stricmp(ModuleName, "msvcp70") != 0 &&
+ stricmp(ModuleName, "msvcr70") != 0 &&
+#ifndef __MINGW32__
+ // Mingw32 uses msvcrt.dll by default. Don't ignore it.
+ // Otherwise, user should be aware, what he's doing :)
+ stricmp(ModuleName, "msvcrt") != 0 &&
+#endif
+ stricmp(ModuleName, "msvcrt20") != 0 &&
+ stricmp(ModuleName, "msvcrt40") != 0) {
+ OpenedHandles.push_back((HMODULE)ModuleBase);
+ }
+ return TRUE;
+ }
+}
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
+ std::string *ErrMsg) {
+ if (filename) {
+ HMODULE a_handle = LoadLibrary(filename);
+
+ if (a_handle == 0)
+ return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
+
+ OpenedHandles.push_back(a_handle);
+ } else {
+ // When no file is specified, enumerate all DLLs and EXEs in the
+ // process.
+ EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+ }
+
+ // Because we don't remember the handle, we will never free it; hence,
+ // it is loaded permanently.
+ return false;
+}
+
+// Stack probing routines are in the support library (e.g. libgcc), but we don't
+// have dynamic linking on windows. Provide a hook.
+#define EXPLICIT_SYMBOL(SYM) \
+ extern "C" { extern void *SYM; }
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+
+#include "explicit_symbols.inc"
+
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+ // First check symbols added via AddSymbol().
+ if (ExplicitSymbols) {
+ std::map<std::string, void *>::iterator I =
+ ExplicitSymbols->find(symbolName);
+ std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+ if (I != E)
+ return I->second;
+ }
+
+ // Now search the libraries.
+ for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
+ E = OpenedHandles.end(); I != E; ++I) {
+ FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+ if (ptr) {
+ return (void *)(intptr_t)ptr;
+ }
+ }
+
+ #define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
+ #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
+ if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+
+ {
+ #include "explicit_symbols.inc"
+ }
+
+ #undef EXPLICIT_SYMBOL
+ #undef EXPLICIT_SYMBOL2
+
+ return 0;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Host.inc b/contrib/llvm/lib/Support/Windows/Host.inc
new file mode 100644
index 000000000000..733830e82f08
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Host.inc
@@ -0,0 +1,23 @@
+//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Host support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <string>
+
+using namespace llvm;
+
+std::string sys::getHostTriple() {
+ // FIXME: Adapt to running version.
+ return LLVM_HOSTTRIPLE;
+}
diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc
new file mode 100644
index 000000000000..9f69e7367e6f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Memory.inc
@@ -0,0 +1,73 @@
+//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of various Memory
+// management utilities
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
+ const MemoryBlock *NearBlock,
+ std::string *ErrMsg) {
+ if (NumBytes == 0) return MemoryBlock();
+
+ static const size_t pageSize = Process::GetPageSize();
+ size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+ //FIXME: support NearBlock if ever needed on Win64.
+
+ void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
+ PAGE_EXECUTE_READWRITE);
+ if (pa == NULL) {
+ MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
+ return MemoryBlock();
+ }
+
+ MemoryBlock result;
+ result.Address = pa;
+ result.Size = NumPages*pageSize;
+ return result;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+ if (M.Address == 0 || M.Size == 0) return false;
+ if (!VirtualFree(M.Address, 0, MEM_RELEASE))
+ return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
+ return false;
+}
+
+bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+ return true;
+}
+
+bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
+ return false;
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+ return true;
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+ return false;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Mutex.inc b/contrib/llvm/lib/Support/Windows/Mutex.inc
new file mode 100644
index 000000000000..583dc6359a16
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Mutex.inc
@@ -0,0 +1,58 @@
+//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/Mutex.h"
+
+namespace llvm {
+using namespace sys;
+
+MutexImpl::MutexImpl(bool /*recursive*/)
+{
+ data_ = new CRITICAL_SECTION;
+ InitializeCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+MutexImpl::~MutexImpl()
+{
+ DeleteCriticalSection((LPCRITICAL_SECTION)data_);
+ delete (LPCRITICAL_SECTION)data_;
+ data_ = 0;
+}
+
+bool
+MutexImpl::acquire()
+{
+ EnterCriticalSection((LPCRITICAL_SECTION)data_);
+ return true;
+}
+
+bool
+MutexImpl::release()
+{
+ LeaveCriticalSection((LPCRITICAL_SECTION)data_);
+ return true;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+ return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
new file mode 100644
index 000000000000..42a92f9c6dfe
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -0,0 +1,931 @@
+//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <malloc.h>
+#include <cstdio>
+
+// We need to undo a macro defined in Windows.h, otherwise we won't compile:
+#undef CopyFile
+#undef GetCurrentDirectory
+
+// Windows happily accepts either forward or backward slashes, though any path
+// returned by a Win32 API will have backward slashes. As LLVM code basically
+// assumes forward slashes are used, backward slashs are converted where they
+// can be introduced into a path.
+//
+// Another invariant is that a path ends with a slash if and only if the path
+// is a root directory. Any other use of a trailing slash is stripped. Unlike
+// in Unix, Windows has a rather complicated notion of a root path and this
+// invariant helps simply the code.
+
+static void FlipBackSlashes(std::string& s) {
+ for (size_t i = 0; i < s.size(); i++)
+ if (s[i] == '\\')
+ s[i] = '/';
+}
+
+namespace llvm {
+namespace sys {
+
+const char PathSeparator = ';';
+
+StringRef Path::GetEXESuffix() {
+ return "exe";
+}
+
+Path::Path(llvm::StringRef p)
+ : path(p) {
+ FlipBackSlashes(path);
+}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+ : path(StrStart, StrLen) {
+ FlipBackSlashes(path);
+}
+
+Path&
+Path::operator=(StringRef that) {
+ path.assign(that.data(), that.size());
+ FlipBackSlashes(path);
+ return *this;
+}
+
+// push_back 0 on create, and pop_back on delete.
+struct ScopedNullTerminator {
+ std::string &str;
+ ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
+ ~ScopedNullTerminator() {
+ // str.pop_back(); But wait, C++03 doesn't have this...
+ assert(!str.empty() && str[str.size() - 1] == 0
+ && "Null char not present!");
+ str.resize(str.size() - 1);
+ }
+};
+
+bool
+Path::isValid() const {
+ if (path.empty())
+ return false;
+
+ // If there is a colon, it must be the second character, preceded by a letter
+ // and followed by something.
+ size_t len = path.size();
+ // This code assumes that path is null terminated, so make sure it is.
+ ScopedNullTerminator snt(path);
+ size_t pos = path.rfind(':',len);
+ size_t rootslash = 0;
+ if (pos != std::string::npos) {
+ if (pos != 1 || !isalpha(path[0]) || len < 3)
+ return false;
+ rootslash = 2;
+ }
+
+ // Look for a UNC path, and if found adjust our notion of the root slash.
+ if (len > 3 && path[0] == '/' && path[1] == '/') {
+ rootslash = path.find('/', 2);
+ if (rootslash == std::string::npos)
+ rootslash = 0;
+ }
+
+ // Check for illegal characters.
+ if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
+ "\013\014\015\016\017\020\021\022\023\024\025\026"
+ "\027\030\031\032\033\034\035\036\037")
+ != std::string::npos)
+ return false;
+
+ // Remove trailing slash, unless it's a root slash.
+ if (len > rootslash+1 && path[len-1] == '/')
+ path.erase(--len);
+
+ // Check each component for legality.
+ for (pos = 0; pos < len; ++pos) {
+ // A component may not end in a space.
+ if (path[pos] == ' ') {
+ if (path[pos+1] == '/' || path[pos+1] == '\0')
+ return false;
+ }
+
+ // A component may not end in a period.
+ if (path[pos] == '.') {
+ if (path[pos+1] == '/' || path[pos+1] == '\0') {
+ // Unless it is the pseudo-directory "."...
+ if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
+ return true;
+ // or "..".
+ if (pos > 0 && path[pos-1] == '.') {
+ if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
+ return true;
+ }
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void Path::makeAbsolute() {
+ TCHAR FullPath[MAX_PATH + 1] = {0};
+ LPTSTR FilePart = NULL;
+
+ DWORD RetLength = ::GetFullPathNameA(path.c_str(),
+ sizeof(FullPath)/sizeof(FullPath[0]),
+ FullPath, &FilePart);
+
+ if (0 == RetLength) {
+ // FIXME: Report the error GetLastError()
+ assert(0 && "Unable to make absolute path!");
+ } else if (RetLength > MAX_PATH) {
+ // FIXME: Report too small buffer (needed RetLength bytes).
+ assert(0 && "Unable to make absolute path!");
+ } else {
+ path = FullPath;
+ }
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+ assert(NameStart);
+ // FIXME: This does not handle correctly an absolute path starting from
+ // a drive letter or in UNC format.
+ switch (NameLen) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ return NameStart[0] == '/';
+ default:
+ return
+ (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+ (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
+ }
+}
+
+bool
+Path::isAbsolute() const {
+ // FIXME: This does not handle correctly an absolute path starting from
+ // a drive letter or in UNC format.
+ switch (path.length()) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ return path[0] == '/';
+ default:
+ return path[0] == '/' || (path[1] == ':' && path[2] == '/');
+ }
+}
+
+static Path *TempDirectory;
+
+Path
+Path::GetTemporaryDirectory(std::string* ErrMsg) {
+ if (TempDirectory)
+ return *TempDirectory;
+
+ char pathname[MAX_PATH];
+ if (!GetTempPath(MAX_PATH, pathname)) {
+ if (ErrMsg)
+ *ErrMsg = "Can't determine temporary directory";
+ return Path();
+ }
+
+ Path result;
+ result.set(pathname);
+
+ // Append a subdirectory passed on our process id so multiple LLVMs don't
+ // step on each other's toes.
+#ifdef __MINGW32__
+ // Mingw's Win32 header files are broken.
+ sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
+#else
+ sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
+#endif
+ result.appendComponent(pathname);
+
+ // If there's a directory left over from a previous LLVM execution that
+ // happened to have the same process id, get rid of it.
+ result.eraseFromDisk(true);
+
+ // And finally (re-)create the empty directory.
+ result.createDirectoryOnDisk(false);
+ TempDirectory = new Path(result);
+ return *TempDirectory;
+}
+
+// FIXME: the following set of functions don't map to Windows very well.
+Path
+Path::GetRootDirectory() {
+ // This is the only notion that that Windows has of a root directory. Nothing
+ // is here except for drives.
+ return Path("file:///");
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+ char buff[MAX_PATH];
+ // Generic form of C:\Windows\System32
+ HRESULT res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_SYSTEM,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK) {
+ assert(0 && "Failed to get system directory");
+ return;
+ }
+ Paths.push_back(sys::Path(buff));
+
+ // Reset buff.
+ buff[0] = 0;
+ // Generic form of C:\Windows
+ res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_WINDOWS,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK) {
+ assert(0 && "Failed to get windows directory");
+ return;
+ }
+ Paths.push_back(sys::Path(buff));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+ char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#ifdef LLVM_LIBDIR
+ {
+ Path tmpPath;
+ if (tmpPath.set(LLVM_LIBDIR))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ }
+#endif
+ GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+ Path ret = GetUserHomeDirectory();
+ if (!ret.appendComponent(".llvm"))
+ assert(0 && "Failed to append .llvm");
+ return ret;
+}
+
+Path
+Path::GetUserHomeDirectory() {
+ char buff[MAX_PATH];
+ HRESULT res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_APPDATA,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK)
+ assert(0 && "Failed to get user home directory");
+ return Path(buff);
+}
+
+Path
+Path::GetCurrentDirectory() {
+ char pathname[MAX_PATH];
+ ::GetCurrentDirectoryA(MAX_PATH,pathname);
+ return Path(pathname);
+}
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+ char pathname[MAX_PATH];
+ DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+ return ret != MAX_PATH ? Path(pathname) : Path();
+}
+
+
+// FIXME: the above set of functions don't map to Windows very well.
+
+
+StringRef Path::getDirname() const {
+ return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+ // Find the last slash
+ size_t slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ size_t dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef(path).substr(slash);
+ else
+ return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+ // Find the last slash
+ size_t slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ size_t dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return StringRef("");
+ else
+ return StringRef(path).substr(dot + 1);
+}
+
+bool
+Path::exists() const {
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isDirectory() const {
+ DWORD attr = GetFileAttributes(path.c_str());
+ return (attr != INVALID_FILE_ATTRIBUTES) &&
+ (attr & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+bool
+Path::isSymLink() const {
+ DWORD attributes = GetFileAttributes(path.c_str());
+
+ if (attributes == INVALID_FILE_ATTRIBUTES)
+ // There's no sane way to report this :(.
+ assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES");
+
+ // This isn't exactly what defines a NTFS symlink, but it is only true for
+ // paths that act like a symlink.
+ return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
+}
+
+bool
+Path::canRead() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::canWrite() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
+}
+
+bool
+Path::canExecute() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isRegularFile() const {
+ bool res;
+ if (fs::is_regular_file(path, res))
+ return false;
+ return res;
+}
+
+StringRef
+Path::getLast() const {
+ // Find the last slash
+ size_t pos = path.rfind('/');
+
+ // Handle the corner cases
+ if (pos == std::string::npos)
+ return path;
+
+ // If the last character is a slash, we have a root directory
+ if (pos == path.length()-1)
+ return path;
+
+ // Return everything after the last slash
+ return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+ if (!fsIsValid || update) {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+ MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
+ ": Can't get status: ");
+ return 0;
+ }
+
+ status.fileSize = fi.nFileSizeHigh;
+ status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
+ status.fileSize += fi.nFileSizeLow;
+
+ status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
+ status.user = 9999; // Not applicable to Windows, so...
+ status.group = 9999; // Not applicable to Windows, so...
+
+ // FIXME: this is only unique if the file is accessed by the same file path.
+ // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
+ // numbers, but the concept doesn't exist in Windows.
+ status.uniqueID = 0;
+ for (unsigned i = 0; i < path.length(); ++i)
+ status.uniqueID += path[i];
+
+ ULARGE_INTEGER ui;
+ ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+ ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+ status.modTime.fromWin32Time(ui.QuadPart);
+
+ status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+ fsIsValid = true;
+ }
+ return &status;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+ // All files are readable on Windows (ignoring security attributes).
+ return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+ DWORD attr = GetFileAttributes(path.c_str());
+
+ // If it doesn't exist, we're done.
+ if (attr == INVALID_FILE_ATTRIBUTES)
+ return false;
+
+ if (attr & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
+ MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+ // All files are executable on Windows (ignoring security attributes).
+ return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+ MakeErrMsg(ErrMsg, path + ": can't get status of file");
+ return true;
+ }
+
+ if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ if (ErrMsg)
+ *ErrMsg = path + ": not a directory";
+ return true;
+ }
+
+ result.clear();
+ WIN32_FIND_DATA fd;
+ std::string searchpath = path;
+ if (path.size() == 0 || searchpath[path.size()-1] == '/')
+ searchpath += "*";
+ else
+ searchpath += "/*";
+
+ HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
+ if (h == INVALID_HANDLE_VALUE) {
+ if (GetLastError() == ERROR_FILE_NOT_FOUND)
+ return true; // not really an error, now is it?
+ MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+ return true;
+ }
+
+ do {
+ if (fd.cFileName[0] == '.')
+ continue;
+ Path aPath(path);
+ aPath.appendComponent(&fd.cFileName[0]);
+ result.insert(aPath);
+ } while (FindNextFile(h, &fd));
+
+ DWORD err = GetLastError();
+ FindClose(h);
+ if (err != ERROR_NO_MORE_FILES) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+ return true;
+ }
+ return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+ if (a_path.empty())
+ return false;
+ std::string save(path);
+ path = a_path;
+ FlipBackSlashes(path);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+ if (name.empty())
+ return false;
+ std::string save(path);
+ if (!path.empty()) {
+ size_t last = path.size() - 1;
+ if (path[last] != '/')
+ path += '/';
+ }
+ path += name;
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseComponent() {
+ size_t slashpos = path.rfind('/',path.size());
+ if (slashpos == path.size() - 1 || slashpos == std::string::npos)
+ return false;
+ std::string save(path);
+ path.erase(slashpos);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseSuffix() {
+ size_t dotpos = path.rfind('.',path.size());
+ size_t slashpos = path.rfind('/',path.size());
+ if (dotpos != std::string::npos) {
+ if (slashpos == std::string::npos || dotpos > slashpos+1) {
+ std::string save(path);
+ path.erase(dotpos, path.size()-dotpos);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
+ if (ErrMsg)
+ *ErrMsg = std::string(pathname) + ": " + std::string(msg);
+ return true;
+}
+
+bool
+Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
+ // Get a writeable copy of the path name
+ size_t len = path.length();
+ char *pathname = reinterpret_cast<char *>(_alloca(len+2));
+ path.copy(pathname, len);
+ pathname[len] = 0;
+
+ // Make sure it ends with a slash.
+ if (len == 0 || pathname[len - 1] != '/') {
+ pathname[len] = '/';
+ pathname[++len] = 0;
+ }
+
+ // Determine starting point for initial / search.
+ char *next = pathname;
+ if (pathname[0] == '/' && pathname[1] == '/') {
+ // Skip host name.
+ next = strchr(pathname+2, '/');
+ if (next == NULL)
+ return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+ // Skip share name.
+ next = strchr(next+1, '/');
+ if (next == NULL)
+ return PathMsg(ErrMsg, pathname,"badly formed remote directory");
+
+ next++;
+ if (*next == 0)
+ return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+ } else {
+ if (pathname[1] == ':')
+ next += 2; // skip drive letter
+ if (*next == '/')
+ next++; // skip root directory
+ }
+
+ // If we're supposed to create intermediate directories
+ if (create_parents) {
+ // Loop through the directory components until we're done
+ while (*next) {
+ next = strchr(next, '/');
+ *next = 0;
+ if (!CreateDirectory(pathname, NULL) &&
+ GetLastError() != ERROR_ALREADY_EXISTS)
+ return MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": Can't create directory: ");
+ *next++ = '/';
+ }
+ } else {
+ // Drop trailing slash.
+ pathname[len-1] = 0;
+ if (!CreateDirectory(pathname, NULL) &&
+ GetLastError() != ERROR_ALREADY_EXISTS) {
+ return MakeErrMsg(ErrMsg, std::string(pathname) +
+ ": Can't create directory: ");
+ }
+ }
+ return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+ // Create the file
+ HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
+
+ CloseHandle(h);
+ return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
+ return true;
+
+ if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+ // If it doesn't exist, we're done.
+ bool Exists;
+ if (fs::exists(path, Exists) || !Exists)
+ return false;
+
+ char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
+ int lastchar = path.length() - 1 ;
+ path.copy(pathname, lastchar+1);
+
+ // Make path end with '/*'.
+ if (pathname[lastchar] != '/')
+ pathname[++lastchar] = '/';
+ pathname[lastchar+1] = '*';
+ pathname[lastchar+2] = 0;
+
+ if (remove_contents) {
+ WIN32_FIND_DATA fd;
+ HANDLE h = FindFirstFile(pathname, &fd);
+
+ // It's a bad idea to alter the contents of a directory while enumerating
+ // its contents. So build a list of its contents first, then destroy them.
+
+ if (h != INVALID_HANDLE_VALUE) {
+ std::vector<Path> list;
+
+ do {
+ if (strcmp(fd.cFileName, ".") == 0)
+ continue;
+ if (strcmp(fd.cFileName, "..") == 0)
+ continue;
+
+ Path aPath(path);
+ aPath.appendComponent(&fd.cFileName[0]);
+ list.push_back(aPath);
+ } while (FindNextFile(h, &fd));
+
+ DWORD err = GetLastError();
+ FindClose(h);
+ if (err != ERROR_NO_MORE_FILES) {
+ SetLastError(err);
+ return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+ }
+
+ for (std::vector<Path>::iterator I = list.begin(); I != list.end();
+ ++I) {
+ Path &aPath = *I;
+ aPath.eraseFromDisk(true);
+ }
+ } else {
+ if (GetLastError() != ERROR_FILE_NOT_FOUND)
+ return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+ }
+ }
+
+ pathname[lastchar] = 0;
+ if (!RemoveDirectory(pathname))
+ return MakeErrMsg(ErrStr,
+ std::string(pathname) + ": Can't destroy directory: ");
+ return false;
+ } else {
+ // Read-only files cannot be deleted on Windows. Must remove the read-only
+ // attribute first.
+ if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(),
+ fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+ }
+
+ if (!DeleteFile(path.c_str()))
+ return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+ return false;
+ }
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+ assert(len < 1024 && "Request for magic string too long");
+ char* buf = reinterpret_cast<char*>(alloca(len));
+
+ HANDLE h = CreateFile(path.c_str(),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return false;
+
+ DWORD nRead = 0;
+ BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
+ CloseHandle(h);
+
+ if (!ret || nRead != len)
+ return false;
+
+ Magic = std::string(buf, len);
+ return true;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+ if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
+ return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
+ + "': ");
+ return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
+ // FIXME: should work on directories also.
+ if (!si.isFile) {
+ return true;
+ }
+
+ HANDLE h = CreateFile(path.c_str(),
+ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return true;
+
+ BY_HANDLE_FILE_INFORMATION bhfi;
+ if (!GetFileInformationByHandle(h, &bhfi)) {
+ DWORD err = GetLastError();
+ CloseHandle(h);
+ SetLastError(err);
+ return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
+ }
+
+ ULARGE_INTEGER ui;
+ ui.QuadPart = si.modTime.toWin32Time();
+ FILETIME ft;
+ ft.dwLowDateTime = ui.LowPart;
+ ft.dwHighDateTime = ui.HighPart;
+ BOOL ret = SetFileTime(h, NULL, &ft, &ft);
+ DWORD err = GetLastError();
+ CloseHandle(h);
+ if (!ret) {
+ SetLastError(err);
+ return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
+ }
+
+ // Best we can do with Unix permission bits is to interpret the owner
+ // writable bit.
+ if (si.mode & 0200) {
+ if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(),
+ bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+ }
+ } else {
+ if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
+ if (!SetFileAttributes(path.c_str(),
+ bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+ }
+ }
+
+ return false;
+}
+
+bool
+CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
+ // Can't use CopyFile macro defined in Windows.h because it would mess up the
+ // above line. We use the expansion it would have in a non-UNICODE build.
+ if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
+ return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
+ "' to '" + Dest.str() + "': ");
+ return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+ bool Exists;
+ if (reuse_current && (fs::exists(path, Exists) || !Exists))
+ return false; // File doesn't exist already, just use it!
+
+ // Reserve space for -XXXXXX at the end.
+ char *FNBuffer = (char*) alloca(path.size()+8);
+ unsigned offset = path.size();
+ path.copy(FNBuffer, offset);
+
+ // Find a numeric suffix that isn't used by an existing file. Assume there
+ // won't be more than 1 million files with the same prefix. Probably a safe
+ // bet.
+ static int FCounter = -1;
+ if (FCounter < 0) {
+ // Give arbitrary initial seed.
+ // FIXME: We should use sys::fs::unique_file() in future.
+ LARGE_INTEGER cnt64;
+ DWORD x = GetCurrentProcessId();
+ x = (x << 16) | (x >> 16);
+ if (QueryPerformanceCounter(&cnt64)) // RDTSC
+ x ^= cnt64.HighPart ^ cnt64.LowPart;
+ FCounter = x % 1000000;
+ }
+ do {
+ sprintf(FNBuffer+offset, "-%06u", FCounter);
+ if (++FCounter > 999999)
+ FCounter = 0;
+ path = FNBuffer;
+ } while (!fs::exists(path, Exists) && Exists);
+ return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+ // Make this into a unique file name
+ makeUnique(reuse_current, ErrMsg);
+
+ // Now go and create it
+ HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return MakeErrMsg(ErrMsg, path + ": can't create file");
+
+ CloseHandle(h);
+ return false;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
+ return 0;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+void Path::UnMapFilePages(const char *Base, size_t FileSize) {
+ assert(0 && "NOT IMPLEMENTED");
+}
+
+}
+}
diff --git a/contrib/llvm/lib/Support/Windows/PathV2.inc b/contrib/llvm/lib/Support/Windows/PathV2.inc
new file mode 100644
index 000000000000..af71b73cd693
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/PathV2.inc
@@ -0,0 +1,757 @@
+//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//=== is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <wincrypt.h>
+#include <fcntl.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// MinGW doesn't define this.
+#ifndef _ERRNO_T_DEFINED
+#define _ERRNO_T_DEFINED
+typedef int errno_t;
+#endif
+
+using namespace llvm;
+
+namespace {
+ typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
+ /*__in*/ LPCWSTR lpSymlinkFileName,
+ /*__in*/ LPCWSTR lpTargetFileName,
+ /*__in*/ DWORD dwFlags);
+
+ PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
+ ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
+ "CreateSymbolicLinkW"));
+
+ error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
+ int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), 0);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ utf16.reserve(len + 1);
+ utf16.set_size(len);
+
+ len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), utf16.size());
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // Make utf16 null terminated.
+ utf16.push_back(0);
+ utf16.pop_back();
+
+ return success;
+ }
+
+ error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8) {
+ // Get length.
+ int len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.begin(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ utf8.reserve(len);
+ utf8.set_size(len);
+
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.data(), utf8.size(),
+ NULL, NULL);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // Make utf8 null terminated.
+ utf8.push_back(0);
+ utf8.pop_back();
+
+ return success;
+ }
+
+ error_code TempDir(SmallVectorImpl<wchar_t> &result) {
+ retry_temp_dir:
+ DWORD len = ::GetTempPathW(result.capacity(), result.begin());
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ if (len > result.capacity()) {
+ result.reserve(len);
+ goto retry_temp_dir;
+ }
+
+ result.set_size(len);
+ return success;
+ }
+
+ // Forwarder for ScopedHandle.
+ BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
+ return ::CryptReleaseContext(Provider, 0);
+ }
+
+ typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
+ BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
+ ScopedCryptContext;
+ bool is_separator(const wchar_t value) {
+ switch (value) {
+ case L'\\':
+ case L'/':
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+ SmallVector<wchar_t, 128> cur_path;
+ cur_path.reserve(128);
+retry_cur_dir:
+ DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
+
+ // A zero return value indicates a failure other than insufficient space.
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // If there's insufficient space, the len returned is larger than the len
+ // given.
+ if (len > cur_path.capacity()) {
+ cur_path.reserve(len);
+ goto retry_cur_dir;
+ }
+
+ cur_path.set_size(len);
+ // cur_path now holds the current directory in utf-16. Convert to utf-8.
+
+ // Find out how much space we need. Sadly, this function doesn't return the
+ // size needed unless you tell it the result size is 0, which means you
+ // _always_ have to call it twice.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ cur_path.data(), cur_path.size(),
+ result.data(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return make_error_code(windows_error(::GetLastError()));
+
+ result.reserve(len);
+ result.set_size(len);
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ cur_path.data(), cur_path.size(),
+ result.data(), result.size(),
+ NULL, NULL);
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ // Copy the file.
+ BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(),
+ copt != copy_option::overwrite_if_exists);
+
+ if (res == 0)
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::already_exists)
+ existed = true;
+ else
+ return ec;
+ } else
+ existed = false;
+
+ return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+ // Only do it if the function is available at runtime.
+ if (!create_symbolic_link_api)
+ return make_error_code(errc::function_not_supported);
+
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ if (st.type() == file_type::directory_file) {
+ if (!::RemoveDirectoryW(c_str(path_utf16))) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec != windows_error::file_not_found)
+ return ec;
+ existed = false;
+ } else
+ existed = true;
+ } else {
+ if (!::DeleteFileW(c_str(path_utf16))) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec != windows_error::file_not_found)
+ return ec;
+ existed = false;
+ } else
+ existed = true;
+ }
+
+ return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!::MoveFileExW(wide_from.begin(), wide_to.begin(),
+ MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+ if (fd == -1)
+ return error_code(errno, generic_category());
+#ifdef HAVE__CHSIZE_S
+ errno_t error = ::_chsize_s(fd, size);
+#else
+ errno_t error = ::_chsize(fd, size);
+#endif
+ ::close(fd);
+ return error_code(error, generic_category());
+}
+
+error_code exists(const Twine &path, bool &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+ if (attributes == INVALID_FILE_ATTRIBUTES) {
+ // See if the file didn't actually exist.
+ error_code ec = make_error_code(windows_error(::GetLastError()));
+ if (ec != windows_error::file_not_found &&
+ ec != windows_error::path_not_found)
+ return ec;
+ result = false;
+ } else
+ result = true;
+ return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ // Get arguments.
+ SmallString<128> a_storage;
+ SmallString<128> b_storage;
+ StringRef a = A.toStringRef(a_storage);
+ StringRef b = B.toStringRef(b_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_a;
+ SmallVector<wchar_t, 128> wide_b;
+ if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
+ if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
+
+ AutoHandle HandleB(
+ ::CreateFileW(wide_b.begin(),
+ 0,
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ 0,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+
+ AutoHandle HandleA(
+ ::CreateFileW(wide_a.begin(),
+ 0,
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ 0,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+
+ // If both handles are invalid, it's an error.
+ if (HandleA == INVALID_HANDLE_VALUE &&
+ HandleB == INVALID_HANDLE_VALUE)
+ return windows_error(::GetLastError());
+
+ // If only one is invalid, it's false.
+ if (HandleA == INVALID_HANDLE_VALUE &&
+ HandleB == INVALID_HANDLE_VALUE) {
+ result = false;
+ return success;
+ }
+
+ // Get file information.
+ BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
+ if (!::GetFileInformationByHandle(HandleA, &InfoA))
+ return windows_error(::GetLastError());
+ if (!::GetFileInformationByHandle(HandleB, &InfoB))
+ return windows_error(::GetLastError());
+
+ // See if it's all the same.
+ result =
+ InfoA.dwVolumeSerialNumber == InfoB.dwVolumeSerialNumber &&
+ InfoA.nFileIndexHigh == InfoB.nFileIndexHigh &&
+ InfoA.nFileIndexLow == InfoB.nFileIndexLow &&
+ InfoA.nFileSizeHigh == InfoB.nFileSizeHigh &&
+ InfoA.nFileSizeLow == InfoB.nFileSizeLow &&
+ InfoA.ftLastWriteTime.dwLowDateTime ==
+ InfoB.ftLastWriteTime.dwLowDateTime &&
+ InfoA.ftLastWriteTime.dwHighDateTime ==
+ InfoB.ftLastWriteTime.dwHighDateTime;
+
+ return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ WIN32_FILE_ATTRIBUTE_DATA FileData;
+ if (!::GetFileAttributesExW(path_utf16.begin(),
+ ::GetFileExInfoStandard,
+ &FileData))
+ return windows_error(::GetLastError());
+
+ result =
+ (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+ + FileData.nFileSizeLow;
+
+ return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ StringRef path8 = path.toStringRef(path_storage);
+ // FIXME: We should detect as many "special file name" as possible.
+ if (path8.compare_lower("nul") == 0) {
+ result = file_status(file_type::character_file);
+ return success;
+ }
+
+ if (error_code ec = UTF8ToUTF16(path8,
+ path_utf16))
+ return ec;
+
+ DWORD attr = ::GetFileAttributesW(path_utf16.begin());
+ if (attr == INVALID_FILE_ATTRIBUTES)
+ goto handle_status_error;
+
+ // Handle reparse points.
+ if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
+ AutoHandle h(
+ ::CreateFileW(path_utf16.begin(),
+ 0, // Attributes only.
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+ if (h == INVALID_HANDLE_VALUE)
+ goto handle_status_error;
+ }
+
+ if (attr & FILE_ATTRIBUTE_DIRECTORY)
+ result = file_status(file_type::directory_file);
+ else
+ result = file_status(file_type::regular_file);
+
+ return success;
+
+handle_status_error:
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::file_not_found ||
+ ec == windows_error::path_not_found)
+ result = file_status(file_type::file_not_found);
+ else if (ec == windows_error::sharing_violation)
+ result = file_status(file_type::type_unknown);
+ else {
+ result = file_status(file_type::status_error);
+ return ec;
+ }
+
+ return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+ SmallVectorImpl<char> &result_path) {
+ // Use result_path as temp storage.
+ result_path.set_size(0);
+ StringRef m = model.toStringRef(result_path);
+
+ SmallVector<wchar_t, 128> model_utf16;
+ if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
+
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(m);
+
+ if (!absolute) {
+ SmallVector<wchar_t, 64> temp_dir;
+ if (error_code ec = TempDir(temp_dir)) return ec;
+ // Handle c: by removing it.
+ if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+ model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+ }
+ model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
+ }
+
+ // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+ // needed if the randomly chosen path already exists.
+ SmallVector<wchar_t, 128> random_path_utf16;
+
+ // Get a Crypto Provider for CryptGenRandom.
+ HCRYPTPROV HCPC;
+ if (!::CryptAcquireContextW(&HCPC,
+ NULL,
+ NULL,
+ PROV_RSA_FULL,
+ CRYPT_VERIFYCONTEXT))
+ return windows_error(::GetLastError());
+ ScopedCryptContext CryptoProvider(HCPC);
+
+retry_random_path:
+ random_path_utf16.set_size(0);
+ for (SmallVectorImpl<wchar_t>::const_iterator i = model_utf16.begin(),
+ e = model_utf16.end();
+ i != e; ++i) {
+ if (*i == L'%') {
+ BYTE val = 0;
+ if (!::CryptGenRandom(CryptoProvider, 1, &val))
+ return windows_error(::GetLastError());
+ random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+ }
+ else
+ random_path_utf16.push_back(*i);
+ }
+ // Make random_path_utf16 null terminated.
+ random_path_utf16.push_back(0);
+ random_path_utf16.pop_back();
+
+ // Try to create + open the path.
+retry_create_file:
+ HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
+ GENERIC_READ | GENERIC_WRITE,
+ FILE_SHARE_READ,
+ NULL,
+ // Return ERROR_FILE_EXISTS if the file
+ // already exists.
+ CREATE_NEW,
+ FILE_ATTRIBUTE_TEMPORARY,
+ NULL);
+ if (TempFileHandle == INVALID_HANDLE_VALUE) {
+ // If the file existed, try again, otherwise, error.
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::file_exists)
+ goto retry_random_path;
+ // Check for non-existing parent directories.
+ if (ec == windows_error::path_not_found) {
+ // Create the directories using result_path as temp storage.
+ if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+ random_path_utf16.size(), result_path))
+ return ec;
+ StringRef p(result_path.begin(), result_path.size());
+ SmallString<64> dir_to_create;
+ for (path::const_iterator i = path::begin(p),
+ e = --path::end(p); i != e; ++i) {
+ path::append(dir_to_create, *i);
+ bool Exists;
+ if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+ if (!Exists) {
+ // If c: doesn't exist, bail.
+ if (i->endswith(":"))
+ return ec;
+
+ SmallVector<wchar_t, 64> dir_to_create_utf16;
+ if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16))
+ return ec;
+
+ // Create the directory.
+ if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL))
+ return windows_error(::GetLastError());
+ }
+ }
+ goto retry_create_file;
+ }
+ return ec;
+ }
+
+ // Set result_path to the utf-8 representation of the path.
+ if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+ random_path_utf16.size(), result_path)) {
+ ::CloseHandle(TempFileHandle);
+ ::DeleteFileW(random_path_utf16.begin());
+ return ec;
+ }
+
+ // Convert the Windows API file handle into a C-runtime handle.
+ int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0);
+ if (fd == -1) {
+ ::CloseHandle(TempFileHandle);
+ ::DeleteFileW(random_path_utf16.begin());
+ // MSDN doesn't say anything about _open_osfhandle setting errno or
+ // GetLastError(), so just return invalid_handle.
+ return windows_error::invalid_handle;
+ }
+
+ result_fd = fd;
+ return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+ SmallVectorImpl<char> &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+ result.set_size(0);
+
+ // Convert path to UTF-16.
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ // Open file.
+ HANDLE file = ::CreateFileW(c_str(path_utf16),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_READONLY,
+ NULL);
+ if (file == INVALID_HANDLE_VALUE)
+ return windows_error(::GetLastError());
+
+ // Allocate buffer.
+ result.reserve(len);
+
+ // Get magic!
+ DWORD bytes_read = 0;
+ BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
+ error_code ec = windows_error(::GetLastError());
+ ::CloseHandle(file);
+ if (!read_success || (bytes_read != len)) {
+ // Set result size to the number of bytes read if it's valid.
+ if (bytes_read <= len)
+ result.set_size(bytes_read);
+ // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
+ return ec;
+ }
+
+ result.set_size(len);
+ return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path,
+ path_utf16))
+ return ec;
+
+ // Convert path to the format that Windows is happy with.
+ if (path_utf16.size() > 0 &&
+ !is_separator(path_utf16[path.size() - 1]) &&
+ path_utf16[path.size() - 1] != L':') {
+ path_utf16.push_back(L'\\');
+ path_utf16.push_back(L'*');
+ } else {
+ path_utf16.push_back(L'*');
+ }
+
+ // Get the first directory entry.
+ WIN32_FIND_DATAW FirstFind;
+ ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
+ if (!FindHandle)
+ return windows_error(::GetLastError());
+
+ size_t FilenameLen = ::wcslen(FirstFind.cFileName);
+ while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
+ (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
+ FirstFind.cFileName[1] == L'.'))
+ if (!::FindNextFileW(FindHandle, &FirstFind)) {
+ error_code ec = windows_error(::GetLastError());
+ // Check for end.
+ if (ec == windows_error::no_more_files)
+ return directory_iterator_destruct(it);
+ return ec;
+ } else
+ FilenameLen = ::wcslen(FirstFind.cFileName);
+
+ // Construct the current directory entry.
+ SmallString<128> directory_entry_name_utf8;
+ if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
+ ::wcslen(FirstFind.cFileName),
+ directory_entry_name_utf8))
+ return ec;
+
+ it.IterationHandle = intptr_t(FindHandle.take());
+ SmallString<128> directory_entry_path(path);
+ path::append(directory_entry_path, directory_entry_name_utf8.str());
+ it.CurrentEntry = directory_entry(directory_entry_path.str());
+
+ return success;
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+ if (it.IterationHandle != 0)
+ // Closes the handle if it's valid.
+ ScopedFindHandle close(HANDLE(it.IterationHandle));
+ it.IterationHandle = 0;
+ it.CurrentEntry = directory_entry();
+ return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+ WIN32_FIND_DATAW FindData;
+ if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+ error_code ec = windows_error(::GetLastError());
+ // Check for end.
+ if (ec == windows_error::no_more_files)
+ return directory_iterator_destruct(it);
+ return ec;
+ }
+
+ size_t FilenameLen = ::wcslen(FindData.cFileName);
+ if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
+ (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
+ FindData.cFileName[1] == L'.'))
+ return directory_iterator_increment(it);
+
+ SmallString<128> directory_entry_path_utf8;
+ if (error_code ec = UTF16ToUTF8(FindData.cFileName,
+ ::wcslen(FindData.cFileName),
+ directory_entry_path_utf8))
+ return ec;
+
+ it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
+ return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
new file mode 100644
index 000000000000..06a7f0054d50
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -0,0 +1,222 @@
+//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <psapi.h>
+#include <malloc.h>
+#include <io.h>
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBPSAPI != 1)
+ #error "libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef __MINGW32__
+// This ban should be lifted when MinGW 1.0+ has defined this value.
+# define _HEAPOK (-2)
+#endif
+
+namespace llvm {
+using namespace sys;
+
+// This function retrieves the page size using GetSystemInfo and is present
+// solely so it can be called once in Process::GetPageSize to initialize the
+// static variable PageSize.
+inline unsigned GetPageSizeOnce() {
+ // NOTE: A 32-bit application running under WOW64 is supposed to use
+ // GetNativeSystemInfo. However, this interface is not present prior
+ // to Windows XP so to use it requires dynamic linking. It is not clear
+ // how this affects the reported page size, if at all. One could argue
+ // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return static_cast<unsigned>(info.dwPageSize);
+}
+
+unsigned
+Process::GetPageSize() {
+ static const unsigned PageSize = GetPageSizeOnce();
+ return PageSize;
+}
+
+size_t
+Process::GetMallocUsage()
+{
+ _HEAPINFO hinfo;
+ hinfo._pentry = NULL;
+
+ size_t size = 0;
+
+ while (_heapwalk(&hinfo) == _HEAPOK)
+ size += hinfo._size;
+
+ return size;
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+ PROCESS_MEMORY_COUNTERS pmc;
+ GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
+ return pmc.PagefileUsage;
+}
+
+void
+Process::GetTimeUsage(
+ TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
+{
+ elapsed = TimeValue::now();
+
+ uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
+ GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
+ (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
+ (FILETIME*)&UserTime);
+
+ // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+ user_time.seconds( UserTime / 10000000 );
+ user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
+ sys_time.seconds( KernelTime / 10000000 );
+ sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
+}
+
+int Process::GetCurrentUserId()
+{
+ return 65536;
+}
+
+int Process::GetCurrentGroupId()
+{
+ return 65536;
+}
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this configuration item
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+ // Windows doesn't do core files, but it does do modal pop-up message
+ // boxes. As this method is used by bugpoint, preventing these pop-ups
+ // is the moral equivalent of suppressing core files.
+ SetErrorMode(SEM_FAILCRITICALERRORS |
+ SEM_NOGPFAULTERRORBOX |
+ SEM_NOOPENFILEERRORBOX);
+}
+
+bool Process::StandardInIsUserInput() {
+ return FileDescriptorIsDisplayed(0);
+}
+
+bool Process::StandardOutIsDisplayed() {
+ return FileDescriptorIsDisplayed(1);
+}
+
+bool Process::StandardErrIsDisplayed() {
+ return FileDescriptorIsDisplayed(2);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+ DWORD Mode; // Unused
+ return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
+}
+
+unsigned Process::StandardOutColumns() {
+ unsigned Columns = 0;
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+ Columns = csbi.dwSize.X;
+ return Columns;
+}
+
+unsigned Process::StandardErrColumns() {
+ unsigned Columns = 0;
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
+ Columns = csbi.dwSize.X;
+ return Columns;
+}
+
+// It always has colors.
+bool Process::StandardErrHasColors() {
+ return StandardErrIsDisplayed();
+}
+
+bool Process::StandardOutHasColors() {
+ return StandardOutIsDisplayed();
+}
+
+namespace {
+class DefaultColors
+{
+ private:
+ WORD defaultColor;
+ public:
+ DefaultColors()
+ :defaultColor(GetCurrentColor()) {}
+ static unsigned GetCurrentColor() {
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+ return csbi.wAttributes;
+ return 0;
+ }
+ WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+ return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+ WORD colors = DefaultColors::GetCurrentColor();
+ if (bg)
+ colors |= BACKGROUND_INTENSITY;
+ else
+ colors |= FOREGROUND_INTENSITY;
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+ return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+ WORD colors;
+ if (bg) {
+ colors = ((code&1) ? BACKGROUND_RED : 0) |
+ ((code&2) ? BACKGROUND_GREEN : 0 ) |
+ ((code&4) ? BACKGROUND_BLUE : 0);
+ if (bold)
+ colors |= BACKGROUND_INTENSITY;
+ } else {
+ colors = ((code&1) ? FOREGROUND_RED : 0) |
+ ((code&2) ? FOREGROUND_GREEN : 0 ) |
+ ((code&4) ? FOREGROUND_BLUE : 0);
+ if (bold)
+ colors |= FOREGROUND_INTENSITY;
+ }
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+ return 0;
+}
+
+const char *Process::ResetColor() {
+ SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+ return 0;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
new file mode 100644
index 000000000000..e486e6ec2381
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -0,0 +1,405 @@
+//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct Win32ProcessInfo {
+ HANDLE hProcess;
+ DWORD dwProcessId;
+ };
+}
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {
+ if (Data_) {
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ CloseHandle(wpi->hProcess);
+ delete wpi;
+ Data_ = 0;
+ }
+}
+
+unsigned Program::GetPid() const {
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ return wpi->dwProcessId;
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+ // Check some degenerate cases
+ if (progName.length() == 0) // no program
+ return Path();
+ Path temp;
+ if (!temp.set(progName)) // invalid name
+ return Path();
+ // Return paths with slashes verbatim.
+ if (progName.find('\\') != std::string::npos ||
+ progName.find('/') != std::string::npos)
+ return temp;
+
+ // At this point, the file name is valid and does not contain slashes.
+ // Let Windows search for it.
+ char buffer[MAX_PATH];
+ char *dummy = NULL;
+ DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
+ buffer, &dummy);
+
+ // See if it wasn't found.
+ if (len == 0)
+ return Path();
+
+ // See if we got the entire path.
+ if (len < MAX_PATH)
+ return Path(buffer);
+
+ // Buffer was too small; grow and retry.
+ while (true) {
+ char *b = reinterpret_cast<char *>(_alloca(len+1));
+ DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
+
+ // It is unlikely the search failed, but it's always possible some file
+ // was added or removed since the last search, so be paranoid...
+ if (len2 == 0)
+ return Path();
+ else if (len2 <= len)
+ return Path(b);
+
+ len = len2;
+ }
+}
+
+static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
+ HANDLE h;
+ if (path == 0) {
+ DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+ GetCurrentProcess(), &h,
+ 0, TRUE, DUPLICATE_SAME_ACCESS);
+ return h;
+ }
+
+ const char *fname;
+ if (path->isEmpty())
+ fname = "NUL";
+ else
+ fname = path->c_str();
+
+ SECURITY_ATTRIBUTES sa;
+ sa.nLength = sizeof(sa);
+ sa.lpSecurityDescriptor = 0;
+ sa.bInheritHandle = TRUE;
+
+ h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
+ &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE) {
+ MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
+ (fd ? "input: " : "output: "));
+ }
+
+ return h;
+}
+
+/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess.
+static bool ArgNeedsQuotes(const char *Str) {
+ return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
+}
+
+
+/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess and returns length of quoted arg with escaped quotes
+static unsigned int ArgLenWithQuotes(const char *Str) {
+ unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+
+ while (*Str != '\0') {
+ if (*Str == '\"')
+ ++len;
+
+ ++len;
+ ++Str;
+ }
+
+ return len;
+}
+
+
+bool
+Program::Execute(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned memoryLimit,
+ std::string* ErrMsg) {
+ if (Data_) {
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ CloseHandle(wpi->hProcess);
+ delete wpi;
+ Data_ = 0;
+ }
+
+ if (!path.canExecute()) {
+ if (ErrMsg)
+ *ErrMsg = "program not executable";
+ return false;
+ }
+
+ // Windows wants a command line, not an array of args, to pass to the new
+ // process. We have to concatenate them all, while quoting the args that
+ // have embedded spaces (or are empty).
+
+ // First, determine the length of the command line.
+ unsigned len = 0;
+ for (unsigned i = 0; args[i]; i++) {
+ len += ArgLenWithQuotes(args[i]) + 1;
+ }
+
+ // Now build the command line.
+ char *command = reinterpret_cast<char *>(_alloca(len+1));
+ char *p = command;
+
+ for (unsigned i = 0; args[i]; i++) {
+ const char *arg = args[i];
+
+ bool needsQuoting = ArgNeedsQuotes(arg);
+ if (needsQuoting)
+ *p++ = '"';
+
+ while (*arg != '\0') {
+ if (*arg == '\"')
+ *p++ = '\\';
+
+ *p++ = *arg++;
+ }
+
+ if (needsQuoting)
+ *p++ = '"';
+ *p++ = ' ';
+ }
+
+ *p = 0;
+
+ // The pointer to the environment block for the new process.
+ char *envblock = 0;
+
+ if (envp) {
+ // An environment block consists of a null-terminated block of
+ // null-terminated strings. Convert the array of environment variables to
+ // an environment block by concatenating them.
+
+ // First, determine the length of the environment block.
+ len = 0;
+ for (unsigned i = 0; envp[i]; i++)
+ len += strlen(envp[i]) + 1;
+
+ // Now build the environment block.
+ envblock = reinterpret_cast<char *>(_alloca(len+1));
+ p = envblock;
+
+ for (unsigned i = 0; envp[i]; i++) {
+ const char *ev = envp[i];
+ size_t len = strlen(ev) + 1;
+ memcpy(p, ev, len);
+ p += len;
+ }
+
+ *p = 0;
+ }
+
+ // Create a child process.
+ STARTUPINFO si;
+ memset(&si, 0, sizeof(si));
+ si.cb = sizeof(si);
+ si.hStdInput = INVALID_HANDLE_VALUE;
+ si.hStdOutput = INVALID_HANDLE_VALUE;
+ si.hStdError = INVALID_HANDLE_VALUE;
+
+ if (redirects) {
+ si.dwFlags = STARTF_USESTDHANDLES;
+
+ si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
+ if (si.hStdInput == INVALID_HANDLE_VALUE) {
+ MakeErrMsg(ErrMsg, "can't redirect stdin");
+ return false;
+ }
+ si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
+ if (si.hStdOutput == INVALID_HANDLE_VALUE) {
+ CloseHandle(si.hStdInput);
+ MakeErrMsg(ErrMsg, "can't redirect stdout");
+ return false;
+ }
+ if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the handle already open for stdout.
+ DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+ GetCurrentProcess(), &si.hStdError,
+ 0, TRUE, DUPLICATE_SAME_ACCESS);
+ } else {
+ // Just redirect stderr
+ si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
+ if (si.hStdError == INVALID_HANDLE_VALUE) {
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ MakeErrMsg(ErrMsg, "can't redirect stderr");
+ return false;
+ }
+ }
+ }
+
+ PROCESS_INFORMATION pi;
+ memset(&pi, 0, sizeof(pi));
+
+ fflush(stdout);
+ fflush(stderr);
+ BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
+ envblock, NULL, &si, &pi);
+ DWORD err = GetLastError();
+
+ // Regardless of whether the process got created or not, we are done with
+ // the handles we created for it to inherit.
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ CloseHandle(si.hStdError);
+
+ // Now return an error if the process didn't get created.
+ if (!rc) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
+ path.str() + "'");
+ return false;
+ }
+ Win32ProcessInfo* wpi = new Win32ProcessInfo;
+ wpi->hProcess = pi.hProcess;
+ wpi->dwProcessId = pi.dwProcessId;
+ Data_ = wpi;
+
+ // Make sure these get closed no matter what.
+ AutoHandle hThread(pi.hThread);
+
+ // Assign the process to a job if a memory limit is defined.
+ AutoHandle hJob(0);
+ if (memoryLimit != 0) {
+ hJob = CreateJobObject(0, 0);
+ bool success = false;
+ if (hJob != 0) {
+ JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+ memset(&jeli, 0, sizeof(jeli));
+ jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
+ jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
+ if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
+ &jeli, sizeof(jeli))) {
+ if (AssignProcessToJobObject(hJob, pi.hProcess))
+ success = true;
+ }
+ }
+ if (!success) {
+ SetLastError(GetLastError());
+ MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
+ TerminateProcess(pi.hProcess, 1);
+ WaitForSingleObject(pi.hProcess, INFINITE);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+int
+Program::Wait(const Path &path,
+ unsigned secondsToWait,
+ std::string* ErrMsg) {
+ if (Data_ == 0) {
+ MakeErrMsg(ErrMsg, "Process not started!");
+ return -1;
+ }
+
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ HANDLE hProcess = wpi->hProcess;
+
+ // Wait for the process to terminate.
+ DWORD millisecondsToWait = INFINITE;
+ if (secondsToWait > 0)
+ millisecondsToWait = secondsToWait * 1000;
+
+ if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+ if (!TerminateProcess(hProcess, 1)) {
+ MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+ // -2 indicates a crash or timeout as opposed to failure to execute.
+ return -2;
+ }
+ WaitForSingleObject(hProcess, INFINITE);
+ }
+
+ // Get its exit status.
+ DWORD status;
+ BOOL rc = GetExitCodeProcess(hProcess, &status);
+ DWORD err = GetLastError();
+
+ if (!rc) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, "Failed getting status for program.");
+ // -2 indicates a crash or timeout as opposed to failure to execute.
+ return -2;
+ }
+
+ return status;
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+ if (Data_ == 0) {
+ MakeErrMsg(ErrMsg, "Process not started!");
+ return true;
+ }
+
+ Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+ HANDLE hProcess = wpi->hProcess;
+ if (TerminateProcess(hProcess, 1) == 0) {
+ MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+ return true;
+ }
+
+ return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+ int result = _setmode( _fileno(stdin), _O_BINARY );
+ return result == -1;
+}
+
+bool Program::ChangeStdoutToBinary(){
+ int result = _setmode( _fileno(stdout), _O_BINARY );
+ return result == -1;
+}
+
+bool Program::ChangeStderrToBinary(){
+ int result = _setmode( _fileno(stderr), _O_BINARY );
+ return result == -1;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/RWMutex.inc b/contrib/llvm/lib/Support/Windows/RWMutex.inc
new file mode 100644
index 000000000000..471f8fa294be
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista. If you want
+// real reader-writer locks, you a threads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+ data_ = calloc(1, sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+ DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+bool RWMutexImpl::reader_release() {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+bool RWMutexImpl::writer_release() {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
new file mode 100644
index 000000000000..14f3f21f02a1
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -0,0 +1,328 @@
+//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Signals class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+#include <psapi.h>
+
+#ifdef __MINGW32__
+ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
+ #error "libimagehlp.a & libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+// Forward declare.
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
+
+// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static bool RegisteredUnhandledExceptionFilter = false;
+static bool CleanupExecuted = false;
+static bool ExitOnUnhandledExceptions = false;
+static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+
+// Windows creates a new thread to execute the console handler when an event
+// (such as CTRL/C) occurs. This causes concurrency issues with the above
+// globals which this critical section addresses.
+static CRITICAL_SECTION CriticalSection;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef _MSC_VER
+/// CRTReportHook - Function called on a CRT debugging event.
+static int CRTReportHook(int ReportType, char *Message, int *Return) {
+ // Don't cause a DebugBreak() on return.
+ if (Return)
+ *Return = 0;
+
+ switch (ReportType) {
+ default:
+ case _CRT_ASSERT:
+ fprintf(stderr, "CRT assert: %s\n", Message);
+ // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+ // exception code? Perhaps SetErrorMode() handles this.
+ _exit(3);
+ break;
+ case _CRT_ERROR:
+ fprintf(stderr, "CRT error: %s\n", Message);
+ // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+ // exception code? Perhaps SetErrorMode() handles this.
+ _exit(3);
+ break;
+ case _CRT_WARN:
+ fprintf(stderr, "CRT warn: %s\n", Message);
+ break;
+ }
+
+ // Don't call _CrtDbgReport.
+ return TRUE;
+}
+#endif
+
+static void RegisterHandler() {
+ if (RegisteredUnhandledExceptionFilter) {
+ EnterCriticalSection(&CriticalSection);
+ return;
+ }
+
+ // Now's the time to create the critical section. This is the first time
+ // through here, and there's only one thread.
+ InitializeCriticalSection(&CriticalSection);
+
+ // Enter it immediately. Now if someone hits CTRL/C, the console handler
+ // can't proceed until the globals are updated.
+ EnterCriticalSection(&CriticalSection);
+
+ RegisteredUnhandledExceptionFilter = true;
+ OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
+ SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+
+ // Environment variable to disable any kind of crash dialog.
+ if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+#ifdef _MSC_VER
+ _CrtSetReportHook(CRTReportHook);
+#endif
+ SetErrorMode(SEM_FAILCRITICALERRORS |
+ SEM_NOGPFAULTERRORBOX |
+ SEM_NOOPENFILEERRORBOX);
+ ExitOnUnhandledExceptions = true;
+ }
+
+ // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
+ // else multi-threading problems will ensue.
+}
+
+// RemoveFileOnSignal - The public API
+bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
+ RegisterHandler();
+
+ if (CleanupExecuted) {
+ if (ErrMsg)
+ *ErrMsg = "Process terminating -- cannot register for removal";
+ return true;
+ }
+
+ if (FilesToRemove == NULL)
+ FilesToRemove = new std::vector<sys::Path>;
+
+ FilesToRemove->push_back(Filename);
+
+ LeaveCriticalSection(&CriticalSection);
+ return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ if (FilesToRemove == NULL)
+ return;
+
+ RegisterHandler();
+
+ FilesToRemove->push_back(Filename);
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+ if (I != FilesToRemove->rend())
+ FilesToRemove->erase(I.base()-1);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void sys::PrintStackTraceOnErrorSignal() {
+ RegisterHandler();
+ LeaveCriticalSection(&CriticalSection);
+}
+
+
+void sys::SetInterruptFunction(void (*IF)()) {
+ RegisterHandler();
+ InterruptFunction = IF;
+ LeaveCriticalSection(&CriticalSection);
+}
+
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process. The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+ if (CallBacksToRun == 0)
+ CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+ CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+ RegisterHandler();
+ LeaveCriticalSection(&CriticalSection);
+}
+}
+
+static void Cleanup() {
+ EnterCriticalSection(&CriticalSection);
+
+ // Prevent other thread from registering new files and directories for
+ // removal, should we be executing because of the console handler callback.
+ CleanupExecuted = true;
+
+ // FIXME: open files cannot be deleted.
+
+ if (FilesToRemove != NULL)
+ while (!FilesToRemove->empty()) {
+ FilesToRemove->back().eraseFromDisk();
+ FilesToRemove->pop_back();
+ }
+
+ if (CallBacksToRun)
+ for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+ (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+ Cleanup();
+}
+
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
+ Cleanup();
+
+#ifdef _WIN64
+ // TODO: provide a x64 friendly version of the following
+#else
+
+ // Initialize the STACKFRAME structure.
+ STACKFRAME StackFrame;
+ memset(&StackFrame, 0, sizeof(StackFrame));
+
+ StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+
+ HANDLE hProcess = GetCurrentProcess();
+ HANDLE hThread = GetCurrentThread();
+
+ // Initialize the symbol handler.
+ SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+ SymInitialize(hProcess, NULL, TRUE);
+
+ while (true) {
+ if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
+ ep->ContextRecord, NULL, SymFunctionTableAccess,
+ SymGetModuleBase, NULL)) {
+ break;
+ }
+
+ if (StackFrame.AddrFrame.Offset == 0)
+ break;
+
+ // Print the PC in hexadecimal.
+ DWORD PC = StackFrame.AddrPC.Offset;
+ fprintf(stderr, "%08lX", PC);
+
+ // Print the parameters. Assume there are four.
+ fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+ StackFrame.Params[0],
+ StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
+
+ // Verify the PC belongs to a module in this process.
+ if (!SymGetModuleBase(hProcess, PC)) {
+ fputs(" <unknown module>\n", stderr);
+ continue;
+ }
+
+ // Print the symbol name.
+ char buffer[512];
+ IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
+ memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
+ symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
+ symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+
+ DWORD dwDisp;
+ if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
+ fputc('\n', stderr);
+ continue;
+ }
+
+ buffer[511] = 0;
+ if (dwDisp > 0)
+ fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+ else
+ fprintf(stderr, ", %s", symbol->Name);
+
+ // Print the source file and line number information.
+ IMAGEHLP_LINE line;
+ memset(&line, 0, sizeof(line));
+ line.SizeOfStruct = sizeof(line);
+ if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+ fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+ if (dwDisp > 0)
+ fprintf(stderr, "+%04lu byte(s)", dwDisp);
+ }
+
+ fputc('\n', stderr);
+ }
+
+#endif
+
+ if (ExitOnUnhandledExceptions)
+ _exit(-3);
+
+ // Allow dialog box to pop up allowing choice to start debugger.
+ if (OldFilter)
+ return (*OldFilter)(ep);
+ else
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
+ // We are running in our very own thread, courtesy of Windows.
+ EnterCriticalSection(&CriticalSection);
+ Cleanup();
+
+ // If an interrupt function has been set, go and run one it; otherwise,
+ // the process dies.
+ void (*IF)() = InterruptFunction;
+ InterruptFunction = 0; // Don't run it on another CTRL-C.
+
+ if (IF) {
+ // Note: if the interrupt function throws an exception, there is nothing
+ // to catch it in this thread so it will kill the process.
+ IF(); // Run it now.
+ LeaveCriticalSection(&CriticalSection);
+ return TRUE; // Don't kill the process.
+ }
+
+ // Allow normal processing to take place; i.e., the process dies.
+ LeaveCriticalSection(&CriticalSection);
+ return FALSE;
+}
diff --git a/contrib/llvm/lib/Support/Windows/ThreadLocal.inc b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
new file mode 100644
index 000000000000..512462d89005
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
@@ -0,0 +1,54 @@
+//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() {
+ DWORD* tls = new DWORD;
+ *tls = TlsAlloc();
+ assert(*tls != TLS_OUT_OF_INDEXES);
+ data = tls;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+ DWORD* tls = static_cast<DWORD*>(data);
+ TlsFree(*tls);
+ delete tls;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+ DWORD* tls = static_cast<DWORD*>(data);
+ return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+ DWORD* tls = static_cast<DWORD*>(data);
+ int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+ assert(errorcode != 0);
+ (void)errorcode;
+}
+
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc
new file mode 100644
index 000000000000..12275526f1c8
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/TimeValue.inc
@@ -0,0 +1,51 @@
+//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 implementation of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <time.h>
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code.
+//===----------------------------------------------------------------------===//
+
+TimeValue TimeValue::now() {
+ uint64_t ft;
+ GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
+
+ TimeValue t(0, 0);
+ t.fromWin32Time(ft);
+ return t;
+}
+
+std::string TimeValue::str() const {
+#ifdef __MINGW32__
+ // This ban may be lifted by either:
+ // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
+ // (ii) configure tests for either the time_t or __time64_t type.
+ time_t ourTime = time_t(this->toEpochTime());
+ struct tm *lt = ::localtime(&ourTime);
+#else
+ __time64_t ourTime = this->toEpochTime();
+ struct tm *lt = ::_localtime64(&ourTime);
+#endif
+
+ char buffer[25];
+ strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
+ return std::string(buffer);
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Windows.h b/contrib/llvm/lib/Support/Windows/Windows.h
new file mode 100644
index 000000000000..4a1553b599d7
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Windows.h
@@ -0,0 +1,120 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// mingw-w64 tends to define it as 0x0502 in its headers.
+#undef _WIN32_WINNT
+
+// Require at least Windows 2000 API.
+#define _WIN32_WINNT 0x0500
+#define _WIN32_IE 0x0500 // MinGW at it again.
+#define WIN32_LEAN_AND_MEAN
+
+#include "llvm/Config/config.h" // Get build system configuration settings
+#include <windows.h>
+#include <shlobj.h>
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+ if (!ErrMsg)
+ return true;
+ char *buffer = NULL;
+ FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+ *ErrMsg = prefix + buffer;
+ LocalFree(buffer);
+ return true;
+}
+
+class AutoHandle {
+ HANDLE handle;
+
+public:
+ AutoHandle(HANDLE h) : handle(h) {}
+
+ ~AutoHandle() {
+ if (handle)
+ CloseHandle(handle);
+ }
+
+ operator HANDLE() {
+ return handle;
+ }
+
+ AutoHandle &operator=(HANDLE h) {
+ handle = h;
+ return *this;
+ }
+};
+
+template <class HandleType, uintptr_t InvalidHandle,
+ class DeleterType, DeleterType D>
+class ScopedHandle {
+ HandleType Handle;
+
+public:
+ ScopedHandle() : Handle(InvalidHandle) {}
+ ScopedHandle(HandleType handle) : Handle(handle) {}
+
+ ~ScopedHandle() {
+ if (Handle != HandleType(InvalidHandle))
+ D(Handle);
+ }
+
+ HandleType take() {
+ HandleType temp = Handle;
+ Handle = HandleType(InvalidHandle);
+ return temp;
+ }
+
+ operator HandleType() const { return Handle; }
+
+ ScopedHandle &operator=(HandleType handle) {
+ Handle = handle;
+ return *this;
+ }
+
+ typedef void (*unspecified_bool_type)();
+ static void unspecified_bool_true() {}
+
+ // True if Handle is valid.
+ operator unspecified_bool_type() const {
+ return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+ }
+
+ bool operator!() const {
+ return Handle == HandleType(InvalidHandle);
+ }
+};
+
+typedef ScopedHandle<HANDLE, uintptr_t(-1),
+ BOOL (WINAPI*)(HANDLE), ::FindClose>
+ ScopedFindHandle;
+
+namespace llvm {
+template <class T>
+class SmallVectorImpl;
+
+template <class T>
+typename SmallVectorImpl<T>::const_pointer
+c_str(SmallVectorImpl<T> &str) {
+ str.push_back(0);
+ str.pop_back();
+ return str.data();
+}
+} // end namespace llvm.
diff --git a/contrib/llvm/lib/Support/Windows/explicit_symbols.inc b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
new file mode 100644
index 000000000000..379645d2ff60
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
@@ -0,0 +1,66 @@
+/* in libgcc.a */
+
+#ifdef HAVE__ALLOCA
+ EXPLICIT_SYMBOL(_alloca)
+ EXPLICIT_SYMBOL2(alloca, _alloca)
+#endif
+#ifdef HAVE___ALLOCA
+ EXPLICIT_SYMBOL(__alloca)
+#endif
+#ifdef HAVE___CHKSTK
+ EXPLICIT_SYMBOL(__chkstk)
+#endif
+#ifdef HAVE____CHKSTK
+ EXPLICIT_SYMBOL(___chkstk)
+#endif
+#ifdef HAVE___MAIN
+ EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
+#endif
+
+#ifdef HAVE___ASHLDI3
+ EXPLICIT_SYMBOL(__ashldi3)
+#endif
+#ifdef HAVE___ASHRDI3
+ EXPLICIT_SYMBOL(__ashrdi3)
+#endif
+#ifdef HAVE___CMPDI2 // FIXME: unused
+ EXPLICIT_SYMBOL(__cmpdi2)
+#endif
+#ifdef HAVE___DIVDI3
+ EXPLICIT_SYMBOL(__divdi3)
+#endif
+#ifdef HAVE___FIXDFDI
+ EXPLICIT_SYMBOL(__fixdfdi)
+#endif
+#ifdef HAVE___FIXSFDI
+ EXPLICIT_SYMBOL(__fixsfdi)
+#endif
+#ifdef HAVE___FIXUNSDFDI
+ EXPLICIT_SYMBOL(__fixunsdfdi)
+#endif
+#ifdef HAVE___FIXUNSSFDI
+ EXPLICIT_SYMBOL(__fixunssfdi)
+#endif
+#ifdef HAVE___FLOATDIDF
+ EXPLICIT_SYMBOL(__floatdidf)
+#endif
+#ifdef HAVE___FLOATDISF
+ EXPLICIT_SYMBOL(__floatdisf)
+#endif
+#ifdef HAVE___LSHRDI3
+ EXPLICIT_SYMBOL(__lshrdi3)
+#endif
+#ifdef HAVE___MODDI3
+ EXPLICIT_SYMBOL(__moddi3)
+#endif
+#ifdef HAVE___UDIVDI3
+ EXPLICIT_SYMBOL(__udivdi3)
+#endif
+#ifdef HAVE___UMODDI3
+ EXPLICIT_SYMBOL(__umoddi3)
+#endif
+
+/* msvcrt */
+#if defined(_MSC_VER)
+ EXPLICIT_SYMBOL2(alloca, _alloca_probe)
+#endif
diff --git a/contrib/llvm/lib/Support/Windows/system_error.inc b/contrib/llvm/lib/Support/Windows/system_error.inc
new file mode 100644
index 000000000000..37ec81dd363c
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/system_error.inc
@@ -0,0 +1,142 @@
+//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Windows specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//=== is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include <windows.h>
+#include <winerror.h>
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+ LPVOID lpMsgBuf = 0;
+ DWORD retval = ::FormatMessageA(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL,
+ ev,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+ (LPSTR) &lpMsgBuf,
+ 0,
+ NULL);
+ if (retval == 0) {
+ ::LocalFree(lpMsgBuf);
+ return std::string("Unknown error");
+ }
+
+ std::string str( static_cast<LPCSTR>(lpMsgBuf) );
+ ::LocalFree(lpMsgBuf);
+
+ while (str.size()
+ && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
+ str.erase( str.size()-1 );
+ if (str.size() && str[str.size()-1] == '.')
+ str.erase( str.size()-1 );
+ return str;
+}
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+ switch (ev) {
+ MAP_ERR_TO_COND(0, success);
+ // Windows system -> posix_errno decode table ---------------------------//
+ // see WinError.h comments for descriptions of errors
+ MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
+ MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+ MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
+ MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
+ MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
+ MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
+ MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
+ MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
+ MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_HANDLE_EOF, value_too_large);
+ MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
+ MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
+ MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE, cross_device_link);
+ MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
+ MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED, operation_canceled);
+ MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_SEEK, io_error);
+ MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
+ MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+ MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
+ MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT, timed_out);
+ MAP_ERR_TO_COND(WSAEACCES, permission_denied);
+ MAP_ERR_TO_COND(WSAEADDRINUSE, address_in_use);
+ MAP_ERR_TO_COND(WSAEADDRNOTAVAIL, address_not_available);
+ MAP_ERR_TO_COND(WSAEAFNOSUPPORT, address_family_not_supported);
+ MAP_ERR_TO_COND(WSAEALREADY, connection_already_in_progress);
+ MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
+ MAP_ERR_TO_COND(WSAECONNABORTED, connection_aborted);
+ MAP_ERR_TO_COND(WSAECONNREFUSED, connection_refused);
+ MAP_ERR_TO_COND(WSAECONNRESET, connection_reset);
+ MAP_ERR_TO_COND(WSAEDESTADDRREQ, destination_address_required);
+ MAP_ERR_TO_COND(WSAEFAULT, bad_address);
+ MAP_ERR_TO_COND(WSAEHOSTUNREACH, host_unreachable);
+ MAP_ERR_TO_COND(WSAEINPROGRESS, operation_in_progress);
+ MAP_ERR_TO_COND(WSAEINTR, interrupted);
+ MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
+ MAP_ERR_TO_COND(WSAEISCONN, already_connected);
+ MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
+ MAP_ERR_TO_COND(WSAEMSGSIZE, message_size);
+ MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
+ MAP_ERR_TO_COND(WSAENETDOWN, network_down);
+ MAP_ERR_TO_COND(WSAENETRESET, network_reset);
+ MAP_ERR_TO_COND(WSAENETUNREACH, network_unreachable);
+ MAP_ERR_TO_COND(WSAENOBUFS, no_buffer_space);
+ MAP_ERR_TO_COND(WSAENOPROTOOPT, no_protocol_option);
+ MAP_ERR_TO_COND(WSAENOTCONN, not_connected);
+ MAP_ERR_TO_COND(WSAENOTSOCK, not_a_socket);
+ MAP_ERR_TO_COND(WSAEOPNOTSUPP, operation_not_supported);
+ MAP_ERR_TO_COND(WSAEPROTONOSUPPORT, protocol_not_supported);
+ MAP_ERR_TO_COND(WSAEPROTOTYPE, wrong_protocol_type);
+ MAP_ERR_TO_COND(WSAETIMEDOUT, timed_out);
+ MAP_ERR_TO_COND(WSAEWOULDBLOCK, operation_would_block);
+ default: return error_condition(ev, system_category());
+ }
+}
diff --git a/contrib/llvm/lib/Support/circular_raw_ostream.cpp b/contrib/llvm/lib/Support/circular_raw_ostream.cpp
new file mode 100644
index 000000000000..ca0d30db388c
--- /dev/null
+++ b/contrib/llvm/lib/Support/circular_raw_ostream.cpp
@@ -0,0 +1,45 @@
+//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for circular buffered streams.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/circular_raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+ if (BufferSize == 0) {
+ TheStream->write(Ptr, Size);
+ return;
+ }
+
+ // Write into the buffer, wrapping if necessary.
+ while (Size != 0) {
+ unsigned Bytes =
+ std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray)));
+ memcpy(Cur, Ptr, Bytes);
+ Size -= Bytes;
+ Cur += Bytes;
+ if (Cur == BufferArray + BufferSize) {
+ // Reset the output pointer to the start of the buffer.
+ Cur = BufferArray;
+ Filled = true;
+ }
+ }
+}
+
+void circular_raw_ostream::flushBufferWithBanner() {
+ if (BufferSize != 0) {
+ // Write out the buffer
+ TheStream->write(Banner, std::strlen(Banner));
+ flushBuffer();
+ }
+}
diff --git a/contrib/llvm/lib/Support/raw_os_ostream.cpp b/contrib/llvm/lib/Support/raw_os_ostream.cpp
new file mode 100644
index 000000000000..44f2325d7f8a
--- /dev/null
+++ b/contrib/llvm/lib/Support/raw_os_ostream.cpp
@@ -0,0 +1,30 @@
+//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support adapting raw_ostream to std::ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_os_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// raw_os_ostream
+//===----------------------------------------------------------------------===//
+
+raw_os_ostream::~raw_os_ostream() {
+ flush();
+}
+
+void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
+ OS.write(Ptr, Size);
+}
+
+uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
new file mode 100644
index 000000000000..5a71fa3d8cea
--- /dev/null
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -0,0 +1,763 @@
+//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for bulk buffered stream output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cctype>
+#include <cerrno>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(HAVE_FCNTL_H)
+# include <fcntl.h>
+#endif
+#if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV)
+# include <sys/uio.h>
+#endif
+
+#if defined(__CYGWIN__)
+#include <io.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <io.h>
+#include <fcntl.h>
+#ifndef STDIN_FILENO
+# define STDIN_FILENO 0
+#endif
+#ifndef STDOUT_FILENO
+# define STDOUT_FILENO 1
+#endif
+#ifndef STDERR_FILENO
+# define STDERR_FILENO 2
+#endif
+#endif
+
+using namespace llvm;
+
+raw_ostream::~raw_ostream() {
+ // raw_ostream's subclasses should take care to flush the buffer
+ // in their destructors.
+ assert(OutBufCur == OutBufStart &&
+ "raw_ostream destructor called with non-empty buffer!");
+
+ if (BufferMode == InternalBuffer)
+ delete [] OutBufStart;
+}
+
+// An out of line virtual method to provide a home for the class vtable.
+void raw_ostream::handle() {}
+
+size_t raw_ostream::preferred_buffer_size() const {
+ // BUFSIZ is intended to be a reasonable default.
+ return BUFSIZ;
+}
+
+void raw_ostream::SetBuffered() {
+ // Ask the subclass to determine an appropriate buffer size.
+ if (size_t Size = preferred_buffer_size())
+ SetBufferSize(Size);
+ else
+ // It may return 0, meaning this stream should be unbuffered.
+ SetUnbuffered();
+}
+
+void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
+ BufferKind Mode) {
+ assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
+ (Mode != Unbuffered && BufferStart && Size)) &&
+ "stream must be unbuffered or have at least one byte");
+ // Make sure the current buffer is free of content (we can't flush here; the
+ // child buffer management logic will be in write_impl).
+ assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
+
+ if (BufferMode == InternalBuffer)
+ delete [] OutBufStart;
+ OutBufStart = BufferStart;
+ OutBufEnd = OutBufStart+Size;
+ OutBufCur = OutBufStart;
+ BufferMode = Mode;
+
+ assert(OutBufStart <= OutBufEnd && "Invalid size!");
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long N) {
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long N) {
+ if (N < 0) {
+ *this << '-';
+ N = -N;
+ }
+
+ return this->operator<<(static_cast<unsigned long>(N));
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long long N) {
+ // Output using 32-bit div/mod when possible.
+ if (N == static_cast<unsigned long>(N))
+ return this->operator<<(static_cast<unsigned long>(N));
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long long N) {
+ if (N < 0) {
+ *this << '-';
+ // Avoid undefined behavior on INT64_MIN with a cast.
+ N = -(unsigned long long)N;
+ }
+
+ return this->operator<<(static_cast<unsigned long long>(N));
+}
+
+raw_ostream &raw_ostream::write_hex(unsigned long long N) {
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ uintptr_t x = N % 16;
+ *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
+ N /= 16;
+ }
+
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::write_escaped(StringRef Str,
+ bool UseHexEscapes) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char c = Str[i];
+
+ switch (c) {
+ case '\\':
+ *this << '\\' << '\\';
+ break;
+ case '\t':
+ *this << '\\' << 't';
+ break;
+ case '\n':
+ *this << '\\' << 'n';
+ break;
+ case '"':
+ *this << '\\' << '"';
+ break;
+ default:
+ if (std::isprint(c)) {
+ *this << c;
+ break;
+ }
+
+ // Write out the escaped representation.
+ if (UseHexEscapes) {
+ *this << '\\' << 'x';
+ *this << hexdigit((c >> 4 & 0xF));
+ *this << hexdigit((c >> 0) & 0xF);
+ } else {
+ // Always use a full 3-character octal escape.
+ *this << '\\';
+ *this << char('0' + ((c >> 6) & 7));
+ *this << char('0' + ((c >> 3) & 7));
+ *this << char('0' + ((c >> 0) & 7));
+ }
+ }
+ }
+
+ return *this;
+}
+
+raw_ostream &raw_ostream::operator<<(const void *P) {
+ *this << '0' << 'x';
+
+ return write_hex((uintptr_t) P);
+}
+
+raw_ostream &raw_ostream::operator<<(double N) {
+#ifdef _WIN32
+ // On MSVCRT and compatible, output of %e is incompatible to Posix
+ // by default. Number of exponent digits should be at least 2. "%+03d"
+ // FIXME: Implement our formatter to here or Support/Format.h!
+ int fpcl = _fpclass(N);
+
+ // negative zero
+ if (fpcl == _FPCLASS_NZ)
+ return *this << "-0.000000e+00";
+
+ char buf[16];
+ unsigned len;
+ len = snprintf(buf, sizeof(buf), "%e", N);
+ if (len <= sizeof(buf) - 2) {
+ if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
+ int cs = buf[len - 4];
+ if (cs == '+' || cs == '-') {
+ int c1 = buf[len - 2];
+ int c0 = buf[len - 1];
+ if (isdigit(c1) && isdigit(c0)) {
+ // Trim leading '0': "...e+012" -> "...e+12\0"
+ buf[len - 3] = c1;
+ buf[len - 2] = c0;
+ buf[--len] = 0;
+ }
+ }
+ }
+ return this->operator<<(buf);
+ }
+#endif
+ return this->operator<<(format("%e", N));
+}
+
+
+
+void raw_ostream::flush_nonempty() {
+ assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
+ size_t Length = OutBufCur - OutBufStart;
+ OutBufCur = OutBufStart;
+ write_impl(OutBufStart, Length);
+}
+
+raw_ostream &raw_ostream::write(unsigned char C) {
+ // Group exceptional cases into a single branch.
+ if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) {
+ if (BUILTIN_EXPECT(!OutBufStart, false)) {
+ if (BufferMode == Unbuffered) {
+ write_impl(reinterpret_cast<char*>(&C), 1);
+ return *this;
+ }
+ // Set up a buffer and start over.
+ SetBuffered();
+ return write(C);
+ }
+
+ flush_nonempty();
+ }
+
+ *OutBufCur++ = C;
+ return *this;
+}
+
+raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
+ // Group exceptional cases into a single branch.
+ if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) {
+ if (BUILTIN_EXPECT(!OutBufStart, false)) {
+ if (BufferMode == Unbuffered) {
+ write_impl(Ptr, Size);
+ return *this;
+ }
+ // Set up a buffer and start over.
+ SetBuffered();
+ return write(Ptr, Size);
+ }
+
+ size_t NumBytes = OutBufEnd - OutBufCur;
+
+ // If the buffer is empty at this point we have a string that is larger
+ // than the buffer. Directly write the chunk that is a multiple of the
+ // preferred buffer size and put the remainder in the buffer.
+ if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) {
+ size_t BytesToWrite = Size - (Size % NumBytes);
+ write_impl(Ptr, BytesToWrite);
+ copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+ return *this;
+ }
+
+ // We don't have enough space in the buffer to fit the string in. Insert as
+ // much as possible, flush and start over with the remainder.
+ copy_to_buffer(Ptr, NumBytes);
+ flush_nonempty();
+ return write(Ptr + NumBytes, Size - NumBytes);
+ }
+
+ copy_to_buffer(Ptr, Size);
+
+ return *this;
+}
+
+void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
+ assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
+
+ // Handle short strings specially, memcpy isn't very good at very short
+ // strings.
+ switch (Size) {
+ case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
+ case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
+ case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
+ case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
+ case 0: break;
+ default:
+ memcpy(OutBufCur, Ptr, Size);
+ break;
+ }
+
+ OutBufCur += Size;
+}
+
+// Formatted output.
+raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
+ // If we have more than a few bytes left in our output buffer, try
+ // formatting directly onto its end.
+ size_t NextBufferSize = 127;
+ size_t BufferBytesLeft = OutBufEnd - OutBufCur;
+ if (BufferBytesLeft > 3) {
+ size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
+
+ // Common case is that we have plenty of space.
+ if (BytesUsed <= BufferBytesLeft) {
+ OutBufCur += BytesUsed;
+ return *this;
+ }
+
+ // Otherwise, we overflowed and the return value tells us the size to try
+ // again with.
+ NextBufferSize = BytesUsed;
+ }
+
+ // If we got here, we didn't have enough space in the output buffer for the
+ // string. Try printing into a SmallVector that is resized to have enough
+ // space. Iterate until we win.
+ SmallVector<char, 128> V;
+
+ while (1) {
+ V.resize(NextBufferSize);
+
+ // Try formatting into the SmallVector.
+ size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
+
+ // If BytesUsed fit into the vector, we win.
+ if (BytesUsed <= NextBufferSize)
+ return write(V.data(), BytesUsed);
+
+ // Otherwise, try again with a new size.
+ assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
+ NextBufferSize = BytesUsed;
+ }
+}
+
+/// indent - Insert 'NumSpaces' spaces.
+raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
+ static const char Spaces[] = " "
+ " "
+ " ";
+
+ // Usually the indentation is small, handle it with a fastpath.
+ if (NumSpaces < array_lengthof(Spaces))
+ return write(Spaces, NumSpaces);
+
+ while (NumSpaces) {
+ unsigned NumToWrite = std::min(NumSpaces,
+ (unsigned)array_lengthof(Spaces)-1);
+ write(Spaces, NumToWrite);
+ NumSpaces -= NumToWrite;
+ }
+ return *this;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Formatted Output
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void format_object_base::home() {
+}
+
+//===----------------------------------------------------------------------===//
+// raw_fd_ostream
+//===----------------------------------------------------------------------===//
+
+/// raw_fd_ostream - Open the specified file for writing. If an error
+/// occurs, information about the error is put into ErrorInfo, and the
+/// stream should be immediately destroyed; the string will be empty
+/// if no error occurred.
+raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Error(false), UseAtomicWrites(false), pos(0)
+{
+ assert(Filename != 0 && "Filename is null");
+ // Verify that we don't have both "append" and "excl".
+ assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
+ "Cannot specify both 'excl' and 'append' file creation flags!");
+
+ ErrorInfo.clear();
+
+ // Handle "-" as stdout. Note that when we do this, we consider ourself
+ // the owner of stdout. This means that we can do things like close the
+ // file descriptor when we're done and set the "binary" flag globally.
+ if (Filename[0] == '-' && Filename[1] == 0) {
+ FD = STDOUT_FILENO;
+ // If user requested binary then put stdout into binary mode if
+ // possible.
+ if (Flags & F_Binary)
+ sys::Program::ChangeStdoutToBinary();
+ // Close stdout when we're done, to detect any output errors.
+ ShouldClose = true;
+ return;
+ }
+
+ int OpenFlags = O_WRONLY|O_CREAT;
+#ifdef O_BINARY
+ if (Flags & F_Binary)
+ OpenFlags |= O_BINARY;
+#endif
+
+ if (Flags & F_Append)
+ OpenFlags |= O_APPEND;
+ else
+ OpenFlags |= O_TRUNC;
+ if (Flags & F_Excl)
+ OpenFlags |= O_EXCL;
+
+ while ((FD = open(Filename, OpenFlags, 0664)) < 0) {
+ if (errno != EINTR) {
+ ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+ ShouldClose = false;
+ return;
+ }
+ }
+
+ // Ok, we successfully opened the file, so it'll need to be closed.
+ ShouldClose = true;
+}
+
+/// raw_fd_ostream ctor - FD is the file descriptor that this writes to. If
+/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
+ : raw_ostream(unbuffered), FD(fd),
+ ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
+#ifdef O_BINARY
+ // Setting STDOUT and STDERR to binary mode is necessary in Win32
+ // to avoid undesirable linefeed conversion.
+ if (fd == STDOUT_FILENO || fd == STDERR_FILENO)
+ setmode(fd, O_BINARY);
+#endif
+
+ // Get the starting position.
+ off_t loc = ::lseek(FD, 0, SEEK_CUR);
+ if (loc == (off_t)-1)
+ pos = 0;
+ else
+ pos = static_cast<uint64_t>(loc);
+}
+
+raw_fd_ostream::~raw_fd_ostream() {
+ if (FD >= 0) {
+ flush();
+ if (ShouldClose)
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
+ }
+
+#ifdef __MINGW32__
+ // On mingw, global dtors should not call exit().
+ // report_fatal_error() invokes exit(). We know report_fatal_error()
+ // might not write messages to stderr when any errors were detected
+ // on FD == 2.
+ if (FD == 2) return;
+#endif
+
+ // If there are any pending errors, report them now. Clients wishing
+ // to avoid report_fatal_error calls should check for errors with
+ // has_error() and clear the error flag with clear_error() before
+ // destructing raw_ostream objects which may have errors.
+ if (has_error())
+ report_fatal_error("IO failure on output stream.");
+}
+
+
+void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
+ assert(FD >= 0 && "File already closed.");
+ pos += Size;
+
+ do {
+ ssize_t ret;
+
+ // Check whether we should attempt to use atomic writes.
+ if (BUILTIN_EXPECT(!UseAtomicWrites, true)) {
+ ret = ::write(FD, Ptr, Size);
+ } else {
+ // Use ::writev() where available.
+#if defined(HAVE_WRITEV)
+ struct iovec IOV = { (void*) Ptr, Size };
+ ret = ::writev(FD, &IOV, 1);
+#else
+ ret = ::write(FD, Ptr, Size);
+#endif
+ }
+
+ if (ret < 0) {
+ // If it's a recoverable error, swallow it and retry the write.
+ //
+ // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
+ // raw_ostream isn't designed to do non-blocking I/O. However, some
+ // programs, such as old versions of bjam, have mistakenly used
+ // O_NONBLOCK. For compatibility, emulate blocking semantics by
+ // spinning until the write succeeds. If you don't want spinning,
+ // don't use O_NONBLOCK file descriptors with raw_ostream.
+ if (errno == EINTR || errno == EAGAIN
+#ifdef EWOULDBLOCK
+ || errno == EWOULDBLOCK
+#endif
+ )
+ continue;
+
+ // Otherwise it's a non-recoverable error. Note it and quit.
+ error_detected();
+ break;
+ }
+
+ // The write may have written some or all of the data. Update the
+ // size and buffer pointer to reflect the remainder that needs
+ // to be written. If there are no bytes left, we're done.
+ Ptr += ret;
+ Size -= ret;
+ } while (Size > 0);
+}
+
+void raw_fd_ostream::close() {
+ assert(ShouldClose);
+ ShouldClose = false;
+ flush();
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
+ FD = -1;
+}
+
+uint64_t raw_fd_ostream::seek(uint64_t off) {
+ flush();
+ pos = ::lseek(FD, off, SEEK_SET);
+ if (pos != off)
+ error_detected();
+ return pos;
+}
+
+size_t raw_fd_ostream::preferred_buffer_size() const {
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
+ // Windows and Minix have no st_blksize.
+ assert(FD >= 0 && "File not yet open!");
+ struct stat statbuf;
+ if (fstat(FD, &statbuf) != 0)
+ return 0;
+
+ // If this is a terminal, don't use buffering. Line buffering
+ // would be a more traditional thing to do, but it's not worth
+ // the complexity.
+ if (S_ISCHR(statbuf.st_mode) && isatty(FD))
+ return 0;
+ // Return the preferred block size.
+ return statbuf.st_blksize;
+#else
+ return raw_ostream::preferred_buffer_size();
+#endif
+}
+
+raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
+ bool bg) {
+ if (sys::Process::ColorNeedsFlush())
+ flush();
+ const char *colorcode =
+ (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
+ : sys::Process::OutputColor(colors, bold, bg);
+ if (colorcode) {
+ size_t len = strlen(colorcode);
+ write(colorcode, len);
+ // don't account colors towards output characters
+ pos -= len;
+ }
+ return *this;
+}
+
+raw_ostream &raw_fd_ostream::resetColor() {
+ if (sys::Process::ColorNeedsFlush())
+ flush();
+ const char *colorcode = sys::Process::ResetColor();
+ if (colorcode) {
+ size_t len = strlen(colorcode);
+ write(colorcode, len);
+ // don't account colors towards output characters
+ pos -= len;
+ }
+ return *this;
+}
+
+bool raw_fd_ostream::is_displayed() const {
+ return sys::Process::FileDescriptorIsDisplayed(FD);
+}
+
+//===----------------------------------------------------------------------===//
+// outs(), errs(), nulls()
+//===----------------------------------------------------------------------===//
+
+/// outs() - This returns a reference to a raw_ostream for standard output.
+/// Use it like: outs() << "foo" << "bar";
+raw_ostream &llvm::outs() {
+ // Set buffer settings to model stdout behavior.
+ // Delete the file descriptor when the program exists, forcing error
+ // detection. If you don't want this behavior, don't use outs().
+ static raw_fd_ostream S(STDOUT_FILENO, true);
+ return S;
+}
+
+/// errs() - This returns a reference to a raw_ostream for standard error.
+/// Use it like: errs() << "foo" << "bar";
+raw_ostream &llvm::errs() {
+ // Set standard error to be unbuffered by default.
+ static raw_fd_ostream S(STDERR_FILENO, false, true);
+ return S;
+}
+
+/// nulls() - This returns a reference to a raw_ostream which discards output.
+raw_ostream &llvm::nulls() {
+ static raw_null_ostream S;
+ return S;
+}
+
+
+//===----------------------------------------------------------------------===//
+// raw_string_ostream
+//===----------------------------------------------------------------------===//
+
+raw_string_ostream::~raw_string_ostream() {
+ flush();
+}
+
+void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
+ OS.append(Ptr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+// raw_svector_ostream
+//===----------------------------------------------------------------------===//
+
+// The raw_svector_ostream implementation uses the SmallVector itself as the
+// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
+// always pointing past the end of the vector, but within the vector
+// capacity. This allows raw_ostream to write directly into the correct place,
+// and we only need to set the vector size when the data is flushed.
+
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+ // Set up the initial external buffer. We make sure that the buffer has at
+ // least 128 bytes free; raw_ostream itself only requires 64, but we want to
+ // make sure that we don't grow the buffer unnecessarily on destruction (when
+ // the data is flushed). See the FIXME below.
+ OS.reserve(OS.size() + 128);
+ SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+raw_svector_ostream::~raw_svector_ostream() {
+ // FIXME: Prevent resizing during this flush().
+ flush();
+}
+
+/// resync - This is called when the SmallVector we're appending to is changed
+/// outside of the raw_svector_ostream's control. It is only safe to do this
+/// if the raw_svector_ostream has previously been flushed.
+void raw_svector_ostream::resync() {
+ assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector");
+
+ if (OS.capacity() - OS.size() < 64)
+ OS.reserve(OS.capacity() * 2);
+ SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+ // If we're writing bytes from the end of the buffer into the smallvector, we
+ // don't need to copy the bytes, just commit the bytes because they are
+ // already in the right place.
+ if (Ptr == OS.end()) {
+ assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+ OS.set_size(OS.size() + Size);
+ } else {
+ assert(GetNumBytesInBuffer() == 0 &&
+ "Should be writing from buffer if some bytes in it");
+ // Otherwise, do copy the bytes.
+ OS.append(Ptr, Ptr+Size);
+ }
+
+ // Grow the vector if necessary.
+ if (OS.capacity() - OS.size() < 64)
+ OS.reserve(OS.capacity() * 2);
+
+ // Update the buffer position.
+ SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+uint64_t raw_svector_ostream::current_pos() const {
+ return OS.size();
+}
+
+StringRef raw_svector_ostream::str() {
+ flush();
+ return StringRef(OS.begin(), OS.size());
+}
+
+//===----------------------------------------------------------------------===//
+// raw_null_ostream
+//===----------------------------------------------------------------------===//
+
+raw_null_ostream::~raw_null_ostream() {
+#ifndef NDEBUG
+ // ~raw_ostream asserts that the buffer is empty. This isn't necessary
+ // with raw_null_ostream, but it's better to have raw_null_ostream follow
+ // the rules than to change the rules just for raw_null_ostream.
+ flush();
+#endif
+}
+
+void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
+}
+
+uint64_t raw_null_ostream::current_pos() const {
+ return 0;
+}
diff --git a/contrib/llvm/lib/Support/regcclass.h b/contrib/llvm/lib/Support/regcclass.h
new file mode 100644
index 000000000000..2cea3e4e5406
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcclass.h
@@ -0,0 +1,70 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cclass.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* character-class table */
+static struct cclass {
+ const char *name;
+ const char *chars;
+ const char *multis;
+} cclasses[] = {
+ { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789", ""} ,
+ { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ ""} ,
+ { "blank", " \t", ""} ,
+ { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
+ { "digit", "0123456789", ""} ,
+ { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ ""} ,
+ { "lower", "abcdefghijklmnopqrstuvwxyz",
+ ""} ,
+ { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+ ""} ,
+ { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ ""} ,
+ { "space", "\t\n\v\f\r ", ""} ,
+ { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ ""} ,
+ { "xdigit", "0123456789ABCDEFabcdef",
+ ""} ,
+ { NULL, 0, "" }
+};
diff --git a/contrib/llvm/lib/Support/regcname.h b/contrib/llvm/lib/Support/regcname.h
new file mode 100644
index 000000000000..3c0bb248ffa7
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcname.h
@@ -0,0 +1,139 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cname.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* character-name table */
+static struct cname {
+ const char *name;
+ char code;
+} cnames[] = {
+ { "NUL", '\0' },
+ { "SOH", '\001' },
+ { "STX", '\002' },
+ { "ETX", '\003' },
+ { "EOT", '\004' },
+ { "ENQ", '\005' },
+ { "ACK", '\006' },
+ { "BEL", '\007' },
+ { "alert", '\007' },
+ { "BS", '\010' },
+ { "backspace", '\b' },
+ { "HT", '\011' },
+ { "tab", '\t' },
+ { "LF", '\012' },
+ { "newline", '\n' },
+ { "VT", '\013' },
+ { "vertical-tab", '\v' },
+ { "FF", '\014' },
+ { "form-feed", '\f' },
+ { "CR", '\015' },
+ { "carriage-return", '\r' },
+ { "SO", '\016' },
+ { "SI", '\017' },
+ { "DLE", '\020' },
+ { "DC1", '\021' },
+ { "DC2", '\022' },
+ { "DC3", '\023' },
+ { "DC4", '\024' },
+ { "NAK", '\025' },
+ { "SYN", '\026' },
+ { "ETB", '\027' },
+ { "CAN", '\030' },
+ { "EM", '\031' },
+ { "SUB", '\032' },
+ { "ESC", '\033' },
+ { "IS4", '\034' },
+ { "FS", '\034' },
+ { "IS3", '\035' },
+ { "GS", '\035' },
+ { "IS2", '\036' },
+ { "RS", '\036' },
+ { "IS1", '\037' },
+ { "US", '\037' },
+ { "space", ' ' },
+ { "exclamation-mark", '!' },
+ { "quotation-mark", '"' },
+ { "number-sign", '#' },
+ { "dollar-sign", '$' },
+ { "percent-sign", '%' },
+ { "ampersand", '&' },
+ { "apostrophe", '\'' },
+ { "left-parenthesis", '(' },
+ { "right-parenthesis", ')' },
+ { "asterisk", '*' },
+ { "plus-sign", '+' },
+ { "comma", ',' },
+ { "hyphen", '-' },
+ { "hyphen-minus", '-' },
+ { "period", '.' },
+ { "full-stop", '.' },
+ { "slash", '/' },
+ { "solidus", '/' },
+ { "zero", '0' },
+ { "one", '1' },
+ { "two", '2' },
+ { "three", '3' },
+ { "four", '4' },
+ { "five", '5' },
+ { "six", '6' },
+ { "seven", '7' },
+ { "eight", '8' },
+ { "nine", '9' },
+ { "colon", ':' },
+ { "semicolon", ';' },
+ { "less-than-sign", '<' },
+ { "equals-sign", '=' },
+ { "greater-than-sign", '>' },
+ { "question-mark", '?' },
+ { "commercial-at", '@' },
+ { "left-square-bracket", '[' },
+ { "backslash", '\\' },
+ { "reverse-solidus", '\\' },
+ { "right-square-bracket", ']' },
+ { "circumflex", '^' },
+ { "circumflex-accent", '^' },
+ { "underscore", '_' },
+ { "low-line", '_' },
+ { "grave-accent", '`' },
+ { "left-brace", '{' },
+ { "left-curly-bracket", '{' },
+ { "vertical-line", '|' },
+ { "right-brace", '}' },
+ { "right-curly-bracket", '}' },
+ { "tilde", '~' },
+ { "DEL", '\177' },
+ { NULL, 0 }
+};
diff --git a/contrib/llvm/lib/Support/regcomp.c b/contrib/llvm/lib/Support/regcomp.c
new file mode 100644
index 000000000000..46c91a9c497c
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcomp.c
@@ -0,0 +1,1525 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+#include "regcclass.h"
+#include "regcname.h"
+
+/*
+ * parse structure, passed up and down to avoid global variables and
+ * other clumsinesses
+ */
+struct parse {
+ char *next; /* next character in RE */
+ char *end; /* end of string (-> NUL normally) */
+ int error; /* has an error been seen? */
+ sop *strip; /* malloced strip */
+ sopno ssize; /* malloced strip size (allocated) */
+ sopno slen; /* malloced strip length (used) */
+ int ncsalloc; /* number of csets allocated */
+ struct re_guts *g;
+# define NPAREN 10 /* we need to remember () 1-9 for back refs */
+ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
+ sopno pend[NPAREN]; /* -> ) ([0] unused) */
+};
+
+static void p_ere(struct parse *, int);
+static void p_ere_exp(struct parse *);
+static void p_str(struct parse *);
+static void p_bre(struct parse *, int, int);
+static int p_simp_re(struct parse *, int);
+static int p_count(struct parse *);
+static void p_bracket(struct parse *);
+static void p_b_term(struct parse *, cset *);
+static void p_b_cclass(struct parse *, cset *);
+static void p_b_eclass(struct parse *, cset *);
+static char p_b_symbol(struct parse *);
+static char p_b_coll_elem(struct parse *, int);
+static char othercase(int);
+static void bothcases(struct parse *, int);
+static void ordinary(struct parse *, int);
+static void nonnewline(struct parse *);
+static void repeat(struct parse *, sopno, int, int);
+static int seterr(struct parse *, int);
+static cset *allocset(struct parse *);
+static void freeset(struct parse *, cset *);
+static int freezeset(struct parse *, cset *);
+static int firstch(struct parse *, cset *);
+static int nch(struct parse *, cset *);
+static void mcadd(struct parse *, cset *, const char *);
+static void mcinvert(struct parse *, cset *);
+static void mccase(struct parse *, cset *);
+static int isinsets(struct re_guts *, int);
+static int samesets(struct re_guts *, int, int);
+static void categorize(struct parse *, struct re_guts *);
+static sopno dupl(struct parse *, sopno, sopno);
+static void doemit(struct parse *, sop, size_t);
+static void doinsert(struct parse *, sop, size_t, sopno);
+static void dofwd(struct parse *, sopno, sop);
+static void enlarge(struct parse *, sopno);
+static void stripsnug(struct parse *, struct re_guts *);
+static void findmust(struct parse *, struct re_guts *);
+static sopno pluscount(struct parse *, struct re_guts *);
+
+static char nuls[10]; /* place to point scanner in event of error */
+
+/*
+ * macros for use with parse structure
+ * BEWARE: these know that the parse structure is named `p' !!!
+ */
+#define PEEK() (*p->next)
+#define PEEK2() (*(p->next+1))
+#define MORE() (p->next < p->end)
+#define MORE2() (p->next+1 < p->end)
+#define SEE(c) (MORE() && PEEK() == (c))
+#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
+#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define NEXT() (p->next++)
+#define NEXT2() (p->next += 2)
+#define NEXTn(n) (p->next += (n))
+#define GETNEXT() (*p->next++)
+#define SETERROR(e) seterr(p, (e))
+#define REQUIRE(co, e) (void)((co) || SETERROR(e))
+#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
+#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
+#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
+#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
+#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
+#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
+#define HERE() (p->slen)
+#define THERE() (p->slen - 1)
+#define THERETHERE() (p->slen - 2)
+#define DROP(n) (p->slen -= (n))
+
+#ifdef _POSIX2_RE_DUP_MAX
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#else
+#define DUPMAX 255
+#endif
+#define INFINITY (DUPMAX + 1)
+
+#ifndef NDEBUG
+static int never = 0; /* for use in asserts; shuts lint up */
+#else
+#define never 0 /* some <assert.h>s have bugs too */
+#endif
+
+/*
+ - llvm_regcomp - interface for parser and compilation
+ */
+int /* 0 success, otherwise REG_something */
+llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
+{
+ struct parse pa;
+ struct re_guts *g;
+ struct parse *p = &pa;
+ int i;
+ size_t len;
+#ifdef REDEBUG
+# define GOODFLAGS(f) (f)
+#else
+# define GOODFLAGS(f) ((f)&~REG_DUMP)
+#endif
+
+ cflags = GOODFLAGS(cflags);
+ if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
+ return(REG_INVARG);
+
+ if (cflags&REG_PEND) {
+ if (preg->re_endp < pattern)
+ return(REG_INVARG);
+ len = preg->re_endp - pattern;
+ } else
+ len = strlen((const char *)pattern);
+
+ /* do the mallocs early so failure handling is easy */
+ g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+ (NC-1)*sizeof(cat_t));
+ if (g == NULL)
+ return(REG_ESPACE);
+ p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
+ p->strip = (sop *)calloc(p->ssize, sizeof(sop));
+ p->slen = 0;
+ if (p->strip == NULL) {
+ free((char *)g);
+ return(REG_ESPACE);
+ }
+
+ /* set things up */
+ p->g = g;
+ p->next = (char *)pattern; /* convenience; we do not modify it */
+ p->end = p->next + len;
+ p->error = 0;
+ p->ncsalloc = 0;
+ for (i = 0; i < NPAREN; i++) {
+ p->pbegin[i] = 0;
+ p->pend[i] = 0;
+ }
+ g->csetsize = NC;
+ g->sets = NULL;
+ g->setbits = NULL;
+ g->ncsets = 0;
+ g->cflags = cflags;
+ g->iflags = 0;
+ g->nbol = 0;
+ g->neol = 0;
+ g->must = NULL;
+ g->mlen = 0;
+ g->nsub = 0;
+ g->ncategories = 1; /* category 0 is "everything else" */
+ g->categories = &g->catspace[-(CHAR_MIN)];
+ (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
+ g->backrefs = 0;
+
+ /* do it */
+ EMIT(OEND, 0);
+ g->firststate = THERE();
+ if (cflags&REG_EXTENDED)
+ p_ere(p, OUT);
+ else if (cflags&REG_NOSPEC)
+ p_str(p);
+ else
+ p_bre(p, OUT, OUT);
+ EMIT(OEND, 0);
+ g->laststate = THERE();
+
+ /* tidy up loose ends and fill things in */
+ categorize(p, g);
+ stripsnug(p, g);
+ findmust(p, g);
+ g->nplus = pluscount(p, g);
+ g->magic = MAGIC2;
+ preg->re_nsub = g->nsub;
+ preg->re_g = g;
+ preg->re_magic = MAGIC1;
+#ifndef REDEBUG
+ /* not debugging, so can't rely on the assert() in llvm_regexec() */
+ if (g->iflags&REGEX_BAD)
+ SETERROR(REG_ASSERT);
+#endif
+
+ /* win or lose, we're done */
+ if (p->error != 0) /* lose */
+ llvm_regfree(preg);
+ return(p->error);
+}
+
+/*
+ - p_ere - ERE parser top level, concatenation and alternation
+ */
+static void
+p_ere(struct parse *p, int stop) /* character this ERE should end at */
+{
+ char c;
+ sopno prevback = 0;
+ sopno prevfwd = 0;
+ sopno conc;
+ int first = 1; /* is this the first alternative? */
+
+ for (;;) {
+ /* do a bunch of concatenated expressions */
+ conc = HERE();
+ while (MORE() && (c = PEEK()) != '|' && c != stop)
+ p_ere_exp(p);
+ REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
+
+ if (!EAT('|'))
+ break; /* NOTE BREAK OUT */
+
+ if (first) {
+ INSERT(OCH_, conc); /* offset is wrong */
+ prevfwd = conc;
+ prevback = conc;
+ first = 0;
+ }
+ ASTERN(OOR1, prevback);
+ prevback = THERE();
+ AHEAD(prevfwd); /* fix previous offset */
+ prevfwd = HERE();
+ EMIT(OOR2, 0); /* offset is very wrong */
+ }
+
+ if (!first) { /* tail-end fixups */
+ AHEAD(prevfwd);
+ ASTERN(O_CH, prevback);
+ }
+
+ assert(!MORE() || SEE(stop));
+}
+
+/*
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ */
+static void
+p_ere_exp(struct parse *p)
+{
+ char c;
+ sopno pos;
+ int count;
+ int count2;
+ sopno subno;
+ int wascaret = 0;
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+
+ pos = HERE();
+ switch (c) {
+ case '(':
+ REQUIRE(MORE(), REG_EPAREN);
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ if (!SEE(')'))
+ p_ere(p, ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ MUSTEAT(')', REG_EPAREN);
+ break;
+#ifndef POSIX_MISTAKE
+ case ')': /* happens only if no current unmatched ( */
+ /*
+ * You may ask, why the ifndef? Because I didn't notice
+ * this until slightly too late for 1003.2, and none of the
+ * other 1003.2 regular-expression reviewers noticed it at
+ * all. So an unmatched ) is legal POSIX, at least until
+ * we can get it fixed.
+ */
+ SETERROR(REG_EPAREN);
+ break;
+#endif
+ case '^':
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ wascaret = 1;
+ break;
+ case '$':
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ break;
+ case '|':
+ SETERROR(REG_EMPTY);
+ break;
+ case '*':
+ case '+':
+ case '?':
+ SETERROR(REG_BADRPT);
+ break;
+ case '.':
+ if (p->g->cflags&REG_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case '\\':
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = GETNEXT();
+ ordinary(p, c);
+ break;
+ case '{': /* okay as ordinary except if digit follows */
+ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ ordinary(p, c);
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ /* we call { a repetition if followed by a digit */
+ if (!( c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
+ return; /* no repetition, we're done */
+ NEXT();
+
+ REQUIRE(!wascaret, REG_BADRPT);
+ switch (c) {
+ case '*': /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ break;
+ case '+':
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ break;
+ case '?':
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, pos); /* offset slightly wrong */
+ ASTERN(OOR1, pos); /* this one's right */
+ AHEAD(pos); /* fix the OCH_ */
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case '{':
+ count = p_count(p);
+ if (EAT(',')) {
+ if (isdigit((uch)PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EAT('}')) { /* error heuristics */
+ while (MORE() && PEEK() != '}')
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ if (!( c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+ return;
+ SETERROR(REG_BADRPT);
+}
+
+/*
+ - p_str - string (no metacharacters) "parser"
+ */
+static void
+p_str(struct parse *p)
+{
+ REQUIRE(MORE(), REG_EMPTY);
+ while (MORE())
+ ordinary(p, GETNEXT());
+}
+
+/*
+ - p_bre - BRE parser top level, anchoring and concatenation
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor. The
+ * only undesirable side effect is that '$' gets included as a character
+ * category in such cases. This is fairly harmless; not worth fixing.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_bre(struct parse *p,
+ int end1, /* first terminating character */
+ int end2) /* second terminating character */
+{
+ sopno start = HERE();
+ int first = 1; /* first subexpression? */
+ int wasdollar = 0;
+
+ if (EAT('^')) {
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ }
+ while (MORE() && !SEETWO(end1, end2)) {
+ wasdollar = p_simp_re(p, first);
+ first = 0;
+ }
+ if (wasdollar) { /* oops, that was a trailing anchor */
+ DROP(1);
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ }
+
+ REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
+}
+
+/*
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ */
+static int /* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p,
+ int starordinary) /* is a leading * an ordinary character? */
+{
+ int c;
+ int count;
+ int count2;
+ sopno pos;
+ int i;
+ sopno subno;
+# define BACKSL (1<<CHAR_BIT)
+
+ pos = HERE(); /* repetion op, if any, covers from here */
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+ if (c == '\\') {
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = BACKSL | GETNEXT();
+ }
+ switch (c) {
+ case '.':
+ if (p->g->cflags&REG_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case BACKSL|'{':
+ SETERROR(REG_BADRPT);
+ break;
+ case BACKSL|'(':
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ /* the MORE here is an error heuristic */
+ if (MORE() && !SEETWO('\\', ')'))
+ p_bre(p, '\\', ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+ break;
+ case BACKSL|')': /* should not get here -- must be user */
+ case BACKSL|'}':
+ SETERROR(REG_EPAREN);
+ break;
+ case BACKSL|'1':
+ case BACKSL|'2':
+ case BACKSL|'3':
+ case BACKSL|'4':
+ case BACKSL|'5':
+ case BACKSL|'6':
+ case BACKSL|'7':
+ case BACKSL|'8':
+ case BACKSL|'9':
+ i = (c&~BACKSL) - '0';
+ assert(i < NPAREN);
+ if (p->pend[i] != 0) {
+ assert(i <= p->g->nsub);
+ EMIT(OBACK_, i);
+ assert(p->pbegin[i] != 0);
+ assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+ assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+ (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+ EMIT(O_BACK, i);
+ } else
+ SETERROR(REG_ESUBREG);
+ p->g->backrefs = 1;
+ break;
+ case '*':
+ REQUIRE(starordinary, REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ ordinary(p, (char)c);
+ break;
+ }
+
+ if (EAT('*')) { /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ } else if (EATTWO('\\', '{')) {
+ count = p_count(p);
+ if (EAT(',')) {
+ if (MORE() && isdigit((uch)PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EATTWO('\\', '}')) { /* error heuristics */
+ while (MORE() && !SEETWO('\\', '}'))
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ } else if (c == '$') /* $ (but not \$) ends it */
+ return(1);
+
+ return(0);
+}
+
+/*
+ - p_count - parse a repetition count
+ */
+static int /* the value */
+p_count(struct parse *p)
+{
+ int count = 0;
+ int ndigits = 0;
+
+ while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+ count = count*10 + (GETNEXT() - '0');
+ ndigits++;
+ }
+
+ REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+ return(count);
+}
+
+/*
+ - p_bracket - parse a bracketed character list
+ *
+ * Note a significant property of this code: if the allocset() did SETERROR,
+ * no set operations are done.
+ */
+static void
+p_bracket(struct parse *p)
+{
+ cset *cs;
+ int invert = 0;
+
+ /* Dept of Truly Sickening Special-Case Kludges */
+ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+ EMIT(OBOW, 0);
+ NEXTn(6);
+ return;
+ }
+ if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+ EMIT(OEOW, 0);
+ NEXTn(6);
+ return;
+ }
+
+ if ((cs = allocset(p)) == NULL) {
+ /* allocset did set error status in p */
+ return;
+ }
+
+ if (EAT('^'))
+ invert++; /* make note to invert set at end */
+ if (EAT(']'))
+ CHadd(cs, ']');
+ else if (EAT('-'))
+ CHadd(cs, '-');
+ while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+ p_b_term(p, cs);
+ if (EAT('-'))
+ CHadd(cs, '-');
+ MUSTEAT(']', REG_EBRACK);
+
+ if (p->error != 0) { /* don't mess things up further */
+ freeset(p, cs);
+ return;
+ }
+
+ if (p->g->cflags&REG_ICASE) {
+ int i;
+ int ci;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i) && isalpha(i)) {
+ ci = othercase(i);
+ if (ci != i)
+ CHadd(cs, ci);
+ }
+ if (cs->multis != NULL)
+ mccase(p, cs);
+ }
+ if (invert) {
+ int i;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i))
+ CHsub(cs, i);
+ else
+ CHadd(cs, i);
+ if (p->g->cflags&REG_NEWLINE)
+ CHsub(cs, '\n');
+ if (cs->multis != NULL)
+ mcinvert(p, cs);
+ }
+
+ assert(cs->multis == NULL); /* xxx */
+
+ if (nch(p, cs) == 1) { /* optimize singleton sets */
+ ordinary(p, firstch(p, cs));
+ freeset(p, cs);
+ } else
+ EMIT(OANYOF, freezeset(p, cs));
+}
+
+/*
+ - p_b_term - parse one term of a bracketed character list
+ */
+static void
+p_b_term(struct parse *p, cset *cs)
+{
+ char c;
+ char start, finish;
+ int i;
+
+ /* classify what we've got */
+ switch ((MORE()) ? PEEK() : '\0') {
+ case '[':
+ c = (MORE2()) ? PEEK2() : '\0';
+ break;
+ case '-':
+ SETERROR(REG_ERANGE);
+ return; /* NOTE RETURN */
+ break;
+ default:
+ c = '\0';
+ break;
+ }
+
+ switch (c) {
+ case ':': /* character class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+ p_b_cclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+ break;
+ case '=': /* equivalence class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+ p_b_eclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+ break;
+ default: /* symbol, ordinary character, or range */
+/* xxx revision needed for multichar stuff */
+ start = p_b_symbol(p);
+ if (SEE('-') && MORE2() && PEEK2() != ']') {
+ /* range */
+ NEXT();
+ if (EAT('-'))
+ finish = '-';
+ else
+ finish = p_b_symbol(p);
+ } else
+ finish = start;
+/* xxx what about signed chars here... */
+ REQUIRE(start <= finish, REG_ERANGE);
+ for (i = start; i <= finish; i++)
+ CHadd(cs, i);
+ break;
+ }
+}
+
+/*
+ - p_b_cclass - parse a character-class name and deal with it
+ */
+static void
+p_b_cclass(struct parse *p, cset *cs)
+{
+ char *sp = p->next;
+ struct cclass *cp;
+ size_t len;
+ const char *u;
+ char c;
+
+ while (MORE() && isalpha((uch)PEEK()))
+ NEXT();
+ len = p->next - sp;
+ for (cp = cclasses; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ break;
+ if (cp->name == NULL) {
+ /* oops, didn't find it */
+ SETERROR(REG_ECTYPE);
+ return;
+ }
+
+ u = cp->chars;
+ while ((c = *u++) != '\0')
+ CHadd(cs, c);
+ for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
+ MCadd(p, cs, u);
+}
+
+/*
+ - p_b_eclass - parse an equivalence-class name and deal with it
+ *
+ * This implementation is incomplete. xxx
+ */
+static void
+p_b_eclass(struct parse *p, cset *cs)
+{
+ char c;
+
+ c = p_b_coll_elem(p, '=');
+ CHadd(cs, c);
+}
+
+/*
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ */
+static char /* value of symbol */
+p_b_symbol(struct parse *p)
+{
+ char value;
+
+ REQUIRE(MORE(), REG_EBRACK);
+ if (!EATTWO('[', '.'))
+ return(GETNEXT());
+
+ /* collating symbol */
+ value = p_b_coll_elem(p, '.');
+ REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+ return(value);
+}
+
+/*
+ - p_b_coll_elem - parse a collating-element name and look it up
+ */
+static char /* value of collating element */
+p_b_coll_elem(struct parse *p,
+ int endc) /* name ended by endc,']' */
+{
+ char *sp = p->next;
+ struct cname *cp;
+ int len;
+
+ while (MORE() && !SEETWO(endc, ']'))
+ NEXT();
+ if (!MORE()) {
+ SETERROR(REG_EBRACK);
+ return(0);
+ }
+ len = p->next - sp;
+ for (cp = cnames; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ return(cp->code); /* known name */
+ if (len == 1)
+ return(*sp); /* single character */
+ SETERROR(REG_ECOLLATE); /* neither */
+ return(0);
+}
+
+/*
+ - othercase - return the case counterpart of an alphabetic
+ */
+static char /* if no counterpart, return ch */
+othercase(int ch)
+{
+ ch = (uch)ch;
+ assert(isalpha(ch));
+ if (isupper(ch))
+ return ((uch)tolower(ch));
+ else if (islower(ch))
+ return ((uch)toupper(ch));
+ else /* peculiar, but could happen */
+ return(ch);
+}
+
+/*
+ - bothcases - emit a dualcase version of a two-case character
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+bothcases(struct parse *p, int ch)
+{
+ char *oldnext = p->next;
+ char *oldend = p->end;
+ char bracket[3];
+
+ ch = (uch)ch;
+ assert(othercase(ch) != ch); /* p_bracket() would recurse */
+ p->next = bracket;
+ p->end = bracket+2;
+ bracket[0] = ch;
+ bracket[1] = ']';
+ bracket[2] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket+2);
+ p->next = oldnext;
+ p->end = oldend;
+}
+
+/*
+ - ordinary - emit an ordinary character
+ */
+static void
+ordinary(struct parse *p, int ch)
+{
+ cat_t *cap = p->g->categories;
+
+ if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
+ bothcases(p, ch);
+ else {
+ EMIT(OCHAR, (uch)ch);
+ if (cap[ch] == 0)
+ cap[ch] = p->g->ncategories++;
+ }
+}
+
+/*
+ - nonnewline - emit REG_NEWLINE version of OANY
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+nonnewline(struct parse *p)
+{
+ char *oldnext = p->next;
+ char *oldend = p->end;
+ char bracket[4];
+
+ p->next = bracket;
+ p->end = bracket+3;
+ bracket[0] = '^';
+ bracket[1] = '\n';
+ bracket[2] = ']';
+ bracket[3] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket+3);
+ p->next = oldnext;
+ p->end = oldend;
+}
+
+/*
+ - repeat - generate code for a bounded repetition, recursively if needed
+ */
+static void
+repeat(struct parse *p,
+ sopno start, /* operand from here to end of strip */
+ int from, /* repeated from this number */
+ int to) /* to this number of times (maybe INFINITY) */
+{
+ sopno finish = HERE();
+# define N 2
+# define INF 3
+# define REP(f, t) ((f)*8 + (t))
+# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+ sopno copy;
+
+ if (p->error != 0) /* head off possible runaway recursion */
+ return;
+
+ assert(from <= to);
+
+ switch (REP(MAP(from), MAP(to))) {
+ case REP(0, 0): /* must be user doing this */
+ DROP(finish-start); /* drop the operand */
+ break;
+ case REP(0, 1): /* as x{1,1}? */
+ case REP(0, N): /* as x{1,n}? */
+ case REP(0, INF): /* as x{1,}? */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start); /* offset is wrong... */
+ repeat(p, start+1, 1, to);
+ ASTERN(OOR1, start);
+ AHEAD(start); /* ... fix it */
+ EMIT(OOR2, 0);
+ AHEAD(THERE());
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case REP(1, 1): /* trivial case */
+ /* done */
+ break;
+ case REP(1, N): /* as x?x{1,n-1} */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start);
+ ASTERN(OOR1, start);
+ AHEAD(start);
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ copy = dupl(p, start+1, finish+1);
+ assert(copy == finish+4);
+ repeat(p, copy, 1, to-1);
+ break;
+ case REP(1, INF): /* as x+ */
+ INSERT(OPLUS_, start);
+ ASTERN(O_PLUS, start);
+ break;
+ case REP(N, N): /* as xx{m-1,n-1} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from-1, to-1);
+ break;
+ case REP(N, INF): /* as xx{n-1,INF} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from-1, to);
+ break;
+ default: /* "can't happen" */
+ SETERROR(REG_ASSERT); /* just in case */
+ break;
+ }
+}
+
+/*
+ - seterr - set an error condition
+ */
+static int /* useless but makes type checking happy */
+seterr(struct parse *p, int e)
+{
+ if (p->error == 0) /* keep earliest error condition */
+ p->error = e;
+ p->next = nuls; /* try to bring things to a halt */
+ p->end = nuls;
+ return(0); /* make the return value well-defined */
+}
+
+/*
+ - allocset - allocate a set of characters for []
+ */
+static cset *
+allocset(struct parse *p)
+{
+ int no = p->g->ncsets++;
+ size_t nc;
+ size_t nbytes;
+ cset *cs;
+ size_t css = (size_t)p->g->csetsize;
+ int i;
+
+ if (no >= p->ncsalloc) { /* need another column of space */
+ void *ptr;
+
+ p->ncsalloc += CHAR_BIT;
+ nc = p->ncsalloc;
+ assert(nc % CHAR_BIT == 0);
+ nbytes = nc / CHAR_BIT * css;
+
+ ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
+ if (ptr == NULL)
+ goto nomem;
+ p->g->sets = ptr;
+
+ ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
+ if (ptr == NULL)
+ goto nomem;
+ p->g->setbits = ptr;
+
+ for (i = 0; i < no; i++)
+ p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+
+ (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+ }
+ /* XXX should not happen */
+ if (p->g->sets == NULL || p->g->setbits == NULL)
+ goto nomem;
+
+ cs = &p->g->sets[no];
+ cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
+ cs->mask = 1 << ((no) % CHAR_BIT);
+ cs->hash = 0;
+ cs->smultis = 0;
+ cs->multis = NULL;
+
+ return(cs);
+nomem:
+ free(p->g->sets);
+ p->g->sets = NULL;
+ free(p->g->setbits);
+ p->g->setbits = NULL;
+
+ SETERROR(REG_ESPACE);
+ /* caller's responsibility not to do set ops */
+ return(NULL);
+}
+
+/*
+ - freeset - free a now-unused set
+ */
+static void
+freeset(struct parse *p, cset *cs)
+{
+ size_t i;
+ cset *top = &p->g->sets[p->g->ncsets];
+ size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ CHsub(cs, i);
+ if (cs == top-1) /* recover only the easy case */
+ p->g->ncsets--;
+}
+
+/*
+ - freezeset - final processing on a set of characters
+ *
+ * The main task here is merging identical sets. This is usually a waste
+ * of time (although the hash code minimizes the overhead), but can win
+ * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
+ * the same value!
+ */
+static int /* set number */
+freezeset(struct parse *p, cset *cs)
+{
+ uch h = cs->hash;
+ size_t i;
+ cset *top = &p->g->sets[p->g->ncsets];
+ cset *cs2;
+ size_t css = (size_t)p->g->csetsize;
+
+ /* look for an earlier one which is the same */
+ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+ if (cs2->hash == h && cs2 != cs) {
+ /* maybe */
+ for (i = 0; i < css; i++)
+ if (!!CHIN(cs2, i) != !!CHIN(cs, i))
+ break; /* no */
+ if (i == css)
+ break; /* yes */
+ }
+
+ if (cs2 < top) { /* found one */
+ freeset(p, cs);
+ cs = cs2;
+ }
+
+ return((int)(cs - p->g->sets));
+}
+
+/*
+ - firstch - return first character in a set (which must have at least one)
+ */
+static int /* character; there is no "none" value */
+firstch(struct parse *p, cset *cs)
+{
+ size_t i;
+ size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ return((char)i);
+ assert(never);
+ return(0); /* arbitrary */
+}
+
+/*
+ - nch - number of characters in a set
+ */
+static int
+nch(struct parse *p, cset *cs)
+{
+ size_t i;
+ size_t css = (size_t)p->g->csetsize;
+ int n = 0;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ n++;
+ return(n);
+}
+
+/*
+ - mcadd - add a collating element to a cset
+ */
+static void
+mcadd( struct parse *p, cset *cs, const char *cp)
+{
+ size_t oldend = cs->smultis;
+ void *np;
+
+ cs->smultis += strlen(cp) + 1;
+ np = realloc(cs->multis, cs->smultis);
+ if (np == NULL) {
+ if (cs->multis)
+ free(cs->multis);
+ cs->multis = NULL;
+ SETERROR(REG_ESPACE);
+ return;
+ }
+ cs->multis = np;
+
+ llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
+}
+
+/*
+ - mcinvert - invert the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities. Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mcinvert(struct parse *p, cset *cs)
+{
+ assert(cs->multis == NULL); /* xxx */
+}
+
+/*
+ - mccase - add case counterparts of the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities. Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mccase(struct parse *p, cset *cs)
+{
+ assert(cs->multis == NULL); /* xxx */
+}
+
+/*
+ - isinsets - is this character in any sets?
+ */
+static int /* predicate */
+isinsets(struct re_guts *g, int c)
+{
+ uch *col;
+ int i;
+ int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+ unsigned uc = (uch)c;
+
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+ if (col[uc] != 0)
+ return(1);
+ return(0);
+}
+
+/*
+ - samesets - are these two characters in exactly the same sets?
+ */
+static int /* predicate */
+samesets(struct re_guts *g, int c1, int c2)
+{
+ uch *col;
+ int i;
+ int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+ unsigned uc1 = (uch)c1;
+ unsigned uc2 = (uch)c2;
+
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+ if (col[uc1] != col[uc2])
+ return(0);
+ return(1);
+}
+
+/*
+ - categorize - sort out character categories
+ */
+static void
+categorize(struct parse *p, struct re_guts *g)
+{
+ cat_t *cats = g->categories;
+ int c;
+ int c2;
+ cat_t cat;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (cats[c] == 0 && isinsets(g, c)) {
+ cat = g->ncategories++;
+ cats[c] = cat;
+ for (c2 = c+1; c2 <= CHAR_MAX; c2++)
+ if (cats[c2] == 0 && samesets(g, c, c2))
+ cats[c2] = cat;
+ }
+}
+
+/*
+ - dupl - emit a duplicate of a bunch of sops
+ */
+static sopno /* start of duplicate */
+dupl(struct parse *p,
+ sopno start, /* from here */
+ sopno finish) /* to this less one */
+{
+ sopno ret = HERE();
+ sopno len = finish - start;
+
+ assert(finish >= start);
+ if (len == 0)
+ return(ret);
+ enlarge(p, p->ssize + len); /* this many unexpected additions */
+ assert(p->ssize >= p->slen + len);
+ (void) memmove((char *)(p->strip + p->slen),
+ (char *)(p->strip + start), (size_t)len*sizeof(sop));
+ p->slen += len;
+ return(ret);
+}
+
+/*
+ - doemit - emit a strip operator
+ *
+ * It might seem better to implement this as a macro with a function as
+ * hard-case backup, but it's just too big and messy unless there are
+ * some changes to the data structures. Maybe later.
+ */
+static void
+doemit(struct parse *p, sop op, size_t opnd)
+{
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ /* deal with oversize operands ("can't happen", more or less) */
+ assert(opnd < 1<<OPSHIFT);
+
+ /* deal with undersized strip */
+ if (p->slen >= p->ssize)
+ enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
+ assert(p->slen < p->ssize);
+
+ /* finally, it's all reduced to the easy case */
+ p->strip[p->slen++] = SOP(op, opnd);
+}
+
+/*
+ - doinsert - insert a sop into the strip
+ */
+static void
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
+{
+ sopno sn;
+ sop s;
+ int i;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ sn = HERE();
+ EMIT(op, opnd); /* do checks, ensure space */
+ assert(HERE() == sn+1);
+ s = p->strip[sn];
+
+ /* adjust paren pointers */
+ assert(pos > 0);
+ for (i = 1; i < NPAREN; i++) {
+ if (p->pbegin[i] >= pos) {
+ p->pbegin[i]++;
+ }
+ if (p->pend[i] >= pos) {
+ p->pend[i]++;
+ }
+ }
+
+ memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
+ (HERE()-pos-1)*sizeof(sop));
+ p->strip[pos] = s;
+}
+
+/*
+ - dofwd - complete a forward reference
+ */
+static void
+dofwd(struct parse *p, sopno pos, sop value)
+{
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ assert(value < 1<<OPSHIFT);
+ p->strip[pos] = OP(p->strip[pos]) | value;
+}
+
+/*
+ - enlarge - enlarge the strip
+ */
+static void
+enlarge(struct parse *p, sopno size)
+{
+ sop *sp;
+
+ if (p->ssize >= size)
+ return;
+
+ sp = (sop *)realloc(p->strip, size*sizeof(sop));
+ if (sp == NULL) {
+ SETERROR(REG_ESPACE);
+ return;
+ }
+ p->strip = sp;
+ p->ssize = size;
+}
+
+/*
+ - stripsnug - compact the strip
+ */
+static void
+stripsnug(struct parse *p, struct re_guts *g)
+{
+ g->nstates = p->slen;
+ g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+ if (g->strip == NULL) {
+ SETERROR(REG_ESPACE);
+ g->strip = p->strip;
+ }
+}
+
+/*
+ - findmust - fill in must and mlen with longest mandatory literal string
+ *
+ * This algorithm could do fancy things like analyzing the operands of |
+ * for common subsequences. Someday. This code is simple and finds most
+ * of the interesting cases.
+ *
+ * Note that must and mlen got initialized during setup.
+ */
+static void
+findmust(struct parse *p, struct re_guts *g)
+{
+ sop *scan;
+ sop *start = 0; /* start initialized in the default case, after that */
+ sop *newstart = 0; /* newstart was initialized in the OCHAR case */
+ sopno newlen;
+ sop s;
+ char *cp;
+ sopno i;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ /* find the longest OCHAR sequence in strip */
+ newlen = 0;
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OCHAR: /* sequence member */
+ if (newlen == 0) /* new sequence */
+ newstart = scan - 1;
+ newlen++;
+ break;
+ case OPLUS_: /* things that don't break one */
+ case OLPAREN:
+ case ORPAREN:
+ break;
+ case OQUEST_: /* things that must be skipped */
+ case OCH_:
+ scan--;
+ do {
+ scan += OPND(s);
+ s = *scan;
+ /* assert() interferes w debug printouts */
+ if (OP(s) != O_QUEST && OP(s) != O_CH &&
+ OP(s) != OOR2) {
+ g->iflags |= REGEX_BAD;
+ return;
+ }
+ } while (OP(s) != O_QUEST && OP(s) != O_CH);
+ /* fallthrough */
+ default: /* things that break a sequence */
+ if (newlen > g->mlen) { /* ends one */
+ start = newstart;
+ g->mlen = newlen;
+ }
+ newlen = 0;
+ break;
+ }
+ } while (OP(s) != OEND);
+
+ if (g->mlen == 0) /* there isn't one */
+ return;
+
+ /* turn it into a character string */
+ g->must = malloc((size_t)g->mlen + 1);
+ if (g->must == NULL) { /* argh; just forget it */
+ g->mlen = 0;
+ return;
+ }
+ cp = g->must;
+ scan = start;
+ for (i = g->mlen; i > 0; i--) {
+ while (OP(s = *scan++) != OCHAR)
+ continue;
+ assert(cp < g->must + g->mlen);
+ *cp++ = (char)OPND(s);
+ }
+ assert(cp == g->must + g->mlen);
+ *cp++ = '\0'; /* just on general principles */
+}
+
+/*
+ - pluscount - count + nesting
+ */
+static sopno /* nesting depth */
+pluscount(struct parse *p, struct re_guts *g)
+{
+ sop *scan;
+ sop s;
+ sopno plusnest = 0;
+ sopno maxnest = 0;
+
+ if (p->error != 0)
+ return(0); /* there may not be an OEND */
+
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OPLUS_:
+ plusnest++;
+ break;
+ case O_PLUS:
+ if (plusnest > maxnest)
+ maxnest = plusnest;
+ plusnest--;
+ break;
+ }
+ } while (OP(s) != OEND);
+ if (plusnest != 0)
+ g->iflags |= REGEX_BAD;
+ return(maxnest);
+}
diff --git a/contrib/llvm/lib/Support/regengine.inc b/contrib/llvm/lib/Support/regengine.inc
new file mode 100644
index 000000000000..7e41f96f359d
--- /dev/null
+++ b/contrib/llvm/lib/Support/regengine.inc
@@ -0,0 +1,1034 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)engine.c 8.5 (Berkeley) 3/20/94
+ */
+
+/*
+ * The matching engine and friends. This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define matcher smatcher
+#define fast sfast
+#define slow sslow
+#define dissect sdissect
+#define backref sbackref
+#define step sstep
+#define print sprint
+#define at sat
+#define match smat
+#define nope snope
+#endif
+#ifdef LNAMES
+#define matcher lmatcher
+#define fast lfast
+#define slow lslow
+#define dissect ldissect
+#define backref lbackref
+#define step lstep
+#define print lprint
+#define at lat
+#define match lmat
+#define nope lnope
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+ struct re_guts *g;
+ int eflags;
+ llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
+ const char *offp; /* offsets work from here */
+ const char *beginp; /* start of string -- virtual NUL precedes */
+ const char *endp; /* end of string -- virtual NUL here */
+ const char *coldp; /* can be no match starting before here */
+ const char **lastpos; /* [nplus+1] */
+ STATEVARS;
+ states st; /* current states */
+ states fresh; /* states for a fresh start */
+ states tmp; /* temporary */
+ states empty; /* empty set of states */
+};
+
+static int matcher(struct re_guts *, const char *, size_t,
+ llvm_regmatch_t[], int);
+static const char *dissect(struct match *, const char *, const char *, sopno,
+ sopno);
+static const char *backref(struct match *, const char *, const char *, sopno,
+ sopno, sopno, int);
+static const char *fast(struct match *, const char *, const char *, sopno, sopno);
+static const char *slow(struct match *, const char *, const char *, sopno, sopno);
+static states step(struct re_guts *, sopno, sopno, states, int, states);
+#define MAX_RECURSION 100
+#define BOL (OUT+1)
+#define EOL (BOL+1)
+#define BOLEOL (BOL+2)
+#define NOTHING (BOL+3)
+#define BOW (BOL+4)
+#define EOW (BOL+5)
+#define CODEMAX (BOL+5) /* highest code used */
+#define NONCHAR(c) ((c) > CHAR_MAX)
+#define NNONCHAR (CODEMAX-CHAR_MAX)
+#ifdef REDEBUG
+static void print(struct match *, char *, states, int, FILE *);
+#endif
+#ifdef REDEBUG
+static void at(struct match *, char *, char *, char *, sopno, sopno);
+#endif
+#ifdef REDEBUG
+static char *pchar(int);
+#endif
+
+#ifdef REDEBUG
+#define SP(t, s, c) print(m, t, s, c, stdout)
+#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
+#define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+static int nope = 0;
+#else
+#define SP(t, s, c) /* nothing */
+#define AT(t, p1, p2, s1, s2) /* nothing */
+#define NOTE(s) /* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ */
+static int /* 0 success, REG_NOMATCH failure */
+matcher(struct re_guts *g, const char *string, size_t nmatch,
+ llvm_regmatch_t pmatch[],
+ int eflags)
+{
+ const char *endp;
+ size_t i;
+ struct match mv;
+ struct match *m = &mv;
+ const char *dp;
+ const sopno gf = g->firststate+1; /* +1 for OEND */
+ const sopno gl = g->laststate;
+ const char *start;
+ const char *stop;
+
+ /* simplify the situation where possible */
+ if (g->cflags&REG_NOSUB)
+ nmatch = 0;
+ if (eflags&REG_STARTEND) {
+ start = string + pmatch[0].rm_so;
+ stop = string + pmatch[0].rm_eo;
+ } else {
+ start = string;
+ stop = start + strlen(start);
+ }
+ if (stop < start)
+ return(REG_INVARG);
+
+ /* prescreening; this does wonders for this rather slow code */
+ if (g->must != NULL) {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] && stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
+
+ /* match struct setup */
+ m->g = g;
+ m->eflags = eflags;
+ m->pmatch = NULL;
+ m->lastpos = NULL;
+ m->offp = string;
+ m->beginp = start;
+ m->endp = stop;
+ STATESETUP(m, 4);
+ SETUP(m->st);
+ SETUP(m->fresh);
+ SETUP(m->tmp);
+ SETUP(m->empty);
+ CLEAR(m->empty);
+
+ /* this loop does only one repetition except for backrefs */
+ for (;;) {
+ endp = fast(m, start, stop, gf, gl);
+ if (endp == NULL) { /* a miss */
+ free(m->pmatch);
+ free((void*)m->lastpos);
+ STATETEARDOWN(m);
+ return(REG_NOMATCH);
+ }
+ if (nmatch == 0 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* where? */
+ assert(m->coldp != NULL);
+ for (;;) {
+ NOTE("finding start");
+ endp = slow(m, m->coldp, stop, gf, gl);
+ if (endp != NULL)
+ break;
+ assert(m->coldp < m->endp);
+ m->coldp++;
+ }
+ if (nmatch == 1 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* oh my, he wants the subexpressions... */
+ if (m->pmatch == NULL)
+ m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
+ sizeof(llvm_regmatch_t));
+ if (m->pmatch == NULL) {
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ for (i = 1; i <= m->g->nsub; i++)
+ m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+ if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+ NOTE("dissecting");
+ dp = dissect(m, m->coldp, endp, gf, gl);
+ } else {
+ if (g->nplus > 0 && m->lastpos == NULL)
+ m->lastpos = (const char **)malloc((g->nplus+1) *
+ sizeof(char *));
+ if (g->nplus > 0 && m->lastpos == NULL) {
+ free(m->pmatch);
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ NOTE("backref dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ }
+ if (dp != NULL)
+ break;
+
+ /* uh-oh... we couldn't find a subexpression-level match */
+ assert(g->backrefs); /* must be back references doing it */
+ assert(g->nplus == 0 || m->lastpos != NULL);
+ for (;;) {
+ if (dp != NULL || endp <= m->coldp)
+ break; /* defeat */
+ NOTE("backoff");
+ endp = slow(m, m->coldp, endp-1, gf, gl);
+ if (endp == NULL)
+ break; /* defeat */
+ /* try it on a shorter possibility */
+#ifndef NDEBUG
+ for (i = 1; i <= m->g->nsub; i++) {
+ assert(m->pmatch[i].rm_so == -1);
+ assert(m->pmatch[i].rm_eo == -1);
+ }
+#endif
+ NOTE("backoff dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ }
+ assert(dp == NULL || dp == endp);
+ if (dp != NULL) /* found a shorter one */
+ break;
+
+ /* despite initial appearances, there is no match here */
+ NOTE("false alarm");
+ if (m->coldp == stop)
+ break;
+ start = m->coldp + 1; /* recycle starting later */
+ }
+
+ /* fill in the details if requested */
+ if (nmatch > 0) {
+ pmatch[0].rm_so = m->coldp - m->offp;
+ pmatch[0].rm_eo = endp - m->offp;
+ }
+ if (nmatch > 1) {
+ assert(m->pmatch != NULL);
+ for (i = 1; i < nmatch; i++)
+ if (i <= m->g->nsub)
+ pmatch[i] = m->pmatch[i];
+ else {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+ }
+
+ if (m->pmatch != NULL)
+ free((char *)m->pmatch);
+ if (m->lastpos != NULL)
+ free((char *)m->lastpos);
+ STATETEARDOWN(m);
+ return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ */
+static const char * /* == stop (success) always */
+dissect(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ int i;
+ sopno ss; /* start sop of current subRE */
+ sopno es; /* end sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ const char *stp; /* string matched by it cannot pass here */
+ const char *rest; /* start of rest of string */
+ const char *tail; /* string unmatched by rest of RE */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *sep; /* end of string matched by subsubRE */
+ const char *oldssp; /* previous ssp */
+
+ AT("diss", start, stop, startst, stopst);
+ sp = start;
+ for (ss = startst; ss < stopst; ss = es) {
+ /* identify end of subRE */
+ es = ss;
+ switch (OP(m->g->strip[es])) {
+ case OPLUS_:
+ case OQUEST_:
+ es += OPND(m->g->strip[es]);
+ break;
+ case OCH_:
+ while (OP(m->g->strip[es]) != O_CH)
+ es += OPND(m->g->strip[es]);
+ break;
+ }
+ es++;
+
+ /* figure out what it matched */
+ switch (OP(m->g->strip[ss])) {
+ case OEND:
+ assert(nope);
+ break;
+ case OCHAR:
+ sp++;
+ break;
+ case OBOL:
+ case OEOL:
+ case OBOW:
+ case OEOW:
+ break;
+ case OANY:
+ case OANYOF:
+ sp++;
+ break;
+ case OBACK_:
+ case O_BACK:
+ assert(nope);
+ break;
+ /* cases where length of match is hard to find */
+ case OQUEST_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = es - 1;
+ /* did innards match? */
+ if (slow(m, sp, rest, ssub, esub) != NULL) {
+ const char *dp = dissect(m, sp, rest, ssub, esub);
+ (void)dp; /* avoid warning if assertions off */
+ assert(dp == rest);
+ } else /* no */
+ assert(sp == rest);
+ sp = rest;
+ break;
+ case OPLUS_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = es - 1;
+ ssp = sp;
+ oldssp = ssp;
+ for (;;) { /* find last match of innards */
+ sep = slow(m, ssp, rest, ssub, esub);
+ if (sep == NULL || sep == ssp)
+ break; /* failed or matched null */
+ oldssp = ssp; /* on to next try */
+ ssp = sep;
+ }
+ if (sep == NULL) {
+ /* last successful match */
+ sep = ssp;
+ ssp = oldssp;
+ }
+ assert(sep == rest); /* must exhaust substring */
+ assert(slow(m, ssp, sep, ssub, esub) == rest);
+ {
+ const char *dp = dissect(m, ssp, sep, ssub, esub);
+ (void)dp; /* avoid warning if assertions off */
+ assert(dp == sep);
+ }
+ sp = rest;
+ break;
+ case OCH_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = ss + OPND(m->g->strip[ss]) - 1;
+ assert(OP(m->g->strip[esub]) == OOR1);
+ for (;;) { /* find first matching branch */
+ if (slow(m, sp, rest, ssub, esub) == rest)
+ break; /* it matched all of it */
+ /* that one missed, try next one */
+ assert(OP(m->g->strip[esub]) == OOR1);
+ esub++;
+ assert(OP(m->g->strip[esub]) == OOR2);
+ ssub = esub + 1;
+ esub += OPND(m->g->strip[esub]);
+ if (OP(m->g->strip[esub]) == OOR2)
+ esub--;
+ else
+ assert(OP(m->g->strip[esub]) == O_CH);
+ }
+ {
+ const char *dp = dissect(m, sp, rest, ssub, esub);
+ (void)dp; /* avoid warning if assertions off */
+ assert(dp == rest);
+ }
+ sp = rest;
+ break;
+ case O_PLUS:
+ case O_QUEST:
+ case OOR1:
+ case OOR2:
+ case O_CH:
+ assert(nope);
+ break;
+ case OLPAREN:
+ i = OPND(m->g->strip[ss]);
+ assert(0 < i && i <= m->g->nsub);
+ m->pmatch[i].rm_so = sp - m->offp;
+ break;
+ case ORPAREN:
+ i = OPND(m->g->strip[ss]);
+ assert(0 < i && i <= m->g->nsub);
+ m->pmatch[i].rm_eo = sp - m->offp;
+ break;
+ default: /* uh oh */
+ assert(nope);
+ break;
+ }
+ }
+
+ assert(sp == stop);
+ return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ */
+static const char * /* == stop (success) or NULL (failure) */
+backref(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst, sopno lev, int rec) /* PLUS nesting level */
+{
+ int i;
+ sopno ss; /* start sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *dp;
+ size_t len;
+ int hard;
+ sop s;
+ llvm_regoff_t offsave;
+ cset *cs;
+
+ AT("back", start, stop, startst, stopst);
+ sp = start;
+
+ /* get as far as we can with easy stuff */
+ hard = 0;
+ for (ss = startst; !hard && ss < stopst; ss++)
+ switch (OP(s = m->g->strip[ss])) {
+ case OCHAR:
+ if (sp == stop || *sp++ != (char)OPND(s))
+ return(NULL);
+ break;
+ case OANY:
+ if (sp == stop)
+ return(NULL);
+ sp++;
+ break;
+ case OANYOF:
+ cs = &m->g->sets[OPND(s)];
+ if (sp == stop || !CHIN(cs, *sp++))
+ return(NULL);
+ break;
+ case OBOL:
+ if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+ (sp < m->endp && *(sp-1) == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOL:
+ if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+ (sp < m->endp && *sp == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OBOW:
+ if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+ (sp < m->endp && *(sp-1) == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) ||
+ (sp > m->beginp &&
+ !ISWORD(*(sp-1))) ) &&
+ (sp < m->endp && ISWORD(*sp)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOW:
+ if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+ (sp < m->endp && *sp == '\n' &&
+ (m->g->cflags&REG_NEWLINE)) ||
+ (sp < m->endp && !ISWORD(*sp)) ) &&
+ (sp > m->beginp && ISWORD(*(sp-1))) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case O_QUEST:
+ break;
+ case OOR1: /* matches null but needs to skip */
+ ss++;
+ s = m->g->strip[ss];
+ do {
+ assert(OP(s) == OOR2);
+ ss += OPND(s);
+ } while (OP(s = m->g->strip[ss]) != O_CH);
+ /* note that the ss++ gets us past the O_CH */
+ break;
+ default: /* have to make a choice */
+ hard = 1;
+ break;
+ }
+ if (!hard) { /* that was it! */
+ if (sp != stop)
+ return(NULL);
+ return(sp);
+ }
+ ss--; /* adjust for the for's final increment */
+
+ /* the hard stuff */
+ AT("hard", sp, stop, ss, stopst);
+ s = m->g->strip[ss];
+ switch (OP(s)) {
+ case OBACK_: /* the vilest depths */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ if (m->pmatch[i].rm_eo == -1)
+ return(NULL);
+ assert(m->pmatch[i].rm_so != -1);
+ len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+ if (len == 0 && rec++ > MAX_RECURSION)
+ return(NULL);
+ assert(stop - m->beginp >= len);
+ if (sp > stop - len)
+ return(NULL); /* not enough left to match */
+ ssp = m->offp + m->pmatch[i].rm_so;
+ if (memcmp(sp, ssp, len) != 0)
+ return(NULL);
+ while (m->g->strip[ss] != SOP(O_BACK, i))
+ ss++;
+ return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+ break;
+ case OQUEST_: /* to null or not */
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ if (dp != NULL)
+ return(dp); /* not */
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+ break;
+ case OPLUS_:
+ assert(m->lastpos != NULL);
+ assert(lev+1 <= m->g->nplus);
+ m->lastpos[lev+1] = sp;
+ return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+ break;
+ case O_PLUS:
+ if (sp == m->lastpos[lev]) /* last pass matched null */
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ /* try another pass */
+ m->lastpos[lev] = sp;
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+ if (dp == NULL)
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ else
+ return(dp);
+ break;
+ case OCH_: /* find the right one, if any */
+ ssub = ss + 1;
+ esub = ss + OPND(s) - 1;
+ assert(OP(m->g->strip[esub]) == OOR1);
+ for (;;) { /* find first matching branch */
+ dp = backref(m, sp, stop, ssub, esub, lev, rec);
+ if (dp != NULL)
+ return(dp);
+ /* that one missed, try next one */
+ if (OP(m->g->strip[esub]) == O_CH)
+ return(NULL); /* there is none */
+ esub++;
+ assert(OP(m->g->strip[esub]) == OOR2);
+ ssub = esub + 1;
+ esub += OPND(m->g->strip[esub]);
+ if (OP(m->g->strip[esub]) == OOR2)
+ esub--;
+ else
+ assert(OP(m->g->strip[esub]) == O_CH);
+ }
+ break;
+ case OLPAREN: /* must undo assignment if rest fails */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ offsave = m->pmatch[i].rm_so;
+ m->pmatch[i].rm_so = sp - m->offp;
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ if (dp != NULL)
+ return(dp);
+ m->pmatch[i].rm_so = offsave;
+ return(NULL);
+ break;
+ case ORPAREN: /* must undo assignment if rest fails */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ offsave = m->pmatch[i].rm_eo;
+ m->pmatch[i].rm_eo = sp - m->offp;
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ if (dp != NULL)
+ return(dp);
+ m->pmatch[i].rm_eo = offsave;
+ return(NULL);
+ break;
+ default: /* uh oh */
+ assert(nope);
+ break;
+ }
+
+ /* "can't happen" */
+ assert(nope);
+ /* NOTREACHED */
+ return NULL;
+}
+
+/*
+ - fast - step through the string at top speed
+ */
+static const char * /* where tentative match ended, or NULL */
+fast(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ states st = m->st;
+ states fresh = m->fresh;
+ states tmp = m->tmp;
+ const char *p = start;
+ int c = (start == m->beginp) ? OUT : *(start-1);
+ int lastc; /* previous c */
+ int flagch;
+ int i;
+ const char *coldp; /* last p after which no match was underway */
+
+ CLEAR(st);
+ SET1(st, startst);
+ st = step(m->g, startst, stopst, st, NOTHING, st);
+ ASSIGN(fresh, st);
+ SP("start", st, *p);
+ coldp = NULL;
+ for (;;) {
+ /* next character */
+ lastc = c;
+ c = (p == m->endp) ? OUT : *p;
+ if (EQ(st, fresh))
+ coldp = p;
+
+ /* is there an EOL and/or BOL between lastc and c? */
+ flagch = '\0';
+ i = 0;
+ if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+ flagch = BOL;
+ i = m->g->nbol;
+ }
+ if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+ flagch = (flagch == BOL) ? BOLEOL : EOL;
+ i += m->g->neol;
+ }
+ if (i != 0) {
+ for (; i > 0; i--)
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("boleol", st, c);
+ }
+
+ /* how about a word boundary? */
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
+ flagch = BOW;
+ }
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ flagch = EOW;
+ }
+ if (flagch == BOW || flagch == EOW) {
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("boweow", st, c);
+ }
+
+ /* are we done? */
+ if (ISSET(st, stopst) || p == stop)
+ break; /* NOTE BREAK OUT */
+
+ /* no, we must deal with this character */
+ ASSIGN(tmp, st);
+ ASSIGN(st, fresh);
+ assert(c != OUT);
+ st = step(m->g, startst, stopst, tmp, c, st);
+ SP("aft", st, c);
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ p++;
+ }
+
+ assert(coldp != NULL);
+ m->coldp = coldp;
+ if (ISSET(st, stopst))
+ return(p+1);
+ else
+ return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ */
+static const char * /* where it ended */
+slow(struct match *m, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ states st = m->st;
+ states empty = m->empty;
+ states tmp = m->tmp;
+ const char *p = start;
+ int c = (start == m->beginp) ? OUT : *(start-1);
+ int lastc; /* previous c */
+ int flagch;
+ int i;
+ const char *matchp; /* last p at which a match ended */
+
+ AT("slow", start, stop, startst, stopst);
+ CLEAR(st);
+ SET1(st, startst);
+ SP("sstart", st, *p);
+ st = step(m->g, startst, stopst, st, NOTHING, st);
+ matchp = NULL;
+ for (;;) {
+ /* next character */
+ lastc = c;
+ c = (p == m->endp) ? OUT : *p;
+
+ /* is there an EOL and/or BOL between lastc and c? */
+ flagch = '\0';
+ i = 0;
+ if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+ flagch = BOL;
+ i = m->g->nbol;
+ }
+ if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+ (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+ flagch = (flagch == BOL) ? BOLEOL : EOL;
+ i += m->g->neol;
+ }
+ if (i != 0) {
+ for (; i > 0; i--)
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("sboleol", st, c);
+ }
+
+ /* how about a word boundary? */
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
+ flagch = BOW;
+ }
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ flagch = EOW;
+ }
+ if (flagch == BOW || flagch == EOW) {
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("sboweow", st, c);
+ }
+
+ /* are we done? */
+ if (ISSET(st, stopst))
+ matchp = p;
+ if (EQ(st, empty) || p == stop)
+ break; /* NOTE BREAK OUT */
+
+ /* no, we must deal with this character */
+ ASSIGN(tmp, st);
+ ASSIGN(st, empty);
+ assert(c != OUT);
+ st = step(m->g, startst, stopst, tmp, c, st);
+ SP("saft", st, c);
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ p++;
+ }
+
+ return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ */
+static states
+step(struct re_guts *g,
+ sopno start, /* start state within strip */
+ sopno stop, /* state after stop state within strip */
+ states bef, /* states reachable before */
+ int ch, /* character or NONCHAR code */
+ states aft) /* states already known reachable after */
+{
+ cset *cs;
+ sop s;
+ sopno pc;
+ onestate here; /* note, macros know this name */
+ sopno look;
+ int i;
+
+ for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+ s = g->strip[pc];
+ switch (OP(s)) {
+ case OEND:
+ assert(pc == stop-1);
+ break;
+ case OCHAR:
+ /* only characters can match */
+ assert(!NONCHAR(ch) || ch != (char)OPND(s));
+ if (ch == (char)OPND(s))
+ FWD(aft, bef, 1);
+ break;
+ case OBOL:
+ if (ch == BOL || ch == BOLEOL)
+ FWD(aft, bef, 1);
+ break;
+ case OEOL:
+ if (ch == EOL || ch == BOLEOL)
+ FWD(aft, bef, 1);
+ break;
+ case OBOW:
+ if (ch == BOW)
+ FWD(aft, bef, 1);
+ break;
+ case OEOW:
+ if (ch == EOW)
+ FWD(aft, bef, 1);
+ break;
+ case OANY:
+ if (!NONCHAR(ch))
+ FWD(aft, bef, 1);
+ break;
+ case OANYOF:
+ cs = &g->sets[OPND(s)];
+ if (!NONCHAR(ch) && CHIN(cs, ch))
+ FWD(aft, bef, 1);
+ break;
+ case OBACK_: /* ignored here */
+ case O_BACK:
+ FWD(aft, aft, 1);
+ break;
+ case OPLUS_: /* forward, this is just an empty */
+ FWD(aft, aft, 1);
+ break;
+ case O_PLUS: /* both forward and back */
+ FWD(aft, aft, 1);
+ i = ISSETBACK(aft, OPND(s));
+ BACK(aft, aft, OPND(s));
+ if (!i && ISSETBACK(aft, OPND(s))) {
+ /* oho, must reconsider loop body */
+ pc -= OPND(s) + 1;
+ INIT(here, pc);
+ }
+ break;
+ case OQUEST_: /* two branches, both forward */
+ FWD(aft, aft, 1);
+ FWD(aft, aft, OPND(s));
+ break;
+ case O_QUEST: /* just an empty */
+ FWD(aft, aft, 1);
+ break;
+ case OLPAREN: /* not significant here */
+ case ORPAREN:
+ FWD(aft, aft, 1);
+ break;
+ case OCH_: /* mark the first two branches */
+ FWD(aft, aft, 1);
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ FWD(aft, aft, OPND(s));
+ break;
+ case OOR1: /* done a branch, find the O_CH */
+ if (ISSTATEIN(aft, here)) {
+ for (look = 1;
+ OP(s = g->strip[pc+look]) != O_CH;
+ look += OPND(s))
+ assert(OP(s) == OOR2);
+ FWD(aft, aft, look);
+ }
+ break;
+ case OOR2: /* propagate OCH_'s marking */
+ FWD(aft, aft, 1);
+ if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ FWD(aft, aft, OPND(s));
+ }
+ break;
+ case O_CH: /* just empty */
+ FWD(aft, aft, 1);
+ break;
+ default: /* ooooops... */
+ assert(nope);
+ break;
+ }
+ }
+
+ return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ */
+static void
+print(struct match *m, char *caption, states st, int ch, FILE *d)
+{
+ struct re_guts *g = m->g;
+ int i;
+ int first = 1;
+
+ if (!(m->eflags&REG_TRACE))
+ return;
+
+ (void)fprintf(d, "%s", caption);
+ if (ch != '\0')
+ (void)fprintf(d, " %s", pchar(ch));
+ for (i = 0; i < g->nstates; i++)
+ if (ISSET(st, i)) {
+ (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+ first = 0;
+ }
+ (void)fprintf(d, "\n");
+}
+
+/*
+ - at - print current situation
+ */
+static void
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
+ sopno stopst)
+{
+ if (!(m->eflags&REG_TRACE))
+ return;
+
+ (void)printf("%s %s-", title, pchar(*start));
+ (void)printf("%s ", pchar(*stop));
+ (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define PCHARDONE /* never again */
+/*
+ - pchar - make a character printable
+ *
+ * Is this identical to regchar() over in debug.c? Well, yes. But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient. It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char * /* -> representation */
+pchar(int ch)
+{
+ static char pbuf[10];
+
+ if (isprint(ch) || ch == ' ')
+ (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+ else
+ (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+ return(pbuf);
+}
+#endif
+#endif
+
+#undef matcher
+#undef fast
+#undef slow
+#undef dissect
+#undef backref
+#undef step
+#undef print
+#undef at
+#undef match
+#undef nope
diff --git a/contrib/llvm/lib/Support/regerror.c b/contrib/llvm/lib/Support/regerror.c
new file mode 100644
index 000000000000..1d67c9a2b03b
--- /dev/null
+++ b/contrib/llvm/lib/Support/regerror.c
@@ -0,0 +1,135 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regerror.c 8.4 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
+static const char *regatoi(const llvm_regex_t *, char *, int);
+
+static struct rerr {
+ int code;
+ const char *name;
+ const char *explain;
+} rerrs[] = {
+ { REG_NOMATCH, "REG_NOMATCH", "llvm_regexec() failed to match" },
+ { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
+ { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
+ { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
+ { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
+ { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
+ { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
+ { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
+ { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
+ { REG_ERANGE, "REG_ERANGE", "invalid character range" },
+ { REG_ESPACE, "REG_ESPACE", "out of memory" },
+ { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
+ { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
+ { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
+ { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
+ { 0, "", "*** unknown regexp error code ***" }
+};
+
+/*
+ - llvm_regerror - the interface to error numbers
+ = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+ */
+/* ARGSUSED */
+size_t
+llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+ struct rerr *r;
+ size_t len;
+ int target = errcode &~ REG_ITOA;
+ const char *s;
+ char convbuf[50];
+
+ if (errcode == REG_ATOI)
+ s = regatoi(preg, convbuf, sizeof convbuf);
+ else {
+ for (r = rerrs; r->code != 0; r++)
+ if (r->code == target)
+ break;
+
+ if (errcode&REG_ITOA) {
+ if (r->code != 0) {
+ assert(strlen(r->name) < sizeof(convbuf));
+ (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
+ } else
+ (void)snprintf(convbuf, sizeof convbuf,
+ "REG_0x%x", target);
+ s = convbuf;
+ } else
+ s = r->explain;
+ }
+
+ len = strlen(s) + 1;
+ if (errbuf_size > 0) {
+ llvm_strlcpy(errbuf, s, errbuf_size);
+ }
+
+ return(len);
+}
+
+/*
+ - regatoi - internal routine to implement REG_ATOI
+ */
+static const char *
+regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
+{
+ struct rerr *r;
+
+ for (r = rerrs; r->code != 0; r++)
+ if (strcmp(r->name, preg->re_endp) == 0)
+ break;
+ if (r->code == 0)
+ return("0");
+
+ (void)snprintf(localbuf, localbufsize, "%d", r->code);
+ return(localbuf);
+}
diff --git a/contrib/llvm/lib/Support/regex2.h b/contrib/llvm/lib/Support/regex2.h
new file mode 100644
index 000000000000..21659c34449a
--- /dev/null
+++ b/contrib/llvm/lib/Support/regex2.h
@@ -0,0 +1,157 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regex2.h 8.4 (Berkeley) 3/20/94
+ */
+
+/*
+ * internals of regex_t
+ */
+#define MAGIC1 ((('r'^0200)<<8) | 'e')
+
+/*
+ * The internal representation is a *strip*, a sequence of
+ * operators ending with an endmarker. (Some terminology etc. is a
+ * historical relic of earlier versions which used multiple strips.)
+ * Certain oddities in the representation are there to permit running
+ * the machinery backwards; in particular, any deviation from sequential
+ * flow must be marked at both its source and its destination. Some
+ * fine points:
+ *
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
+ * OOR1 and OOR2 are respectively the end and the beginning of one of
+ * the branches. Note that there is an implicit OOR2 following OCH_
+ * and an implicit OOR1 preceding O_CH.
+ *
+ * In state representations, an operator's bit is on to signify a state
+ * immediately *preceding* "execution" of that operator.
+ */
+typedef unsigned long sop; /* strip operator */
+typedef long sopno;
+#define OPRMASK 0xf8000000LU
+#define OPDMASK 0x07ffffffLU
+#define OPSHIFT ((unsigned)27)
+#define OP(n) ((n)&OPRMASK)
+#define OPND(n) ((n)&OPDMASK)
+#define SOP(op, opnd) ((op)|(opnd))
+/* operators meaning operand */
+/* (back, fwd are offsets) */
+#define OEND (1LU<<OPSHIFT) /* endmarker - */
+#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
+#define OBOL (3LU<<OPSHIFT) /* left anchor - */
+#define OEOL (4LU<<OPSHIFT) /* right anchor - */
+#define OANY (5LU<<OPSHIFT) /* . - */
+#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
+#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
+#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
+#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
+#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
+#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
+#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
+#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
+#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
+#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
+#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
+#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
+#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
+#define OBOW (19LU<<OPSHIFT) /* begin word - */
+#define OEOW (20LU<<OPSHIFT) /* end word - */
+
+/*
+ * Structure for [] character-set representation. Character sets are
+ * done as bit vectors, grouped 8 to a byte vector for compactness.
+ * The individual set therefore has both a pointer to the byte vector
+ * and a mask to pick out the relevant bit of each byte. A hash code
+ * simplifies testing whether two sets could be identical.
+ *
+ * This will get trickier for multicharacter collating elements. As
+ * preliminary hooks for dealing with such things, we also carry along
+ * a string of multi-character elements, and decide the size of the
+ * vectors at run time.
+ */
+typedef struct {
+ uch *ptr; /* -> uch [csetsize] */
+ uch mask; /* bit within array */
+ uch hash; /* hash code */
+ size_t smultis;
+ char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
+} cset;
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
+#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
+#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
+#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
+#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */
+#define MCsub(p, cs, cp) mcsub(p, cs, cp)
+#define MCin(p, cs, cp) mcin(p, cs, cp)
+
+/* stuff for character categories */
+typedef unsigned char cat_t;
+
+/*
+ * main compiled-expression structure
+ */
+struct re_guts {
+ int magic;
+# define MAGIC2 ((('R'^0200)<<8)|'E')
+ sop *strip; /* malloced area for strip */
+ int csetsize; /* number of bits in a cset vector */
+ int ncsets; /* number of csets in use */
+ cset *sets; /* -> cset [ncsets] */
+ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
+ int cflags; /* copy of llvm_regcomp() cflags argument */
+ sopno nstates; /* = number of sops */
+ sopno firststate; /* the initial OEND (normally 0) */
+ sopno laststate; /* the final OEND */
+ int iflags; /* internal flags */
+# define USEBOL 01 /* used ^ */
+# define USEEOL 02 /* used $ */
+# define REGEX_BAD 04 /* something wrong */
+ int nbol; /* number of ^ used */
+ int neol; /* number of $ used */
+ int ncategories; /* how many character categories */
+ cat_t *categories; /* ->catspace[-CHAR_MIN] */
+ char *must; /* match must contain this string */
+ int mlen; /* length of must */
+ size_t nsub; /* copy of re_nsub */
+ int backrefs; /* does it use back references? */
+ sopno nplus; /* how deep does it nest +s? */
+ /* catspace must be last */
+ cat_t catspace[1]; /* actually [NC] */
+};
+
+/* misc utilities */
+#define OUT (CHAR_MAX+1) /* a non-character value */
+#define ISWORD(c) (isalnum(c&0xff) || (c) == '_')
diff --git a/contrib/llvm/lib/Support/regex_impl.h b/contrib/llvm/lib/Support/regex_impl.h
new file mode 100644
index 000000000000..f8296c9ff75e
--- /dev/null
+++ b/contrib/llvm/lib/Support/regex_impl.h
@@ -0,0 +1,108 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992 Henry Spencer.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer of the University of Toronto.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regex.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _REGEX_H_
+#define _REGEX_H_
+
+#include <sys/types.h>
+typedef off_t llvm_regoff_t;
+typedef struct {
+ llvm_regoff_t rm_so; /* start of match */
+ llvm_regoff_t rm_eo; /* end of match */
+} llvm_regmatch_t;
+
+typedef struct llvm_regex {
+ int re_magic;
+ size_t re_nsub; /* number of parenthesized subexpressions */
+ const char *re_endp; /* end pointer for REG_PEND */
+ struct re_guts *re_g; /* none of your business :-) */
+} llvm_regex_t;
+
+/* llvm_regcomp() flags */
+#define REG_BASIC 0000
+#define REG_EXTENDED 0001
+#define REG_ICASE 0002
+#define REG_NOSUB 0004
+#define REG_NEWLINE 0010
+#define REG_NOSPEC 0020
+#define REG_PEND 0040
+#define REG_DUMP 0200
+
+/* llvm_regerror() flags */
+#define REG_NOMATCH 1
+#define REG_BADPAT 2
+#define REG_ECOLLATE 3
+#define REG_ECTYPE 4
+#define REG_EESCAPE 5
+#define REG_ESUBREG 6
+#define REG_EBRACK 7
+#define REG_EPAREN 8
+#define REG_EBRACE 9
+#define REG_BADBR 10
+#define REG_ERANGE 11
+#define REG_ESPACE 12
+#define REG_BADRPT 13
+#define REG_EMPTY 14
+#define REG_ASSERT 15
+#define REG_INVARG 16
+#define REG_ATOI 255 /* convert name to number (!) */
+#define REG_ITOA 0400 /* convert number to name (!) */
+
+/* llvm_regexec() flags */
+#define REG_NOTBOL 00001
+#define REG_NOTEOL 00002
+#define REG_STARTEND 00004
+#define REG_TRACE 00400 /* tracing of execution */
+#define REG_LARGE 01000 /* force large representation */
+#define REG_BACKR 02000 /* force use of backref code */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int llvm_regcomp(llvm_regex_t *, const char *, int);
+size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+int llvm_regexec(const llvm_regex_t *, const char *, size_t,
+ llvm_regmatch_t [], int);
+void llvm_regfree(llvm_regex_t *);
+size_t llvm_strlcpy(char *dst, const char *src, size_t siz);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_REGEX_H_ */
diff --git a/contrib/llvm/lib/Support/regexec.c b/contrib/llvm/lib/Support/regexec.c
new file mode 100644
index 000000000000..007861675ba1
--- /dev/null
+++ b/contrib/llvm/lib/Support/regexec.c
@@ -0,0 +1,162 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regexec.c 8.3 (Berkeley) 3/20/94
+ */
+
+/*
+ * the outer shell of llvm_regexec()
+ *
+ * This file includes engine.inc *twice*, after muchos fiddling with the
+ * macros that code uses. This lets the same code operate on two different
+ * representations for state sets.
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/* macros for manipulating states, small version */
+/* FIXME: 'states' is assumed as 'long' on small version. */
+#define states1 long /* for later use in llvm_regexec() decision */
+#define states states1
+#define CLEAR(v) ((v) = 0)
+#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
+#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
+#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
+#define ASSIGN(d, s) ((d) = (s))
+#define EQ(a, b) ((a) == (b))
+#define STATEVARS long dummy /* dummy version */
+#define STATESETUP(m, n) /* nothing */
+#define STATETEARDOWN(m) /* nothing */
+#define SETUP(v) ((v) = 0)
+#define onestate long
+#define INIT(o, n) ((o) = (unsigned long)1 << (n))
+#define INC(o) ((o) <<= 1)
+#define ISSTATEIN(v, o) (((v) & (o)) != 0)
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
+#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
+#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
+/* function names */
+#define SNAMES /* engine.inc looks after details */
+
+#include "regengine.inc"
+
+/* now undo things */
+#undef states
+#undef CLEAR
+#undef SET0
+#undef SET1
+#undef ISSET
+#undef ASSIGN
+#undef EQ
+#undef STATEVARS
+#undef STATESETUP
+#undef STATETEARDOWN
+#undef SETUP
+#undef onestate
+#undef INIT
+#undef INC
+#undef ISSTATEIN
+#undef FWD
+#undef BACK
+#undef ISSETBACK
+#undef SNAMES
+
+/* macros for manipulating states, large version */
+#define states char *
+#define CLEAR(v) memset(v, 0, m->g->nstates)
+#define SET0(v, n) ((v)[n] = 0)
+#define SET1(v, n) ((v)[n] = 1)
+#define ISSET(v, n) ((v)[n])
+#define ASSIGN(d, s) memmove(d, s, m->g->nstates)
+#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
+#define STATEVARS long vn; char *space
+#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
+ if ((m)->space == NULL) return(REG_ESPACE); \
+ (m)->vn = 0; }
+#define STATETEARDOWN(m) { free((m)->space); }
+#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
+#define onestate long
+#define INIT(o, n) ((o) = (n))
+#define INC(o) ((o)++)
+#define ISSTATEIN(v, o) ((v)[o])
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
+#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
+#define ISSETBACK(v, n) ((v)[here - (n)])
+/* function names */
+#define LNAMES /* flag */
+
+#include "regengine.inc"
+
+/*
+ - llvm_regexec - interface for matching
+ *
+ * We put this here so we can exploit knowledge of the state representation
+ * when choosing which matcher to call. Also, by this point the matchers
+ * have been prototyped.
+ */
+int /* 0 success, REG_NOMATCH failure */
+llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
+ llvm_regmatch_t pmatch[], int eflags)
+{
+ struct re_guts *g = preg->re_g;
+#ifdef REDEBUG
+# define GOODFLAGS(f) (f)
+#else
+# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
+#endif
+
+ if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
+ return(REG_BADPAT);
+ assert(!(g->iflags&REGEX_BAD));
+ if (g->iflags&REGEX_BAD) /* backstop for no-debug case */
+ return(REG_BADPAT);
+ eflags = GOODFLAGS(eflags);
+
+ if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
+ return(smatcher(g, string, nmatch, pmatch, eflags));
+ else
+ return(lmatcher(g, string, nmatch, pmatch, eflags));
+}
diff --git a/contrib/llvm/lib/Support/regfree.c b/contrib/llvm/lib/Support/regfree.c
new file mode 100644
index 000000000000..dc2b4af90fa7
--- /dev/null
+++ b/contrib/llvm/lib/Support/regfree.c
@@ -0,0 +1,72 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regfree.c 8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/*
+ - llvm_regfree - free everything
+ */
+void
+llvm_regfree(llvm_regex_t *preg)
+{
+ struct re_guts *g;
+
+ if (preg->re_magic != MAGIC1) /* oops */
+ return; /* nice to complain, but hard */
+
+ g = preg->re_g;
+ if (g == NULL || g->magic != MAGIC2) /* oops again */
+ return;
+ preg->re_magic = 0; /* mark it invalid */
+ g->magic = 0; /* mark it invalid */
+
+ if (g->strip != NULL)
+ free((char *)g->strip);
+ if (g->sets != NULL)
+ free((char *)g->sets);
+ if (g->setbits != NULL)
+ free((char *)g->setbits);
+ if (g->must != NULL)
+ free(g->must);
+ free((char *)g);
+}
diff --git a/contrib/llvm/lib/Support/regstrlcpy.c b/contrib/llvm/lib/Support/regstrlcpy.c
new file mode 100644
index 000000000000..8b68afdf75f1
--- /dev/null
+++ b/contrib/llvm/lib/Support/regstrlcpy.c
@@ -0,0 +1,52 @@
+/*
+ * This code is derived from OpenBSD's libc, original license follows:
+ *
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "regex_impl.h"
+/*
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+llvm_strlcpy(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
diff --git a/contrib/llvm/lib/Support/regutils.h b/contrib/llvm/lib/Support/regutils.h
new file mode 100644
index 000000000000..d0ee100a382b
--- /dev/null
+++ b/contrib/llvm/lib/Support/regutils.h
@@ -0,0 +1,53 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)utils.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* utility definitions */
+#define NC (CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define NDEBUG /* no assertions please */
+#endif
+#endif
+#include <assert.h>
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define memmove(d, s, c) bcopy(s, d, c)
+#endif
diff --git a/contrib/llvm/lib/Support/system_error.cpp b/contrib/llvm/lib/Support/system_error.cpp
new file mode 100644
index 000000000000..56898de31520
--- /dev/null
+++ b/contrib/llvm/lib/Support/system_error.cpp
@@ -0,0 +1,130 @@
+//===---------------------- system_error.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Errno.h"
+#include <string>
+#include <cstring>
+
+namespace llvm {
+
+// class error_category
+
+error_category::error_category() {
+}
+
+error_category::~error_category() {
+}
+
+error_condition
+error_category::default_error_condition(int ev) const {
+ return error_condition(ev, *this);
+}
+
+bool
+error_category::equivalent(int code, const error_condition& condition) const {
+ return default_error_condition(code) == condition;
+}
+
+bool
+error_category::equivalent(const error_code& code, int condition) const {
+ return *this == code.category() && code.value() == condition;
+}
+
+std::string
+_do_message::message(int ev) const {
+ return std::string(sys::StrError(ev));
+}
+
+class _generic_error_category : public _do_message {
+public:
+ virtual const char* name() const;
+ virtual std::string message(int ev) const;
+};
+
+const char*
+_generic_error_category::name() const {
+ return "generic";
+}
+
+std::string
+_generic_error_category::message(int ev) const {
+#ifdef ELAST
+ if (ev > ELAST)
+ return std::string("unspecified generic_category error");
+#endif // ELAST
+ return _do_message::message(ev);
+}
+
+const error_category&
+generic_category() {
+ static _generic_error_category s;
+ return s;
+}
+
+class _system_error_category : public _do_message {
+public:
+ virtual const char* name() const;
+ virtual std::string message(int ev) const;
+ virtual error_condition default_error_condition(int ev) const;
+};
+
+const char*
+_system_error_category::name() const {
+ return "system";
+}
+
+// std::string _system_error_category::message(int ev) const {
+// Is in Platform/system_error.inc
+
+// error_condition _system_error_category::default_error_condition(int ev) const
+// Is in Platform/system_error.inc
+
+const error_category&
+system_category() {
+ static _system_error_category s;
+ return s;
+}
+
+const error_category&
+posix_category() {
+#ifdef LLVM_ON_WIN32
+ return generic_category();
+#else
+ return system_category();
+#endif
+}
+
+// error_condition
+
+std::string
+error_condition::message() const {
+ return _cat_->message(_val_);
+}
+
+// error_code
+
+std::string
+error_code::message() const {
+ return _cat_->message(_val_);
+}
+
+} // end namespace llvm
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/system_error.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/system_error.inc"
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
new file mode 100644
index 000000000000..08dc340f8541
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -0,0 +1,72 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMAsmPrinter;
+class ARMBaseTargetMachine;
+class FunctionPass;
+class JITCodeEmitter;
+class MachineInstr;
+class MCCodeEmitter;
+class MCInst;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class TargetAsmBackend;
+class formatted_raw_ostream;
+
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createNEONMoveFixPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP);
+
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
new file mode 100644
index 000000000000..cf333ccd49ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -0,0 +1,251 @@
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+ "Thumb mode">;
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+ "Enable VFP3 instructions",
+ [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable NEON instructions",
+ [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+ "Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision floating point">;
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict VFP3 to 16 double registers">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
+
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+ "AvoidCPSRPartialUpdate", "true",
+ "Avoid CPSR partial update for OOO execution">;
+
+/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
+def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
+ "Supports v7 DSP instructions in Thumb2.">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
+
+// ARM ISAs.
+def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
+ "Support ARM v4T instructions">;
+def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
+ "Support ARM v5T instructions",
+ [HasV4TOps]>;
+def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+ "Support ARM v5TE, v5TEj, and v5TExp instructions",
+ [HasV5TOps]>;
+def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
+ "Support ARM v6 instructions",
+ [HasV5TEOps]>;
+def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+ "Support ARM v6t2 instructions",
+ [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>;
+def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
+ "Support ARM v7 instructions",
+ [HasV6T2Ops]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+include "ARMSchedule.td"
+
+// ARM processor families.
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors",
+ [FeatureSlowFPBrcc, FeatureNEONForFP,
+ FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors",
+ [FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
+
+// V4 Processors.
+def : ProcNoItin<"generic", []>;
+def : ProcNoItin<"arm8", []>;
+def : ProcNoItin<"arm810", []>;
+def : ProcNoItin<"strongarm", []>;
+def : ProcNoItin<"strongarm110", []>;
+def : ProcNoItin<"strongarm1100", []>;
+def : ProcNoItin<"strongarm1110", []>;
+
+// V4T Processors.
+def : ProcNoItin<"arm7tdmi", [HasV4TOps]>;
+def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>;
+def : ProcNoItin<"arm710t", [HasV4TOps]>;
+def : ProcNoItin<"arm720t", [HasV4TOps]>;
+def : ProcNoItin<"arm9", [HasV4TOps]>;
+def : ProcNoItin<"arm9tdmi", [HasV4TOps]>;
+def : ProcNoItin<"arm920", [HasV4TOps]>;
+def : ProcNoItin<"arm920t", [HasV4TOps]>;
+def : ProcNoItin<"arm922t", [HasV4TOps]>;
+def : ProcNoItin<"arm940t", [HasV4TOps]>;
+def : ProcNoItin<"ep9312", [HasV4TOps]>;
+
+// V5T Processors.
+def : ProcNoItin<"arm10tdmi", [HasV5TOps]>;
+def : ProcNoItin<"arm1020t", [HasV5TOps]>;
+
+// V5TE Processors.
+def : ProcNoItin<"arm9e", [HasV5TEOps]>;
+def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm946e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm966e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm968e-s", [HasV5TEOps]>;
+def : ProcNoItin<"arm10e", [HasV5TEOps]>;
+def : ProcNoItin<"arm1020e", [HasV5TEOps]>;
+def : ProcNoItin<"arm1022e", [HasV5TEOps]>;
+def : ProcNoItin<"xscale", [HasV5TEOps]>;
+def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
+
+// V6 Processors.
+def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+ FeatureDB]>;
+
+// V6T2 Processors.
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// V7a Processors.
+def : Processor<"cortex-a8", CortexA8Itineraries,
+ [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2]>;
+def : Processor<"cortex-a9", CortexA9Itineraries,
+ [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2]>;
+def : Processor<"cortex-a9-mp", CortexA9Itineraries,
+ [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureMP]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv]>;
+
+// V7EM Processors.
+def : ProcNoItin<"cortex-m4", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureDSPThumb2,
+ FeatureT2XtPk, FeatureVFP2,
+ FeatureVFPOnlySP]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+
+ let AssemblyWriters = [ARMAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 000000000000..595708fa7881
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,595 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ add = '+', sub = '-'
+ };
+
+ static inline const char *getAddrOpcStr(AddrOpc Op) {
+ return Op == sub ? "-" : "";
+ }
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror: return 3;
+ }
+ }
+
+ static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+ switch (N.getOpcode()) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should ignore the low 6 bits, then
+ // retry the hunt.
+ if (Imm & 63U) {
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+ unsigned RotAmt2 = TZ2 & ~1;
+ if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+ // 16-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~65535U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImm16ShiftedVal - Return true if the specified value can be
+ /// obtained by left shifting a 16-bit immediate.
+ static inline bool isThumbImm16ShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~65535U << getThumbImm16ValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+
+ /// getT2SOImmValSplat - Return the 12-bit encoded representation
+ /// if the specified value can be obtained by splatting the low 8 bits
+ /// into every other byte or every byte of a 32-bit value. i.e.,
+ /// 00000000 00000000 00000000 abcdefgh control = 0
+ /// 00000000 abcdefgh 00000000 abcdefgh control = 1
+ /// abcdefgh 00000000 abcdefgh 00000000 control = 2
+ /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
+ /// Return -1 if none of the above apply.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValSplatVal(unsigned V) {
+ unsigned u, Vs, Imm;
+ // control = 0
+ if ((V & 0xffffff00) == 0)
+ return V;
+
+ // If the value is zeroes in the first byte, just shift those off
+ Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+ // Any passing value only has 8 bits of payload, splatted across the word
+ Imm = Vs & 0xff;
+ // Likewise, any passing values have the payload splatted into the 3rd byte
+ u = Imm | (Imm << 16);
+
+ // control = 1 or 2
+ if (Vs == u)
+ return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+ // control = 3
+ if (Vs == (u | (u << 8)))
+ return (3 << 8) | Imm;
+
+ return -1;
+ }
+
+ /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+ /// specified value is a rotated 8-bit value. Return -1 if no rotation
+ /// encoding is possible.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValRotateVal(unsigned V) {
+ unsigned RotAmt = CountLeadingZeros_32(V);
+ if (RotAmt >= 24)
+ return -1;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ if ((rotr32(0xff000000U, RotAmt) & V) == V)
+ return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+ return -1;
+ }
+
+ /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+ /// encoding for it. If not, return -1.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmVal(unsigned Arg) {
+ // If 'Arg' is an 8-bit splat, then get the encoded value.
+ int Splat = getT2SOImmValSplatVal(Arg);
+ if (Splat != -1)
+ return Splat;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ int Rot = getT2SOImmValRotateVal(Arg);
+ if (Rot != -1)
+ return Rot;
+
+ return -1;
+ }
+
+ static inline unsigned getT2SOImmValRotate(unsigned V) {
+ if ((V & ~255U) == 0) return 0;
+ // Use CTZ to compute the rotate amount.
+ unsigned RotAmt = CountTrailingZeros_32(V);
+ return (32 - RotAmt) & 31;
+ }
+
+ static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+ unsigned V = Imm;
+ // Passing values can be any combination of splat values and shifter
+ // values. If this can be handled with a single shifter or splat, bail
+ // out. Those should be handled directly, not with a two-part val.
+ if (getT2SOImmValSplatVal(V) != -1)
+ return false;
+ V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Likewise, try masking out a splat value first.
+ V = Imm;
+ if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+ V &= ~0xff00ff00U;
+ else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+ V &= ~0x00ff00ffU;
+ // If what's left can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Otherwise, do not accept.
+ return false;
+ }
+
+ static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+ assert (isT2SOImmTwoPartVal(Imm) &&
+ "Immedate cannot be encoded as two part immediate!");
+ // Try a shifter operand as one part
+ unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+ // If the rest is encodable as an immediate, then return it.
+ if (getT2SOImmVal(V) != -1) return V;
+
+ // Try masking out a splat value first.
+ if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+ return Imm & 0xff00ff00U;
+
+ // The other splat is all that's left as an option.
+ assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+ return Imm & 0x00ff00ffU;
+ }
+
+ static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+ // Mask out the first hunk
+ Imm ^= getT2SOImmTwoPartFirst(Imm);
+ // Return what's left
+ assert (getT2SOImmVal(Imm) != -1 &&
+ "Unable to encode second part of T2 two part SO immediate");
+ return Imm;
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+ // fourth operand 16-17 encodes the index mode.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+ unsigned IdxMode = 0) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)((AM2Opc >> 13) & 7);
+ }
+ static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+ return (AM2Opc >> 16);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+ // index mode.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+ unsigned IdxMode = 0) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset | (IdxMode << 9);
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+ static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+ return (AM3Opc >> 9);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+ return (int)SubMode;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The second operand encodes the
+ // operation in bit 8 and the immediate in bits 0-7.
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #6
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for NEON load / store instructions.
+ //
+ // addrmode6 := reg with optional alignment
+ //
+ // This is stored in two operands [regaddr, align]. The first is the
+ // address register. The second operand is the value of the alignment
+ // specifier in bytes or zero if no explicit alignment.
+ // Valid alignments depend on the specific instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ assert(false && "Unsupported NEON immediate");
+ }
+ return Val;
+ }
+
+ AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
new file mode 100644
index 000000000000..5e438a976732
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
@@ -0,0 +1,516 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ ARMELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public TargetAsmBackend {
+ bool isThumbMode; // Currently emitting Thumb code.
+public:
+ ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+
+ unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16", 0, 20, 0 },
+{ "fixup_arm_movw_lo16", 0, 20, 0 },
+{ "fixup_t2_movt_hi16", 0, 20, 0 },
+{ "fixup_t2_movw_lo16", 0, 20, 0 },
+{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
+ }
+ }
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME: Thumb targets, different move constant targets..
+ return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+ return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if (isThumb()) {
+ // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
+ // use 0x46c0 (which is a 'mov r8, r8' insn).
+ uint64_t NumNops = Count / 2;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write16(0xbf00);
+ if (Count & 1)
+ OW->Write8(0);
+ return true;
+ }
+ // ARM mode
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0xe1a00000);
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ }
+
+ return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case ARM::fixup_arm_movt_hi16:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned Lo12 = Value & 0x0FFF;
+ assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) &&
+ "Out of range pc-relative fixup value!");
+ // inst{19-16} = Hi4;
+ // inst{11-0} = Lo12;
+ Value = (Hi4 << 16) | (Lo12);
+ return Value;
+ }
+ case ARM::fixup_t2_movt_hi16:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel: //FIXME: Shouldn't this be shifted like
+ // the other hi16 fixup?
+ case ARM::fixup_t2_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned i = (Value & 0x800) >> 11;
+ unsigned Mid3 = (Value & 0x700) >> 8;
+ unsigned Lo8 = Value & 0x0FF;
+ // inst{19-16} = Hi4;
+ // inst{26} = i;
+ // inst{14-12} = Mid3;
+ // inst{7-0} = Lo8;
+ // The value comes in as the whole thing, not just the portion required
+ // for this fixup, so we need to mask off the bits not handled by this
+ // portion (lo vs. hi).
+ Value &= 0xffff;
+ Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_ldst_pcrel_12:
+ // ARM PC-relative values are offset by 8.
+ Value -= 4;
+ // FALLTHROUGH
+ case ARM::fixup_t2_ldst_pcrel_12: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value -= 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return ((Value - 4) >> 2) & 0xff;
+ case ARM::fixup_arm_adr_pcrel_12: {
+ // ARM PC-relative values are offset by 8.
+ Value -= 8;
+ unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 2; // 0b0010
+ }
+ assert(ARM_AM::getSOImmVal(Value) != -1 &&
+ "Out of range pc-relative fixup value!");
+ // Encode the immediate and shift the opcode into place.
+ return ARM_AM::getSOImmVal(Value) | (opc << 21);
+ }
+
+ case ARM::fixup_t2_adr_pcrel_12: {
+ Value -= 4;
+ unsigned opc = 0;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 5;
+ }
+
+ uint32_t out = (opc << 21);
+ out |= (Value & 0x800) << 15;
+ out |= (Value & 0x700) << 4;
+ out |= (Value & 0x0FF);
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ // These values don't encode the low two bits since they're always zero.
+ // Offset by 8 just as above.
+ return 0xffffff & ((Value - 8) >> 2);
+ case ARM::fixup_t2_uncondbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint32_t out = 0;
+ bool I = Value & 0x800000;
+ bool J1 = Value & 0x400000;
+ bool J2 = Value & 0x200000;
+ J1 ^= I;
+ J2 ^= I;
+
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_t2_condbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint64_t out = 0;
+ out |= (Value & 0x80000) << 7; // S bit
+ out |= (Value & 0x40000) >> 7; // J2 bit
+ out |= (Value & 0x20000) >> 4; // J1 bit
+ out |= (Value & 0x1F800) << 5; // imm6 field
+ out |= (Value & 0x007FF); // imm11 field
+
+ uint32_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_thumb_bl: {
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0x3fffff & ((Value - 4) >> 1);
+ Binary = (Value & 0x7ff) << 16; // Low imm11 value.
+ Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_blx: {
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+ // positions in the destination opcode. x = unchanged, I = immediate value
+ // bit, S = sign extension bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0xfffff & ((Value - 2) >> 2);
+ Binary = (Value & 0x3ff) << 17; // Low imm10L value.
+ Binary |= (Value & 0xffc00) >> 10; // High imm10H value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_cp:
+ // Offset by 4, and don't encode the low two bits. Two bytes of that
+ // 'off by 4' is implicitly handled by the half-word ordering of the
+ // Thumb encoding, so we only need to adjust by 2 here.
+ return ((Value - 2) >> 2) & 0xff;
+ case ARM::fixup_arm_thumb_cb: {
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ uint32_t Binary = (Value - 4) >> 1;
+ return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+ }
+ case ARM::fixup_arm_thumb_br:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0x7ff;
+ case ARM::fixup_arm_thumb_bcc:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_10: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low two bits since they're always zero.
+ Value >>= 2;
+ assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_pcrel_10) {
+ uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
+ : ARMAsmBackend(T), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ true);
+ }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = 4; // FIXME: 2 for Thumb
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T), Subtype(st) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return 1;
+
+ case FK_Data_2:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ return 2;
+
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ return 3;
+
+ case FK_Data_4:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ return 4;
+ }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const std::string &TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ if (TheTriple.getArchName() == "armv4t" ||
+ TheTriple.getArchName() == "thumbv4t")
+ return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T);
+ else if (TheTriple.getArchName() == "armv5e" ||
+ TheTriple.getArchName() == "thumbv5e")
+ return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ);
+ else if (TheTriple.getArchName() == "armv6" ||
+ TheTriple.getArchName() == "thumbv6")
+ return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
+ return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
+ }
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on ARM");
+
+ return new ELFARMAsmBackend(T, Triple(TT).getOS());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 000000000000..dbc3ee41f3da
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1834 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMAddressingModes.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ // Per section and per symbol attributes are not supported.
+ // To implement them we would need the ability to delay this emission
+ // until the assembly file is fully parsed/generated as only then do we
+ // know the symbol and section numbers.
+ class AttributeEmitter {
+ public:
+ virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+ virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+ virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+ virtual void Finish() = 0;
+ virtual ~AttributeEmitter() {}
+ };
+
+ class AsmAttributeEmitter : public AttributeEmitter {
+ MCStreamer &Streamer;
+
+ public:
+ AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+ void MaybeSwitchVendor(StringRef Vendor) { }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ Streamer.EmitRawText("\t.eabi_attribute " +
+ Twine(Attribute) + ", " + Twine(Value));
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ switch (Attribute) {
+ case ARMBuildAttrs::CPU_name:
+ Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+ break;
+ /* GAS requires .fpu to be emitted regardless of EABI attribute */
+ case ARMBuildAttrs::Advanced_SIMD_arch:
+ case ARMBuildAttrs::VFP_arch:
+ Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+ break;
+ default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
+ }
+ }
+ void Finish() { }
+ };
+
+ class ObjectAttributeEmitter : public AttributeEmitter {
+ MCObjectStreamer &Streamer;
+ StringRef CurrentVendor;
+ SmallString<64> Contents;
+
+ public:
+ ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+ Streamer(Streamer_), CurrentVendor("") { }
+
+ void MaybeSwitchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor.empty())
+ CurrentVendor = Vendor;
+ else if (CurrentVendor == Vendor)
+ return;
+ else
+ Finish();
+
+ CurrentVendor = Vendor;
+
+ assert(Contents.size() == 0);
+ }
+
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ // FIXME: should be ULEB
+ Contents += Attribute;
+ Contents += Value;
+ }
+
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ Contents += Attribute;
+ Contents += UppercaseString(String);
+ Contents += 0;
+ }
+
+ void Finish() {
+ const size_t ContentsSize = Contents.size();
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ Streamer.EmitBytes(Contents, 0);
+
+ Contents.clear();
+ }
+ };
+
+} // end of anonymous namespace
+
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+ AsmPrinter::EmitDwarfRegOp(MLoc);
+ else {
+ unsigned Reg = MLoc.getReg();
+ if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+ // S registers are described as bit-pieces of a register
+ // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+ // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+
+ unsigned SReg = Reg - ARM::S0;
+ bool odd = SReg & 0x1;
+ unsigned Rx = 256 + (SReg >> 1);
+
+ OutStreamer.AddComment("DW_OP_regx for S register");
+ EmitInt8(dwarf::DW_OP_regx);
+
+ OutStreamer.AddComment(Twine(SReg));
+ EmitULEB128(Rx);
+
+ if (odd) {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(32);
+ } else {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(0);
+ }
+ } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+ assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+ // Q registers Q0-Q15 are described by composing two D registers together.
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+
+ unsigned QReg = Reg - ARM::Q0;
+ unsigned D1 = 256 + 2 * QReg;
+ unsigned D2 = D1 + 1;
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D1);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D2);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+ }
+ }
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ if (AFI->isThumbFunction()) {
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer.EmitThumbFunc(CurrentFnSym);
+ }
+
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned TF = MO.getTargetFlags();
+
+ switch (MO.getType()) {
+ default:
+ assert(0 && "<unknown operand type>");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ O << ARMInstPrinter::getRegisterName(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ int64_t Imm = MO.getImm();
+ O << '#';
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF == ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF == ARMII::MO_HI16))
+ O << ":upper16:";
+ O << Imm;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+ (TF & ARMII::MO_LO16))
+ O << ":lower16:";
+ else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+ (TF & ARMII::MO_HI16))
+ O << ":upper16:";
+ O << *Mang->getSymbol(GV);
+
+ printOffset(MO.getOffset(), O);
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << '_' << uid2
+ << "_set_" << MBB->getNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid << '_' << uid2;
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+ << getFunctionNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // Print as a memory address.
+ if (MI->getOperand(OpNum).isReg()) {
+ O << "["
+ << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+ << "]";
+ return false;
+ }
+ // Fallthrough
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
+ printOperand(MI, OpNum, O);
+ return false;
+ case 'y': // Print a VFP single precision register as indexed double.
+ // This uses the ordering of the alias table to get the first 'd' register
+ // that overlaps the 's' register. Also, s0 is an odd register, hence the
+ // odd modulus check below.
+ if (MI->getOperand(OpNum).isReg()) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ O << ARMInstPrinter::getRegisterName(TRI->getAliasSet(Reg)[0]) <<
+ (((Reg % 2) == 1) ? "[0]" : "[1]");
+ return false;
+ }
+ return true;
+ case 'B': // Bitwise inverse of integer or symbol without a preceding #.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << ~(MI->getOperand(OpNum).getImm());
+ return false;
+ case 'L': // The low 16 bits of an immediate constant.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << (MI->getOperand(OpNum).getImm() & 0xffff);
+ return false;
+ case 'M': { // A register range suitable for LDM/STM.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned RegBegin = MO.getReg();
+ // This takes advantage of the 2 operand-ness of ldm/stm and that we've
+ // already got the operands in registers that are operands to the
+ // inline asm statement.
+
+ O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
+
+ // FIXME: The register allocator not only may not have given us the
+ // registers in sequence, but may not be in ascending registers. This
+ // will require changes in the register allocator that'll need to be
+ // propagated down here if the operands change.
+ unsigned RegOps = OpNum + 1;
+ while (MI->getOperand(RegOps).isReg()) {
+ O << ", "
+ << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
+ RegOps++;
+ }
+
+ O << "}";
+
+ return false;
+ }
+ // These modifiers are not yet supported.
+ case 'p': // The high single-precision register of a VFP double-precision
+ // register.
+ case 'e': // The low doubleword register of a NEON quad register.
+ case 'f': // The high doubleword register of a NEON quad register.
+ case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
+ case 'Q': // The least significant register of a pair.
+ case 'R': // The most significant register of a pair.
+ case 'H': // The highest-numbered register of a pair.
+ return true;
+ }
+ }
+
+ printOperand(MI, OpNum, O);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ case 'A': // A memory operand for a VLD1/VST1 instruction.
+ default: return true; // Unknown modifier.
+ case 'm': // The base register of a memory operand.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ O << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg());
+ return false;
+ }
+ }
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+ return false;
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+ // Declare all the text sections up front (before the DWARF sections
+ // emitted by AsmPrinter::doInitialization) so the assembler will keep
+ // them together at the beginning of the object file. This helps
+ // avoid out-of-range branches that are due a fundamental limitation of
+ // the way symbol offsets are encoded with the current Darwin ARM
+ // relocations.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(
+ getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextSection());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+ if (RelocM == Reloc::DynamicNoPIC) {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 12, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ } else {
+ const MCSection *sect =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+ MCSectionMachO::S_SYMBOL_STUBS,
+ 16, SectionKind::getText());
+ OutStreamer.SwitchSection(sect);
+ }
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
+ }
+ }
+
+ // Use unified assembler syntax.
+ OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+ // Emit ARM Build Attributes
+ if (Subtarget->isTargetELF()) {
+
+ emitAttributes();
+ }
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetDarwin()) {
+ // All darwin targets use mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output non-lazy-pointers for external and common global variables.
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+ emitARMAttributeSection();
+
+ /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+ bool emitFPU = false;
+ AttributeEmitter *AttrEmitter;
+ if (OutStreamer.hasRawTextSupport()) {
+ AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+ emitFPU = true;
+ } else {
+ MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+ AttrEmitter = new ObjectAttributeEmitter(O);
+ }
+
+ AttrEmitter->MaybeSwitchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString == "cortex-a8" ||
+ Subtarget->isCortexA8()) {
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ // Fixme: figure out when this is emitted.
+ //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+ // ARMBuildAttrs::AllowWMMXv1);
+ //
+
+ /// ADD additional Else-cases here!
+ } else if (CPUString == "xscale") {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ } else if (CPUString == "generic") {
+ // FIXME: Why these defaults?
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (Subtarget->hasNEON() && emitFPU) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/vfpv3/vfpv2 for .fpu parameters */
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ /* If emitted for NEON, omit from VFP below, since you can have both
+ * NEON and VFP in build attributes but only one .fpu */
+ emitFPU = false;
+ }
+
+ /* VFPv3 + .fpu */
+ if (Subtarget->hasVFP3()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+ /* VFPv2 + .fpu */
+ } else if (Subtarget->hasVFP2()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ }
+
+ /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+ * since NEON can have 1 (allowed) or 2 (MAC operations) */
+ if (Subtarget->hasNEON()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::Allowed);
+ }
+
+ // Signal various FP modes.
+ if (!UnsafeFPMath) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (NoInfsFPMath && NoNaNsFPMath)
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
+ else
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
+
+ // FIXME: add more flags to ARMBuildAttrs.h
+ // 8-bytes alignment stuff.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // Hard float. Use both S and D registers and conform to AAPCS-VFP.
+ if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+ }
+ // FIXME: Should we signal R9 usage?
+
+ if (Subtarget->hasDivide())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+ AttrEmitter->Finish();
+ delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (OutStreamer.hasRawTextSupport())
+ return;
+
+ const ARMElfTargetObjectFile &TLOFELF =
+ static_cast<const ARMElfTargetObjectFile &>
+ (getObjFileLowering());
+
+ OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+ // Format version
+ OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
+ case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
+ case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+ case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
+ case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
+ }
+ return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+ bool isIndirect = Subtarget->isTargetDarwin() &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+ if (!isIndirect)
+ return Mang->getSymbol(GV);
+
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+ MMIMachO.getGVStubEntry(MCSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+ MCSymbol *MCSym;
+ if (ACPV->isLSDA()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+ MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ } else if (ACPV->isBlockAddress()) {
+ MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+ } else if (ACPV->isGlobalValue()) {
+ const GlobalValue *GV = ACPV->getGV();
+ MCSym = GetARMGVSymbol(GV);
+ } else {
+ assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+ MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+ }
+
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ OutContext);
+
+ if (ACPV->getPCAdjustment()) {
+ MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ ACPV->getLabelId(),
+ OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+ PCRelExpr =
+ MCBinaryExpr::CreateAdd(PCRelExpr,
+ MCConstantExpr::Create(ACPV->getPCAdjustment(),
+ OutContext),
+ OutContext);
+ if (ACPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+ }
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = 1;
+ if (Opcode == ARM::BR_JTadd)
+ OpNum = 2;
+ else if (Opcode == ARM::BR_JTm)
+ OpNum = 3;
+
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ // Construct an MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr)
+ //
+ // For example, a table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .word (LBB0 - LJTI_0_0)
+ // .word (LBB1 - LJTI_0_0)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+ OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, 4);
+ }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ unsigned OffsetWidth = 4;
+ if (MI->getOpcode() == ARM::t2TBB_JT)
+ OffsetWidth = 1;
+ else if (MI->getOpcode() == ARM::t2TBH_JT)
+ OffsetWidth = 2;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+ OutContext);
+ // If this isn't a TBB or TBH, the entries are direct branch instructions.
+ if (OffsetWidth == 4) {
+ MCInst BrInst;
+ BrInst.setOpcode(ARM::t2B);
+ BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+ OutStreamer.EmitInstruction(BrInst);
+ continue;
+ }
+ // Otherwise it's an offset from the dispatch instruction. Construct an
+ // MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr) / 2
+ //
+ // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .byte (LBB0 - LJTI_0_0) / 2
+ // .byte (LBB1 - LJTI_0_0) / 2
+ const MCExpr *Expr =
+ MCBinaryExpr::CreateSub(MBBSymbolExpr,
+ MCSymbolRefExpr::Create(JTISymbol, OutContext),
+ OutContext);
+ Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, OffsetWidth);
+ }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+ const MCSymbol *Label,
+ unsigned pred, unsigned ccreg,
+ MCContext &Ctx) {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+ Inst.addOperand(MCOperand::CreateReg(Dest));
+ Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ // Add predicate operands.
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+ unsigned Opcode) {
+ MCInst TmpInst;
+
+ // Emit the instruction as usual, just patch the opcode.
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(Opcode);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+ assert(MI->getFlag(MachineInstr::FrameSetup) &&
+ "Only instruction which are involved into frame setup code are allowed");
+
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned Opc = MI->getOpcode();
+ unsigned SrcReg, DstReg;
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+ // Two special cases:
+ // 1) tPUSH does not have src/dst regs.
+ // 2) for Thumb1 code we sometimes materialize the constant via constpool
+ // load. Yes, this is pretty fragile, but for now I don't see better
+ // way... :(
+ SrcReg = DstReg = ARM::SP;
+ } else {
+ SrcReg = MI->getOperand(1).getReg();
+ DstReg = MI->getOperand(0).getReg();
+ }
+
+ // Try to figure out the unwinding opcode out of src / dst regs.
+ if (MI->getDesc().mayStore()) {
+ // Register saves.
+ assert(DstReg == ARM::SP &&
+ "Only stack pointer as a destination reg is supported");
+
+ SmallVector<unsigned, 4> RegList;
+ // Skip src & dst reg, and pred ops.
+ unsigned StartOp = 2 + 2;
+ // Use all the operands.
+ unsigned NumOffset = 0;
+
+ switch (Opc) {
+ default:
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ case ARM::tPUSH:
+ // Special case here: no src & dst reg, but two extra imp ops.
+ StartOp = 2; NumOffset = 2;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ assert(SrcReg == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+ i != NumOps; ++i)
+ RegList.push_back(MI->getOperand(i).getReg());
+ break;
+ case ARM::STR_PRE:
+ assert(MI->getOperand(2).getReg() == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ RegList.push_back(SrcReg);
+ break;
+ }
+ OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ } else {
+ // Changes of stack / frame pointer.
+ if (SrcReg == ARM::SP) {
+ int64_t Offset = 0;
+ switch (Opc) {
+ default:
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ case ARM::MOVr:
+ Offset = 0;
+ break;
+ case ARM::ADDri:
+ Offset = -MI->getOperand(2).getImm();
+ break;
+ case ARM::SUBri:
+ Offset = MI->getOperand(2).getImm();
+ break;
+ case ARM::tSUBspi:
+ Offset = MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tADDspi:
+ case ARM::tADDrSPi:
+ Offset = -MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI.getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ // FIXME: Check for user, it should be "add" instruction!
+ Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ break;
+ }
+ }
+
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ OutStreamer.EmitPad(Offset);
+ } else {
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ } else if (DstReg == ARM::SP) {
+ // FIXME: .movsp goes here
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ else {
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
+ // Check for manual lowerings.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+ case ARM::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case ARM::LEApcrel:
+ case ARM::tLEApcrel:
+ case ARM::t2LEApcrel: {
+ // FIXME: Need to also handle globals and externals
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetCPISymbol(MI->getOperand(1).getIndex()),
+ MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LEApcrelJT:
+ case ARM::tLEApcrelJT:
+ case ARM::t2LEApcrelJT: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm()),
+ MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ // Darwin call instructions are just normal call instructions with different
+ // clobber semantics (they clobber R9).
+ case ARM::BXr9_CALL:
+ case ARM::BX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::tBXr9_CALL:
+ case ARM::tBX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBX);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::BMOVPCRXr9_CALL:
+ case ARM::BMOVPCRX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+ unsigned TF = MI->getOperand(1).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+ ? ARM::MOVTi16 : ARM::t2MOVTi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+ unsigned TF = MI->getOperand(2).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(2).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::tPICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::tADDhirr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc, r0
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::ADDrr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ // Add 's' bit operand (always reg0 for this)
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICSTR:
+ case ARM::PICSTRB:
+ case ARM::PICSTRH:
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICLDRH:
+ case ARM::PICLDRSB:
+ case ARM::PICLDRSH: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // OP r0, [pc, r0]
+ // The LCP0 label is referenced by a constant pool entry in order to get
+ // a PC-relative address at the ldr instruction.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the load
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case ARM::PICSTR: Opcode = ARM::STRrs; break;
+ case ARM::PICSTRB: Opcode = ARM::STRBrs; break;
+ case ARM::PICSTRH: Opcode = ARM::STRH; break;
+ case ARM::PICLDR: Opcode = ARM::LDRrs; break;
+ case ARM::PICLDRB: Opcode = ARM::LDRBrs; break;
+ case ARM::PICLDRH: Opcode = ARM::LDRH; break;
+ case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+ case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+ }
+ MCInst LdStInst;
+ LdStInst.setOpcode(Opcode);
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ LdStInst.addOperand(MCOperand::CreateImm(0));
+ // Add predicate operands.
+ LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ OutStreamer.EmitInstruction(LdStInst);
+
+ return;
+ }
+ case ARM::CONSTPOOL_ENTRY: {
+ /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+ /// in the function. The first operand is the ID# for this instruction, the
+ /// second is the index into the MachineConstantPool that this is, the third
+ /// is the size in bytes of this constant pool entry.
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ EmitAlignment(2);
+ OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+
+ return;
+ }
+ case ARM::t2BR_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::t2TBB_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBB);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
+ return;
+ }
+ case ARM::t2TBH_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBH);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // mov pc, target
+ MCInst TmpInst;
+ unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+ ARM::MOVr : ARM::tMOVr;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ if (Opc == ARM::MOVr)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Make sure the Thumb jump table is 4-byte aligned.
+ if (Opc == ARM::tMOVr)
+ EmitAlignment(2);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTm: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // ldr pc, target
+ MCInst TmpInst;
+ if (MI->getOperand(1).getReg() == 0) {
+ // literal offset
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+ } else {
+ TmpInst.setOpcode(ARM::LDRrs);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTadd: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // add pc, target, idx
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDrr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::TRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.long 0xe7ffdefe @ trap
+ uint32_t Val = 0xe7ffdefeUL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
+ break;
+ }
+ case ARM::tTRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.short 57086 @ trap
+ uint16_t Val = 0xdefe;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 2);
+ return;
+ }
+ break;
+ }
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // mov $val, pc
+ // adds $val, #7
+ // str $val, [$src, #4]
+ // movs r0, #0
+ // b 1f
+ // movs r0, #1
+ // 1:
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+ MCSymbol *Label = GetARMSJLJEHLabel();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDi3);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ // 's' bit operand
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateImm(7));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tSTRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #4. The operand value is scaled by 4 for the
+ // tSTR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tB);
+ TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ OutStreamer.EmitLabel(Label);
+ return;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::Int_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // add $val, pc, #8
+ // str $val, [$src, #+4]
+ // mov r0, #0
+ // add pc, pc, #0
+ // mov r0, #1
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::Int_eh_sjlj_longjmp: {
+ // ldr sp, [$src, #8]
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::tInt_eh_sjlj_longjmp: {
+ // ldr $scratch, [$src, #8]
+ // mov sp, $scratch
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #8. The operand value is scaled by 4 for the
+ // tLDR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(2));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBX);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+ // Emit unwinding stuff for frame-related instructions
+ if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+ RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+ RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+
+ TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 000000000000..7741fc4b34e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,127 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+public:
+ explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+
+ void EmitJumpTable(const MachineInstr *MI);
+ void EmitJump2Table(const MachineInstr *MI);
+ virtual void EmitInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ virtual void EmitConstantPool() {} // we emit constant pools customly!
+ virtual void EmitFunctionEntryLabel();
+ void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
+private:
+ // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ void emitAttributes();
+
+ // Helper for ELF .o only
+ void emitARMAttributeSection();
+
+ // Generic helper used to emit e.g. ARMv5 mul pseudos
+ void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+ void EmitUnwindingInstruction(const MachineInstr *MI);
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+public:
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ /// EmitDwarfRegOp - Emit dwarf register operation.
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const;
+ MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+ MCSymbol *GetARMSJLJEHLabel(void) const;
+
+ MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
new file mode 100644
index 000000000000..458f7dd1f784
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
@@ -0,0 +1,294 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+// Note that the following auto-generated files only defined enum types, and
+// so are safe to include here.
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+namespace ARM_PROC {
+ enum IMod {
+ IE = 2,
+ ID = 3
+ };
+
+ enum IFlags {
+ F = 1,
+ I = 2,
+ A = 4
+ };
+
+ inline static const char *IFlagsToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown iflags operand");
+ case F: return "f";
+ case I: return "i";
+ case A: return "a";
+ }
+ }
+
+ inline static const char *IModToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown imod operand");
+ case IE: return "ie";
+ case ID: return "id";
+ }
+ }
+}
+
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ SY = 15,
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory operation");
+ case SY: return "sy";
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown ARM register!");
+ case R0: case S0: case D0: case Q0: return 0;
+ case R1: case S1: case D1: case Q1: return 1;
+ case R2: case S2: case D2: case Q2: return 2;
+ case R3: case S3: case D3: case Q3: return 3;
+ case R4: case S4: case D4: case Q4: return 4;
+ case R5: case S5: case D5: case Q5: return 5;
+ case R6: case S6: case D6: case Q6: return 6;
+ case R7: case S7: case D7: case Q7: return 7;
+ case R8: case S8: case D8: case Q8: return 8;
+ case R9: case S9: case D9: case Q9: return 9;
+ case R10: case S10: case D10: case Q10: return 10;
+ case R11: case S11: case D11: case Q11: return 11;
+ case R12: case S12: case D12: case Q12: return 12;
+ case SP: case S13: case D13: case Q13: return 13;
+ case LR: case S14: case D14: case Q14: return 14;
+ case PC: case S15: case D15: case Q15: return 15;
+
+ case S16: case D16: return 16;
+ case S17: case D17: return 17;
+ case S18: case D18: return 18;
+ case S19: case D19: return 19;
+ case S20: case D20: return 20;
+ case S21: case D21: return 21;
+ case S22: case D22: return 22;
+ case S23: case D23: return 23;
+ case S24: case D24: return 24;
+ case S25: case D25: return 25;
+ case S26: case D26: return 26;
+ case S27: case D27: return 27;
+ case S28: case D28: return 28;
+ case S29: case D29: return 29;
+ case S30: case D30: return 30;
+ case S31: case D31: return 31;
+ }
+}
+
+namespace ARMII {
+
+ /// ARM Index Modes
+ enum IndexMode {
+ IndexModeNone = 0,
+ IndexModePre = 1,
+ IndexModePost = 2,
+ IndexModeUpd = 3
+ };
+
+ /// ARM Addressing Modes
+ enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrMode6 = 6,
+ AddrModeT1_1 = 7,
+ AddrModeT1_2 = 8,
+ AddrModeT1_4 = 9,
+ AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
+ AddrModeT2_i12 = 11,
+ AddrModeT2_i8 = 12,
+ AddrModeT2_so = 13,
+ AddrModeT2_pc = 14, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16
+ };
+
+ inline static const char *AddrModeToString(AddrMode addrmode) {
+ switch (addrmode) {
+ default: llvm_unreachable("Unknown memory operation");
+ case AddrModeNone: return "AddrModeNone";
+ case AddrMode1: return "AddrMode1";
+ case AddrMode2: return "AddrMode2";
+ case AddrMode3: return "AddrMode3";
+ case AddrMode4: return "AddrMode4";
+ case AddrMode5: return "AddrMode5";
+ case AddrMode6: return "AddrMode6";
+ case AddrModeT1_1: return "AddrModeT1_1";
+ case AddrModeT1_2: return "AddrModeT1_2";
+ case AddrModeT1_4: return "AddrModeT1_4";
+ case AddrModeT1_s: return "AddrModeT1_s";
+ case AddrModeT2_i12: return "AddrModeT2_i12";
+ case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_so: return "AddrModeT2_so";
+ case AddrModeT2_pc: return "AddrModeT2_pc";
+ case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+ case AddrMode_i12: return "AddrMode_i12";
+ }
+ }
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16,
+
+ /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY,
+
+ /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+ MO_HI16_NONLAZY,
+
+ /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY_PIC,
+
+ /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movt instruction.
+ MO_HI16_NONLAZY_PIC,
+
+ /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+ /// call operand.
+ MO_PLT
+ };
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 000000000000..649bd7d5ce3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,2584 @@
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+ unsigned MLxOpc; // MLA / MLS opcode
+ unsigned MulOpc; // Expanded multiplication opcode
+ unsigned AddSubOpc; // Expanded add / sub opcode
+ bool NegAcc; // True if the acc is negated before the add / sub.
+ bool HasLane; // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+ // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
+ // fp scalar ops
+ { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
+ { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
+ { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
+ { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
+ { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
+ { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
+
+ // fp SIMD ops
+ { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
+ { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
+ { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
+ { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
+ { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
+ { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
+ { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
+ { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+ : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ Subtarget(STI) {
+ for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+ if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+ }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ if (usePreRAHazardRecognizer()) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+ }
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ return (ScheduleHazardRecognizer *)
+ new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ // FIXME: Thumb2 support.
+
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ bool isLoad = !MCID.mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ if (ARM_AM::getSOImmVal(Amt) == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // ...likewise if it ends with a branch table followed by an unconditional
+ // branch. The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+ isIndirectBranchOpcode(SecondLastOpc)) &&
+ isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+ int BOpc = !AFI->isThumbFunction()
+ ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+ int BccOpc = !AFI->isThumbFunction()
+ ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (isUncondBranchOpcode(Opc)) {
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+ MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+ MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // FIXME: This confuses implicit_def with optional CPSR def.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+ return false;
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isPredicable())
+ return false;
+
+ if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ return AFI->isThumb2Function();
+ }
+ return true;
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+LLVM_ATTRIBUTE_NOINLINE
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI);
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ assert(JTI < JT.size());
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI->isLabel())
+ return 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr:
+ case ARM::t2BR_JT:
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+ // entry is one byte; TBH two byte each.
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+ unsigned NumOps = MCID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ assert(MJTI != 0);
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+ unsigned NumEntries = getNumJTEntries(JT, JTI);
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+ // Make sure the instruction that follows TBB is 2-byte aligned.
+ // FIXME: Constant island pass should insert an "ALIGN" instruction
+ // instead.
+ ++NumEntries;
+ return NumEntries * EntrySize + InstSize;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ return 0; // Not reached
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
+ }
+
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq;
+ else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQ;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQQQ;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc == ARM::VORRq)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ AddDefaultPred(MIB);
+}
+
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::SPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STRrs:
+ case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::STRi12:
+ case ARM::t2STRi12:
+ case ARM::tSTRspi:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VST1q64Pseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::SPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ }
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::LDRi12:
+ case ARM::t2LDRi12:
+ case ARM::tLDRspi:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLD1q64Pseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLDMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+ MachineConstantPool *MCP = MF.getConstantPool();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
+ if (ACPV->isGlobalValue())
+ NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
+ else if (ACPV->isExtSymbol())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+ ACPV->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+ MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+ switch(Orig->getOpcode()) {
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ Orig->getOperand(1).setIndex(CPI);
+ Orig->getOperand(2).setImm(PCLabelId);
+ break;
+ }
+ }
+ return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ int Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci ||
+ Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci ||
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ if (Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+ bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+ if (isARMCP0 && isARMCP1) {
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ } else if (!isARMCP0 && !isARMCP1) {
+ return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+ }
+ return false;
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
+ }
+
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ if (!NumCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ unsigned UnpredCost = Probability.getNumerator() * NumCycles;
+ UnpredCost /= Probability.getDenominator();
+ UnpredCost += 1; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+ return (NumCycles + ExtraPredCycles) <= UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
+ const BranchProbability &Probability) const {
+ if (!TCycles || !FCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ unsigned TUnpredCost = Probability.getNumerator() * TCycles;
+ TUnpredCost /= Probability.getDenominator();
+
+ uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
+ unsigned FUnpredCost = Comp * FCycles;
+ FUnpredCost /= Probability.getDenominator();
+
+ unsigned UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+ return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+ if (Opc == ARM::B)
+ return ARM::Bcc;
+ else if (Opc == ARM::tB)
+ return ARM::tBcc;
+ else if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
+
+ llvm_unreachable("Unknown unconditional branch opcode!");
+ return 0;
+}
+
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ BaseReg = DestReg;
+ }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(FrameRegIdx+1);
+ Offset = 0;
+ return true;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getSOImmVal(Offset) != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+ // as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode_i12: {
+ ImmIdx = FrameRegIdx + 1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Can't fold any offset even if it's zero.
+ return false;
+ case ARMII::AddrMode5: {
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Attempt to fold address comp. if opcode has offset bits
+ if (NumBits > 0) {
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // FIXME: When addrmode2 goes away, this will simplify (like the
+ // T2 version), as the LDR.i12 versions don't need the encoding
+ // tricks for the offset value.
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+ int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::t2CMPri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ case ARM::COPY: {
+ // Walk down one instruction which is potentially an 'and'.
+ const MachineInstr &Copy = *MI;
+ MachineBasicBlock::iterator AND(
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ if (AND == MI->getParent()->end()) return false;
+ MI = AND;
+ return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+ CmpMask, true);
+ }
+ }
+
+ return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
+bool ARMBaseInstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI) const {
+ if (CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ MachineInstr *MI = &*DI;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ MI = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
+ B = MI->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg()) continue;
+
+ // This instruction modifies or uses CPSR after the one we want to
+ // change. We can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri: {
+ // Scan forward for the use of CPSR, if it's a conditional code requires
+ // checking of V bit, then this is not safe to do. If we can't find the
+ // CPSR use (i.e. used in another block), then it's not safe to perform
+ // the optimization.
+ bool isSafe = false;
+ I = CmpInstr;
+ E = MI->getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR.
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ switch (CC) {
+ default:
+ isSafe = true;
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
+ }
+ }
+
+ if (!isSafe)
+ return false;
+
+ // Toggle the optional operand to CPSR.
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc = 0;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ *UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ break;
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ //
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the the address is not 64-bit aligned.
+ //
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
+ // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++UOps;
+ return UOps;
+ } else if (Subtarget.isCortexA9()) {
+ UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ isSLoad = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+
+ switch (UseMCID.getOpcode()) {
+ default: break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ isSStore = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefMCID.getSchedClass();
+ unsigned UseClass = UseMCID.getSchedClass();
+
+ if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ int DefCycle = -1;
+ bool LdmBypass = false;
+ switch (DefMCID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseMCID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tSTMIA:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx)) {
+ --UseCycle;
+ }
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+
+ const MCInstrDesc &DefMCID = DefMI->getDesc();
+ if (!ItinData || ItinData->isEmpty())
+ return DefMCID.mayLoad() ? 3 : 1;
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ if (DefMO.getReg() == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMCID.isBranch())
+ return 0;
+ }
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD1d64T:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD1d64Q:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8_UPD:
+ case ARM::VLD1DUPq16_UPD:
+ case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8_UPD:
+ case ARM::VLD2DUPd16_UPD:
+ case ARM::VLD2DUPd32_UPD:
+ case ARM::VLD4DUPd8:
+ case ARM::VLD4DUPd16:
+ case ARM::VLD4DUPd32:
+ case ARM::VLD4DUPd8_UPD:
+ case ARM::VLD4DUPd16_UPD:
+ case ARM::VLD4DUPd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16_UPD:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
+ case ARM::VLD2LNd8_UPD:
+ case ARM::VLD2LNd16_UPD:
+ case ARM::VLD2LNd32_UPD:
+ case ARM::VLD2LNq16_UPD:
+ case ARM::VLD2LNq32_UPD:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
+ case ARM::VLD4LNd8_UPD:
+ case ARM::VLD4LNd16_UPD:
+ case ARM::VLD4LNd32_UPD:
+ case ARM::VLD4LNq16_UPD:
+ case ARM::VLD4LNq32_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
+
+ if (isZeroCost(DefMCID.Opcode))
+ return 0;
+
+ if (!ItinData || ItinData->isEmpty())
+ return DefMCID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode()) {
+ int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
+ if (Subtarget.isCortexA9())
+ return Latency <= 2 ? 1 : Latency - 1;
+ else
+ return Latency <= 3 ? 1 : Latency - 2;
+ }
+
+ const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Class = MCID.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ if (UOps)
+ return ItinData->getStageLatency(Class);
+ return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+ }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (DDomain == ARMII::DomainGeneral) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 2);
+ }
+ return false;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc,
+ bool &NegAcc, bool &HasLane) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+ if (I == MLxEntryMap.end())
+ return false;
+
+ const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+ MulOpc = Entry.MulOpc;
+ AddSubOpc = Entry.AddSubOpc;
+ NegAcc = Entry.NegAcc;
+ HasLane = Entry.HasLane;
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 000000000000..507e8974bf7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,503 @@
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
+ IndexModeShift = 5,
+ IndexModeMask = 3 << IndexModeShift,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 7,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 13,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 14,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 15,
+ DomainMask = 7 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+}
+
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
+ const ARMSubtarget &Subtarget;
+
+protected:
+ // Can be only subclassed.
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+public:
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+ const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+ }
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
+ virtual bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+ /// should be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const {
+ return NumCycles == 1;
+ }
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpMask, int &CmpValue) const;
+
+ /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+private:
+ int getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+ /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+ /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+ /// MLx table.
+ DenseMap<unsigned, unsigned> MLxEntryMap;
+
+ /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+ /// stalls when scheduled together with fp MLA / MLS opcodes.
+ SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+ /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+ /// instruction.
+ bool isFpMLxInstruction(unsigned Opcode) const {
+ return MLxEntryMap.count(Opcode);
+ }
+
+ /// isFpMLxInstruction - This version also returns the multiply opcode and the
+ /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+ /// the MLX instructions with an extra lane operand.
+ bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc, bool &NegAcc,
+ bool &HasLane) const;
+
+ /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+ /// will cause stalls when scheduled after (within 4-cycle window) a fp
+ /// MLA / MLS instruction.
+ bool canCauseFpMLxStall(unsigned Opcode) const {
+ return MLxHazardOpcodes.count(Opcode);
+ }
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+ bool isDead = false) {
+ return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+ return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+ return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+ return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+ Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+ return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = 0);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 000000000000..ba422952ac1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,1287 @@
+//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(), TII(tii), STI(sti),
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
+}
+
+const unsigned*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
+ // register.
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // FIXME: avoid re-calculating this every time.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
+ if (TFI->hasFP(MF))
+ Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ // Reserve D16-D31 if the subtarget doesn't support them.
+ if (!STI.hasVFP3() || STI.hasD16()) {
+ assert(ARM::D31 == ARM::D16 + 15);
+ for (unsigned i = 0; i != 16; ++i)
+ Reserved.set(ARM::D16 + i);
+ }
+ return Reserved;
+}
+
+bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
+ unsigned Reg) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (Reg) {
+ default: break;
+ case ARM::SP:
+ case ARM::PC:
+ return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
+ case ARM::R7:
+ case ARM::R11:
+ if (FramePtr == Reg && TFI->hasFP(MF))
+ return true;
+ break;
+ case ARM::R9:
+ return STI.isR9Reserved();
+ }
+
+ return false;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ switch (SubIdx) {
+ default: return 0;
+ case ARM::ssub_0:
+ case ARM::ssub_1:
+ case ARM::ssub_2:
+ case ARM::ssub_3: {
+ // S sub-registers.
+ if (A->getSize() == 8) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::DPR_8RegClass;
+ assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
+ if (A == &ARM::DPR_8RegClass)
+ return A;
+ return &ARM::DPR_VFP2RegClass;
+ }
+
+ if (A->getSize() == 16) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::QPR_8RegClass;
+ return &ARM::QPR_VFP2RegClass;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::SPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return &ARM::QQPR_VFP2RegClass;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::dsub_0:
+ case ARM::dsub_1:
+ case ARM::dsub_2:
+ case ARM::dsub_3: {
+ // D sub-registers.
+ if (A->getSize() == 16) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B != &ARM::DPRRegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+ case ARM::dsub_4:
+ case ARM::dsub_5:
+ case ARM::dsub_6:
+ case ARM::dsub_7: {
+ // D sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+
+ case ARM::qsub_0:
+ case ARM::qsub_1: {
+ // Q sub-registers.
+ if (A->getSize() == 32) {
+ if (B == &ARM::QPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::QPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::qsub_2:
+ case ARM::qsub_3: {
+ // Q sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ }
+ return 0;
+}
+
+bool
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+
+ unsigned Size = RC->getSize() * 8;
+ if (Size < 6)
+ return 0;
+
+ NewSubIdx = 0; // Whole register.
+ unsigned NumRegs = SubIndices.size();
+ if (NumRegs == 8) {
+ // 8 D registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[0] == ARM::dsub_0 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3 &&
+ SubIndices[4] == ARM::dsub_4 &&
+ SubIndices[5] == ARM::dsub_5 &&
+ SubIndices[6] == ARM::dsub_6 &&
+ SubIndices[7] == ARM::dsub_7);
+ } else if (NumRegs == 4) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 4 Q registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[1] == ARM::qsub_1 &&
+ SubIndices[2] == ARM::qsub_2 &&
+ SubIndices[3] == ARM::qsub_3);
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 4 D registers -> 1 QQ register.
+ if (Size >= 256 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 4 D registers -> 1 QQ register (2nd).
+ if (Size == 512 &&
+ SubIndices[1] == ARM::dsub_5 &&
+ SubIndices[2] == ARM::dsub_6 &&
+ SubIndices[3] == ARM::dsub_7) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 4 S registers -> 1 Q register.
+ if (Size >= 128 &&
+ SubIndices[1] == ARM::ssub_1 &&
+ SubIndices[2] == ARM::ssub_2 &&
+ SubIndices[3] == ARM::ssub_3) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ }
+ } else if (NumRegs == 2) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 2 Q registers -> 1 QQ register.
+ if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::qsub_2) {
+ // 2 Q registers -> 1 QQ register (2nd).
+ if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 2 D registers -> 1 Q register.
+ if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_2) {
+ // 2 D registers -> 1 Q register (2nd).
+ if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+ NewSubIdx = ARM::qsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+ NewSubIdx = ARM::qsub_2;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_6) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+ NewSubIdx = ARM::qsub_3;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 2 S registers -> 1 D register.
+ if (SubIndices[1] == ARM::ssub_1) {
+ if (Size >= 128)
+ NewSubIdx = ARM::dsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_2) {
+ // 2 S registers -> 1 D register (2nd).
+ if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+ NewSubIdx = ARM::dsub_1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+const TargetRegisterClass*
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ do {
+ switch (Super->getID()) {
+ case ARM::GPRRegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::DPRRegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QQPRRegClassID:
+ case ARM::QQQQPRRegClassID:
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return ARM::GPRRegisterClass;
+}
+
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
+/// getRawAllocationOrder - Returns the register allocation order for a
+/// specified register class with a target-dependent hint.
+ArrayRef<unsigned>
+ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ // Alternative register allocation orders when favoring even / odd registers
+ // of register pairs.
+
+ // No FP, R9 is available.
+ static const unsigned GPREven1[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd1[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R7, R9 is available.
+ static const unsigned GPREven2[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd2[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R11, R9 is available.
+ static const unsigned GPREven3[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9
+ };
+ static const unsigned GPROdd3[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+ ARM::R8
+ };
+
+ // No FP, R9 is not available.
+ static const unsigned GPREven4[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd4[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R7, R9 is not available.
+ static const unsigned GPREven5[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd5[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R11, R9 is not available.
+ static const unsigned GPREven6[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+ };
+ static const unsigned GPROdd6[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+ };
+
+ // We only support even/odd hints for GPR and rGPR.
+ if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+ return RC->getRawAllocationOrder(MF);
+
+ if (HintType == ARMRI::RegPairEven) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return RC->getRawAllocationOrder(MF);
+
+ if (!TFI->hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return ArrayRef<unsigned>(GPREven1);
+ else
+ return ArrayRef<unsigned>(GPREven4);
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return ArrayRef<unsigned>(GPREven2);
+ else
+ return ArrayRef<unsigned>(GPREven5);
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return ArrayRef<unsigned>(GPREven3);
+ else
+ return ArrayRef<unsigned>(GPREven6);
+ }
+ } else if (HintType == ARMRI::RegPairOdd) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return RC->getRawAllocationOrder(MF);
+
+ if (!TFI->hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return ArrayRef<unsigned>(GPROdd1);
+ else
+ return ArrayRef<unsigned>(GPROdd4);
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return ArrayRef<unsigned>(GPROdd2);
+ else
+ return ArrayRef<unsigned>(GPROdd5);
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return ArrayRef<unsigned>(GPROdd3);
+ else
+ return ArrayRef<unsigned>(GPROdd6);
+ }
+ }
+ return RC->getRawAllocationOrder(MF);
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const {
+ if (Reg == 0 || !isPhysicalRegister(Reg))
+ return 0;
+ if (Type == 0)
+ return Reg;
+ else if (Type == (unsigned)ARMRI::RegPairOdd)
+ // Odd register.
+ return getRegisterPairOdd(Reg, MF);
+ else if (Type == (unsigned)ARMRI::RegPairEven)
+ // Even register.
+ return getRegisterPairEven(Reg, MF);
+ return 0;
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+ Hint.first == (unsigned)ARMRI::RegPairEven) &&
+ TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ // If 'Reg' is one of the even / odd register pair and it's now changed
+ // (e.g. coalesced) into a different register. The other register of the
+ // pair allocation hint must be updated to reflect the relationship
+ // change.
+ unsigned OtherReg = Hint.second;
+ Hint = MRI->getRegAllocationHint(OtherReg);
+ if (Hint.second == Reg)
+ // Make sure the pair has not already divorced.
+ MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ }
+}
+
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ // CortexA9 has a Write-after-write hazard for NEON registers.
+ if (!STI.isCortexA9())
+ return false;
+
+ switch (RC->getID()) {
+ case ARM::DPRRegClassID:
+ case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::SPR_8RegClassID:
+ // Avoid reusing S, D, and Q registers.
+ // Don't increase register pressure for QQ and QQQQ.
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
+}
+
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ return true;
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
+}
+
+unsigned ARMBaseRegisterInfo::getRARegister() const {
+ return ARM::LR;
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return FramePtr;
+ return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int ARMBaseRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ return ARMGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R1:
+ return ARM::R0;
+ case ARM::R3:
+ return ARM::R2;
+ case ARM::R5:
+ return ARM::R4;
+ case ARM::R7:
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
+ case ARM::R9:
+ return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11:
+ return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+ case ARM::S1:
+ return ARM::S0;
+ case ARM::S3:
+ return ARM::S2;
+ case ARM::S5:
+ return ARM::S4;
+ case ARM::S7:
+ return ARM::S6;
+ case ARM::S9:
+ return ARM::S8;
+ case ARM::S11:
+ return ARM::S10;
+ case ARM::S13:
+ return ARM::S12;
+ case ARM::S15:
+ return ARM::S14;
+ case ARM::S17:
+ return ARM::S16;
+ case ARM::S19:
+ return ARM::S18;
+ case ARM::S21:
+ return ARM::S20;
+ case ARM::S23:
+ return ARM::S22;
+ case ARM::S25:
+ return ARM::S24;
+ case ARM::S27:
+ return ARM::S26;
+ case ARM::S29:
+ return ARM::S28;
+ case ARM::S31:
+ return ARM::S30;
+
+ case ARM::D1:
+ return ARM::D0;
+ case ARM::D3:
+ return ARM::D2;
+ case ARM::D5:
+ return ARM::D4;
+ case ARM::D7:
+ return ARM::D6;
+ case ARM::D9:
+ return ARM::D8;
+ case ARM::D11:
+ return ARM::D10;
+ case ARM::D13:
+ return ARM::D12;
+ case ARM::D15:
+ return ARM::D14;
+ case ARM::D17:
+ return ARM::D16;
+ case ARM::D19:
+ return ARM::D18;
+ case ARM::D21:
+ return ARM::D20;
+ case ARM::D23:
+ return ARM::D22;
+ case ARM::D25:
+ return ARM::D24;
+ case ARM::D27:
+ return ARM::D26;
+ case ARM::D29:
+ return ARM::D28;
+ case ARM::D31:
+ return ARM::D30;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R0:
+ return ARM::R1;
+ case ARM::R2:
+ return ARM::R3;
+ case ARM::R4:
+ return ARM::R5;
+ case ARM::R6:
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
+ case ARM::R8:
+ return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10:
+ return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+ case ARM::S0:
+ return ARM::S1;
+ case ARM::S2:
+ return ARM::S3;
+ case ARM::S4:
+ return ARM::S5;
+ case ARM::S6:
+ return ARM::S7;
+ case ARM::S8:
+ return ARM::S9;
+ case ARM::S10:
+ return ARM::S11;
+ case ARM::S12:
+ return ARM::S13;
+ case ARM::S14:
+ return ARM::S15;
+ case ARM::S16:
+ return ARM::S17;
+ case ARM::S18:
+ return ARM::S19;
+ case ARM::S20:
+ return ARM::S21;
+ case ARM::S22:
+ return ARM::S23;
+ case ARM::S24:
+ return ARM::S25;
+ case ARM::S26:
+ return ARM::S27;
+ case ARM::S28:
+ return ARM::S29;
+ case ARM::S30:
+ return ARM::S31;
+
+ case ARM::D0:
+ return ARM::D1;
+ case ARM::D2:
+ return ARM::D3;
+ case ARM::D4:
+ return ARM::D5;
+ case ARM::D6:
+ return ARM::D7;
+ case ARM::D8:
+ return ARM::D9;
+ case ARM::D10:
+ return ARM::D11;
+ case ARM::D12:
+ return ARM::D13;
+ case ARM::D14:
+ return ARM::D15;
+ case ARM::D16:
+ return ARM::D17;
+ case ARM::D18:
+ return ARM::D19;
+ case ARM::D20:
+ return ARM::D21;
+ case ARM::D22:
+ return ARM::D23;
+ case ARM::D24:
+ return ARM::D25;
+ case ARM::D26:
+ return ARM::D27;
+ case ARM::D28:
+ return ARM::D29;
+ case ARM::D30:
+ return ARM::D31;
+ }
+
+ return 0;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C =
+ ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+}
+
+
+void ARMBaseRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+ case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = TFI->getStackAlignment();
+ if (TFI->hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
+
+ MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode_i12:
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMFrameLowering *TFI =
+ static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ unsigned FrameReg;
+
+ int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+ }
+ if (Done)
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert((Offset ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+ "This code isn't needed if offset already handled!");
+
+ unsigned ScratchReg = 0;
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ if (Offset == 0)
+ // Must be addrmode4/6.
+ MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+ else {
+ ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+ if (!AFI->isThumbFunction())
+ emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ }
+ // Update the original instruction to use the scratch register.
+ MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 000000000000..b4b4059e7361
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,216 @@
+//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+ enum {
+ RegPairOdd = 1,
+ RegPairEven = 2
+ };
+}
+
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case LR: case SP: case PC:
+ return true;
+ case R8: case R9: case R10: case R11:
+ // For darwin we want r7 and lr to be next to each other.
+ return !isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R8: case R9: case R10: case R11:
+ // Darwin has this second area.
+ return isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case D15: case D14: case D13: case D12:
+ case D11: case D10: case D9: case D8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+ const ARMBaseInstrInfo &TII;
+ const ARMSubtarget &STI;
+
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
+ // Can be only subclassed.
+ explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &STI);
+
+ // Return the opcode that implements 'Op', or 0 if no opcode
+ unsigned getOpcode(int Op) const;
+
+public:
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ /// canCombineSubRegIndices - Given a register class and a list of
+ /// subregister indices, return true if it's possible to combine the
+ /// subregister indices into one that corresponds to a larger
+ /// subregister. Return the new subregister index by reference. Note the
+ /// new index may be zero if the given subregisters can be combined to
+ /// form the whole register.
+ virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const;
+
+ unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const;
+
+ void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const;
+
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
+ bool hasBasePointer(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
+ bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getBaseRegister() const { return BasePtr; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ bool isLowRegister(unsigned Reg) const;
+
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags)const;
+
+ /// Code Generation virtual methods...
+ virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+private:
+ unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+ unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 000000000000..69eddf03ec94
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,131 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.08).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+ enum SpecialAttr {
+ // This is for the .cpu asm attr. It translates into one or more
+ // AttrType (below) entries in the .ARM.attributes section in the ELF.
+ SEL_CPU
+ };
+
+ enum AttrType {
+ // Rest correspond to ELF/.ARM.attributes
+ File = 1,
+ Section = 2,
+ Symbol = 3,
+ CPU_raw_name = 4,
+ CPU_name = 5,
+ CPU_arch = 6,
+ CPU_arch_profile = 7,
+ ARM_ISA_use = 8,
+ THUMB_ISA_use = 9,
+ VFP_arch = 10,
+ WMMX_arch = 11,
+ Advanced_SIMD_arch = 12,
+ PCS_config = 13,
+ ABI_PCS_R9_use = 14,
+ ABI_PCS_RW_data = 15,
+ ABI_PCS_RO_data = 16,
+ ABI_PCS_GOT_use = 17,
+ ABI_PCS_wchar_t = 18,
+ ABI_FP_rounding = 19,
+ ABI_FP_denormal = 20,
+ ABI_FP_exceptions = 21,
+ ABI_FP_user_exceptions = 22,
+ ABI_FP_number_model = 23,
+ ABI_align8_needed = 24,
+ ABI_align8_preserved = 25,
+ ABI_enum_size = 26,
+ ABI_HardFP_use = 27,
+ ABI_VFP_args = 28,
+ ABI_WMMX_args = 29,
+ ABI_optimization_goals = 30,
+ ABI_FP_optimization_goals = 31,
+ compatibility = 32,
+ CPU_unaligned_access = 34,
+ VFP_HP_extension = 36,
+ ABI_FP_16bit_format = 38,
+ MPextension_use = 42, // was 70, 2.08 ABI
+ DIV_use = 44,
+ nodefaults = 64,
+ also_compatible_with = 65,
+ T2EE_use = 66,
+ conformance = 67,
+ Virtualization_use = 68,
+ MPextension_use_old = 70
+ };
+
+ // Magic numbers for .ARM.attributes
+ enum AttrMagic {
+ Format_Version = 0x41
+ };
+
+ // Legal Values for CPU_arch, (=6), uleb128
+ enum CPUArch {
+ Pre_v4 = 0,
+ v4 = 1, // e.g. SA110
+ v4T = 2, // e.g. ARM7TDMI
+ v5T = 3, // e.g. ARM9TDMI
+ v5TE = 4, // e.g. ARM946E_S
+ v5TEJ = 5, // e.g. ARM926EJ_S
+ v6 = 6, // e.g. ARM1136J_S
+ v6KZ = 7, // e.g. ARM1176JZ_S
+ v6T2 = 8, // e.g. ARM1156T2F_S
+ v6K = 9, // e.g. ARM1136J_S
+ v7 = 10, // e.g. Cortex A8, Cortex M3
+ v6_M = 11, // e.g. Cortex M1
+ v6S_M = 12, // v6_M with the System extensions
+ v7E_M = 13 // v7_M with DSP extensions
+ };
+
+ enum CPUArchProfile { // (=7), uleb128
+ Not_Applicable = 0, // pre v7, or cross-profile code
+ ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+ RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+ MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+ SystemProfile = (0x53) // 'S' Application or real-time profile
+ };
+
+ // The following have a lot of common use cases
+ enum {
+ //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ Not_Allowed = 0,
+ Allowed = 1,
+
+ // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+ AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_ABI_FP_denormal, (=20), uleb128
+ PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+ // Tag_ABI_FP_number_model, (=23), uleb128
+ AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
+ AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 000000000000..ff7db1ff62ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+ if (Reg == 0) {
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 000000000000..d2981c0af8ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,164 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+ "ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 8>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 000000000000..d6fca6277501
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1898 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+ class ARMCodeEmitter : public MachineFunctionPass {
+ ARMJITInfo *JTI;
+ const ARMInstrInfo *II;
+ const TargetData *TD;
+ const ARMSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+ bool IsThumb;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+ public:
+ ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const ARMInstrInfo *)tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Binary);
+ void emitDWordLE(uint64_t Binary);
+ void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
+ void emitMOVi2piecesInstruction(const MachineInstr &MI);
+ void emitLEApcrelJTInstruction(const MachineInstr &MI);
+ void emitPseudoMoveInstruction(const MachineInstr &MI);
+ void addPCLabel(unsigned LabelID);
+ void emitPseudoInstruction(const MachineInstr &MI);
+ unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+ const MCInstrDesc &MCID,
+ const MachineOperand &MO,
+ unsigned OpIdx);
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm);
+ unsigned getAddrModeSBit(const MachineInstr &MI,
+ const MCInstrDesc &MCID) const;
+
+ void emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMulFrmInstruction(const MachineInstr &MI);
+
+ void emitExtendInstruction(const MachineInstr &MI);
+
+ void emitMiscArithInstruction(const MachineInstr &MI);
+
+ void emitSaturateInstruction(const MachineInstr &MI);
+
+ void emitBranchInstruction(const MachineInstr &MI);
+
+ void emitInlineJumpTable(unsigned JTIndex);
+
+ void emitMiscBranchInstruction(const MachineInstr &MI);
+
+ void emitVFPArithInstruction(const MachineInstr &MI);
+
+ void emitVFPConversionInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+ // TableGen'erated getBinaryCodeForInstr() function to encode any
+ // operand values, instead querying getMachineOpValue() directly for
+ // each operand it needs to encode. Thus, any of the new encoder
+ // helper functions can simply return 0 as the values the return
+ // are already handled elsewhere. They are placeholders to allow this
+ // encoder to continue to function until the MC encoder is sufficiently
+ // far along that this one can be eliminated entirely.
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+ const { return 0; }
+ unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
+ unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getMsbOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getSsatBitPosValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const {return 0; }
+ uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0; }
+
+ unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+ uint32_t Binary;
+ Binary = Imm12 & 0xfff;
+ if (Imm12 >= 0)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+
+ unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+ return 0;
+ }
+
+ uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
+ }
+
+ uint32_t Binary = Imm12 & 0xfff;
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+ unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(unsigned Imm) const ;
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV = 0) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+ intptr_t JTBase = 0) const;
+ };
+}
+
+char ARMCodeEmitter::ID = 0;
+
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// code to the specified MCE object.
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new ARMCodeEmitter(TM, JCE);
+}
+
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+ II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+ TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+ JTI->Initialize(MF, IsPIC);
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
+ switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+}
+
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable("Unsupported operand type for movw/movt");
+ }
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return getARMRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+ else if (MO.isCPI()) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ // For VFP load, the immediate offset is multiplied by 4.
+ unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+ ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ } else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV) const {
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ ACPV, MayNeedFarStub)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), ACPV,
+ MayNeedFarStub);
+ MCE.addRelocation(MR);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ // Tell JIT emitter we'll resolve the address.
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc,
+ intptr_t JTBase) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB, JTBase));
+}
+
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Binary) << "\n");
+ MCE.emitDWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+ default: {
+ llvm_unreachable("Unhandled instruction encoding format!");
+ break;
+ }
+ case ARMII::MiscFrm:
+ if (MI.getOpcode() == ARM::LEApcrelJT) {
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ }
+ llvm_unreachable("Unhandled instruction encoding!");
+ break;
+ case ARMII::Pseudo:
+ emitPseudoInstruction(MI);
+ break;
+ case ARMII::DPFrm:
+ case ARMII::DPSoRegFrm:
+ emitDataProcessingInstruction(MI);
+ break;
+ case ARMII::LdFrm:
+ case ARMII::StFrm:
+ emitLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdMiscFrm:
+ case ARMII::StMiscFrm:
+ emitMiscLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdStMulFrm:
+ emitLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::MulFrm:
+ emitMulFrmInstruction(MI);
+ break;
+ case ARMII::ExtFrm:
+ emitExtendInstruction(MI);
+ break;
+ case ARMII::ArithMiscFrm:
+ emitMiscArithInstruction(MI);
+ break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
+ case ARMII::BrFrm:
+ emitBranchInstruction(MI);
+ break;
+ case ARMII::BrMiscFrm:
+ emitMiscBranchInstruction(MI);
+ break;
+ // VFP instructions.
+ case ARMII::VFPUnaryFrm:
+ case ARMII::VFPBinaryFrm:
+ emitVFPArithInstruction(MI);
+ break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ emitVFPConversionInstruction(MI);
+ break;
+ case ARMII::VFPLdStFrm:
+ emitVFPLoadStoreInstruction(MI);
+ break;
+ case ARMII::VFPLdStMulFrm:
+ emitVFPLoadStoreMultipleInstruction(MI);
+ break;
+
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
+ unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
+ unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+ const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+ // Remember the CONSTPOOL_ENTRY address for later relocation.
+ JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+ // Emit constpool island entry. In most cases, the actual values will be
+ // resolved and relocated after code emission.
+ if (MCPE.isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
+
+ assert(ACPV->isGlobalValue() && "unsupported constant pool value");
+ const GlobalValue *GV = ACPV->getGV();
+ if (GV) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+ isa<Function>(GV),
+ Subtarget->GVIsIndirectSymbol(GV, RelocM),
+ (intptr_t)ACPV);
+ } else {
+ emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+ }
+ emitWordLE(0);
+ } else {
+ const Constant *CV = MCPE.Val.ConstVal;
+
+ DEBUG({
+ errs() << " ** Constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " ";
+ if (const Function *F = dyn_cast<Function>(CV))
+ errs() << F->getName();
+ else
+ errs() << *CV;
+ errs() << '\n';
+ });
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
+ emitWordLE(0);
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint32_t Val = uint32_t(*CI->getValue().getRawData());
+ emitWordLE(Val);
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (CFP->getType()->isFloatTy())
+ emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else if (CFP->getType()->isDoubleTy())
+ emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ } else {
+ llvm_unreachable("Unable to handle this constantpool entry!");
+ }
+ }
+}
+
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ // Emit the 'movw' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm12
+ Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+ emitWordLE(Binary);
+
+ unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+ // Emit the 'movt' instruction.
+ Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm1, same as movw above.
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+ "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V1);
+ emitWordLE(Binary);
+
+ // Now the 'orr' instruction.
+ Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(V2);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+ // It's basically add r, pc, (LJTI - $+8)
+
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Emit the 'add' instruction.
+ unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode Rn which is PC.
+ Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+ // Encode the displacement.
+ Binary |= 1 << ARMII::I_BitShift;
+ emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+ Binary |= 1 << ARMII::S_BitShift;
+
+ // Encode register def if there is one.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode the shift operation.
+ switch (Opcode) {
+ default: break;
+ case ARM::RRX:
+ // rrx
+ Binary |= 0x6 << 4;
+ break;
+ case ARM::MOVsrl_flag:
+ // lsr #1
+ Binary |= (0x2 << 4) | (1 << 7);
+ break;
+ case ARM::MOVsra_flag:
+ // asr #1
+ Binary |= (0x4 << 4) | (1 << 7);
+ break;
+ }
+
+ // Encode register Rm.
+ Binary |= getMachineOpValue(MI, 1);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
+ DEBUG(errs() << " ** LPC" << LabelID << " @ "
+ << (void*)MCE.getCurrentPCValue() << '\n');
+ JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL:
+ case ARM::BXr9_CALL:
+ case ARM::BMOVPCRXr9_CALL: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
+ case TargetOpcode::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ report_fatal_error("JIT does not support inline asm!");
+ }
+ break;
+ }
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ // Do nothing.
+ break;
+ case ARM::CONSTPOOL_ENTRY:
+ emitConstPoolInstruction(MI);
+ break;
+ case ARM::PICADD: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // PICADD is just an add instruction that implicitly read pc.
+ emitDataProcessingInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICSTR:
+ case ARM::PICSTRB: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitLoadStoreInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDRH:
+ case ARM::PICLDRSH:
+ case ARM::PICLDRSB:
+ case ARM::PICSTRH: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitMiscLoadStoreInstruction(MI, ARM::PC);
+ break;
+ }
+
+ case ARM::MOVi32imm:
+ // Two instructions to materialize a constant.
+ if (Subtarget->hasV6T2Ops())
+ emitMOVi32immInstruction(MI);
+ else
+ emitMOVi2piecesInstruction(MI);
+ break;
+
+ case ARM::LEApcrelJT:
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ case ARM::RRX:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag:
+ emitPseudoMoveInstruction(MI);
+ break;
+ }
+}
+
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
+ const MCInstrDesc &MCID,
+ const MachineOperand &MO,
+ unsigned OpIdx) {
+ unsigned Binary = getMachineOpValue(MI, MO);
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+}
+
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
+ const MCInstrDesc &MCID) const {
+ for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){
+ const MachineOperand &MO = MI.getOperand(i-1);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+ return 1 << ARMII::S_BitShift;
+ }
+ return 0;
+}
+
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // Encode register def if there is one.
+ unsigned NumDefs = MCID.getNumDefs();
+ unsigned OpIdx = 0;
+ if (NumDefs)
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ else if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+
+ if (MCID.Opcode == ARM::MOVi16) {
+ // Get immediate from MI.
+ unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movw);
+ // Encode imm which is the same as in emitMOVi32immInstruction().
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if(MCID.Opcode == ARM::MOVTi16) {
+ unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movt) >> 16);
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
+ uint32_t v = ~MI.getOperand(2).getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ // Instr{20-16} = msb, Instr{11-7} = lsb
+ Binary |= (msb & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
+ // Encode Rn in Instr{0-3}
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+ uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+ // Instr{20-16} = widthm1, Instr{11-7} = lsb
+ Binary |= (widthm1 & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ }
+
+ // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode first non-shifter register operand if there is one.
+ bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
+ if (!isUnary) {
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else {
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+ ++OpIdx;
+ }
+ }
+
+ // Encode shifter operand.
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+ // Encode SoReg.
+ emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
+ return;
+ }
+
+ if (MO.isReg()) {
+ // Encode register Rm.
+ emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ return;
+ }
+
+ // Encode so_imm.
+ Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+ if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+ MI.getOpcode() == ARM::STRi12) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDR_PRE.
+ if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM2Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative).
+ Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+ if (!MO2.getReg()) { // is immediate
+ if (ARM_AM::getAM2Offset(AM2Opc))
+ // Set the value of offset_12 field
+ Binary |= ARM_AM::getAM2Offset(AM2Opc);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set bit I(25), because this is not in immediate encoding.
+ Binary |= 1 << ARMII::I_BitShift;
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ // Set bit[3:0] to the corresponding Rm register
+ Binary |= getARMRegisterNumbering(MO2.getReg());
+
+ // If this instr is in scaled register offset/index instruction, set
+ // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+ if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+ Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
+ Binary |= ShImm << ARMII::ShiftShift; // shift_immed
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StMiscFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Skip LDRD and STRD's second operand.
+ if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
+ ++OpIdx;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDRH_POST.
+ if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM3Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative)
+ Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+
+ // If this instr is in register offset/index encoding, set bit[3:0]
+ // to the corresponding Rm register.
+ if (MO2.getReg()) {
+ Binary |= getARMRegisterNumbering(MO2.getReg());
+ emitWordLE(Binary);
+ return;
+ }
+
+ // This instr is in immediate offset/index encoding, set bit 22 to 1.
+ Binary |= 1 << ARMII::AM3_I_BitShift;
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+ // Set operands
+ Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
+ Binary |= (ImmOffs & 0xF); // immedL
+ }
+
+ emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+ unsigned Binary = 0;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ // IA - Increment after - bit U = 1 and bit P = 0
+ // IB - Increment before - bit U = 1 and bit P = 1
+ // DA - Decrement after - bit U = 0 and bit P = 0
+ // DB - Decrement before - bit U = 0 and bit P = 1
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::da: break;
+ case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+ case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+ case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+ }
+
+ return Binary;
+}
+
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // Set registers
+ for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ RegNum < 16);
+ Binary |= 0x1 << RegNum;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, MCID);
+
+ // 32x32->64bit operations have two destination registers. The number
+ // of register definitions will tell us if that's what we're dealing with.
+ unsigned OpIdx = 0;
+ if (MCID.getNumDefs() == 2)
+ Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode Rs
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+ // Many multiple instructions (e.g. MLA) have three src operands. Encode
+ // it as Rn (for multiply, that's in the same offset as RdLo.
+ if (MCID.getNumOperands() > OpIdx &&
+ !MCID.OpInfo[OpIdx].isPredicate() &&
+ !MCID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ if (MO2.isReg()) {
+ // Two register operand form.
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, MO2);
+ ++OpIdx;
+ } else {
+ Binary |= getMachineOpValue(MI, MO1);
+ }
+
+ // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+ if (MI.getOperand(OpIdx).isImm() &&
+ !MCID.OpInfo[OpIdx].isPredicate() &&
+ !MCID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // PKH instructions are finished at this point
+ if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
+ emitWordLE(Binary);
+ return;
+ }
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx++);
+ if (OpIdx == MCID.getNumOperands() ||
+ MCID.OpInfo[OpIdx].isPredicate() ||
+ MCID.OpInfo[OpIdx].isOptionalDef()) {
+ // Encode Rm and it's done.
+ Binary |= getMachineOpValue(MI, MO);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode shift_imm.
+ unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (MCID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ MCID.Opcode != ARM::SSAT16 &&
+ MCID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (MCID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ if (MCID.Opcode == ARM::TPsoft) {
+ llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set signed_immed_24 field
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
+ // Remember the base address of the inline jump table.
+ uintptr_t JTBase = MCE.getCurrentPCValue();
+ JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+ DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+ << '\n');
+
+ // Now emit the jump table entries.
+ const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ if (IsPIC)
+ // DestBB address - JT base.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+ else
+ // Absolute DestBB address.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+ emitWordLE(0);
+ }
+}
+
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Handle jump tables.
+ if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
+ // First emit a ldr pc, [] instruction.
+ emitDataProcessingInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ unsigned JTIndex =
+ (MCID.Opcode == ARM::BR_JTr)
+ ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+ emitInlineJumpTable(JTIndex);
+ return;
+ } else if (MCID.Opcode == ARM::BR_JTm) {
+ // First emit a ldr pc, [] instruction.
+ emitLoadStoreInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ emitInlineJumpTable(MI.getOperand(3).getIndex());
+ return;
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
+ // The return register is LR.
+ Binary |= getARMRegisterNumbering(ARM::LR);
+ else
+ // otherwise, set the return register
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+ RegD = getARMRegisterNumbering(RegD);
+ if (!isSPVFP)
+ Binary |= RegD << ARMII::RegRdShift;
+ else {
+ Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+ Binary |= (RegD & 0x01) << ARMII::D_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+ RegN = getARMRegisterNumbering(RegN);
+ if (!isSPVFP)
+ Binary |= RegN << ARMII::RegRnShift;
+ else {
+ Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+ Binary |= (RegN & 0x01) << ARMII::N_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+ RegM = getARMRegisterNumbering(RegM);
+ if (!isSPVFP)
+ Binary |= RegM;
+ else {
+ Binary |= ((RegM & 0x1E) >> 1);
+ Binary |= (RegM & 0x01) << ARMII::M_BitShift;
+ }
+ return Binary;
+}
+
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+ assert((Binary & ARMII::D_BitShift) == 0 &&
+ (Binary & ARMII::N_BitShift) == 0 &&
+ (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // If this is a two-address operand, skip it, e.g. FMACD.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode Dn / Sn.
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+ Binary |= encodeVFPRn(MI, OpIdx++);
+
+ if (OpIdx == MCID.getNumOperands() ||
+ MCID.OpInfo[OpIdx].isPredicate() ||
+ MCID.OpInfo[OpIdx].isOptionalDef()) {
+ // FCMPEZD etc. has only one operand.
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, OpIdx);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Form = MCID.TSFlags & ARMII::FormMask;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+ break;
+ case ARMII::VFPConv4Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 0);
+ break;
+ case ARMII::VFPConv5Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 0);
+ break;
+ }
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 1);
+ break;
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 1);
+ break;
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 1);
+ break;
+ }
+
+ if (Form == ARMII::VFPConv5Frm)
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 2);
+ else if (Form == ARMII::VFPConv3Frm)
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 2);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // Encode address base.
+ const MachineOperand &Base = MI.getOperand(OpIdx++);
+ Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+ // If there is a non-zero immediate offset, encode it.
+ if (Base.isReg()) {
+ const MachineOperand &Offset = MI.getOperand(OpIdx);
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+ if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+ Binary |= 1 << ARMII::U_BitShift;
+ Binary |= ImmOffs;
+ emitWordLE(Binary);
+ return;
+ }
+ }
+
+ // If immediate offset is omitted, default to +0.
+ Binary |= 1 << ARMII::U_BitShift;
+
+ emitWordLE(Binary);
+}
+
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Skip operand 0 of an instruction with base register update.
+ unsigned OpIdx = 0;
+ if (IsUpdating)
+ ++OpIdx;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+ // Set bit W(21)
+ if (IsUpdating)
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // First register is encoded in Dd.
+ Binary |= encodeVFPRd(MI, OpIdx+2);
+
+ // Count the number of registers.
+ unsigned NumRegs = 1;
+ for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ ++NumRegs;
+ }
+ // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+ // Otherwise, it will be 0, in the case of 32-bit registers.
+ if(Binary & 0x100)
+ Binary |= NumRegs * 2;
+ else
+ Binary |= NumRegs;
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = getARMRegisterNumbering(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = getARMRegisterNumbering(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = getARMRegisterNumbering(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const MCInstrDesc &MCID = MI.getDesc();
+ if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = getARMRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = getARMRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 000000000000..f45ebdc53500
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1906 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+STATISTIC(NumTBs, "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class ARMConstantIslands : public MachineFunctionPass {
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number. The two-byte pads required for Thumb alignment are
+ /// counted as part of the following block (i.e., the offset and size for
+ /// a padded block will both be ==2 mod 4).
+ std::vector<unsigned> BBSizes;
+
+ /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+ /// The two-byte pads required for Thumb alignment are counted as part of
+ /// the following block.
+ std::vector<unsigned> BBOffsets;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ unsigned MaxDisp;
+ bool NegOk;
+ bool IsSoImm;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg, bool soimm)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ HighWaterMark = CPEMI->getParent();
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+ SmallVector<MachineInstr*, 4> T2JumpTables;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ /// HasInlineAsm - True if the function contains inline assembly.
+ bool HasInlineAsm;
+
+ const ARMInstrInfo *TII;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ bool isThumb1;
+ bool isThumb2;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &MF,
+ std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void JumpTableFunctionScan(MachineFunction &MF);
+ void InitialFunctionScan(MachineFunction &MF,
+ const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+ int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+ bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
+ void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+ void RemoveDeadCPEMI(MachineInstr *CPEMI);
+ bool RemoveUnusedCPEntries();
+ bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool UndoLRSpillRestore();
+ bool OptimizeThumb2Instructions(MachineFunction &MF);
+ bool OptimizeThumb2Branches(MachineFunction &MF);
+ bool ReorderThumb2JumpTables(MachineFunction &MF);
+ bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
+
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify(MachineFunction &MF);
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &MF) {
+ assert(BBOffsets.size() == BBSizes.size());
+ for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+ assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+ if (!isThumb)
+ return;
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() &&
+ MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned MBBId = MBB->getNumber();
+ assert(HasInlineAsm ||
+ (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+ (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
+ }
+ }
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+ unsigned CPEOffset = GetOffsetOf(U.CPEMI);
+ unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+ UserOffset - CPEOffset;
+ assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+ DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
+ << " size " << BBSizes[J] << "\n");
+ }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
+ MachineConstantPool &MCP = *MF.getConstantPool();
+
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+
+ isThumb = AFI->isThumbFunction();
+ isThumb1 = AFI->isThumb1OnlyFunction();
+ isThumb2 = AFI->isThumb2Function();
+
+ HasFarJump = false;
+ HasInlineAsm = false;
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF.RenumberBlocks();
+
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ JumpTableFunctionScan(MF);
+ MadeChange |= ReorderThumb2JumpTables(MF);
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF.RenumberBlocks();
+ }
+
+ // Thumb1 functions containing constant pools get 4-byte alignment.
+ // This is so we can keep exact track of where the alignment padding goes.
+
+ // ARM and Thumb2 functions need to be 4-byte aligned.
+ if (!isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP.isEmpty()) {
+ DoInitialPlacement(MF, CPEMIs);
+ if (isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
+ }
+
+ /// The next UID to take is the first unused one.
+ AFI->initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(MF, CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+
+ /// Remove dead constant pool entries.
+ MadeChange |= RemoveUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ while (true) {
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= HandleConstantPoolUser(MF, i);
+ if (CPChange && ++NoCPIters > 30)
+ llvm_unreachable("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ llvm_unreachable("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ if (isThumb2 && !STI->prefers32BitThumb())
+ MadeChange |= OptimizeThumb2Instructions(MF);
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify(MF);
+
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
+ if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+ MadeChange |= UndoLRSpillRestore();
+
+ // Save the mapping between original and cloned constpool entries.
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+ const CPEntry & CPE = CPEntries[i][j];
+ AFI->recordCPEClone(i, CPE.CPI);
+ }
+ }
+
+ DEBUG(errs() << '\n'; dumpBBs());
+
+ BBSizes.clear();
+ BBOffsets.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+ T2JumpTables.clear();
+
+ return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
+ std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
+ MF.push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ MF.getConstantPool()->getConstants();
+
+ const TargetData &TD = *MF.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ ++NumCPEs;
+ DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
+ << "\n");
+ }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ // First thing, see if the function has any inline assembly in it. If so,
+ // we have to be conservative about alignment assumptions, as we don't
+ // know for sure the size of any instructions in the inline assembly.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getOpcode() == ARM::INLINEASM)
+ HasInlineAsm = true;
+ }
+
+ // Now go back through the instructions and build up our data structures.
+ unsigned Offset = 0;
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ // Add instruction size to MBBSize.
+ MBBSize += TII->GetInstSizeInBytes(I);
+
+ int Opc = I->getOpcode();
+ if (I->getDesc().isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case ARM::tBR_JTr:
+ // A Thumb1 table jump may involve padding; for the offsets to
+ // be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
+ MF.EnsureAlignment(2U);
+ if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
+ // FIXME: Add a pseudo ALIGN instruction instead.
+ MBBSize += 2; // padding
+ continue; // Does not get an entry in ImmBranches
+ case ARM::t2BR_JT:
+ T2JumpTables.push_back(I);
+ continue; // Does not get an entry in ImmBranches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc:
+ isCond = true;
+ UOpc = ARM::t2B;
+ Bits = 20;
+ Scale = 2;
+ break;
+ case ARM::t2B:
+ Bits = 24;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ if (Opc == ARM::CONSTPOOL_ENTRY)
+ continue;
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ bool IsSoImm = false;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+ break;
+
+ // Taking the address of a CP entry.
+ case ARM::LEApcrel:
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ Scale = 4;
+ NegOk = true;
+ IsSoImm = true;
+ break;
+ case ARM::t2LEApcrel:
+ Bits = 12;
+ NegOk = true;
+ break;
+ case ARM::tLEApcrel:
+ Bits = 8;
+ Scale = 4;
+ break;
+
+ case ARM::LDRi12:
+ case ARM::LDRcp:
+ case ARM::t2LDRpci:
+ Bits = 12; // +-offset_12
+ NegOk = true;
+ break;
+
+ case ARM::tLDRpci:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ NegOk = true;
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+
+ // In thumb mode, if this block is a constpool island, we may need padding
+ // so it's aligned on 4 byte boundary.
+ if (isThumb &&
+ !MBB.empty() &&
+ MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ ((Offset%4) != 0 || HasInlineAsm))
+ MBBSize += 2;
+
+ BBSizes.push_back(MBBSize);
+ BBOffsets.push_back(Offset);
+ Offset += MBBSize;
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBOffsets[MBB->getNumber()];
+
+ // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+ // alignment padding, and compensate if so.
+ if (isThumb &&
+ MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ (Offset%4 != 0 || HasInlineAsm))
+ Offset += 2;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+ MachineFunction &MF = *OrigBB->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || !Succ->begin()->isPHI())
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(llvm::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ unsigned OrigBBI = OrigBB->getNumber();
+ unsigned NewBBI = NewBB->getNumber();
+
+ int delta = isThumb1 ? 2 : 4;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
+
+ // ...and adjust BBOffsets for NewBB accordingly.
+ BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
+ // All BBOffsets following these blocks must be modified.
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
+
+ return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK, bool IsSoImm) {
+ // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // Effectively, the valid range of displacements is 2 bytes smaller for such
+ // references.
+ unsigned TotalAdj = 0;
+ if (isThumb && UserOffset%4 !=0) {
+ UserOffset -= 2;
+ TotalAdj = 2;
+ }
+ // CPEs will be rounded up to a multiple of 4.
+ if (isThumb && TrialOffset%4 != 0) {
+ TrialOffset += 2;
+ TotalAdj += 2;
+ }
+
+ // In Thumb2 mode, later branch adjustments can shift instructions up and
+ // cause alignment change. In the worst case scenario this can cause the
+ // user's effective address to be subtracted by 2 and the CPE's address to
+ // be plus 2.
+ if (isThumb2 && TotalAdj != 4)
+ MaxDisp -= (4 - TotalAdj);
+
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ }
+ return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U) {
+ unsigned MaxDisp = U.MaxDisp;
+ unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+ BBSizes[Water->getNumber()];
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction.
+ if (CPEOffset < UserOffset)
+ UserOffset += U.CPEMI->getOperand(2).getImm();
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+ assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+
+ if (DoDump) {
+ DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << " insn address=" << UserOffset
+ << " CPE address=" << CPEOffset
+ << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ }
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+ || PredMI->getOpcode() == ARM::t2B)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+ int delta) {
+ MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
+ for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
+ i < e; ++i) {
+ BBOffsets[i] += delta;
+ // If some existing blocks have padding, adjust the padding as needed, a
+ // bit tricky. delta can be negative so don't use % on that.
+ if (!isThumb)
+ continue;
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() && !HasInlineAsm) {
+ // Constant pool entries require padding.
+ if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned OldOffset = BBOffsets[i] - delta;
+ if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ }
+ }
+ // Thumb1 jump tables require padding. They should be at the end;
+ // following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
+ MachineInstr *ThumbJTMI = prior(MBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
+ unsigned OldMIOffset = NewMIOffset - delta;
+ if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ }
+ }
+ if (delta==0)
+ return;
+ }
+ MBBI = llvm::next(MBBI);
+ }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ RemoveDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
+ DEBUG(errs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
+ DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case ARM::tB:
+ return ((1<<10)-1)*2;
+ case ARM::t2B:
+ return ((1<<23)-1)*2;
+ default:
+ break;
+ }
+
+ return ((1<<23)-1)*4;
+}
+
+/// LookForWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry. For Thumb, prefer water that will not
+/// introduce padding to water that will. To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ bool FoundWaterThatWouldPad = false;
+ water_iterator IPThatWouldPad;
+ for (water_iterator IP = prior(WaterList.end()),
+ B = WaterList.begin();; --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB))) {
+ unsigned WBBId = WaterBB->getNumber();
+ if (isThumb &&
+ (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
+ // This is valid Water, but would introduce padding. Remember
+ // it in case we don't find any Water that doesn't do this.
+ if (!FoundWaterThatWouldPad) {
+ FoundWaterThatWouldPad = true;
+ IPThatWouldPad = IP;
+ }
+ } else {
+ WaterIter = IP;
+ return true;
+ }
+ }
+ if (IP == B)
+ break;
+ }
+ if (FoundWaterThatWouldPad) {
+ WaterIter = IPThatWouldPad;
+ return true;
+ }
+ return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+ BBSizes[UserMBB->getNumber()];
+ assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there. (The addition
+ // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+ // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
+ // inside OffsetIsInRange.
+ if (BBHasFallthrough(UserMBB) &&
+ OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
+ U.MaxDisp, U.NegOk, U.IsSoImm)) {
+ DEBUG(errs() << "Split at end of block\n");
+ if (&UserMBB->back() == UserMI)
+ assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block.
+ // Record it for branch lengthening; this new branch will not get out of
+ // range, but if the preceding conditional branch is out of range, the
+ // targets will be exchanged, and the altered branch may be out of
+ // range, so the machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ int delta = isThumb1 ? 2 : 4;
+ BBSizes[UserMBB->getNumber()] += delta;
+ AdjustBBOffsetsAfter(UserMBB, delta);
+ } else {
+ // What a big block. Find a place within the block to split it.
+ // This is a little tricky on Thumb1 since instructions are 2 bytes
+ // and constant pool entries are 4 bytes: if instruction I references
+ // island CPE, and instruction I+1 references CPE', it will
+ // not work well to put CPE as far forward as possible, since then
+ // CPE' cannot immediately follow it (that location is 2 bytes
+ // farther away from I+1 than CPE was from I) and we'd need to create
+ // a new island. So, we make a first guess, then walk through the
+ // instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions
+ // that reference CPEs will be able to use the same island area;
+ // if not, we back up the insertion point.
+
+ // The 4 in the following is for the unconditional branch we'll be
+ // inserting (allows for long branch on Thumb1). Alignment of the
+ // island is handled inside OffsetIsInRange.
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+ // This could point off the end of the block if we've already got
+ // constant pool entries following this block; only the last one is
+ // in the water list. Back past any possible branches (allow for a
+ // conditional and a maximally long unconditional).
+ if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+ BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset = BaseInsertOffset +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!OffsetIsInRange(Offset, EndInsertOffset,
+ U.MaxDisp, U.NegOk, U.IsSoImm)) {
+ BaseInsertOffset -= (isThumb1 ? 2 : 4);
+ EndInsertOffset -= (isThumb1 ? 2 : 4);
+ }
+ // This is overly conservative, as we don't account for CPEMIs
+ // being reused within the block, but it doesn't matter much.
+ EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ DEBUG(errs() << "Split in middle of big block\n");
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
+ }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
+ unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive. The 4 or 8 is the value the
+ // hardware keeps in the PC.
+ unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = LookForExistingCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createPICLabelUId();
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (LookForWater(U, UserOffset, IP)) {
+ DEBUG(errs() << "found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.count(WaterBB)) {
+ NewWaterList.erase(WaterBB);
+ NewWaterList.insert(NewIsland);
+ }
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
+ // No water found.
+ DEBUG(errs() << "No water found\n");
+ CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF.insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ DecrementOldEntry(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+ // Compensate for .align 2 in thumb mode.
+ if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
+ Size += 2;
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+ AdjustBBOffsetsAfter(NewIsland, Size);
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << '\t' << *UserMI);
+
+ return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBSizes[CPEBB->getNumber()] -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ // In thumb1 mode, the size of island may be padded by two to compensate for
+ // the alignment requirement. Then it will now be 2 when the block is
+ // empty, so fix this.
+ // All succeeding offsets have the current size value added in, fix this.
+ if (BBSizes[CPEBB->getNumber()] != 0) {
+ Size += BBSizes[CPEBB->getNumber()];
+ BBSizes[CPEBB->getNumber()] = 0;
+ }
+ }
+ AdjustBBOffsetsAfter(CPEBB, -Size);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ RemoveDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+ DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return FixUpUnconditionalBr(MF, Br);
+ return FixUpConditionalBr(MF, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!isThumb1)
+ llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBSizes[MBB->getNumber()] += 2;
+ AdjustBBOffsetsAfter(MBB, 2);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(errs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ SplitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBSizes[MBB->getNumber()] -= delta;
+ MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
+ AdjustBBOffsetsAfter(SplitBB, -delta);
+ MBB->back().eraseFromParent();
+ // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ // The net size change is an addition of one unconditional branch.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ AdjustBBOffsetsAfter(MBB, delta);
+ return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ // First two operands are predicates.
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(2).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 3) {
+ // Create the new insn and copy the predicate from the old.
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Shrink ADR and LDR from constantpool.
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned Opcode = U.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2LEApcrel:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLEApcrel;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ case ARM::t2LDRpci:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ }
+
+ if (!NewOpc)
+ continue;
+
+ unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+ // FIXME: Check if offset is multiple of scale if scale is not 4.
+ if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ U.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = U.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2CPShrunk;
+ MadeChange = true;
+ }
+ }
+
+ MadeChange |= OptimizeThumb2Branches(MF);
+ MadeChange |= OptimizeThumb2JumpTables(MF);
+ return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+ ImmBranch &Br = ImmBranches[i];
+ unsigned Opcode = Br.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2B:
+ NewOpc = ARM::tB;
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc: {
+ NewOpc = ARM::tBcc;
+ Bits = 8;
+ Scale = 2;
+ break;
+ }
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
+ continue;
+
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI;
+ if (CmpMI != Br.MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // FIXME: After the tables are shrunk, can we get rid some of the
+ // constantpool tables?
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ bool ByteOk = true;
+ bool HalfWordOk = true;
+ unsigned JTOffset = GetOffsetOf(MI) + 4;
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ // Negative offset is not ok. FIXME: We should change BB layout to make
+ // sure all the branches are forward.
+ if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+ ByteOk = false;
+ unsigned TBHLimit = ((1<<16)-1)*2;
+ if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+ HalfWordOk = false;
+ if (!ByteOk && !HalfWordOk)
+ break;
+ }
+
+ if (ByteOk || HalfWordOk) {
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ bool BaseRegKill = MI->getOperand(0).isKill();
+ if (!BaseRegKill)
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
+ MachineBasicBlock::iterator PrevI = MI;
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
+ continue;
+
+ MachineInstr *AddrMI = PrevI;
+ bool OptOk = true;
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
+ for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+ const MachineOperand &MO = AddrMI->getOperand(k);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() != BaseReg) {
+ OptOk = false;
+ break;
+ }
+ if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+ OptOk = false;
+ break;
+ }
+ }
+ if (!OptOk)
+ continue;
+
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+ // to delete it as well.
+ MachineInstr *LeaMI = PrevI;
+ if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+ LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+ LeaMI->getOperand(0).getReg() != BaseReg)
+ OptOk = false;
+
+ if (!OptOk)
+ continue;
+
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(MI->getOperand(JTOpIdx+1).getImm());
+ // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+ // is 2-byte aligned. For now, asm printer will fix it up.
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+ OrigSize += TII->GetInstSizeInBytes(LeaMI);
+ OrigSize += TII->GetInstSizeInBytes(MI);
+
+ AddrMI->eraseFromParent();
+ LeaMI->eraseFromParent();
+ MI->eraseFromParent();
+
+ int delta = OrigSize - NewSize;
+ BBSizes[MBB->getNumber()] -= delta;
+ AdjustBBOffsetsAfter(MBB, -delta);
+
+ ++NumTBs;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ AdjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+ MachineFunction &MF = *BB->getParent();
+
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF.begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF.RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 000000000000..165a1d849ad5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,130 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
+ ARMCP::ARMCPKind K,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)cval->getType()),
+ CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
+ const char *s, unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
+ CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
+ PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
+ ARMCP::ARMCPModifier Modif)
+ : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
+ CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
+ Modifier(Modif), AddCurrentAddress(false) {}
+
+const GlobalValue *ARMConstantPoolValue::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+ if (S1 == S2)
+ return true;
+ if (S1 && S2 && strcmp(S1, S2) == 0)
+ return true;
+ return false;
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (CPV->CVal == CVal &&
+ CPV->LabelId == LabelId &&
+ CPV->PCAdjust == PCAdjust &&
+ CPV_streq(CPV->S, S) &&
+ CPV->Modifier == Modifier)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {
+ free((void*)S);
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ID.AddPointer(S);
+ ID.AddInteger(LabelId);
+ ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->CVal == CVal &&
+ ACPV->PCAdjust == PCAdjust &&
+ CPV_streq(ACPV->S, S) &&
+ ACPV->Modifier == Modifier) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+ errs() << " " << *this;
+}
+
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (CVal)
+ O << CVal->getName();
+ else
+ O << S;
+ if (Modifier) O << "(" << getModifierText() << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 000000000000..d008811c40e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,122 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class Constant;
+class BlockAddress;
+class GlobalValue;
+class LLVMContext;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPExtSymbol,
+ CPBlockAddress,
+ CPLSDA
+ };
+
+ enum ARMCPModifier {
+ no_modifier,
+ TLSGD,
+ GOT,
+ GOTOFF,
+ GOTTPOFF,
+ TPOFF
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+ const char *S; // ExtSymbol being loaded.
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // Kind of constant.
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
+ // 8 for ARM, 4 for Thumb.
+ ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+public:
+ ARMConstantPoolValue(const Constant *cval, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0,
+ ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj = 0,
+ ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
+ ARMConstantPoolValue();
+ ~ARMConstantPoolValue();
+
+ const GlobalValue *getGV() const;
+ const char *getSymbol() const { return S; }
+ const BlockAddress *getBlockAddress() const;
+ ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+ const char *getModifierText() const {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
+ }
+ bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+ unsigned getLabelId() const { return LabelId; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+ bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+ bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+ bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+
+ virtual unsigned getRelocationInfo() const { return 2; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value
+ /// can share the same constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void print(raw_ostream &O) const;
+ void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 000000000000..51e68b4553ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case ARM::reloc_arm_absolute:
+ case ARM::reloc_arm_relative:
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_jt_base:
+ case ARM::reloc_arm_pic_jt:
+ assert(0 && "unsupported ARM relocation type"); break;
+
+ case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+ case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break;
+ case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break;
+ default:
+ llvm_unreachable("unknown ARM relocation type"); break;
+ }
+ return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+ return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+ return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+ return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 000000000000..1c4e5329ac61
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class ARMELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ ARMELFWriterInfo(TargetMachine &TM);
+ virtual ~ARMELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..94b72fdb9a7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1326 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+using namespace llvm;
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+ void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const MCInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addOperand(MO);
+ else
+ DefMI.addOperand(MO);
+ }
+}
+
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ unsigned PseudoOpc;
+ unsigned RealOpc;
+ bool IsLoad;
+ bool HasWriteBack;
+ NEONRegSpacing RegSpacing;
+ unsigned char NumRegs; // D registers loaded or stored
+ unsigned char RegElts; // elements per D register; used for lane ops
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
+{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
+{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
+
+{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
+{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
+{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
+{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
+{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
+
+{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
+{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
+{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+ unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ const NEONLdStTableEntry *I =
+ std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+ if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ return I;
+ return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+ }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // For an instruction writing double-spaced subregs, the pseudo instruction
+ // has an extra operand that is a use of the super-register. Record the
+ // operand index and skip over it.
+ unsigned SrcOpIdx = 0;
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+ SrcOpIdx = OpIdx++;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source operand used for double-spaced subregs over
+ // to the new instruction as an implicit operand.
+ if (SrcOpIdx != 0) {
+ MachineOperand MO = MI.getOperand(SrcOpIdx);
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ }
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+ unsigned DstReg = 0;
+ bool DstIsDead = false;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags);
+ if (NumRegs > 1)
+ MIB.addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+ OpIdx += 1;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ // Transfer the destination register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ if (IsExt)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the other source register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ MachineInstrBuilder LO16, HI16;
+
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
+
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ }
+
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::VMOVScc:
+ case ARM::VMOVDcc: {
+ unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCr:
+ case ARM::MOVCCr: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCs: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addReg(MI.getOperand(3).getReg(),
+ getKillRegState(MI.getOperand(3).isKill()))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .addReg(MI.getOperand(6).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCi16: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCi:
+ case ARM::MOVCCi: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MVNCCi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::Int_eh_sjlj_dispatchsetup: {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMBaseInstrInfo *AII =
+ static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ // For functions using a base pointer, we rematerialize it (via the frame
+ // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+ // for us. Otherwise, expand to nothing.
+ if (RI.hasBasePointer(MF)) {
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function()) {
+ llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ } else if (AFI->isThumbFunction()) {
+ llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
+ } else {
+ llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
+ }
+ // If there's dynamic realignment, adjust for it.
+ if (RI.needsStackRealignment(MF)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ?
+ ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ }
+
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag: {
+ // These are just fancy MOVs insructions.
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addReg(0)
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+ ARM_AM::lsr : ARM_AM::asr),
+ 1)))
+ .addReg(ARM::CPSR, RegState::Define);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::RRX: {
+ // This encodes as "MOVs Rd, Rm, rrx
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addReg(0);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tTPsoft:
+ case ARM::TPsoft: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
+
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOV_ga_dyn:
+ case ARM::MOV_ga_pcrel:
+ case ARM::MOV_ga_pcrel_ldr:
+ case ARM::t2MOV_ga_dyn:
+ case ARM::t2MOV_ga_pcrel: {
+ // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+ unsigned LabelId = AFI->createPICLabelUId();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
+ bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+ unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
+ unsigned LO16TF = isPIC
+ ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+ unsigned HI16TF = isPIC
+ ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId);
+ if (!isPIC) {
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB3);
+ if (Opcode == ARM::MOV_ga_pcrel_ldr)
+ MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ }
+ TransferImpOps(MI, MIB1, MIB3);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
+ case ARM::VMOVQQ: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+ unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1);
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ bool SrcIsKill = MI.getOperand(1).isKill();
+ unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+ unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1);
+ MachineInstrBuilder Even =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VORRq))
+ .addReg(EvenDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ MachineInstrBuilder Odd =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VORRq))
+ .addReg(OddDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ TransferImpOps(MI, Even, Odd);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLDMQIA: {
+ unsigned NewOpc = ARM::VLDMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register destination.
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the source register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the destination operands (D subregs).
+ unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VSTMQIA: {
+ unsigned NewOpc = ARM::VSTMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register source.
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the destination register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the source operands (D subregs).
+ unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ MIB.addReg(D0).addReg(D1);
+
+ if (SrcIsKill) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::VDUPfqf:
+ case ARM::VDUPfdf:{
+ unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
+ ARM::VDUPLN32d;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
+ &ARM::DPR_VFP2RegClass);
+ // The lane is [0,1] for the containing DReg superregister.
+ // Copy the dst/src register operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addReg(DReg);
+ ++OpIdx;
+ // Add the lane select operand.
+ MIB.addImm(Lane);
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ ExpandVLD(MBBI);
+ return true;
+
+ case ARM::VST1q8Pseudo:
+ case ARM::VST1q16Pseudo:
+ case ARM::VST1q32Pseudo:
+ case ARM::VST1q64Pseudo:
+ case ARM::VST1q8Pseudo_UPD:
+ case ARM::VST1q16Pseudo_UPD:
+ case ARM::VST1q32Pseudo_UPD:
+ case ARM::VST1q64Pseudo_UPD:
+ case ARM::VST2d8Pseudo:
+ case ARM::VST2d16Pseudo:
+ case ARM::VST2d32Pseudo:
+ case ARM::VST2q8Pseudo:
+ case ARM::VST2q16Pseudo:
+ case ARM::VST2q32Pseudo:
+ case ARM::VST2d8Pseudo_UPD:
+ case ARM::VST2d16Pseudo_UPD:
+ case ARM::VST2d32Pseudo_UPD:
+ case ARM::VST2q8Pseudo_UPD:
+ case ARM::VST2q16Pseudo_UPD:
+ case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST3d8Pseudo:
+ case ARM::VST3d16Pseudo:
+ case ARM::VST3d32Pseudo:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST3d8Pseudo_UPD:
+ case ARM::VST3d16Pseudo_UPD:
+ case ARM::VST3d32Pseudo_UPD:
+ case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST3q8Pseudo_UPD:
+ case ARM::VST3q16Pseudo_UPD:
+ case ARM::VST3q32Pseudo_UPD:
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
+ case ARM::VST3q8oddPseudo_UPD:
+ case ARM::VST3q16oddPseudo_UPD:
+ case ARM::VST3q32oddPseudo_UPD:
+ case ARM::VST4d8Pseudo:
+ case ARM::VST4d16Pseudo:
+ case ARM::VST4d32Pseudo:
+ case ARM::VST1d64QPseudo:
+ case ARM::VST4d8Pseudo_UPD:
+ case ARM::VST4d16Pseudo_UPD:
+ case ARM::VST4d32Pseudo_UPD:
+ case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST4q8Pseudo_UPD:
+ case ARM::VST4q16Pseudo_UPD:
+ case ARM::VST4q32Pseudo_UPD:
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
+ case ARM::VST4q8oddPseudo_UPD:
+ case ARM::VST4q16oddPseudo_UPD:
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI);
+ return true;
+
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ return true;
+
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ }
+
+ return false;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 000000000000..f469d7efe11a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,2099 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableARMFastISel("disable-arm-fast-isel",
+ cl::desc("Turn off experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
+
+namespace {
+
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ ARMFunctionInfo *AFI;
+
+ // Convenience variables to avoid some queries.
+ bool isThumb;
+ LLVMContext *Context;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectSIToFP(const Instruction *I);
+ bool SelectFPToSI(const Instruction *I);
+ bool SelectSDiv(const Instruction *I);
+ bool SelectSRem(const Instruction *I);
+ bool SelectCall(const Instruction *I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectIntCast(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, EVT VT);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+ unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(const GlobalValue *GV);
+
+ // Call handling routines.
+ private:
+ bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
+ bool isARMNEONPred(const MachineInstr *MI);
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return false;
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm1).addImm(Imm2));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::f64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addFPImm(CFP));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+ // For now 32-bit only.
+ if (VT != MVT::i32) return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // If we can do this in a single instruction without a constant pool entry
+ // do so now.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+ unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), DestReg)
+ .addImm(CI->getSExtValue()));
+ return DestReg;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg)
+ .addConstantPoolIndex(Idx));
+ else
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+
+ // TODO: Need more magic for ARM PIC.
+ if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+ ARMCP::CPValue, PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb) {
+ unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ }
+ AddOptionalDefs(MIB);
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12),
+ NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ else
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
+ NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
+ }
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple()) return false;
+ VT = evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // Materialize the global variable's address into a reg which can
+ // then be used later to load the variable.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+ if (Tmp == 0) return false;
+
+ Addr.Base.Reg = Tmp;
+ return true;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+ bool needsLowering = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+ break;
+ }
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ // Since the offset is too large for the load/store instruction
+ // get the reg+offset into a register.
+ if (needsLowering) {
+ Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ /*Op0IsKill*/false, Addr.Offset, MVT::i32);
+ Addr.Offset = 0;
+ }
+}
+
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+ VT.getSimpleVT().SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ }
+ AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::f32:
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ case MVT::f64:
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
+ return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1: {
+ unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass);
+ unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), Res)
+ .addReg(SrcReg).addImm(1));
+ SrcReg = Res;
+ } // Fallthrough here.
+ case MVT::i8:
+ StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ break;
+ case MVT::i16:
+ StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ break;
+ case MVT::i32:
+ StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg, getKillRegState(true));
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
+ return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0) return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+
+ // If we can, avoid recomputing the compare - redoing it could lead to wonky
+ // behavior.
+ // TODO: Factor this out.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ MVT SourceVT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())
+ && isTypeLegal(Ty, SourceVT)) {
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ switch (SourceVT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ break;
+ }
+
+ // Get the compare predicate.
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ ARMCC::CondCodes ARMPred = getComparePred(Predicate);
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+ unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TstOpc))
+ .addReg(OpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ }
+
+ unsigned CmpReg = getRegForValue(BI->getCondition());
+ if (CmpReg == 0) return false;
+
+ // We've been divorced from our compare! Our block was split, and
+ // now our compare lives in a predecessor block. We musn't
+ // re-compare here, as the children of the compare aren't guaranteed
+ // live across the block boundary (we *could* check for this).
+ // Regardless, the compare has been done in the predecessor block,
+ // and it left a value for us in a virtual register. Ergo, we test
+ // the one-bit value left in the virtual register.
+ unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+ .addReg(CmpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ unsigned CondReg;
+ switch (VT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ CondReg = ARM::CPSR;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+ TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero
+ = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(CondReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ // FIXME: Handle sign-extension where necessary.
+ if (!I->getOperand(0)->getType()->isIntegerTy(32))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ if (FP == 0) return false;
+
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+ else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ else return 0;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(FP));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ const Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ const Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+ else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ else return 0;
+
+ // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT != MVT::i32) return false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(1));
+ unsigned ResultReg = createResultReg(RC);
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+ MVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SDIV_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+ MVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SREM_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT VT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ const Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned Opc;
+ bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+ EVT SrcVT, unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ // Ignore fastcc. Silence compiler warnings.
+ (void)RetFastCC_ARM_APCS;
+ (void)FastCC_ARM_APCS;
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg())
+ .addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64) return false;
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ // TODO: Only handle register args for now.
+ if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
+
+ if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ }
+ }
+ return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ EVT DestVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ EVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (F.isVarArg())
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc)));
+ return true;
+}
+
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+ // Darwin needs the r9 versions of the opcodes.
+ bool isDarwin = Subtarget->isTargetDarwin();
+ if (isThumb) {
+ return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ } else {
+ return isDarwin ? ARM::BLr9 : ARM::BL;
+ }
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ const Type *ArgTy = Op->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLr9 for darwin, BL otherwise.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc = ARMSelectCallOp(NULL);
+ if(isThumb)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addExternalSymbol(TLI.getLibcallName(Call));
+ else
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(TLI.getLibcallName(Call)));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm or worry about intrinsics yet.
+ if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+ // Only handle global variable Callees.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ // Let SDISel handle vararg functions.
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgRegs.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ const Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLr9 for darwin, BL otherwise.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc = ARMSelectCallOp(GV);
+ // Explicitly adding the predicate here.
+ if(isThumb)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addGlobalAddress(GV, 0, 0);
+ else
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+
+}
+
+bool ARMFastISel::SelectIntCast(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers. The high bits for a type smaller than
+ // the register size are assumed to be undefined.
+ const Type *DestTy = I->getType();
+ Value *Op = I->getOperand(0);
+ const Type *SrcTy = Op->getType();
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(SrcTy, true);
+ DestVT = TLI.getValueType(DestTy, true);
+
+ if (isa<TruncInst>(I)) {
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ UpdateValueMap(I, SrcReg);
+ return true;
+ }
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned Opc;
+ bool isZext = isa<ZExtInst>(I);
+ bool isBoolZext = false;
+ if (!SrcVT.isSimple())
+ return false;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i16:
+ if (isZext)
+ Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr;
+ else
+ Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr;
+ break;
+ case MVT::i8:
+ if (isZext)
+ Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr;
+ else
+ Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr;
+ break;
+ case MVT::i1:
+ if (isZext) {
+ Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ isBoolZext = true;
+ break;
+ }
+ return false;
+ }
+
+ // FIXME: We could save an instruction in many cases by special-casing
+ // load instructions.
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+ if (isBoolZext)
+ MIB.addImm(1);
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectSIToFP(I);
+ case Instruction::FPToSI:
+ return SelectFPToSI(I);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectSDiv(I);
+ case Instruction::SRem:
+ return SelectSRem(I);
+ case Instruction::Call:
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntCast(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ // Completely untested on non-darwin.
+ const TargetMachine &TM = funcInfo.MF->getTarget();
+
+ // Darwin and thumb1 only for now.
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ !DisableARMFastISel)
+ return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
new file mode 100644
index 000000000000..350c92decdce
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+ // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+ // addresses
+ fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+ // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+ // the 16-bit halfwords reordered.
+ fixup_t2_ldst_pcrel_12,
+
+ // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where the lower 2 bits are not encoded
+ // (so it's encoded as an 8-bit immediate).
+ fixup_arm_pcrel_10,
+ // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_10,
+ // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+ // addresses where the lower 2 bits are not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_thumb_adr_pcrel_10,
+ // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_arm_adr_pcrel_12,
+ // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_t2_adr_pcrel_12,
+ // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+ // instructions.
+ fixup_arm_condbranch,
+ // fixup_arm_uncondbranch - 24-bit PC relative relocation for
+ // branch instructions. (unconditional)
+ fixup_arm_uncondbranch,
+ // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+ // uconditional branch instructions.
+ fixup_t2_condbranch,
+ // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+ // branch unconditional branch instructions.
+ fixup_t2_uncondbranch,
+
+ // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ fixup_arm_thumb_br,
+
+ // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ fixup_arm_thumb_bl,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ fixup_arm_thumb_blx,
+
+ // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ fixup_arm_thumb_cb,
+
+ // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ fixup_arm_thumb_cp,
+
+ // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ fixup_arm_thumb_bcc,
+
+ // The next two are for the movt/movw pair
+ // the 16bit imm field are split into imm{15-12} and imm{11-0}
+ fixup_arm_movt_hi16, // :upper16:
+ fixup_arm_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
+
+ // It is possible to create an "immediate" that happens to be pcrel.
+ // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8)
+ // result in different reloc tags than the above two.
+ // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+ fixup_arm_movt_hi16_pcrel, // :upper16:
+ fixup_arm_movw_lo16_pcrel, // :lower16:
+ fixup_t2_movt_hi16_pcrel, // :upper16:
+ fixup_t2_movw_lo16_pcrel, // :lower16:
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 000000000000..381b404519e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1086 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified. Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+ const ARMBaseInstrInfo &TII,
+ const unsigned *CSRegs) {
+ // Integer spill area is handled with "pop".
+ if (MI->getOpcode() == ARM::LDMIA_RET ||
+ MI->getOpcode() == ARM::t2LDMIA_RET ||
+ MI->getOpcode() == ARM::LDMIA_UPD ||
+ MI->getOpcode() == ARM::t2LDMIA_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ if ((MI->getOpcode() == ARM::LDR_POST ||
+ MI->getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+ MI->getOperand(1).getReg() == ARM::SP)
+ return true;
+
+ return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ ARMCC::AL, 0, TII, MIFlags);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ ARMCC::AL, 0, TII, MIFlags);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitPrologue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ // Allocate the vararg register save area. This is not counted in NumBytes.
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ MachineInstr::FrameSetup);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ // Move past area 1.
+ if (GPRCS1Size > 0) MBBI++;
+
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For Darwin, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not Darwin, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it is
+ // now safe to emit this assignment.
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ // Move past area 2.
+ if (GPRCS2Size > 0) MBBI++;
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ // Move past area 3.
+ if (DPRCSSize > 0) {
+ MBBI++;
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ MBBI++;
+ }
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Adjust SP after all the callee-save spills.
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
+ if (RegInfo->needsStackRealignment(MF)) {
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill));
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill));
+ }
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ // FIXME: Clarify FrameSetup flags here.
+ if (RegInfo->hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ RegInfo->getBaseRegister())
+ .addReg(ARM::SP));
+ }
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getDesc().isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitEpilogue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ if (!isCSRestore(MBBI, TII, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For Darwin, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ }
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ }
+ } else if (NumBytes)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+ // Increment past our save areas.
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
+ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ }
+
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+ unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+ ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ }
+
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ int SPAdj) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ Offset += SPAdj;
+ if (AFI->isGPRCalleeSavedArea1Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = RegInfo->getBaseRegister();
+ }
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() &&
+ !RegInfo->hasBasePointer(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+ if (AFI->isThumb2Function()) {
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ } else if (AFI->isThumb2Function()) {
+ // Use add <rd>, sp, #<imm8>
+ // ldr <rd>, [sp, #<imm8>]
+ // if at all possible to save space.
+ if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+ return Offset;
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references. ldr <rt>,[<rn>, #-<imm8>]
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned StmOpc, unsigned StrOpc,
+ bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for
+ // @llvm.returnaddress then it's already added to the function and
+ // entry block live-in sets.
+ bool isKill = true;
+ if (Reg == ARM::LR) {
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // If NoGap is true, push consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+ LastReg = Reg;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || StrOpc== 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP).setMIFlags(MIFlags));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+ } else if (Regs.size() == 1) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+ ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP).setMIFlags(MIFlags);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (StrOpc == ARM::STR_PRE) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(-4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned LdmOpc, unsigned LdrOpc,
+ bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+ RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri ||
+ RetOpcode == ARM::TCRETURNriND);
+
+ SmallVector<unsigned, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ bool DeleteRet = false;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ Reg = ARM::PC;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ // Fold the return instruction into the LDM.
+ DeleteRet = true;
+ }
+
+ // If NoGap is true, pop consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+
+ LastReg = Reg;
+ Regs.push_back(Reg);
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || LdrOpc == 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i], getDefRegState(true));
+ if (DeleteRet)
+ MI->eraseFromParent();
+ MI = MIB;
+ } else if (Regs.size() == 1) {
+ // If we adjusted the reg to PC from LR above, switch it back here. We
+ // only do that for LDM.
+ if (Regs[0] == ARM::PC)
+ Regs[0] = ARM::LR;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (LdrOpc == ARM::LDR_POST) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+ unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+ unsigned FltOpc = ARM::VSTMDDB_UPD;
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ MachineInstr::FrameSetup);
+
+ return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+ unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+ unsigned FltOpc = ARM::VLDMDIA_UPD;
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register);
+
+ return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -MFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ Offset += MFI->getObjectSize(i);
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const TargetFrameLowering *TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ case ARMII::AddrModeT2_i12:
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (TFI->hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Addressing modes 4 & 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
+ return 0;
+ default:
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's not always possible to restore sp from fp in a single
+ // instruction.
+ // FIXME: It will be better just to find spare register here.
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // Spill LR if Thumb1 function uses variable length argument lists.
+ if (AFI->getVarArgsRegSaveSize() > 0)
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+ // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+ // for sure what the stack size will be, but for this, an estimate is good
+ // enough. If there anything changes it, it'll be a spill, which implies
+ // we've used all the registers and so R4 is already used, so not marking
+ // it here will be OK.
+ // FIXME: It will be better just to find spare register here.
+ unsigned StackSize = estimateStackSize(MF);
+ if (MFI->hasVarSizedObjects() || StackSize > 508)
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ }
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases =
+ RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
+
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF, this)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+ bool ExtraCSSpill = false;
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs, spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spill high register if the function is thumb1
+ if (!AFI->isThumb1OnlyFunction() ||
+ isARMLowRegister(Reg) || Reg == ARM::LR) {
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. Thumb1 needs a spill slot for stack pointer
+ // adjustments also, even when the frame itself is small.
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ }
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 000000000000..61bb8afa40f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,76 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+ const ARMSubtarget &STI;
+
+public:
+ explicit ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg, int SPAdj) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ private:
+ void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned MIFlags = 0) const;
+ void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ unsigned LdrOpc, bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 000000000000..8d77b2d8383e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,219 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+namespace {
+ class ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td) : TD(td) { }
+
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; j != e; ++j) {
+ Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += TD->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ }
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "_MergedGlobals");
+ for (size_t k = i; k < j; ++k) {
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module &M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ unsigned Alignment = I->getAlignment();
+ const Type *Ty = I->getType()->getElementType();
+ if (Alignment > TD->getABITypeAlignment(Ty))
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+ const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+ if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+ BSSGlobals.push_back(I);
+ else if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ if (BSSGlobals.size() > 1)
+ Changed |= doMerge(BSSGlobals, M, false);
+
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 000000000000..787f6a279187
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,120 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+ const TargetRegisterInfo &TRI) {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MCID.mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (!MI->isDebugValue()) {
+ if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+
+ // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+ // a VMLA / VMLS will cause 4 cycle stall.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+ MachineInstr *DefMI = LastMI;
+ const MCInstrDesc &LastMCID = LastMI->getDesc();
+ // Skip over one non-VFP / NEON instruction.
+ if (!LastMCID.isBarrier() &&
+ // On A9, AGU and NEON/FPU are muxed.
+ !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+ MachineBasicBlock::iterator I = LastMI;
+ if (I != LastMI->getParent()->begin()) {
+ I = llvm::prior(I);
+ DefMI = &*I;
+ }
+ }
+
+ if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+ (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+ hasRAWHazard(DefMI, MI, TRI))) {
+ // Try to schedule another instruction for the next 4 cycles.
+ if (FpMLxStalls == 0)
+ FpMLxStalls = 4;
+ return Hazard;
+ }
+ }
+ }
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+ LastMI = 0;
+ FpMLxStalls = 0;
+ ITBlockSize = 0;
+ ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ if (!MI->isDebugValue()) {
+ LastMI = MI;
+ FpMLxStalls = 0;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+ if (FpMLxStalls && --FpMLxStalls == 0)
+ // Stalled for 4 cycles but still can't schedule any other instructions.
+ LastMI = 0;
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 000000000000..2bc218d8566b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMSubtarget &STI;
+
+ MachineInstr *LastMI;
+ unsigned FpMLxStalls;
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ ARMHazardRecognizer(const InstrItineraryData *ItinData,
+ const ARMBaseInstrInfo &tii,
+ const ARMBaseRegisterInfo &tri,
+ const ARMSubtarget &sti,
+ const ScheduleDAG *DAG) :
+ ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+ TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2c9481b86c55
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,3011 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMAddressingModes.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+ cl::desc("Check fp vmla / vmls hazard at isel time"),
+ cl::init(true));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMBaseTargetMachine &TM;
+ const ARMBaseInstrInfo *TII;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ /// getI32Imm - Return a target constant of type i32 with the specified
+ /// value.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ SDNode *Select(SDNode *N);
+
+
+ bool hasNoVMLxHazardUse(SDNode *N) const;
+ bool isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+ bool SelectShifterOperandReg(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C,
+ bool CheckProfitability = true);
+ bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C) {
+ // Don't apply the profitability check
+ return SelectShifterOperandReg(N, A, B, C, false);
+ }
+
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
+ bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
+
+ bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+ // Thumb Addressing Modes:
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+ unsigned Scale);
+ bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ // Thumb 2 Addressing Modes:
+ bool SelectT2ShifterOperandReg(SDValue N,
+ SDValue &BaseReg, SDValue &Opc);
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm);
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm);
+
+ inline bool is_so_imm(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }
+
+ inline bool is_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(~Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(~Imm) != -1;
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+ /// ARM.
+ SDNode *SelectARMIndexedLoad(SDNode *N);
+ SDNode *SelectT2IndexedLoad(SDNode *N);
+
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// loads of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+ /// SelectVST - Select NEON store intrinsics. NumVecs should
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// stores of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+ /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
+ /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// load/store of D registers and Q registers.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes);
+
+ /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
+ /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// for loading D registers. (Q registers are not supported.)
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *Opcodes);
+
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+ /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+ SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+ /// SelectCMOVOp - Select CMOV instructions for ARM.
+ SDNode *SelectCMOVOp(SDNode *N);
+ SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+ SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR,
+ SDValue InFlag);
+
+ SDNode *SelectConcatVector(SDNode *N);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ // Get the alignment operand for a NEON VLD or VST instruction.
+ SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc &&
+ isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+ int RangeMin, int RangeMax,
+ int &ScaledConstant) {
+ assert(Scale && "Invalid scale!");
+
+ // Check that this is a constant.
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+ if (!C)
+ return false;
+
+ ScaledConstant = (int) C->getZExtValue();
+ if ((ScaledConstant % Scale) != 0)
+ return false;
+
+ ScaledConstant /= Scale;
+ return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+ if (OptLevel == CodeGenOpt::None)
+ return true;
+
+ if (!CheckVMLxHazard)
+ return true;
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+ return true;
+
+ if (!N->hasOneUse())
+ return false;
+
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg)
+ return true;
+ if (Use->isMachineOpcode()) {
+ const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+ if (MCID.mayStore())
+ return true;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return true;
+ // vmlx feeding into another vmlx. We actually want to unfold
+ // the use later in the MLxExpansion pass. e.g.
+ // vmla
+ // vmla (stall 8 cycles)
+ //
+ // vmul (5 cycles)
+ // vadd (5 cycles)
+ // vmla
+ // This adds up to about 18 - 19 cycles.
+ //
+ // vmla
+ // vmul (stall 4 cycles)
+ // vadd adds up to about 14 cycles.
+ return TII->isFpMLxInstruction(Opcode);
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal,
+ unsigned ShAmt) {
+ if (!Subtarget->isCortexA9())
+ return true;
+ if (Shift.hasOneUse())
+ return true;
+ // R << 2 is free.
+ return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getZExtValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ISD::ADD.
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) // 12 bits.
+ return false;
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse())
+ // Compute R +/- (R << N) and reuse it.
+ return false;
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (!Subtarget->isCortexA9() ||
+ (N.hasOneUse() &&
+ isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ADD.
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() != ISD::SUB) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R +/- (R << N) and reuse it.
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (!Subtarget->isCortexA9() ||
+ (N.hasOneUse() &&
+ isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+ Offset = N.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -256 + 1, 256, RHSC)) { // 8 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+ -256 + 1, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+ SDValue &Align) {
+ Addr = N;
+
+ unsigned Alignment = 0;
+ if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+ // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+ // The maximum alignment is equal to the memory size being referenced.
+ unsigned LSNAlign = LSN->getAlignment();
+ unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+ if (LSNAlign > MemSize && MemSize > 1)
+ Alignment = MemSize;
+ } else {
+ // All other uses of addrmode6 are for intrinsics. For now just record
+ // the raw alignment value; it will be refined later based on the legal
+ // alignment operands for the intrinsic.
+ Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+ }
+
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+ SDValue &Base, SDValue &Offset){
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+ ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+ if (!NC || !NC->isNullValue())
+ return false;
+
+ Base = Offset = N;
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+ SDValue &Offset, unsigned Scale) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP))
+ return false;
+
+ // FIXME: Why do we explicitly check for a match here and then return false?
+ // Presumably to allow something else to match, but shouldn't this be
+ // documented?
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+ return false;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else {
+ Base = N;
+ }
+
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+ unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+ // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+ if (LHSC != 0 || RHSC != 0) return false;
+
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
+ SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select t2LDRpci instead.
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
+ // Let t2LDRi8 handle (R - imm8).
+ return false;
+
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R - imm8 operands.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm){
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ int RHSC;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+ OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+ SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm) {
+ // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+ return false;
+ else if (RHSC < 0 && RHSC >= -255) // 8 bits
+ return false;
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R + (R << [1,2,3]) and reuse it.
+ Base = N;
+ return false;
+ }
+
+ // Look for (R + R) or (R + (R << [1,2,3])).
+ unsigned ShAmt = 0;
+ Base = N.getOperand(0);
+ OffReg = N.getOperand(1);
+
+ // Swap if it is ((R << c) + R).
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+ if (ShOpcVal != ARM_AM::lsl) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+ if (ShOpcVal == ARM_AM::lsl)
+ std::swap(Base, OffReg);
+ }
+
+ if (ShOpcVal == ARM_AM::lsl) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+ OffReg = OffReg.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+ return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ Match = true;
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ }
+ }
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ MVT::Other, Ops, 6);
+ }
+
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return NULL;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ SDValue Offset;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+ break;
+ case MVT::i16:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+ break;
+ case MVT::i8:
+ case MVT::i1:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+ break;
+ default:
+ return NULL;
+ }
+ Match = true;
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ MVT::Other, Ops, 5);
+ }
+
+ return NULL;
+}
+
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+ bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ if (Alignment >= 32 && NumRegs == 4)
+ Alignment = 32;
+ else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ Alignment = 16;
+ else if (Alignment >= 8)
+ Alignment = 8;
+ else
+ Alignment = 0;
+
+ return CurDAG->getTargetConstant(Alignment, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+ std::vector<EVT> ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDNode *VLd;
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ } else {
+ // Otherwise, quad registers are loaded with two separate instructions,
+ // where one loads the even registers and the other loads the odd registers.
+ EVT AddrTy = MemAddr.getValueType();
+
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
+
+ // Load the odd subregs.
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ }
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ SDValue SrcReg;
+ if (NumVecs == 1) {
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2)
+ SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ // If it's a vst3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // Form a QQ register.
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
+ SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ }
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(SrcReg);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VSt =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ // Transfer memoperands.
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA, 7);
+ cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ return VStB;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld/vst lane type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ // Quad-register operations:
+ case MVT::v8i16: OpcodeIndex = 0; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 1; break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+ Ops.push_back(SuperReg);
+ Ops.push_back(getI32Imm(Lane));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs, unsigned *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld-dup type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
+ unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ std::vector<EVT> ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+ SuperReg = SDValue(VLdDup, 0);
+
+ // Extract the subregisters.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ unsigned SubIdx = ARM::dsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+ bool isSigned) {
+ if (!Subtarget->hasV6T2Ops())
+ return NULL;
+
+ unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+ // For unsigned extracts, check for a shift right and mask
+ unsigned And_imm = 0;
+ if (N->getOpcode() == ISD::AND) {
+ if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return NULL;
+
+ unsigned Srl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+ unsigned Width = CountTrailingOnes_32(And_imm);
+ unsigned LSB = Srl_imm;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+ }
+
+ // Otherwise, we're looking for a shift of a shift
+ unsigned Shl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+ assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+ unsigned Srl_imm = 0;
+ if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ unsigned Width = 32 - Srl_imm;
+ int LSB = Srl_imm - Shl_imm;
+ if (LSB < 0)
+ return NULL;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+ unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+ unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+ unsigned Opc = 0;
+ switch (SOShOp) {
+ case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+ case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+ case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+ case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+ default:
+ llvm_unreachable("Unknown so_reg opcode!");
+ break;
+ }
+ SDValue SOShImm =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
+ }
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ Opc = ARM::t2MOVCCi;
+ } else if (TrueImm <= 0xffff) {
+ Opc = ARM::t2MOVCCi16;
+ } else if (is_t2_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::t2MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+ // Large immediate.
+ Opc = ARM::t2MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (!T)
+ return 0;
+
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = is_so_imm(TrueImm);
+ if (isSoImm) {
+ Opc = ARM::MOVCCi;
+ } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+ Opc = ARM::MOVCCi16;
+ } else if (is_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() &&
+ (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+ // Large immediate.
+ Opc = ARM::MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ SDValue CCR = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(CC.getOpcode() == ISD::Constant);
+ assert(CCR.getOpcode() == ISD::Register);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (Subtarget->isThumb()) {
+ SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ } else {
+ SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
+ CCVal, CCR, InFlag);
+ if (!Res)
+ Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
+ ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+ if (Res)
+ return Res;
+ }
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also VMOVScc and VMOVDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = Subtarget->isThumb()
+ ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+ : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::VMOVScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::VMOVDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ bool UseCP = true;
+ if (Subtarget->hasThumb2())
+ // Thumb2-aware targets have the MOVT instruction, so all immediates can
+ // be done with MOV + MOVT, at worst.
+ UseCP = 0;
+ else {
+ if (Subtarget->isThumb()) {
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ } else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ }
+
+ if (UseCP) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb1Only()) {
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+ Ops, 4);
+ } else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops, 5);
+ }
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb1Only()) {
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ } else {
+ unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+ ARM::t2ADDri : ARM::ADDri);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SRL:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+ break;
+ case ISD::SRA:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+ return I;
+ break;
+ case ISD::MUL:
+ if (Subtarget->isThumb1Only())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ unsigned ShImm = Log2_32(RHSV-1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ }
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ unsigned ShImm = Log2_32(RHSV+1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ }
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check for unsigned bitfield extract
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+
+ // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+ // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+ // are entirely contributed by c2 and lower 16-bits are entirely contributed
+ // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+ // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ break;
+ unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ? ARM::t2MOVTi16
+ : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+ if (!Opc)
+ break;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (!N1C)
+ break;
+ if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+ SDValue N2 = N0.getOperand(1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (!N2C)
+ break;
+ unsigned N1CVal = N1C->getZExtValue();
+ unsigned N2CVal = N2C->getZExtValue();
+ if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+ (N1CVal & 0xffffU) == 0xffffU &&
+ (N2CVal & 0xffffU) == 0x0U) {
+ SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+ MVT::i32);
+ SDValue Ops[] = { N0.getOperand(0), Imm16,
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ }
+ }
+ break;
+ }
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+ N->getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMULL : ARM::UMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::SMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMULL : ARM::SMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::LOAD: {
+ SDNode *ResNode = 0;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ResNode = SelectT2IndexedLoad(N);
+ else
+ ResNode = SelectARMIndexedLoad(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ?
+ ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ MVT::Glue, Ops, 5);
+ Chain = SDValue(ResNode, 0);
+ if (N->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InFlag);
+ }
+ ReplaceUses(SDValue(N, 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
+ return NULL;
+ }
+ case ARMISD::CMOV:
+ return SelectCMOVOp(N);
+ case ARMISD::VZIP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4i16: Opc = ARM::VZIPd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VZIPd32; break;
+ case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8i16: Opc = ARM::VZIPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VZIPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VUZP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4i16: Opc = ARM::VUZPd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VUZPd32; break;
+ case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8i16: Opc = ARM::VUZPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VUZPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::VTRN: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4i16: Opc = ARM::VTRNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8i16: Opc = ARM::VTRNq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VTRNq32; break;
+ }
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ }
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
+
+ case ARMISD::VLD2DUP: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+ ARM::VLD2DUPd32Pseudo };
+ return SelectVLDDup(N, false, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, false, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+ ARM::VLD2DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+ ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+ ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+ ARM::VLD2q32Pseudo_UPD };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+ ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+ ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST2_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+ ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+ ARM::VST2q32Pseudo_UPD };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST3_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_ldrexd: {
+ SDValue MemAddr = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+
+ unsigned NewOpc = ARM::LDREXD;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ NewOpc = ARM::t2LDREXD;
+
+ // arm_ldrexd returns a i64 value in {i32, i32}
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ // place arguments in the right order
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+ Ops.size());
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+
+ // Until there's support for specifing explicit register constraints
+ // like the use of even/odd register pair, hardcode ldrexd to always
+ // use the pair [R0, R1] to hold the load result.
+ Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
+ SDValue(Ld, 0), SDValue(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
+ SDValue(Ld, 1), Chain.getValue(1));
+
+ // Remap uses.
+ SDValue Glue = Chain.getValue(1);
+ if (!SDValue(N, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R0, MVT::i32, Glue);
+ Glue = Result.getValue(2);
+ ReplaceUses(SDValue(N, 0), Result);
+ }
+ if (!SDValue(N, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R1, MVT::i32, Glue);
+ Glue = Result.getValue(2);
+ ReplaceUses(SDValue(N, 1), Result);
+ }
+
+ ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+ return NULL;
+ }
+
+ case Intrinsic::arm_strexd: {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Val0 = N->getOperand(2);
+ SDValue Val1 = N->getOperand(3);
+ SDValue MemAddr = N->getOperand(4);
+
+ // Until there's support for specifing explicit register constraints
+ // like the use of even/odd register pair, hardcode strexd to always
+ // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
+ Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
+ SDValue(0, 0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
+
+ SDValue Glue = Chain.getValue(1);
+ Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R2, MVT::i32, Glue);
+ Glue = Val0.getValue(1);
+ Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ ARM::R3, MVT::i32, Glue);
+
+ // Store exclusive double return a i32 value which is the return status
+ // of the issued store.
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ // place arguments in the right order
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+
+ unsigned NewOpc = ARM::STREXD;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ NewOpc = ARM::t2STREXD;
+
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+ Ops.size());
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
+ }
+
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld2: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vld3: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld4: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld2lane: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld3lane: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld4lane: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst2: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case Intrinsic::arm_neon_vst3: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst4: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst2lane: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst3lane: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ }
+ break;
+ }
+
+ case ARMISD::VTBL1: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 6> Ops;
+
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ }
+ case ARMISD::VTBL2: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+ Ops.data(), Ops.size());
+ }
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
+ }
+
+ return SelectCode(N);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ // Require the address to be in a register. That is safe for all ARM
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 000000000000..cf8c5baa8e7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,7978 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(false));
+
+cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+namespace llvm {
+ class ARMCCState : public CCState {
+ public:
+ ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C, ParmContext PC)
+ : CCState(CC, isVarArg, MF, TM, locs, C) {
+ assert(((PC == Call) || (PC == Prologue)) &&
+ "ARMCCState users must specify whether their context is call"
+ "or prologue generation.");
+ CallOrPrologue = PC;
+ }
+ };
+}
+
+// The APCS parameter registers.
+static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
+void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
+ EVT PromotedBitwiseVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+
+ EVT ElemTy = VT.getVectorElementType();
+ if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ if (ElemTy != MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
+ setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+ if (VT.isInteger()) {
+ setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction(VT.getSimpleVT(),
+ (MVT::SimpleValueType)InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+
+ // Promote all bit-wise operations.
+ if (VT.isInteger() && VT != PromotedBitwiseVT) {
+ setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::AND, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::OR, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
+ AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
+ PromotedBitwiseVT.getSimpleVT());
+ }
+
+ // Neon does not support vector divide/remainder operations.
+ setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+ addRegisterClass(VT, ARM::DPRRegisterClass);
+ addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+ addRegisterClass(VT, ARM::QPRRegisterClass);
+ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+ if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new ARMElfTargetObjectFile();
+}
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
+
+ if (Subtarget->isTargetDarwin()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+
+ if (Subtarget->isAAPCS_ABI()) {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+ setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+ setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+ setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+ setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+ setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+ setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+ setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+ setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+ setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+ setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+ setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
+ setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
+ setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
+ }
+
+ if (Subtarget->isThumb1Only())
+ addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ else
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+
+ if (Subtarget->hasNEON()) {
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
+
+ // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+ // neither Neon nor VFP support any arithmetic operations on it.
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
+ // Neon does not support some operations on v1i64 and v2i64 types.
+ setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+ // a destination type that is wider than the source.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
+ }
+
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM does not have i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ if (!Subtarget->isThumb1Only()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+ }
+
+ // i64 operation support.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ }
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+ || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
+ // v7M has a hardware divider
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+ // 64-bit versions are always libcalls (for now)
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+ // iff target supports vfp2.
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+ }
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ // Various VFP goodness
+ if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ if (Subtarget->hasVFP2()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ }
+ // Special handling for half-precision FP.
+ if (!Subtarget->hasFP16()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ }
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
+
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::OR);
+ if (Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::AND);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ //// temporary - rewrite interface to use type
+ maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ benefitFromCodePlacementOpt = true;
+
+ setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurrence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ // When NEON is used for SP, only half of the register file is available
+ // because operations that define both SP and DP results will be constrained
+ // to the VFP2 class (D0-D15). We currently model this constraint prior to
+ // coalescing by double-counting the SP regs. See the FIXME above.
+ if (Subtarget->useNEONForSinglePrecisionFP())
+ Cost = 2;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
+ case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPZ: return "ARMISD::CMPZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+
+ case ARMISD::RBIT: return "ARMISD::RBIT";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+
+ case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+ case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
+
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
+ case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
+ case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+
+ case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
+ case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
+ case ARMISD::VCGEU: return "ARMISD::VCGEU";
+ case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
+ case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::VTST: return "ARMISD::VTST";
+
+ case ARMISD::VSHL: return "ARMISD::VSHL";
+ case ARMISD::VSHRs: return "ARMISD::VSHRs";
+ case ARMISD::VSHRu: return "ARMISD::VSHRu";
+ case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
+ case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
+ case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
+ case ARMISD::VSHRN: return "ARMISD::VSHRN";
+ case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
+ case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
+ case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
+ case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
+ case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
+ case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
+ case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
+ case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
+ case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
+ case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
+ case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
+ case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
+ case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
+ case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VDUP: return "ARMISD::VDUP";
+ case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
+ case ARMISD::VEXT: return "ARMISD::VEXT";
+ case ARMISD::VREV64: return "ARMISD::VREV64";
+ case ARMISD::VREV32: return "ARMISD::VREV32";
+ case ARMISD::VREV16: return "ARMISD::VREV16";
+ case ARMISD::VZIP: return "ARMISD::VZIP";
+ case ARMISD::VUZP: return "ARMISD::VUZP";
+ case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
+ case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
+ case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
+ case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
+ }
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return ARM::QQPRRegisterClass;
+ else if (VT == MVT::v8i64)
+ return ARM::QQQQPRRegisterClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::Latency;
+ }
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+
+ if (MCID.getNumDefs() == 0)
+ return Sched::RegPressure;
+ if (!Itins->isEmpty() &&
+ Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
+ return Sched::Latency;
+
+ return Sched::RegPressure;
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::LS; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C: {
+ // Use target triple & subtarget features to do actual dispatch.
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ else if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ }
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallResult(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ true,
+ isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 or half of a v2f64.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(0, MVT::i32));
+
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const {
+
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+ if (NextVA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(NextVA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ dl, DAG, NextVA,
+ Flags));
+ }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Temporarily disable tail calls so things don't break.
+ if (!EnableARMTailCalls)
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ RegsToPassVector RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+ dl, DAG, VA, Flags));
+ }
+ } else {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+ }
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (isByVal) {
+ assert(VA.isMemLoc());
+ unsigned offset = 0;
+
+ // True if this byval aggregate will be split between registers
+ // and memory.
+ if (CCInfo.isFirstByValRegValid()) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned int i, j;
+ for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ SDValue Const = DAG.getConstant(4*i, MVT::i32);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(j, Load));
+ }
+ offset = ARM::R4 - CCInfo.getFirstByValReg();
+ CCInfo.clearFirstByValReg();
+ }
+
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile=*/false,
+ /*AlwaysInline=*/false,
+ MachinePointerInfo(0),
+ MachinePointerInfo(0)));
+
+ } else if (!IsSibCall) {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (EnableARMLongCalls) {
+ assert (getTargetMachine().getRelocationModel() == Reloc::Static
+ && "long-calls with non-static relocation model!");
+ // Handle a global address or an external symbol. If it's not one of
+ // those, the target's already in a register, so we don't need to do
+ // anything extra.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+ ARMPCLabelIndex,
+ ARMCP::CPValue, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+ Sym, ARMPCLabelIndex, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+ ARMPCLabelIndex,
+ ARMCP::CPValue, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+ Sym, ARMPCLabelIndex, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ }
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+ : ARMISD::CALL_NOLINK;
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+}
+
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack. Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+ assert((State->getCallOrPrologue() == Prologue ||
+ State->getCallOrPrologue() == Call) &&
+ "unhandled ParmContext");
+ if ((!State->isFirstByValRegValid()) &&
+ (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
+ }
+ // Confiscate any remaining parameter registers to preclude their
+ // assignment to subsequent parameters.
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const ARMInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMInstrInfo *TII =
+ ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+ isVarArg));
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ // Extract the first half and return it in two registers.
+ SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(1), Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+
+ // Extract the 2nd half and fall through to handle it as an f64 value.
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+ }
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ SDValue result;
+ if (Flag.getNode())
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else // Return Void
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+ return result;
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ unsigned NumCopies = 0;
+ SDNode* Copies[2];
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ Copies[NumCopies++] = Use;
+ } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ // f64 returned in a pair of GPRs.
+ for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies[UI.getUse().getResNo()] = *UI;
+ ++NumCopies;
+ }
+ } else if (Use->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Use->hasNUsesOfValue(1, 0))
+ return false;
+ Use = *Use->use_begin();
+ if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ return false;
+ Copies[NumCopies++] = Use;
+ } else {
+ return false;
+ }
+
+ if (NumCopies != 1 && NumCopies != 2)
+ return false;
+
+ bool HasRet = false;
+ for (unsigned i = 0; i < NumCopies; ++i) {
+ SDNode *Copy = Copies[i];
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ SDNode *Use = *UI;
+ if (Use == Copies[0] || Use == Copies[1])
+ continue;
+ return false;
+ }
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+ }
+
+ return HasRet;
+}
+
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!EnableARMTailCalls)
+ return false;
+
+ if (!CI->isTailCall())
+ return false;
+
+ return !Subtarget->isThumb1Only();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+ } else {
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress,
+ PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ if (RelocM == Reloc::Static)
+ return Result;
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = GA->getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+ // FIXME: is there useful debug info available here?
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = GA->getGlobal();
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (GV->isDeclaration()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ // Initial exec model.
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ } else {
+ // local exec model
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec" TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ else
+ return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (RelocM == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // FIXME: Enable this for static codegen when tool issues are fixed.
+ if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ if (RelocM == Reloc::Static)
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+ unsigned Wrapper = (RelocM == Reloc::PIC_)
+ ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ unsigned ARMPCLabelIndex = 0;
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ } else {
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
+ false, false, 0);
+
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+ "_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex, PCAdj);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+ const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_thread_pointer: {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ return Result;
+ }
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+ ? ARMISD::VMULLs : ARMISD::VMULLu;
+ return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue Op5 = Op.getOperand(5);
+ bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+ unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+ ARM_MB::MemBOpt DMBOpt;
+ if (isDeviceBarrier)
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+ else
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ if (!isRead &&
+ (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+ // ARMv7 with MP extension has PLDW.
+ return Op.getOperand(0);
+
+ unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (Subtarget->isThumb()) {
+ // Invert the bits.
+ isRead = ~isRead & 1;
+ isData = ~isData & 1;
+ }
+
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+ DAG.getConstant(isData, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+ SDValue ArgValue2;
+ if (NextVA.isMemLoc()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ } else {
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize)
+ const {
+ unsigned NumGPRs;
+ if (CCInfo.isFirstByValRegValid())
+ NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+ else {
+ unsigned int firstUnalloced;
+ firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+ sizeof(GPRArgRegs) /
+ sizeof(GPRArgRegs[0]));
+ NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+ }
+
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ VARegSize = NumGPRs * 4;
+ VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usuall
+// byval). Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned firstRegToSaveIndex;
+ if (CCInfo.isFirstByValRegValid())
+ firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+ else {
+ firstRegToSaveIndex = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ }
+
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+ ArgOffset + VARegSaveSize
+ - VARegSize,
+ false));
+ SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+ CCInfo.AnalyzeFormalArguments(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ SmallVector<SDValue, 16> ArgValues;
+ int lastInsIndex = -1;
+
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+
+ if (VA.needsCustom()) {
+ // f64 and vector types are split up into multiple registers or
+ // combinations of registers and stack slots.
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+ } else
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+ } else {
+ TargetRegisterClass *RC;
+
+ if (RegVT == MVT::f32)
+ RC = ARM::SPRRegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = ARM::DPRRegisterClass;
+ else if (RegVT == MVT::v2f64)
+ RC = ARM::QPRRegisterClass;
+ else if (RegVT == MVT::i32)
+ RC = (AFI->isThumb1OnlyFunction() ?
+ ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+ else
+ llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ int index = ArgLocs[i].getValNo();
+
+ // Some Ins[] entries become multiple ArgLoc[] entries.
+ // Process them only once.
+ if (index != lastInsIndex)
+ {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ // FIXME: For now, all byval parameter objects are marked mutable.
+ // This can be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable.
+ // Since they could be overwritten by lowering of arguments in case of
+ // a tail call.
+ if (Flags.isByVal()) {
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
+ unsigned Bytes = Flags.getByValSize() - VARegSize;
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes,
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ lastInsIndex = index;
+ }
+ }
+ }
+
+ // varargs
+ if (isVarArg)
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
+
+ return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ }
+ return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ // Uses only Z Flag
+ CompareType = ARMISD::CMPZ;
+ break;
+ }
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ DebugLoc DL = Cmp.getDebugLoc();
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Op.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+ assert(True.getValueType() == VT);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ ARMcc, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool SeenZero = false;
+ if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+ canChangeToInt(RHS, SeenZero, Subtarget) &&
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+ // If unsafe fp math optimization is enabled and there are no other uses of
+ // the CMP operands, and the condition code is EQ or NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = bitcastf32Toi32(LHS, DAG);
+ RHS = bitcastf32Toi32(RHS, DAG);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ if (Subtarget->isThumb2()) {
+ // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+ // which does another jump to the destination. This also makes it easier
+ // to translate it to TBB / TBH later.
+ // FIXME: This might not work if the function is extremely large.
+ return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+ Addr, Op.getOperand(2), JTI, UId);
+ }
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(),
+ false, false, 0);
+ Chain = Addr.getValue(1);
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ } else {
+ Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), false, false, 0);
+ Chain = Addr.getValue(1);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+ }
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::FP_TO_SINT:
+ Opc = ARMISD::FTOSI;
+ break;
+ case ISD::FP_TO_UINT:
+ Opc = ARMISD::FTOUI;
+ break;
+ }
+ Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT OperandVT = Op.getOperand(0).getValueType();
+ assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+ if (VT != MVT::v4f32)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned CastOpc;
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ CastOpc = ISD::SIGN_EXTEND;
+ Opc = ISD::SINT_TO_FP;
+ break;
+ case ISD::UINT_TO_FP:
+ CastOpc = ISD::ZERO_EXTEND;
+ Opc = ISD::UINT_TO_FP;
+ break;
+ }
+
+ Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ Opc = ARMISD::SITOF;
+ break;
+ case ISD::UINT_TO_FP:
+ Opc = ARMISD::UITOF;
+ break;
+ }
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Tmp1.getValueType();
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ } else if (VT == MVT::f32)
+ Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (VT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ ? ARM::R7 : ARM::R11;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+
+ // This function is only supposed to be called for i64 types, either as the
+ // source or destination of the bit convert.
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+ "ExpandBITCAST called for non-i64 type");
+
+ // Turn i64->f64 into VMOVDRR.
+ if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+ }
+
+ // Turn f64->i64 into VMOVRRD.
+ if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+ SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+ }
+
+ return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+ ARMcc, DAG, dl);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!ST->hasV6T2Ops())
+ return SDValue();
+
+ SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Lower vector shifts on NEON to use VSHL.
+ assert(ST->hasNEON() && "unexpected vector shift");
+
+ // Left shifts translate directly to the vshiftu intrinsic.
+ if (N->getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+ N->getOperand(0), N->getOperand(1));
+
+ assert((N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+ // NEON uses the same intrinsics for both left and right shifts. For
+ // right shifts, the shift amounts are negative, so negate the vector of
+ // shift amounts.
+ EVT ShiftVT = N->getOperand(1).getValueType();
+ SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+ getZeroVector(ShiftVT, DAG, dl),
+ N->getOperand(1));
+ Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+ Intrinsic::arm_neon_vshifts :
+ Intrinsic::arm_neon_vshiftu);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(vshiftInt, MVT::i32),
+ N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // We can get here for a node like i32 = ISD::SHL i32, i64
+ if (VT != MVT::i64)
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb1Only()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue TmpOp0, TmpOp1;
+ bool Invert = false;
+ bool Swap = false;
+ unsigned Opc = 0;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal FP comparison"); break;
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; // Fallthrough
+ case ISD::SETOEQ:
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETOLT:
+ case ISD::SETLT: Swap = true; // Fallthrough
+ case ISD::SETOGT:
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETOLE:
+ case ISD::SETLE: Swap = true; // Fallthrough
+ case ISD::SETOGE:
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETUGE: Swap = true; // Fallthrough
+ case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+ case ISD::SETUGT: Swap = true; // Fallthrough
+ case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+ case ISD::SETUEQ: Invert = true; // Fallthrough
+ case ISD::SETONE:
+ // Expand this to (OLT | OGT).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+ break;
+ case ISD::SETUO: Invert = true; // Fallthrough
+ case ISD::SETO:
+ // Expand this to (OLT | OGE).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+ break;
+ }
+ } else {
+ // Integer comparisons.
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal integer comparison"); break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETLE: Swap = true;
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+ }
+
+ // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+ if (Opc == ARMISD::VCEQ) {
+
+ SDValue AndOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ AndOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+ AndOp = Op1;
+
+ // Ignore bitconvert.
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+ AndOp = AndOp.getOperand(0);
+
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+ Opc = ARMISD::VTST;
+ Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+ Invert = !Invert;
+ }
+ }
+ }
+
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ EVT &VT, bool is128Bits, NEONModImmType type) {
+ unsigned OpCmode, Imm;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
+ switch (SplatBitSize) {
+ case 8:
+ if (type != VMOVModImm)
+ return SDValue();
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
+ assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ break;
+
+ case 16:
+ // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
+
+ case 32:
+ // NEON's 32-bit VMOV supports splat values where:
+ // * only one byte is nonzero, or
+ // * the least significant byte is 0xff and the second byte is nonzero, or
+ // * the least significant 2 bytes are 0xff and the third is nonzero.
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
+
+ // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+ if (type == OtherModImm) return SDValue();
+
+ if ((SplatBits & ~0xffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
+
+ if ((SplatBits & ~0xffffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
+
+ // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+ // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+ // VMOV.I32. A (very) minor optimization would be to replicate the value
+ // and fall through here to test for a valid 64-bit splat. But, then the
+ // caller would also need to check and handle the change in size.
+ return SDValue();
+
+ case 64: {
+ if (type != VMOVModImm)
+ return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ uint64_t BitMask = 0xff;
+ uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
+ for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+ Val |= BitMask;
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
+ return SDValue();
+ }
+ BitMask <<= 8;
+ ImmMask <<= 1;
+ }
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+ break;
+ }
+
+ default:
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
+ }
+
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+}
+
+static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+ bool &ReverseVEXT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, it may still be
+ // a VEXT but the source vectors must be swapped.
+ ExpectedElt += 1;
+ if (ExpectedElt == NumElts * 2) {
+ ExpectedElt = 0;
+ ReverseVEXT = true;
+ }
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ // Adjust the index value if the source operands will be swapped.
+ if (ReverseVEXT)
+ Imm -= NumElts;
+
+ return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned BlockSize) {
+ assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+ "Only possible block sizes for VREV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+ // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+ // range, then 0 is placed into the resulting vector. So pretty much any mask
+ // of 8 elements can work here.
+ return VT == MVT::v8i8 && M.size() == 8;
+}
+
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != 2 * i + WhichResult)
+ return false;
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ // Check if an immediate VMOV works.
+ EVT VmovVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMOVModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+ ((1LL << SplatBitSize) - 1));
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(),
+ VMVNModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+ }
+ }
+
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
+ if (Val.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
+ }
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 2> SourceVecs;
+ SmallVector<unsigned, 2> MinElts;
+ SmallVector<unsigned, 2> MaxElts;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
+ }
+
+ // Record this extraction against the appropriate vector if possible...
+ SDValue SourceVec = V.getOperand(0);
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ bool FoundSource = false;
+ for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+ if (SourceVecs[j] == SourceVec) {
+ if (MinElts[j] > EltNo)
+ MinElts[j] = EltNo;
+ if (MaxElts[j] < EltNo)
+ MaxElts[j] = EltNo;
+ FoundSource = true;
+ break;
+ }
+ }
+
+ // Or record a new source if not...
+ if (!FoundSource) {
+ SourceVecs.push_back(SourceVec);
+ MinElts.push_back(EltNo);
+ MaxElts.push_back(EltNo);
+ }
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // involved.
+ if (SourceVecs.size() > 2)
+ return SDValue();
+
+ SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ int VEXTOffsets[2] = {0, 0};
+
+ // This loop extracts the usage patterns of the source vectors
+ // and prepares appropriate SDValues for a shuffle if possible.
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ if (SourceVecs[i].getValueType() == VT) {
+ // No VEXT necessary
+ ShuffleSrcs[i] = SourceVecs[i];
+ VEXTOffsets[i] = 0;
+ continue;
+ } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ // It probably isn't worth padding out a smaller vector just to
+ // break it down again in a shuffle.
+ return SDValue();
+ }
+
+ // Since only 64-bit and 128-bit vectors are legal on ARM and
+ // we've eliminated the other cases...
+ assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+ "unexpected vector sizes in ReconstructShuffle");
+
+ if (MaxElts[i] - MinElts[i] >= NumElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (MinElts[i] >= NumElts) {
+ // The extraction can just take the second half
+ VEXTOffsets[i] = NumElts;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ } else if (MaxElts[i] < NumElts) {
+ // The extraction can just take the first half
+ VEXTOffsets[i] = 0;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ } else {
+ // An actual VEXT is needed
+ VEXTOffsets[i] = MinElts[i];
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(VEXTOffsets[i], MVT::i32));
+ }
+ }
+
+ SmallVector<int, 8> Mask;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ SDValue ExtractVec = Entry.getOperand(0);
+ int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+ .getOperand(1))->getSExtValue();
+ if (ExtractVec == SourceVecs[0]) {
+ Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ } else {
+ Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ }
+ }
+
+ // Final check before we try to produce nonsense...
+ if (isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+ &Mask[0]);
+
+ return SDValue();
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ if (VT.getVectorNumElements() == 4 &&
+ (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return true;
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm, WhichResult;
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isVREVMask(M, VT, 64) ||
+ isVREVMask(M, VT, 32) ||
+ isVREVMask(M, VT, 16) ||
+ isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
+ isVTRNMask(M, VT, WhichResult) ||
+ isVUZPMask(M, VT, WhichResult) ||
+ isVZIPMask(M, VT, WhichResult) ||
+ isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+ isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+ isVZIP_v_undef_Mask(M, VT, WhichResult));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown shuffle opcode!");
+ case OP_VREV:
+ // VREV divides the vector in half and swaps within the half.
+ if (VT.getVectorElementType() == MVT::i32 ||
+ VT.getVectorElementType() == MVT::f32)
+ return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ // vrev <4 x i16> -> VREV32
+ if (VT.getVectorElementType() == MVT::i16)
+ return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+ // vrev <4 x i8> -> VREV16
+ assert(VT.getVectorElementType() == MVT::i8);
+ return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3:
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+ case OP_VEXT1:
+ case OP_VEXT2:
+ case OP_VEXT3:
+ return DAG.getNode(ARMISD::VEXT, dl, VT,
+ OpLHS, OpRHS,
+ DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+ case OP_VUZPL:
+ case OP_VUZPR:
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+ case OP_VZIPL:
+ case OP_VZIPR:
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+ case OP_VTRNL:
+ case OP_VTRNR:
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+ }
+}
+
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ SmallVectorImpl<int> &ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (SmallVectorImpl<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ SmallVector<int, 8> ShuffleMask;
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ // FIXME: floating-point vectors should be canonicalized to integer vectors
+ // of the same time so that they get CSEd properly.
+ SVN->getMask(ShuffleMask);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (ShuffleMask[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = ShuffleMask[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
+ }
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ if (Op.getValueType() == MVT::i32 &&
+ Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ }
+
+ return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+ "unexpected CONCAT_VECTORS");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = DAG.getUNDEF(MVT::v2f64);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+ DAG.getIntPtrConstant(0));
+ if (Op1.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ if (BVN->getValueType(0) != MVT::v4i32 ||
+ BVN->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned HiElt = 1 - LoElt;
+ ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+ ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+ ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+ ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+ if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+ return false;
+ if (isSigned) {
+ if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+ Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+ return true;
+ } else {
+ if (Hi0->isNullValue() && Hi1->isNullValue())
+ return true;
+ }
+ return false;
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ int64_t SExtVal = C->getSExtValue();
+ if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+ return false;
+ } else {
+ if ((C->getZExtValue() >> HalfSize) != 0)
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
+ // have been legalized as a BITCAST from v4i32.
+ if (N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+ BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+ unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ }
+ // Construct a new BUILD_VECTOR with elements truncated to half the size.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
+ NewOpc = ARMISD::VMULLs;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = ARMISD::VMULLu;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLs;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0;
+ SDValue Op1 = SkipExtension(N1, DAG);
+ if (!isMLA) {
+ Op0 = SkipExtension(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+
+ // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back vmul + vmla.
+ // vmull q0, d4, d6
+ // vmlal q0, d5, d6
+ // is faster than
+ // vaddl q0, d4, d5
+ // vmovl q1, d6
+ // vmul q0, q0, q1
+ SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0x89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(0x89, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 2);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+ Subtarget);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
+ }
+ return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ break;
+ case ISD::BITCAST:
+ Res = ExpandBITCAST(N, DAG);
+ break;
+ case ISD::SRL:
+ case ISD::SRA:
+ Res = Expand64BitShift(N, DAG, Subtarget);
+ break;
+ }
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ unsigned scratch =
+ MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass);
+
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldrex dest, [ptr]
+ // cmp dest, oldval
+ // bne exitMBB
+ BB = loop1MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(dest).addReg(oldval));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex scratch, newval, [ptr]
+ // cmp scratch, #0
+ // bne loop1MBB
+ BB = loop2MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc, extendOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ extendOpc = 0;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // (sign extend dest, if required)
+ // cmp dest, incr
+ // cmov.cond scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+
+ // Sign extend the value, if necessary.
+ if (signExtend && extendOpc) {
+ oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+ }
+
+ // Build compare and cmov instructions.
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(oldval).addReg(incr));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+ .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
+// FIXME: This opcode table should obviously be expressed in the target
+// description. We probably just need a "machine opcode" value in the pseudo
+// instruction. But the ideal solution maybe to simply remove the "S" version
+// of the opcode altogether.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADCSri, ARM::ADCri},
+ {ARM::ADCSrr, ARM::ADCrr},
+ {ARM::ADCSrs, ARM::ADCrs},
+ {ARM::SBCSri, ARM::SBCri},
+ {ARM::SBCSrr, ARM::SBCrr},
+ {ARM::SBCSrs, ARM::SBCrs},
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrr, ARM::RSBrr},
+ {ARM::RSBSrs, ARM::RSBrs},
+ {ARM::RSCSri, ARM::RSCri},
+ {ARM::RSCSrs, ARM::RSCrs},
+ {ARM::t2ADCSri, ARM::t2ADCri},
+ {ARM::t2ADCSrr, ARM::t2ADCrr},
+ {ARM::t2ADCSrs, ARM::t2ADCrs},
+ {ARM::t2SBCSri, ARM::t2SBCri},
+ {ARM::t2SBCSrr, ARM::t2SBCrr},
+ {ARM::t2SBCSrs, ARM::t2SBCrs},
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+// Convert and Add or Subtract with Carry and Flags to a generic opcode with
+// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
+//
+// FIXME: Somewhere we should assert that CPSR<def> is in the correct
+// position to be recognized by the target descrition as the 'S' bit.
+bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ unsigned OldOpc = MI->getOpcode();
+ unsigned NewOpc = 0;
+
+ // This is only called for instructions that need remapping, so iterating over
+ // the tiny opcode table is not costly.
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
+ if (OldOpc == Pair->PseudoOpc) {
+ NewOpc = Pair->MachineOpc;
+ break;
+ }
+ }
+ if (!NewOpc)
+ return false;
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ AddDefaultPred(MIB);
+ MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+ MI->eraseFromParent();
+ return true;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ switch (MI->getOpcode()) {
+ default: {
+ if (RemapAddSubWithFlags(MI, BB))
+ return BB;
+
+ MI->dump();
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_LOAD_MIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+ case ARM::ATOMIC_LOAD_MAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+ case ARM::ATOMIC_LOAD_UMIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+ case ARM::ATOMIC_LOAD_UMAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
+ case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+ case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+ case ARM::tMOVCCr_pseudo: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // If there is an unconditional branch to the other successor, remove it.
+ BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+ .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+
+ if (isSlctCC) {
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ } else {
+ SDValue CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue()) {
+ DoXform = true;
+ } else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ SDValue Op0 = Slct.getOperand(0);
+ EVT OpVT = isSlctCC ? Op0.getValueType() :
+ Op0.getOperand(0).getValueType();
+ bool isInt = OpVT.isInteger();
+ CC = ISD::getSetCCInverse(CC, isInt);
+
+ if (!TLI.isCondCodeLegal(CC, OpVT))
+ return SDValue(); // Inverse operator isn't legal.
+
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDValue CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+ CCOp.getOperand(0), CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, OtherOp, Result);
+ }
+ return SDValue();
+}
+
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Only perform optimization if after legalize, and if NEON is available. We
+ // also expected both operands to be BUILD_VECTORs.
+ if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+ || N0.getOpcode() != ISD::BUILD_VECTOR
+ || N1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Check that the vector operands are of the right form.
+ // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+ // operands, where N is the size of the formed vector.
+ // Each EXTRACT_VECTOR should have the same input vector and odd or even
+ // index such that we have a pair wise add pattern.
+
+ // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+ if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = N0->getOperand(0)->getOperand(0);
+ SDNode *V = Vec.getNode();
+ unsigned nextIndex = 0;
+
+ // For each operands to the ADD which are BUILD_VECTORs,
+ // check to see if each of their operands are an EXTRACT_VECTOR with
+ // the same vector and appropriate index.
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+ if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+ && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ SDValue ExtVec0 = N0->getOperand(i);
+ SDValue ExtVec1 = N1->getOperand(i);
+
+ // First operand is the vector, verify its the same.
+ if (V != ExtVec0->getOperand(0).getNode() ||
+ V != ExtVec1->getOperand(0).getNode())
+ return SDValue();
+
+ // Second is the constant, verify its correct.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+ // For the constant, we want to see all the even or all the odd.
+ if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+ || C1->getZExtValue() != nextIndex+1)
+ return SDValue();
+
+ // Increment index.
+ nextIndex+=2;
+ } else
+ return SDValue();
+ }
+
+ // Create VPADDL node.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+ TLI.getPointerTy()));
+
+ // Input is the vector.
+ Ops.push_back(Vec);
+
+ // Get widened type and narrowed type.
+ MVT widenType;
+ unsigned numElem = VT.getVectorNumElements();
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+ case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+ case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+ default:
+ assert(0 && "Invalid vector element type for padd optimization.");
+ }
+
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ widenType, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget){
+
+ // Attempt to create vpaddl for this add.
+ SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+/// vmul d3, d0, d2
+/// vmla d3, d1, d2
+/// is faster than
+/// vadd d3, d0, d1
+/// vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasVMLxForwarding())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+ Opcode = N1.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB)
+ return SDValue();
+ std::swap(N0, N1);
+ }
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+ DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT.is64BitVector() || VT.is128BitVector())
+ return PerformVMULCombine(N, DCI, Subtarget);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ uint64_t MulAmt = C->getZExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V, DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt-1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt+1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ // Attempt to use immediate-form VBIC
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VbicVT;
+ SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VbicVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
+ }
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ APInt SplatBits0;
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ APInt SplatBits1;
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1), N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && mask == ~mask2
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && ~mask == mask2
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ SDValue MaskOp = N0.getOperand(1);
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+ if (!MaskC)
+ return SDValue();
+ unsigned Mask = MaskC->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return SDValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask2 = N11C->getZExtValue();
+
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ (Mask == ~Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned amt = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(amt, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ (~Mask == Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ }
+
+ if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(Mask);
+ if (ShAmtC != LSB)
+ return SDValue();
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+ DAG.getConstant(~Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+
+ return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
+static SDValue PerformBFICombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(~InvMask);
+ unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned Mask = (1 << Width)-1;
+ unsigned Mask2 = N11C->getZExtValue();
+ if ((Mask & (~Mask2)) == 0)
+ return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N1.getOperand(0),
+ N->getOperand(2));
+ }
+ return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // vmovrrd(vmovdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ DCI.RemoveFromWorklist(LD);
+ DAG.DeleteNode(LD);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BITCAST)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ SDValue StVal = St->getValue();
+ if (!ISD::isNormalStore(St) || St->isVolatile())
+ return SDValue();
+
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = St->getDebugLoc();
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(0), BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() != MVT::i64 ||
+ StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = StVal.getDebugLoc();
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = N->getDebugLoc();
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads. If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+ return true;
+ }
+ return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI){
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getNumOperands() == 2) {
+ SDValue RV = PerformVMOVDRRCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Load i64 elements as f64 values so that type legalization does not split
+ // them up into i32 values.
+ EVT VT = N->getValueType(0);
+ if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+ return SDValue();
+ DebugLoc dl = N->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+ Ops.push_back(V);
+ // Make the DAGCombiner fold the bitcast.
+ DCI.AddToWorklist(V.getNode());
+ }
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 load inserted into a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ EVT VT = N->getValueType(0);
+ SDNode *Elt = N->getOperand(1).getNode();
+ if (VT.getVectorElementType() != MVT::i64 ||
+ !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ VT.getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(V.getNode());
+ SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+ Vec, V, N->getOperand(2));
+ return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ // The LLVM shufflevector instruction does not require the shuffle mask
+ // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+ // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
+ // operands do not match the mask length, they are extended by concatenating
+ // them with undef vectors. That is probably the right thing for other
+ // targets, but for NEON it is better to concatenate two double-register
+ // size vector operands into a single quad-register size vector. Do that
+ // transformation here:
+ // shuffle(concat(v1, undef), concat(v2, undef)) ->
+ // shuffle(concat(v1, v2), undef)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op0.getNumOperands() != 2 ||
+ Op1.getNumOperands() != 2)
+ return SDValue();
+ SDValue Concat0Op1 = Op0.getOperand(1);
+ SDValue Concat1Op1 = Op1.getOperand(1);
+ if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+ Concat1Op1.getOpcode() != ISD::UNDEF)
+ return SDValue();
+ // Skip the transformation if any of the types are illegal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ if (!TLI.isTypeLegal(VT) ||
+ !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+ !TLI.isTypeLegal(Concat1Op1.getValueType()))
+ return SDValue();
+
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ Op0.getOperand(0), Op1.getOperand(0));
+ // Translate the shuffle mask.
+ SmallVector<int, 16> NewMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts/2;
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned n = 0; n < NumElts; ++n) {
+ int MaskElt = SVN->getMaskElt(n);
+ int NewElt = -1;
+ if (MaskElt < (int)HalfElts)
+ NewElt = MaskElt;
+ else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+ NewElt = HalfElts + MaskElt - NumElts;
+ NewMask.push_back(NewElt);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: assert(0 && "unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: assert(0 && "unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // vldN-dup instructions only support 64-bit vectors for N > 1.
+ if (!VT.is64BitVector())
+ return false;
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = ARMISD::VLD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = ARMISD::VLD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = ARMISD::VLD4DUP;
+ } else {
+ return false;
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ARMISD::VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return false;
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ Ops, 2, VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op = N->getOperand(0);
+
+ // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+ // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+ if (CombineVLDDUP(N, DCI))
+ return SDValue(N, 0);
+
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant. Ignore bit_converts for now; element sizes are checked below.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ EVT VT = N->getValueType(0);
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+}
+
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+ integerPart cN;
+ integerPart c0 = 0;
+ for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+ I != E; I++) {
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+ if (!C)
+ return false;
+
+ bool isExact;
+ APFloat APF = C->getValueAPF();
+ if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+ != APFloat::opOK || !isExact)
+ return false;
+
+ c0 = (I == 0) ? cN : c0;
+ if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+ return false;
+ }
+ C = c0;
+ return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vmul.f32 d16, d17, d16
+/// vcvt.s32.f32 d16, d16
+/// becomes:
+/// vcvt.s32.f32 d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+
+ if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+ Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ uint64_t C;
+ SDValue N0 = Op->getOperand(0);
+ SDValue ConstVec = Op->getOperand(1);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+ Intrinsic::arm_neon_vcvtfp2fxu;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ N->getValueType(0),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+ DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vcvt.f32.s32 d16, d16
+/// vdiv.f32 d16, d17, d16
+/// becomes:
+/// vcvt.f32.s32 d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OpOpcode = Op.getNode()->getOpcode();
+
+ if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+ return SDValue();
+
+ uint64_t C;
+ SDValue ConstVec = N->getOperand(1);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+ Intrinsic::arm_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (isIntrinsic)
+ Cnt = -Cnt;
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ // Vector shifts: check for immediate versions and lower them.
+ // Note: This is done during DAG combining instead of DAG legalizing because
+ // the build_vectors for 64-bit vector element shift counts are generally
+ // not legal, and it is hard to see their values after they get legalized to
+ // loads from a constant pool.
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ case Intrinsic::arm_neon_vqshiftsu:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+ VShiftOpc = ARMISD::VSHL;
+ break;
+ }
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ break;
+ }
+ return SDValue();
+
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vshll intrinsic");
+
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshiftsu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+ case Intrinsic::arm_neon_vshiftn:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ // Narrowing shifts require an immediate right shift.
+ if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
+
+ default:
+ llvm_unreachable("unhandled vector shift");
+ }
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ // Opcode already set above.
+ break;
+ case Intrinsic::arm_neon_vshiftls:
+ case Intrinsic::arm_neon_vshiftlu:
+ if (Cnt == VT.getVectorElementType().getSizeInBits())
+ VShiftOpc = ARMISD::VSHLLi;
+ else
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
+ ARMISD::VSHLLs : ARMISD::VSHLLu);
+ break;
+ case Intrinsic::arm_neon_vshiftn:
+ VShiftOpc = ARMISD::VSHRN; break;
+ case Intrinsic::arm_neon_vrshifts:
+ VShiftOpc = ARMISD::VRSHRs; break;
+ case Intrinsic::arm_neon_vrshiftu:
+ VShiftOpc = ARMISD::VRSHRu; break;
+ case Intrinsic::arm_neon_vrshiftn:
+ VShiftOpc = ARMISD::VRSHRN; break;
+ case Intrinsic::arm_neon_vqshifts:
+ VShiftOpc = ARMISD::VQSHLs; break;
+ case Intrinsic::arm_neon_vqshiftu:
+ VShiftOpc = ARMISD::VQSHLu; break;
+ case Intrinsic::arm_neon_vqshiftsu:
+ VShiftOpc = ARMISD::VQSHLsu; break;
+ case Intrinsic::arm_neon_vqshiftns:
+ VShiftOpc = ARMISD::VQSHRNs; break;
+ case Intrinsic::arm_neon_vqshiftnu:
+ VShiftOpc = ARMISD::VQSHRNu; break;
+ case Intrinsic::arm_neon_vqshiftnsu:
+ VShiftOpc = ARMISD::VQSHRNsu; break;
+ case Intrinsic::arm_neon_vqrshiftns:
+ VShiftOpc = ARMISD::VQRSHRNs; break;
+ case Intrinsic::arm_neon_vqrshiftnu:
+ VShiftOpc = ARMISD::VQRSHRNu; break;
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ VShiftOpc = ARMISD::VQRSHRNsu; break;
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vshiftins: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+ VShiftOpc = ARMISD::VSLI;
+ else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+ VShiftOpc = ARMISD::VSRI;
+ else {
+ llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+ }
+
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vqrshifts:
+ case Intrinsic::arm_neon_vqrshiftu:
+ // No immediate versions of these to check for.
+ break;
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them. As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+ return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+ }
+ return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue N0 = N->getOperand(0);
+
+ // Check for sign- and zero-extensions of vector extract operations of 8-
+ // and 16-bit vector elements. NEON supports these directly. They are
+ // handled during DAG combining because type legalization will promote them
+ // to 32-bit types and it is messy to recognize the operations after that.
+ if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Lane = N0.getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = N0.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (VT == MVT::i32 &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ TLI.isTypeLegal(Vec.getValueType()) &&
+ isa<ConstantSDNode>(Lane)) {
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ISD::SIGN_EXTEND:
+ Opc = ARMISD::VGETLANEs;
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Opc = ARMISD::VGETLANEu;
+ break;
+ }
+ return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+ SDValue Cmp = N->getOperand(4);
+ if (Cmp.getOpcode() != ARMISD::CMPZ)
+ // Only looking at EQ and NE cases.
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ // Simplify
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, y
+ // moveq r0, x
+ // to
+ // cmp r0, x
+ // movne r0, y
+ //
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, x
+ // movne r0, y
+ // to
+ // cmp r0, x
+ // movne r0, y
+ /// FIXME: Turn this into a target neutral optimization?
+ SDValue Res;
+ if (CC == ARMCC::NE && FalseVal == RHS) {
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+ N->getOperand(3), Cmp);
+ } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+ SDValue ARMcc;
+ SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+ N->getOperand(3), NewCmp);
+ }
+
+ if (Res.getNode()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+ DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+ // Capture demanded bits information that would be otherwise lost.
+ if (KnownZero == 0xfffffffe)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i1));
+ else if (KnownZero == 0xffffff00)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i8));
+ else if (KnownZero == 0xffff0000)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i16));
+ }
+
+ return Res;
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ARMISD::BFI: return PerformBFICombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+ case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default: break;
+ }
+ break;
+ }
+ return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ if (!Subtarget->allowsUnalignedMem())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return true;
+ // FIXME: VLD1 etc with standard alignment is legal.
+ }
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ bool isNeg = false;
+ if (V < 0) {
+ isNeg = true;
+ V = - V;
+ }
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // + imm12 or - imm8
+ if (isNeg)
+ return V == (V & ((1LL << 8) - 1));
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ // Same as ARM mode. FIXME: NEON?
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb1Only())
+ return isLegalT1AddressImmediate(V, VT);
+ else if (Subtarget->isThumb2())
+ return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+ // ARM mode.
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+ EVT VT) const {
+ int Scale = AM.Scale;
+ if (Scale < 0)
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ Scale = Scale & ~1;
+ return Scale == 2 || Scale == 4 || Scale == 8;
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ EVT VT = getValueType(Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb1Only())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb2())
+ return isLegalT2ScaledAddressingMode(AM, VT);
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+ break;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return Imm >= 0 && Imm <= 255;
+}
+
+/// isLegalAddImmediate - Return true if the specified immediate is legal
+/// add immediate, that is the target has add instructions which can add
+/// a register with the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+ return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+ isInc = Ptr->getOpcode() == ISD::ADD;
+ Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ // Looking for "rev" which is V6+.
+ if (!Subtarget->hasV6Ops())
+ return false;
+
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::string AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t,");
+
+ // rev $0, $1
+ if (AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+ IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (Ty && Ty->getBitWidth() == 32)
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ }
+
+ return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ case 'h': return C_RegisterClass;
+ case 'x': return C_RegisterClass;
+ case 't': return C_RegisterClass;
+ case 'j': return C_Other; // Constant for movw.
+ }
+ } else if (Constraint.size() == 2) {
+ switch (Constraint[0]) {
+ default: break;
+ // All 'U+' constraints are addresses.
+ case 'U': return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'l':
+ if (type->isIntegerTy()) {
+ if (Subtarget->isThumb())
+ weight = CW_SpecificReg;
+ else
+ weight = CW_Register;
+ }
+ break;
+ case 'w':
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC ARM Constraint Letters
+ switch (Constraint[0]) {
+ case 'l': // Low regs or general regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, ARM::tGPRRegisterClass);
+ else
+ return RCPair(0U, ARM::GPRRegisterClass);
+ case 'h': // High regs or no regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, ARM::hGPRRegisterClass);
+ break;
+ case 'r':
+ return RCPair(0U, ARM::GPRRegisterClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPRRegisterClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, ARM::DPRRegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, ARM::QPRRegisterClass);
+ break;
+ case 'x':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPR_8RegisterClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, ARM::DPR_8RegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, ARM::QPR_8RegisterClass);
+ break;
+ case 't':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPRRegisterClass);
+ break;
+ }
+ }
+ if (StringRef("{cc}").equals_lower(Constraint))
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Currently only support length 1 constraints.
+ if (Constraint.length() != 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'j':
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (ConstraintLetter) {
+ case 'j':
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
+ case 'I':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between 0 and 255, for ADD
+ // immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getT2SOImmVal(CVal) != -1)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb1Only()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The ARM target isn't yet aware of offsets.
+ return false;
+}
+
+int ARM::getVFPf32Imm(const APFloat &FPImm) {
+ APInt Imm = FPImm.bitcastToAPInt();
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+int ARM::getVFPf64Imm(const APFloat &FPImm) {
+ APInt Imm = FPImm.bitcastToAPInt();
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffLL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (!Subtarget->hasVFP3())
+ return false;
+ if (VT == MVT::f32)
+ return ARM::getVFPf32Imm(Imm) != -1;
+ if (VT == MVT::f64)
+ return ARM::getVFPf64Imm(Imm) != -1;
+ return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_strexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_ldrexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 000000000000..980fb404887e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,509 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in
+ // DYN mode.
+ WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
+ // PIC mode.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPZ, // ARM compare that sets only Z flag.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+
+ BCC_i64,
+
+ RBIT, // ARM bitreverse instruction
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
+
+ TC_RETURN, // Tail call return pseudo.
+
+ THREAD_POINTER,
+
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
+ MEMBARRIER, // Memory barrier (DMB)
+ MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
+
+ VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
+ VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
+ VCGEU, // Vector compare unsigned greater than or equal.
+ VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
+ VCGTU, // Vector compare unsigned greater than.
+ VTST, // Vector test bits.
+
+ // Vector shift by immediate:
+ VSHL, // ...left
+ VSHRs, // ...right (signed)
+ VSHRu, // ...right (unsigned)
+ VSHLLs, // ...left long (signed)
+ VSHLLu, // ...left long (unsigned)
+ VSHLLi, // ...left long (with maximum shift count)
+ VSHRN, // ...right narrow
+
+ // Vector rounding shift by immediate:
+ VRSHRs, // ...right (signed)
+ VRSHRu, // ...right (unsigned)
+ VRSHRN, // ...right narrow
+
+ // Vector saturating shift by immediate:
+ VQSHLs, // ...left (signed)
+ VQSHLu, // ...left (unsigned)
+ VQSHLsu, // ...left (signed to unsigned)
+ VQSHRNs, // ...right narrow (signed)
+ VQSHRNu, // ...right narrow (unsigned)
+ VQSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector saturating rounding shift by immediate:
+ VQRSHRNs, // ...right narrow (signed)
+ VQRSHRNu, // ...right narrow (unsigned)
+ VQRSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector shift and insert:
+ VSLI, // ...left
+ VSRI, // ...right
+
+ // Vector get lane (VMOV scalar to ARM core register)
+ // (These are used for 8- and 16-bit element types only.)
+ VGETLANEu, // zero-extend vector extract element
+ VGETLANEs, // sign-extend vector extract element
+
+ // Vector move immediate and move negated immediate:
+ VMOVIMM,
+ VMVNIMM,
+
+ // Vector duplicate:
+ VDUP,
+ VDUPLANE,
+
+ // Vector shuffles:
+ VEXT, // extract
+ VREV64, // reverse elements within 64-bit doublewords
+ VREV32, // reverse elements within 32-bit words
+ VREV16, // reverse elements within 16-bit halfwords
+ VZIP, // zip (interleave)
+ VUZP, // unzip (deinterleave)
+ VTRN, // transpose
+ VTBL1, // 1-register shuffle with mask
+ VTBL2, // 2-register shuffle with mask
+
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN,
+
+ // Bit-field insert
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM,
+ // Vector AND with NOT of immediate
+ VBICIMM,
+
+ // Vector bitwise select
+ VBSL,
+
+ // Vector load N-element structure to all lanes:
+ VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace ARM {
+ /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
+ /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
+ /// instruction, returns its 8-bit integer representation. Otherwise,
+ /// returns -1.
+ int getVFPf32Imm(const APFloat &FPImm);
+ int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ public:
+ explicit ARMTargetLowering(TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding(void) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalAddImmediate(int64_t Imm) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ const ARMSubtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ const TargetRegisterInfo *RegInfo;
+
+ const InstrItineraryData *Itins;
+
+ /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
+ void addDRTypeForNEON(EVT VT);
+ void addQRTypeForNEON(EVT VT);
+
+ typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+ void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+ const;
+
+ void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ /// HandleByVal - Target-specific cleanup for ByVal support.
+ virtual void HandleByVal(CCState *, unsigned &) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const;
+ MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const;
+
+ bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+ };
+
+ enum NEONModImmType {
+ VMOVModImm,
+ VMVNModImm,
+ OtherModImm
+ };
+
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 000000000000..3ccf22f80b7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,1930 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def LdStExFrm : Format<11>;
+
+def ArithMiscFrm : Format<12>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<3> val> {
+ bits<3> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain : Domain<1>; // Instructions in VFP domain only
+def NeonDomain : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
+def CCOutOperand : AsmOperandClass {
+ let Name = "CCOut";
+ let SuperClasses = [];
+}
+
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemBarrierOptOperand";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseProcIFlagsOperand";
+}
+
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMSRMaskOperand";
+}
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+ let PrintMethod = "printCPSIMod";
+}
+
+def iflags_op : Operand<i32> {
+ let PrintMethod = "printCPSIFlag";
+ let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+}
+
+// ARM special operands for disassembly only.
+//
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+}
+
+def msr_mask : Operand<i32> {
+ let PrintMethod = "printMSRMaskOperand";
+ let ParserMatchClass = MSRMaskOperand;
+}
+
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+// Offset Encoding
+// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8 : Operand<i32> {
+ let EncoderMethod = "getShiftRight8Imm";
+}
+def shr_imm16 : Operand<i32> {
+ let EncoderMethod = "getShiftRight16Imm";
+}
+def shr_imm32 : Operand<i32> {
+ let EncoderMethod = "getShiftRight32Imm";
+}
+def shr_imm64 : Operand<i32> {
+ let EncoderMethod = "getShiftRight64Imm";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+class InstTemplate<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : Instruction {
+ let Namespace = "ARM";
+
+ AddrMode AM = am;
+ int Size = sz;
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+ Format F = f;
+ bits<6> Form = F.Value;
+ Domain D = d;
+ bit isUnaryDataProc = 0;
+ bit canXformTo16Bit = 0;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{6-5} = IndexModeBits;
+ let TSFlags{12-7} = Form;
+ let TSFlags{13} = isUnaryDataProc;
+ let TSFlags{14} = canXformTo16Bit;
+ let TSFlags{17-15} = D.Value;
+
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+class Encoding {
+ field bits<32> Inst;
+}
+
+class InstARM<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>;
+
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+ GenericDomain, "", itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let Pattern = pattern;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : tPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : t2PseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
+ asm, "", pattern>;
+
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b11111001;
+ let Inst{3-0} = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> Rn;
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = b;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b00001001;
+ let Inst{3-0} = Rt2;
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = isByte;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-26} = 0b01;
+ let Inst{24} = isPre; // P bit
+ let Inst{22} = isByte; // B bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = isLd; // L bit
+ let Inst{15-12} = Rt;
+}
+class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = offset{13};
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+}
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3";
+}
+
+class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM3 store w/ two operands: (GPR, am3offset)
+ bits<14> offset;
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{27-25} = 0b000;
+ let Inst{23} = offset{8};
+ let Inst{22} = offset{9};
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = 0; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
+ opc, asm, cstr,pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{15-12} = Rt; // Rt
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{23} = addr{8}; // U bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
+ bits<4> p;
+ bits<16> regs;
+ bits<4> Rn;
+ let Inst{31-28} = p;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0; // S bit
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
+ opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
+ let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<8> sh;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5T];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
+ pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
+ Encoding {
+ let Inst{31-27} = opcod1;
+ let Inst{15-14} = opcod2;
+ let Inst{12} = opcod3;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin,
+ asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = !con(oops, (outs s_cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+ let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+ let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+ let Inst{15-14} = 0b00;
+ let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010000;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+ let Inst{15-12} = opA;
+ let Inst{11-9} = opB;
+}
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+
+class T1BranchCond<bits<4> opcode> : Encoding16 {
+ let Inst{15-12} = opcode;
+}
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+// 0b0110 => Immediate, 4 bytes
+// 0b1000 => Immediate, 2 bytes
+// 0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<0b0101, opcode> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{8-6} = addr{5-3}; // Rm
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<opA, {opB,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-6} = addr{7-3}; // imm5
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+ let Inst{15-12} = 0b1011;
+ let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "",
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<13> addr;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = addr{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = addr{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
+ : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+ let Inst{31-28} = opc;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
+
+// T2Iidxldst - Thumb2 indexed load / store instructions.
+class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
+
+ bits<9> addr;
+ let Inst{7-0} = addr{7-0};
+ let Inst{9} = addr{8}; // Sign bit
+
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{19-16} = Rn{3-0};
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode.
+class T2v6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasV6T2];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Dd{4};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Dd{3-0};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{12};
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+}
+
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{8};
+ let Inst{15-12} = regs{12-9};
+ let Inst{7-0} = regs{7-0};
+
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision, unary
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = opcod4;
+ let Inst{6} = 1;
+ let Inst{4} = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ // size (fixed-point number): sx == 0 ? 16 : 32
+ let Inst{7} = op5; // sx
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+ cstr, pattern> {
+ let Inst{31-24} = 0b11110100;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+
+ let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+
+ bits<5> Vd;
+ bits<6> Rn;
+ bits<4> Rm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+ dt, asm, cstr, pattern> {
+ bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+ bit op5, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{23} = op23;
+ let Inst{21-19} = op21_19;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bit lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bits<2> lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{2-0} = Vm{2-0};
+ let Inst{5} = lane{1};
+ let Inst{3} = lane{0};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
+ "", itin> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
+ // A8.6.303, A8.6.328, A8.6.329
+ let Inst{3-0} = 0b0000;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+
+ let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+
+ bits<5> V;
+ bits<4> R;
+ bits<4> p;
+ bits<4> lane;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = V{4};
+ let Inst{19-16} = V{3-0};
+ let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+ opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<4> lane;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 000000000000..adcbf1806fe3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,60 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE:
+ case ARM::LDR_POST:
+ return ARM::LDRi12;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE:
+ case ARM::LDRB_POST:
+ return ARM::LDRBi12;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE:
+ case ARM::STR_POST:
+ return ARM::STRi12;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE:
+ case ARM::STRB_POST:
+ return ARM::STRBi12;
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 000000000000..f2c7bdc31be9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,44 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+ ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 000000000000..a42dd1a54ec7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,4076 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMBr2JT : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+ SDTCisInt<1>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInGlue]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+ [SDNPHasChain]>;
+
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+ [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+ SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
+
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
+ AssemblerPredicate<"HasV4TOps">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
+ AssemblerPredicate<"HasV5TEOps">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
+ AssemblerPredicate<"HasV6Ops">;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
+ AssemblerPredicate<"HasV6T2Ops">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
+ AssemblerPredicate<"HasV7Ops">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
+ AssemblerPredicate<"FeatureVFP2">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
+ AssemblerPredicate<"FeatureVFP3">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">,
+ AssemblerPredicate<"FeatureFP16">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">,
+ AssemblerPredicate<"FeatureHWDiv">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+ AssemblerPredicate<"FeatureT2XtPk">;
+def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
+ AssemblerPredicate<"FeatureDSPThumb2">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate<"FeatureDB">;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate<"FeatureMP">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">,
+ AssemblerPredicate<"ModeThumb">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
+ AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsARM : Predicate<"!Subtarget->isThumb()">,
+ AssemblerPredicate<"!ModeThumb">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
+ }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+ // Returns true if all low 16-bits are 0.
+ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+ return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+ let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+ let EncoderMethod = "getARMBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let SuperClasses = [];
+}
+
+def DPRRegListAsmOperand : AsmOperandClass {
+ let Name = "DPRRegList";
+ let SuperClasses = [];
+}
+
+def SPRRegListAsmOperand : AsmOperandClass {
+ let Name = "SPRRegList";
+ let SuperClasses = [];
+}
+
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+def dpr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = DPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+def spr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = SPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+ let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, ImmLeaf<i32, [{
+ int32_t v = (int32_t)Imm;
+ return v == 8 || v == 16 || v == 24; }]> {
+ let EncoderMethod = "getRotImmOpValue";
+}
+
+def ShifterAsmOperand : AsmOperandClass {
+ let Name = "Shifter";
+ let SuperClasses = [];
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterAsmOperand;
+}
+
+def ShiftedRegAsmOperand : AsmOperandClass {
+ let Name = "ShiftedReg";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegOpValue";
+ let PrintMethod = "printSORegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPR, GPR, shift_imm);
+}
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegOpValue";
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, shift_imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.
+def so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }]> {
+ let EncoderMethod = "getSOImmOpValue";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+ if (Subtarget->hasV6T2Ops())
+ return true;
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_7 predicate - Immediate in the range [0,31].
+def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 8;
+}]> {
+ let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,31].
+def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 16;
+}]> {
+ let ParserMatchClass = Imm0_15AsmOperand;
+}
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]>;
+
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let EncoderMethod = "getImmMinusOneOpValue";
+}
+
+// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
+// The imm is split into imm{15-12}, imm{11-0}
+//
+def i32imm_hilo16 : Operand<i32> {
+ let EncoderMethod = "getHiLo16ImmOpValue";
+}
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+ let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+ let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
+
+/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
+def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{
+ return isInt<5>(Imm);
+}]>;
+
+/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
+def width_imm : Operand<i32>, ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 32;
+}] > {
+ let EncoderMethod = "getMsbOpValue";
+}
+
+def ssat_imm : Operand<i32>, ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 32;
+}]> {
+ let EncoderMethod = "getSsatBitPosValue";
+}
+
+// Define ARM specific addressing modes.
+
+def MemMode2AsmOperand : AsmOperandClass {
+ let Name = "MemMode2";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemMode2Operand";
+}
+
+def MemMode3AsmOperand : AsmOperandClass {
+ let Name = "MemMode3";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemMode3Operand";
+}
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+ // 12-bit immediate operand. Note that instructions using this encode
+ // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+ // immediate values are as normal.
+
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ let EncoderMethod = "getLdStSORegOpValue";
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
+//
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let EncoderMethod = "getAddrMode2OpValue";
+ let PrintMethod = "printAddrMode2Operand";
+ let ParserMatchClass = MemMode2AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let EncoderMethod = "getAddrMode3OpValue";
+ let PrintMethod = "printAddrMode3Operand";
+ let ParserMatchClass = MemMode3AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode3OffsetOpValue";
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+ let EncoderMethod = "getLdStmModeOpValue";
+ let PrintMethod = "printLdStmModeOperand";
+}
+
+def MemMode5AsmOperand : AsmOperandClass {
+ let Name = "MemMode5";
+ let SuperClasses = [];
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm);
+ let ParserMatchClass = MemMode5AsmOperand;
+ let EncoderMethod = "getAddrMode5OpValue";
+}
+
+// addrmode6 := reg with optional alignment
+//
+def addrmode6 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+}
+
+def am6offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printAddrMode6OffsetOperand";
+ let MIOperandInfo = (ops GPR);
+ let EncoderMethod = "getAddrMode6OffsetOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
+// (single element from one lane) for size 32.
+def addrmode6oneL32 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def MemMode7AsmOperand : AsmOperandClass {
+ let Name = "MemMode7";
+ let SuperClasses = [];
+}
+
+// addrmode7 := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def addrmode7 : Operand<i32> {
+ let PrintMethod = "printAddrMode7Operand";
+ let MIOperandInfo = (ops GPR);
+ let ParserMatchClass = MemMode7AsmOperand;
+}
+
+def nohash_imm : Operand<i32> {
+ let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseCoprocRegOperand";
+}
+
+def p_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def c_imm : Operand<i32> {
+ let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+ }
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+ so_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+}
+
+/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+ }
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+ }
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = shift;
+ }
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{3-0} = Rm;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
+ }
+}
+
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = 0b00;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
+ }
+}
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode GPR:$Rn,
+ (rotr GPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+ }
+}
+
+// For disassembly only.
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{11-10} = 0b00;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+ }
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc, bit Commutable = 0> {
+ let Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ }
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+ so_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+}
+
+// Carry setting variants
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ 4, IIC_iALUi,
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
+ def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ 4, IIC_iALUr,
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ let isCommutable = Commutable;
+ }
+ def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ 4, IIC_iALUsr,
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPR:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+ "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "bkpt", "\t$val", []>, Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010010;
+ let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class CPS<dag iops, string asm_ops>
+ : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+ [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{19-18} = imod;
+ let Inst{17} = M; // Enabled if mode is set;
+ let Inst{16} = 0;
+ let Inst{8-6} = iflags;
+ let Inst{5} = 0;
+ let Inst{4-0} = mode;
+}
+
+let M = 1 in
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+ def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+ def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$addr"),
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$shift"),
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ bits<17> shift;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+ []>, Requires<[IsARM, HasV7]> {
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM]> {
+ let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ 4, IIC_iALUr,
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_r,
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+ addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+ bits<4> Rd;
+ bits<12> label;
+ let Inst{27-25} = 0b001;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = label;
+}
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 4, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // ARMV4T and above
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "bx", "\tlr", [(ARMretflag)]>,
+ Requires<[IsARM, HasV4T]> {
+ let Inst{27-0} = 0b0001001011111111111100011110;
+ }
+
+ // ARMV4 only
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "mov", "\tpc, lr", [(ARMretflag)]>,
+ Requires<[IsARM, NoV4T]> {
+ let Inst{27-0} = 0b0001101000001111000000001110;
+ }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ // ARMV4T and above
+ def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+
+ def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+ "bx", "\t$dst", [/* pattern left blank */]>,
+ Requires<[IsARM, HasV4T]> {
+ bits<4> dst;
+ let Inst{27-4} = 0b000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
+ // FIXME: Do we really need a non-predicated version? If so, it should
+ // at least be a pseudo instruction expanding to the predicated version
+ // at MC lowering time.
+ Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [SP] in {
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsARM, IsNotDarwin]> {
+ let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
+
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl", "\t$func",
+ [(ARMcall_pred tglobaladdr:$func)]>,
+ Requires<[IsARM, IsNotDarwin]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx\t$func",
+ [(ARMcall GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx", "\t$func",
+ [(ARMcall_pred GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsNotDarwin]>;
+
+ // ARMv4
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsNotDarwin]>;
+}
+
+let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
+ Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
+ Requires<[IsARM, IsDarwin]>;
+
+ def BLr9_pred : ARMPseudoExpand<(outs),
+ (ins bl_target:$func, pred:$p, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_pred tglobaladdr:$func)],
+ (BL_pred bl_target:$func, pred:$p)>,
+ Requires<[IsARM, IsDarwin]>;
+
+ // ARMv5T and above
+ def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall GPR:$func)],
+ (BLX GPR:$func)>,
+ Requires<[IsARM, HasV5T, IsDarwin]>;
+
+ def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_pred GPR:$func)],
+ (BLX_pred GPR:$func, pred:$p)>,
+ Requires<[IsARM, HasV5T, IsDarwin]>;
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsDarwin]>;
+
+ // ARMv4
+ def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+ IIC_Br, "b", "\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ bits<24> target;
+ let Inst{23-0} = target;
+ }
+
+ let isBarrier = 1 in {
+ // B is "predicable" since it's just a Bcc with an 'always' condition.
+ let isPredicable = 1 in
+ // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+ // should be sufficient.
+ // FIXME: Is B really a Barrier? That doesn't seem right.
+ def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+ [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
+
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : ARMPseudoInst<(outs),
+ (ins GPR:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+ // into i12 and rs suffixed versions.
+ def BR_JTm : ARMPseudoInst<(outs),
+ (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : ARMPseudoInst<(outs),
+ (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+}
+
+// BLX (immediate) -- for disassembly only
+def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", [/* pattern left blank */]>,
+ Requires<[IsARM, HasV5T]> {
+ let Inst{31-25} = 0b1111101;
+ bits<25> target;
+ let Inst{23-0} = target{24-1};
+ let Inst{24} = target{0};
+}
+
+// Branch and Exchange Jazelle
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* pattern left blank */]> {
+ bits<4> func;
+ let Inst{23-20} = 0b0010;
+ let Inst{19-8} = 0xfff;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = func;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
+
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
+
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM, IsDarwin]>;
+
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM, IsDarwin]>;
+
+ }
+
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+ def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+ def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM, IsNotDarwin]>;
+
+ def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM, IsNotDarwin]>;
+ }
+}
+
+
+
+
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
+}
+
+// Store Return State is a system instruction -- for disassembly only
+let isCodeGenOnly = 1 in { // FIXME: This should not use submode!
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+ NoItinerary, "srs${amode}\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b110; // W = 1
+ let Inst{19-8} = 0xd05;
+ let Inst{7-5} = 0b000;
+}
+
+def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+ NoItinerary, "srs${amode}\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b100; // W = 0
+ let Inst{19-8} = 0xd05;
+ let Inst{7-5} = 0b000;
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+ NoItinerary, "rfe${amode}\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b011; // W = 1
+ let Inst{15-0} = 0x0a00;
+}
+
+def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+ NoItinerary, "rfe${amode}\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b001; // W = 0
+ let Inst{15-0} = 0x0a00;
+}
+} // isCodeGenOnly = 1
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+ []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
+
+// Loads with zero extension
+def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+ (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr",
+ []>, Requires<[IsARM, HasV5TE]>;
+}
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+ }
+ def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins GPR:$Rn, am2offset:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = offset{13};
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+ }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ }
+ def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> Rn;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = Rn;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$Rn], $offset",
+ "$Rn = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> Rn;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = Rn;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+}
+} // hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+}
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+}
+def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+}
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+ IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+ [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_d_r,
+ "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>;
+
+// Indexed stores
+def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePre, StFrm, IIC_iStore_ru,
+ "str", "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "str", "\t$Rt, [$Rn], $offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePre, StFrm, IIC_iStore_bh_ru,
+ "strb", "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
+ GPR:$Rn, am2offset:$offset))]>;
+def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strb", "\t$Rt, [$Rn], $offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
+ GPR:$Rn, am2offset:$offset))]>;
+
+def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+ IndexModePre, StMiscFrm, IIC_iStore_ru,
+ "strh", "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+
+def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, [$Rn], $offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ GPR:$Rn, am3offset:$offset))]>;
+
+// For disassembly only
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$src1, $src2, [$base, $offset]!",
+ "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$src1, $src2, [$base], $offset",
+ "$base = $base_wb", []>;
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// STRT, STRBT, and STRHT are for disassembly only.
+
+def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+ let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+}
+
+def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+ let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+}
+
+def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strht", "\t$Rt, $addr", "$addr.base = $base_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+ let AsmMatchConverter = "CvtStWriteBackRegAddrMode3";
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // IA is the default, so no need for an explicit suffix on the
+ // mnemonic here. Without it is the cannonical spelling.
+ def IA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def IB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
+ DPSoRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = src;
+ let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
+ Requires<[IsARM, HasV6T2]>, UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+}
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
+ DPFrm, IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set GPR:$Rd,
+ (or (and GPR:$src, 0xffff),
+ lo16AllZero:$imm))]>, UnaryDP,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+ Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_exta_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_exta_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// For disassembly only
+defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+// For disassembly only
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111101;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+def UBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111111;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>,
+ "ADC", 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>,
+ "SBC">;
+
+// ADC and SUBC with 's' bit set.
+let usesCustomInserter = 1 in {
+defm ADCS : AI1_adde_sube_s_irs<
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+}
+
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+
+// RSB with 's' bit set.
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ 4, IIC_iALUi,
+ [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
+def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ 4, IIC_iALUr,
+ [/* For disassembly only; pattern left blank */]>;
+def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ 4, IIC_iALUsr,
+ [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
+}
+
+let Uses = [CPSR] in {
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+}
+
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1, Uses = [CPSR] in {
+def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ 4, IIC_iALUi,
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
+def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ 4, IIC_iALUsr,
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
+ (SBCSri GPR:$src, so_imm_not:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction -- for disassembly only
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = op27_20;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
+ (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
+ (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110101;
+ let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101010;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110111;
+ let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101110;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
+}
+
+// GNU as only supports this form of bfi (w/ 4 arguments)
+let isAsmParserOnly = 1 in
+def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
+ lsb_pos_imm:$lsb, width_imm:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
+ []>, Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{20-16} = width; // Custom encoder => lsb+width-1
+ let Inst{3-0} = Rn;
+}
+
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+ IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+// property. Remove them when it's possible to add those properties
+// on an individual MachineInstr, not just an instuction description.
+let isCommutable = 1 in {
+def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b0000;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ pred:$p, cc_out:$s),
+ 4, IIC_iMUL32,
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
+ (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ 4, IIC_iMAC32,
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
+ (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> Ra;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+}
+
+// Multiply + accumulate
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdLo;
+ let Inst{15-12} = RdHi;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, IIC_iMAC64, [],
+ (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+} // neverHasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld -- for disassembly only
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 1;
+ let Inst{5} = swap;
+ let Inst{6} = sub;
+ let Inst{7} = 0;
+ let Inst{21-20} = 0b00;
+ let Inst{22} = long;
+ let Inst{27-23} = 0b01110;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+ def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+let AddedComplexity = 5 in
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>;
+
+let AddedComplexity = 5 in
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>;
+
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl GPR:$Rm, (i32 8)), 0xFF)),
+ (REVSH GPR:$Rm)>;
+
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : ImmLeaf<i32, [{
+ return Imm > 0 && Imm < 32;
+}], lsl_shift_imm>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+ (and (shl GPR:$Rm, lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+ (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+ (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def asr_amt : ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 32;
+}], asr_shift_imm>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+ (and (sra GPR:$Rm, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+ (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+ (CMPrr GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
+ (CMPrs GPR:$src, so_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// When x is 0 and unsigned:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
+//defm CMN : AI1_cmp_irs<0b1011, "cmn",
+// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+defm CMNz : AI1_cmp_irs<0b1011, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+// (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+ (CMNzri GPR:$src, so_imm_neg:$imm)>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+ 4, IIC_iCMOVr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
+ 4, IIC_iMOVi,
+ []>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+
+let isMoveImm = 1 in
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, pred:$p),
+ 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
+}
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff05;
+ let Inst{3-0} = opt;
+}
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dsb", "\t$opt", []>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff04;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "isb", "\t$opt", []>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff06;
+ let Inst{3-0} = opt;
+}
+
+let usesCustomInserter = 1 in {
+ let Uses = [CPSR] in {
+ def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+ "ldrexb", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+ "ldrexh", "\t$Rt, $addr", []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+ "ldrex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+ def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>;
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+ NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+ NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+ NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
+}
+
+let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>;
+
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
+ [/* For disassembly only; pattern left blank */]>;
+def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
+ [/* For disassembly only; pattern left blank */]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{31-28} = 0b1111;
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{
+
+ def _OFFSET : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _PRE : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _POST : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _OPTION : ACI<(outs),
+ !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option),
+ ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def L_OFFSET : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_PRE : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!",
+ IndexModePre> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_POST : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr",
+ IndexModePost> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_OPTION : ACI<(outs),
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option),
+ ops),
+ !strconcat(!strconcat(opc, "l"), cond),
+ "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+}
+
+defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">;
+defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">;
+defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">;
+defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class MovRCopro<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABI<0b1110, oops, iops, NoItinerary, opc,
+ "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> {
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
+ i32imm:$opc2), []>;
+
+def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRCopro2<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABXI<0b1110, oops, iops, NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
+ let Inst{31-28} = 0b1111;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
+ i32imm:$opc2), []>;
+
+def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
+ imm:$CRm, imm:$opc2),
+ (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRRCopro<string opc, bit direction,
+ list<dag> pattern = [/* For disassembly only */]>
+ : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction,
+ list<dag> pattern = [/* For disassembly only */]>
+ : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b00001111;
+ let Inst{15-12} = Rd;
+ let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b01001111;
+ let Inst{15-12} = Rd;
+ let Inst{7-4} = 0b0000;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+ "msr", "\t$mask, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
+ "msr", "\t$mask, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<12> a;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = a;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+ [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+ Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+ def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+ 4, IIC_Br, [(brind GPR:$dst)],
+ (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+ Requires<[IsARM, NoV4T]>;
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+ Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+ Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+ Requires<[IsARM, HasV6]>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 000000000000..0df62f456343
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,4937 @@
+//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
+
+def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates. The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types. The "SHINS" version is for shift and insert operations.
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
+def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
+def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
+def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvbsl : SDNode<"ARMISD::VBSL",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
+
+def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON operand definitions
+//===----------------------------------------------------------------------===//
+
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1,
+ "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD1x2,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
+
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+ "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VLD1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VLD1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
+
+def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VLD1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+ []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
+
+def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+
+// VLD2 : Vector Load (multiple 2-element structures)
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD2,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VLD2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD2x2,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
+def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
+def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
+def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
+def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+class VLD2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+
+// VLD3 : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// VLD4 : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+// VLD1LN : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+}
+class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6oneL32:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+ let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+ (i32 (LoadOp addrmode6:$addr)),
+ imm:$lane))];
+}
+
+def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$src = $Vd, $Rn.addr = $wb", []>;
+
+def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+// VLD2LN : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+ "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+// VLD3LN : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ let Rm = 0b1111;
+}
+
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VLD3lnu, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+ []>;
+
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+// VLD4LN : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+ []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// VLD1DUP : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+ [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+ let Pattern = [(set QPR:$dst,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
+// VLD2DUP : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
+// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6dup:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+// VLD4DUP : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6dup:$Rn), IIC_VLD4dup,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+// VST1 : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+ IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VST1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+ "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8 : VST1D<{0,0,0,?}, "8">;
+def VST1d16 : VST1D<{0,1,0,?}, "16">;
+def VST1d32 : VST1D<{1,0,0,?}, "32">;
+def VST1d64 : VST1D<{1,1,0,?}, "64">;
+
+def VST1q8 : VST1Q<{0,0,?,?}, "8">;
+def VST1q16 : VST1Q<{0,1,?,?}, "16">;
+def VST1q32 : VST1Q<{1,0,?,?}, "32">;
+def VST1q64 : VST1Q<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+ "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VST1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
+
+def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VST1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VST1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+
+def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VST1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+ "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+
+def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+
+// VST2 : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+ IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+class VST2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
+def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
+def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8 : VST2Q<{0,0,?,?}, "8">;
+def VST2q16 : VST2Q<{0,1,?,?}, "16">;
+def VST2q32 : VST2Q<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+class VST2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+ "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+
+// VST3 : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
+def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
+def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// VST4 : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
+def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
+def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+ "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+// VST1LN : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+ let Rm = 0b1111;
+}
+class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
+ let Rm = 0b1111;
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNPseudo<IIC_VST1ln> {
+ let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr)];
+}
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+ NEONvgetlaneu> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+ NEONvgetlaneu> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+
+def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// VST2LN : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+ IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+// VST3LN : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+}
+
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST3lnu, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []>;
+
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+// VST4LN : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST4lnu, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+ (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+ OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+ InstrItinClass itin, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+ SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
+ SDNode MulOp, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+ (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+ (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic. The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+ Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm))))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+ (i32 imm:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (add DPR:$src1,
+ (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (add QPR:$src1,
+ (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ Intrinsic IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ Intrinsic IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+// Q = quarter int (8 bit) elements
+// H = half int (16 bit) elements
+// S = single int (32 bit) elements
+// D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
+
+// Neon 2-register vector intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+ def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+ def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+ def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
+ def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
+ def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, OpNode, Commutable>;
+ def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
+ def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
+ def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
+ def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+ v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+ v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0>
+ : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+ OpcodeStr, Dt, OpNode, Commutable> {
+ def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, OpNode, Commutable>;
+ def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp, Commutable>;
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp, Commutable>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp, Commutable>;
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+ def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+ def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp, Commutable>;
+ def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp, Commutable>;
+ def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0> {
+ def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, Commutable>;
+ def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp, bit Commutable = 0>
+ : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+ IntOp, Commutable> {
+ def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+ def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+ def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+ def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+ def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+ def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+ def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
+ def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+ OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, Intrinsic IntOp>
+ : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+ def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+// element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+// Neon Shift Narrow operations,
+// element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, shr_imm8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, shr_imm16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, shr_imm32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+// VADD : Vector Add (integer and floating-point)
+defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+ add, 1>;
+def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+ v2f32, v2f32, fadd, 1>;
+def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+ v4f32, v4f32, fadd, 1>;
+// VADDL : Vector Add Long (Q = D + D)
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
+// VADDW : Vector Add Wide (Q = Q + D)
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+// VHADD : Vector Halving Add
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
+// VRHADD : Vector Rounding Halving Add
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+// VQADD : Vector Saturating Add
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
+// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+ int_arm_neon_vaddhn, 1>;
+// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+ int_arm_neon_vraddhn, 1>;
+
+// Vector Multiply Operations.
+
+// VMUL : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+ v2f32, v2f32, fmul, 1>;
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+ (v4f32 (VMULslfq (v4f32 QPR:$src1),
+ (v2f32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+ "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+// VMLA : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+ v4f32, v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+ (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlal", "s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+
+// VMLS : Vector Multiply Subtract (integer and floating-point)
+defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+ v4f32, v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Vector Subtract Operations.
+
+// VSUB : Vector Subtract (integer and floating-point)
+defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+ "vsub", "i", sub, 0>;
+def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+ v2f32, v2f32, fsub, 0>;
+def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+ v4f32, v4f32, fsub, 0>;
+// VSUBL : Vector Subtract Long (Q = D - D)
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
+// VSUBW : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+// VHSUB : Vector Halving Subtract
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "u", int_arm_neon_vhsubu, 0>;
+// VQSUB : Vector Saturing Subtract
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "u", int_arm_neon_vqsubu, 0>;
+// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+ int_arm_neon_vsubhn, 0>;
+// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+ int_arm_neon_vrsubhn, 0>;
+
+// Vector Comparisons.
+
+// VCEQ : Vector Compare Equal
+defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ NEONvceq, 1>;
+def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ NEONvceq, 1>;
+
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$Vd, $Vm, #0", NEONvceqz>;
+
+// VCGE : Vector Compare Greater Than or Equal
+defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
+def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ NEONvcge, 0>;
+
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$Vd, $Vm, #0", NEONvclez>;
+
+// VCGT : Vector Compare Greater Than
+defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ NEONvcgt, 0>;
+def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ NEONvcgt, 0>;
+
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$Vd, $Vm, #0", NEONvcltz>;
+
+// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+// VTST : Vector Test Bits
+defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+ IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+// VAND : Vector Bitwise AND
+def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+ v2i32, v2i32, and, 1>;
+def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+ v4i32, v4i32, and, 1>;
+
+// VEOR : Vector Bitwise Exclusive OR
+def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+ v2i32, v2i32, xor, 1>;
+def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+ v4i32, v4i32, xor, 1>;
+
+// VORR : Vector Bitwise OR
+def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+ v2i32, v2i32, or, 1>;
+def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+ v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+
+// VBIC : Vector Bitwise Bit Clear (AND NOT)
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+// VORN : Vector Bitwise OR NOT
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+// VMVN : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+}
+
+// VMVN : Vector Bitwise NOT
+def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+
+// VBIF : Vector Bitwise Insert if False
+// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+
+// VBIT : Vector Bitwise Insert if True
+// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [/* For disassembly only; pattern left blank */]>;
+
+// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+// VABD : Vector Absolute Difference
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "u", int_arm_neon_vabdu, 1>;
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+
+// VABDL : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+// VABA : Vector Absolute Difference and Accumulate
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
+
+// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+// VMAX : Vector Maximum
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+// VMIN : Vector Minimum
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f32",
+ v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f32",
+ v4f32, v4f32, int_arm_neon_vmins, 1>;
+
+// Vector Pairwise Operations.
+
+// VPADD : Vector Pairwise Add
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i8",
+ v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i16",
+ v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i32",
+ v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f32",
+ v2f32, v2f32, int_arm_neon_vpadd, 0>;
+
+// VPADDL : Vector Pairwise Add Long
+defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+ int_arm_neon_vpaddls>;
+defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+ int_arm_neon_vpaddlu>;
+
+// VPADAL : Vector Pairwise Add and Accumulate Long
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+ int_arm_neon_vpadals>;
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+ int_arm_neon_vpadalu>;
+
+// VPMAX : Vector Pairwise Maximum
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+// VPMIN : Vector Pairwise Minimum
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+// VRECPE : Vector Reciprocal Estimate
+def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAD, "vrecpe", "u32",
+ v2i32, v2i32, int_arm_neon_vrecpe>;
+def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAQ, "vrecpe", "u32",
+ v4i32, v4i32, int_arm_neon_vrecpe>;
+def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f32",
+ v2f32, v2f32, int_arm_neon_vrecpe>;
+def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f32",
+ v4f32, v4f32, int_arm_neon_vrecpe>;
+
+// VRECPS : Vector Reciprocal Step
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f32",
+ v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f32",
+ v4f32, v4f32, int_arm_neon_vrecps, 1>;
+
+// VRSQRTE : Vector Reciprocal Square Root Estimate
+def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAD, "vrsqrte", "u32",
+ v2i32, v2i32, int_arm_neon_vrsqrte>;
+def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAQ, "vrsqrte", "u32",
+ v4i32, v4i32, int_arm_neon_vrsqrte>;
+def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrte>;
+def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrte>;
+
+// VRSQRTS : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+
+// Vector Shifts.
+
+// VSHL : Vector Shift
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu>;
+
+// VSHL : Vector Shift Left (Immediate)
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
+// VSHR : Vector Shift Right (Immediate)
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
+
+// VSHLL : Vector Shift Left Long
+defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
+
+// VSHLL : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+ bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+ ValueType OpTy, SDNode OpNode>
+ : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+ ResTy, OpTy, OpNode> {
+ let Inst{21-16} = op21_16;
+}
+def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+ v8i16, v8i8, NEONvshlli>;
+def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+ v4i32, v4i16, NEONvshlli>;
+def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+ v2i64, v2i32, NEONvshlli>;
+
+// VSHRN : Vector Shift Right and Narrow
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ NEONvshrn>;
+
+// VRSHL : Vector Rounding Shift
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu>;
+// VRSHR : Vector Rounding Shift Right
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
+
+// VRSHRN : Vector Rounding Shift Right and Narrow
+defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+ NEONvrshrn>;
+
+// VQSHL : Vector Saturating Shift
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu>;
+// VQSHL : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
+// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+
+// VQSHRN : Vector Saturating Shift Right and Narrow
+defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+ NEONvqshrns>;
+defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+ NEONvqshrnu>;
+
+// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+ NEONvqshrnsu>;
+
+// VQRSHL : Vector Saturating Rounding Shift
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+ NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+ NEONvqrshrnu>;
+
+// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+ NEONvqrshrnsu>;
+
+// VSRA : Vector Shift Right and Accumulate
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+// VRSRA : Vector Rounding Shift Right and Accumulate
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+// VSLI : Vector Shift Left and Insert
+defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
+// VSRI : Vector Shift Right and Insert
+defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
+
+// Vector Absolute and Saturating Absolute.
+
+// VABS : Vector Absolute Value
+defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+ int_arm_neon_vabs>;
+def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ IIC_VUNAD, "vabs", "f32",
+ v2f32, v2f32, int_arm_neon_vabs>;
+def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ IIC_VUNAQ, "vabs", "f32",
+ v4f32, v4f32, int_arm_neon_vabs>;
+
+// VQABS : Vector Saturating Absolute Value
+defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+ int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd : PatFrag<(ops node:$in),
+ (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq : PatFrag<(ops node:$in),
+ (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+ IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+// VNEG : Vector Negate (integer)
+def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
+def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+// VNEG : Vector Negate (floating-point)
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+
+def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+// VQNEG : Vector Saturating Negate
+defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+ int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+// VCLS : Vector Count Leading Sign Bits
+defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+ int_arm_neon_vcls>;
+// VCLZ : Vector Count Leading Zeros
+defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+ int_arm_neon_vclz>;
+// VCNT : Vector Count One Bits
+def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiD, "vcnt", "8",
+ v8i8, v8i8, int_arm_neon_vcnt>;
+def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiQ, "vcnt", "8",
+ v16i8, v16i8, int_arm_neon_vcnt>;
+
+// Vector Swap -- for disassembly only.
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
+
+// Vector Move Operations.
+
+// VMOV : Vector Move (Register)
+def : InstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : InstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+let neverHasSideEffects = 1 in {
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+ NoItinerary, []>;
+
+def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+ NoItinerary, []>;
+} // neverHasSideEffects
+
+// VMOV : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+// VMOV : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+ [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+ (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+ (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+ (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane))>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+// VMOV : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+ [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+ (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+
+// VDUP : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+
+// VDUP : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+ imm:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+ let Inst{19} = lane{0};
+}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32q DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+ (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+ (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+ (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+ (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+ [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+// VMOVN : Vector Narrowing Move
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+ "vmovn", "i", trunc>;
+// VQMOVN : Vector Saturating Narrowing Move
+defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+ "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+ "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+ "vqmovun", "s", int_arm_neon_vqmovnsu>;
+// VMOVL : Vector Lengthening Move
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+// Vector Conversions.
+
+// VCVT : Vector Convert Between Floating-Point and Integers
+def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+
+def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v4i32, v4f32, fp_to_sint>;
+def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v4i32, v4f32, fp_to_uint>;
+def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v4f32, v4i32, sint_to_fp>;
+def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v4f32, v4i32, uint_to_fp>;
+
+// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+
+// VCVT : Vector Convert Between Half-Precision and Single-Precision.
+def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+ IIC_VUNAQ, "vcvt", "f16.f32",
+ v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+ Requires<[HasNEON, HasFP16]>;
+def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+ IIC_VUNAQ, "vcvt", "f32.f16",
+ v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+ Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+// VREV64 : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+
+def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+
+// VREV32 : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+// VREV16 : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+// Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+ : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+// VEXT : Vector Extract
+
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+ (Ty DPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+ (Ty QPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+ (v2f32 DPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+ (v4f32 QPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
+// VTRN : Vector Transpose
+
+def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+// VUZP : Vector Unzip (Deinterleave)
+
+def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+
+def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+// VZIP : Vector Zip (Interleave)
+
+def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+
+def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+// VTBL : Vector Table Lookup
+def VTBL1
+ : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBL2
+ : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+def VTBL3
+ : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+def VTBL4
+ : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ NVTBLFrm, IIC_VTB4,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBL2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+// VTBX : Vector Table Extension
+def VTBX1
+ : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+ DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBX2
+ : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+def VTBX3
+ : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ NVTBLFrm, IIC_VTBX3,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "$orig = $Vd", []>;
+def VTBX4
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+ DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBX2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+ IIC_VTBX2, "$orig = $dst", []>;
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$acc, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 000000000000..bfe83eceb13f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1397 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let ParserMatchClass = imm0_255_asmoperand;
+}
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
+// Scaled 4 immediate.
+def t_imm_s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Define Thumb specific addressing modes.
+
+let OperandType = "OPERAND_PCREL" in {
+def t_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getThumbBRTargetOpValue";
+}
+
+def t_bcctarget : Operand<i32> {
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+}
+
+def t_cbtarget : Operand<i32> {
+ let EncoderMethod = "getThumbCBTargetOpValue";
+}
+
+def t_bltarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLTargetOpValue";
+}
+
+def t_blxtarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLXTargetOpValue";
+}
+}
+
+def MemModeRegThumbAsmOperand : AsmOperandClass {
+ let Name = "MemModeRegThumb";
+ let SuperClasses = [];
+}
+
+def MemModeImmThumbAsmOperand : AsmOperandClass {
+ let Name = "MemModeImmThumb";
+ let SuperClasses = [];
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+def t_addrmode_rrs1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+ [(ARMcallseq_start imm:$amt)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+// T1Disassembly - A simple class to make encoding some disassembly patterns
+// easier and less verbose.
+class T1Disassembly<bits<2> op1, bits<8> op2>
+ : T1Encoding<0b101111> {
+ let Inst{9-8} = op1;
+ let Inst{7-0} = op2;
+}
+
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x00>; // A8.6.110
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x10>; // A8.6.410
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x20>; // A8.6.408
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x30>; // A8.6.409
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b11, 0x40>; // A8.6.157
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+ // A8.6.22
+ bits<8> val;
+ let Inst{7-0} = val;
+}
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = 1; // Big-Endian
+ let Inst{2-0} = 0b000;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = 0; // Little-Endian
+ let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+ NoItinerary, "cps$imod $iflags",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Misc<0b0110011> {
+ // A8.6.38 & B6.1.1
+ bit imod;
+ bits<3> iflags;
+
+ let Inst{4} = imod;
+ let Inst{3} = 0;
+ let Inst{2-0} = iflags;
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6
+ bits<3> dst;
+ let Inst{6-3} = 0b1111; // Rm = pc
+ let Inst{2-0} = dst;
+}
+
+// PC relative add (ADR).
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, pc, $rhs", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ // A6.2 & A8.6.10
+ bits<3> dst;
+ bits<8> rhs;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = rhs;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, $sp, $rhs", []>,
+ T1Encoding<{1,0,1,0,1,?}> {
+ // A6.2 & A8.6.8
+ bits<3> dst;
+ bits<8> rhs;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = rhs;
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, $rhs", []>,
+ T1Misc<{0,0,0,0,0,?,?}> {
+ // A6.2.5 & A8.6.8
+ bits<7> rhs;
+ let Inst{6-0} = rhs;
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+ "sub\t$dst, $rhs", []>,
+ T1Misc<{0,0,0,0,1,?,?}> {
+ // A6.2.5 & A8.6.214
+ bits<7> rhs;
+ let Inst{6-0} = rhs;
+}
+
+// ADD <Rm>, sp
+def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ "add\t$dst, $rhs", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T1
+ bits<4> dst;
+ let Inst{7} = dst{3};
+ let Inst{6-3} = 0b1101;
+ let Inst{2-0} = dst{2-0};
+}
+
+// ADD sp, <Rm>
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ "add\t$dst, $rhs", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.9 Encoding T2
+ bits<4> dst;
+ let Inst{7} = 1;
+ let Inst{6-3} = dst;
+ let Inst{2-0} = 0b101;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ }
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+ [(ARMretflag)], (tBX LR, pred:$p)>;
+
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+ 2, IIC_Br, [],
+ (tBX GPR:$Rm, pred:$p)>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
+ Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [SP] in {
+ // Also used for Thumb2
+ def tBL : TIx2<0b11110, 0b11, 1,
+ (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
+ "bl\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi : TIx2<0b11110, 0b11, 0,
+ (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
+ "blx\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+ "blx\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+ T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
+
+ // ARMv4T
+ def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
+}
+
+let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
+ Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ // Also used for Thumb2
+ def tBLr9 : TIx2<0b11110, 0b11, 1,
+ (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+ IIC_Br, "bl${p}\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, IsDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
+ (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+ IIC_Br, "blx${p}\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T, IsDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>,
+ T1Special<{1,1,1,?}> {
+ // A6.2.3 & A8.6.24
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
+
+ // ARMv4T
+ def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ 4, IIC_Br,
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b\t$target", [(br bb:$target)]>,
+ T1Encoding<{1,1,1,0,0,?}> {
+ bits<11> target;
+ let Inst{10-0} = target;
+ }
+
+ // Far jump
+ // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+ // the clobber of LR.
+ let Defs = [LR] in
+ def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target),
+ 4, IIC_Br, [], (tBL t_bltarget:$target)>;
+
+ def tBR_JTr : tPseudoInst<(outs),
+ (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ "b${p}\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+ T1BranchCond<{1,1,0,1}> {
+ bits<4> p;
+ bits<8> target;
+ let Inst{11-8} = p;
+ let Inst{7-0} = target;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$cmp, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+}
+
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
+ // on Darwin), so it's in ARMInstrThumb2.td.
+ def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (tBX GPR:$dst, (ops 14, zero_reg))>,
+ Requires<[IsThumb, IsDarwin]>;
+ }
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in {
+ def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (tB t_brtarget:$dst)>,
+ Requires<[IsThumb, IsNotDarwin]>;
+ def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (tBX GPR:$dst, (ops 14, zero_reg))>,
+ Requires<[IsThumb, IsNotDarwin]>;
+ }
+}
+
+
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+ "svc", "\t$imm", []>, Encoding16 {
+ bits<8> imm;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ "trap", [(trap)]>, Encoding16 {
+ let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+ (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+ (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iLoad_r, IIC_iLoad_i, "ldr",
+ UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB : // A8.6.80
+ T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ AddrModeT1_1, IIC_iLoad_bh_r,
+ "ldrsb", "\t$dst, $addr",
+ [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH : // A8.6.84
+ T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ AddrModeT1_2, IIC_iLoad_bh_r,
+ "ldrsh", "\t$dst, $addr",
+ [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// Load tconstpool
+// FIXME: Use ldr.n to work around a Darwin assembler bug.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", ".n\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// FIXME: Remove this entry when the above ldr.n workaround is fixed.
+// For disassembly use only.
+def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [/* disassembly only */]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iStore_r, IIC_iStore_i, "str",
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bits<6> T1Enc,
+ bit L_bit> {
+ def IA :
+ T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
+ T1Encoding<T1Enc> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+ }
+ def IA_UPD :
+ T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
+ T1Encoding<T1Enc> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+ }
+}
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
+ {1,1,0,0,1,?}, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
+ {1,1,0,0,0,?}, 0>;
+
+} // neverHasSideEffects
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{15};
+ let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iStore_m,
+ "push${p}\t$regs", []>,
+ T1Misc<{0,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{14};
+ let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1Misc<opA> {
+ bits<3> Rm;
+ bits<3> Rd;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ bits<3> Rd;
+ let Inst{8-6} = Rm;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rdn;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rdn;
+ bits<8> imm8;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+
+// Add immediate
+def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+ IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+
+let neverHasSideEffects = 1 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND : // A8.6.12
+ T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "and", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+
+// ASR immediate
+def tASRri : // A8.6.14
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "asr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr : // A8.6.15
+ T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "asr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+
+// BIC register
+def tBIC : // A8.6.20
+ T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "bic", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//def tCMN : // A8.6.33
+// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+// IIC_iCMPr,
+// "cmn", "\t$lhs, $rhs",
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz : // A8.6.33
+ T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+ "cmp", "\t$Rn, $imm8",
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+ T1General<{1,0,1,?,?}> {
+ // A8.6.35
+ bits<3> Rn;
+ bits<8> imm8;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = imm8;
+}
+
+// CMP register
+def tCMPr : // A8.6.36 T1
+ T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm",
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm", []>,
+ T1Special<{0,1,?,?}> {
+ // A8.6.36 T2
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{7} = Rn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR : // A8.6.45
+ T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "eor", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSL immediate
+def tLSLri : // A8.6.88
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "lsl", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr : // A8.6.89
+ T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsl", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSR immediate
+def tLSRri : // A8.6.90
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "lsr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr : // A8.6.91
+ T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
+ "mov", "\t$Rd, $imm8",
+ [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ T1General<{1,0,0,?,?}> {
+ // A8.6.96
+ bits<3> Rd;
+ bits<8> imm8;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = imm8;
+}
+
+// A7-73: MOV(2) - mov setting flag.
+
+let neverHasSideEffects = 1 in {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+ 2, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", "", []>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{7} = Rd{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "movs\t$Rd, $Rm", []>, Encoding16 {
+ // A8.6.97
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{15-6} = 0b0000000000;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+} // neverHasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL : // A8.6.105 T1
+ T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMUL32,
+ "mul", "\t$Rdn, $Rm, $Rdn",
+ [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move inverse register
+def tMVN : // A8.6.107
+ T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+ "mvn", "\t$Rd, $Rn",
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR : // A8.6.114
+ T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "orr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV : // A8.6.134
+ T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 : // A8.6.135
+ T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev16", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH : // A8.6.136
+ T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "revsh", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR : // A8.6.139
+ T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "ror", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB : // A8.6.141
+ T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+ IIC_iALUi,
+ "rsb", "\t$Rd, $Rn, #0",
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC : // A8.6.151
+ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sbc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+
+// Subtract immediate
+def tSUBi3 : // A8.6.210 T1
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ IIC_iALUi,
+ "sub", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tSUBi8 : // A8.6.210 T2
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+ IIC_iALUi,
+ "sub", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr : // A8.6.212
+ T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sub", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+
+// TODO: A7-96: STMIA - store multiple.
+
+// Sign-extend byte
+def tSXTB : // A8.6.222
+ T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH : // A8.6.224
+ T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : // A8.6.230
+ T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+ "tst", "\t$Rn, $Rm",
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB : // A8.6.262
+ T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH : // A8.6.264
+ T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in // Expanded after instruction selection.
+ def tMOVCCr_pseudo :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+ NoItinerary,
+ [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ bits<3> Rd;
+ bits<8> addr;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = addr;
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 2, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 2, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everything out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "","",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, 0, IndexModeNone,
+ Pseudo, NoItinerary, "", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+ (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+ (tCMPr tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
+ (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
+ (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
+ (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
+ (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
+ (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
+ (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// ConstantPool, GlobalAddress
+def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+ Requires<[IsThumb, IsNotDarwin]>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
+ Requires<[IsThumb, IsDarwin]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+ Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+ (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+ (tLDRBi t_addrmode_is1:$addr)>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary,
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ 2, IIC_iPop_Br, [],
+ (tPOP pred:$p, reglist:$regs)>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+ 2, IIC_Br, [(brind GPR:$Rm)],
+ (tMOVr PC, GPR:$Rm, pred:$p)>;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 000000000000..c2c6cbcac0f5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,3467 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred : Operand<i32> {
+ let PrintMethod = "printMandatoryPredicateOperand";
+}
+
+// IT block condition mask
+def it_mask : Operand<i32> {
+ let PrintMethod = "printThumbITMask";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getT2SORegOpValue";
+ let PrintMethod = "printT2SOOperand";
+ let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }]> {
+ let ParserMatchClass = t2_so_imm_asmoperand;
+ let EncoderMethod = "getT2SOImmOpValue";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+def t2_so_imm_not : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM>;
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_neg_XFORM>;
+
+/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
+def imm1_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 1 && (int32_t)Imm < 32;
+}]>;
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095 : Operand<i32>,
+ ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 4096;
+}]>;
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
+
+def imm0_255_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 255;
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+ return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+def lo5AllOne : PatLeaf<(i32 imm), [{
+ // Returns true if all low 5-bits are 1.
+ return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+ let PrintMethod = "printAddrModeImm12Operand";
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2ldrlabel := imm12
+def t2ldrlabel : Operand<i32> {
+ let EncoderMethod = "getAddrModeImm12OpValue";
+}
+
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+ let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
+// t2addrmode_imm8 := reg +/- imm8
+def t2addrmode_imm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8_offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def t2addrmode_imm8s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4Operand";
+ let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8s4_offset : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+}
+
+// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+ let PrintMethod = "printT2AddrModeSoRegOperand";
+ let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2addrmode_reg := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def t2addrmode_reg : Operand<i32> {
+ let PrintMethod = "printAddrMode7Operand";
+ let MIOperandInfo = (ops GPR);
+ let ParserMatchClass = MemMode7AsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0,
+ string wide = ""> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsThumb2]>;
+ def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsThumb2]>;
+ def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsThumb2]>;
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+// the ".w" suffix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">;
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // shifted imm
+ def ri : T2TwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2ThreeReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2TwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ }
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{15} = 0;
+ }
+ }
+ // 12-bit imm
+ def ri12 : T2I<
+ (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
+ let Inst{23-21} = op23_21;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+}
+
+// Carry setting variants
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ 4, IIC_iALUi,
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
+ // register
+ def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ 4, IIC_iALUr,
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ }
+ // shifted register
+ def rs : t2PseudoInst<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ 4, IIC_iALUsi,
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2TwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ }
+ // shifted register
+ def rs : T2TwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ }
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+// rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
+ // 5-bit imm
+ def ri : T2sTwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+ opc, ".w\t$Rd, $Rm, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-21} = 0b010010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-21} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0000;
+ }
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode> {
+ // shifted imm
+ def ri : T2OneRegCmpImm<
+ (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rn, $imm",
+ [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def rr : T2TwoRegCmp<
+ (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
+ opc, ".w\t$lhs, $rhs",
+ [(opnode GPR:$lhs, rGPR:$rhs)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rn, $ShiftedRm",
+ [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+ def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 1;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+
+ // FIXME: Is the pci variant actually needed?
+ def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ let isReMaterializable = 1;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = ?; // add = (U == '1')
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = 0b1111; // Rn
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-0} = addr{11-0};
+ }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0001;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm, ror $rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
+ [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, "\t$Rd, $Rm", []>,
+ Requires<[IsThumb2, HasT2ExtractPack]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm, ror $rot", []>,
+ Requires<[IsThumb2, HasT2ExtractPack]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn,
+ (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+ def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+ opc, "\t$Rd, $Rn, $Rm", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<12> label;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = label{11};
+ let Inst{14-12} = label{10-8};
+ let Inst{7-0} = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+ (ins t2adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-24} = 0b10;
+ // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+ let Inst{22} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<13> addr;
+ let Inst{11-8} = Rd;
+ let Inst{23} = addr{12};
+ let Inst{21} = addr{12};
+ let Inst{26} = addr{11};
+ let Inst{14-12} = addr{10-8};
+ let Inst{7-0} = addr{7-0};
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ 4, IIC_iALUi,
+ []>;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(sextloadi8 node:$Src)>>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins t2addrmode_imm8s4:$addr),
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
+ (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_imm8:$addr),
+ (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_imm8:$addr),
+ (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
+ (t2LDRHi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+ (t2LDRHpci tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+
+def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+ []>;
+
+def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ (ins t2addrmode_imm8:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
+ []>;
+def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
+ []>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 1; // load
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW.
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+ (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
+
+// Indexed stores
+def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "str", "\t$Rt, [$Rn, $addr]!",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
+ [(set GPR:$base_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, [$Rn], $addr",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
+ [(set GPR:$base_wb,
+ (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "strh", "\t$Rt, [$Rn, $addr]!",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strh", "\t$Rt, [$Rn], $addr",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
+ [(set GPR:$base_wb,
+ (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, [$Rn, $addr]!",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, [$Rn], $addr",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
+ [(set GPR:$base_wb,
+ (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = 0; // not signed
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 0; // store
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
+ (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
+ (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access. These are for disassembly only.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // U = 0
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1100;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{7-0} = addr{7-0}; // imm8
+ }
+
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0000000;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm2
+ }
+}
+
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov", ".w\t$Rd, $Rm", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0000;
+}
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+ "mov", ".w\t$Rd, $imm",
+ [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>,
+ Requires<[IsThumb2]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set rGPR:$Rd, imm0_65535:$imm)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set rGPR:$Rd,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0110;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+ UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm t2SXTH : T2I_ext_rrot<0b000, "sxth",
+ UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
+
+// TODO: SXT(A){B|H}16 - done for disassembly only
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+ UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm t2UXTH : T2I_ext_rrot<0b001, "uxth",
+ UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+ UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16r_rot rGPR:$Src, 24)>,
+// Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16r_rot rGPR:$Src, 8)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm t2ADD : T2I_bin_ii12rs<0b000, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS,
+ node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS,
+ node:$RHS)>>;
+
+// RSB
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
+ (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
+ (t2SBCri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
+ (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> sat_imm;
+ bits<7> sh;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{4-0} = sat_imm{4-0};
+ let Inst{21} = sh{6};
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+ (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2SSAT16: T2SatI<
+ (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
+
+def t2USAT: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn),
+ NoItinerary,
+ "usat16", "\t$dst, $sat_imm, $Rn",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate Instructions.
+//
+
+defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
+defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
+defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b01; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b10; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>;
+defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+
+defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>,
+ "t2BIC">;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<5> msb;
+ bits<5> lsb;
+
+ let Inst{11-8} = Rd;
+ let Inst{4-0} = msb{4-0};
+ let Inst{14-12} = lsb{4-2};
+ let Inst{7-6} = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+
+ let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+ IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10100;
+ let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b11100;
+ let Inst{15} = 0;
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+ def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+ bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+ }
+
+ // GNU as only supports this form of bfi (w/ 4 arguments)
+ let isAsmParserOnly = 1 in
+ def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
+ width_imm:$width),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
+ []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<5> lsbit;
+ bits<5> width;
+ let msb{4-0} = width; // Custom encoder => lsb+width-1
+ let lsb{4-0} = lsbit;
+ }
+}
+
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>,
+ "t2ORN", 0, "">;
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1>;
+
+
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+ Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+ (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "mul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+ (outs rGPR:$Rd, rGPR:$Ra),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+} // neverHasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+ def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+ def BB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD: T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+ 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+ 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasThumb2DSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLADX : T2FourReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+ "\t$Ra, $Rd, $Rm, $Rn", []>,
+ Requires<[IsThumb2, HasThumb2DSP]>;
+
+//===----------------------------------------------------------------------===//
+// Division Instructions.
+// Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-22} = 0b01010;
+ let Inst{21-20} = op1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = op2;
+ let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rbit", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev16", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "revsh", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
+
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+ (t2REVSH rGPR:$Rm)>;
+
+def t2PKHBT : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+ (and (shl rGPR:$Rm, lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 0; // BT form
+ let Inst{4} = 0;
+
+ bits<8> sh;
+ let Inst{14-12} = sh{7-5};
+ let Inst{7-6} = sh{4-3};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+ (and (sra rGPR:$Rm, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 1; // TB form
+ let Inst{4} = 0;
+
+ bits<8> sh;
+ let Inst{14-12} = sh{7-5};
+ let Inst{7-6} = sh{4-3};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPR:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs),
+ (t2CMPrr GPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>;
+
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
+// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm),
+ (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST : T2I_cmp_irs<0b0000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, pred:$p),
+ 4, IIC_iCMOVr,
+ [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iCMOVi,
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+
+// FIXME: Pseudo-ize these. For now, just mark codegen only.
+let isCodeGenOnly = 1 in {
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+ IIC_iCMOVi,
+ "movw", "\t$Rd, $imm", []>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0011;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod; // Shift type.
+}
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f5;
+ let Inst{3-0} = opt;
+}
+}
+
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dsb", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f4;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option -- for disassembly only
+def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-4} = 0xf3bf8f6;
+ let Inst{3-0} = 0b1111;
+}
+
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+ let Inst{3-0} = 0b1111;
+
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-8} = rt2;
+ let Inst{7-6} = 0b01;
+ let Inst{5-4} = opcod;
+
+ bits<4> Rd;
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{3-0} = Rd;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexb", "\t$Rt, $addr", "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexh", "\t$Rt, $addr", "", []>;
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrex", "\t$Rt, $addr", "", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = 0b00000000; // imm8 = 0
+
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strex", "\t$Rd, $Rt, $addr", "",
+ []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{7-0} = 0b00000000; // imm8 = 0
+
+ bits<4> Rd;
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+}
+
+let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-16} = 0xf3bf;
+ let Inst{15-14} = 0b10;
+ let Inst{13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b.w\t$target",
+ [(br bb:$target)]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 1;
+
+ bits<20> target;
+ let Inst{26} = target{19};
+ let Inst{11} = target{18};
+ let Inst{13} = target{17};
+ let Inst{21-16} = target{16-11};
+ let Inst{10-0} = target{10-0};
+}
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+ (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br,
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id),
+ 0, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+ "tbb", "\t[$Rn, $Rm]", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 0; // B form
+ let Inst{3-0} = Rm;
+}
+
+def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+ "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 1; // H form
+ let Inst{3-0} = Rm;
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> p;
+ let Inst{25-22} = p;
+
+ bits<21> target;
+ let Inst{26} = target{20};
+ let Inst{11} = target{19};
+ let Inst{13} = target{18};
+ let Inst{21-16} = target{17-12};
+ let Inst{10-0} = target{11-1};
+}
+
+// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin version.
+ let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+ Uses = [SP] in
+ def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (t2B uncondbrtarget:$dst)>,
+ Requires<[IsThumb2, IsDarwin]>;
+}
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+ AddrModeNone, 2, IIC_iALUx,
+ "it$mask\t$cc", "", []> {
+ // 16-bit instruction.
+ let Inst{31-16} = 0x0000;
+ let Inst{15-8} = 0b10111111;
+
+ bits<4> cc;
+ bits<4> mask;
+ let Inst{7-4} = cc;
+ let Inst{3-0} = mask;
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> func;
+ let Inst{19-16} = func;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+ !strconcat("cps", asm_op),
+ [/* For disassembly only; pattern left blank */]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-9} = imod;
+ let Inst{8} = M;
+ let Inst{7-5} = iflags;
+ let Inst{4-0} = mode;
+}
+
+let M = 1 in
+ def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+ "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+ def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-0} = op7_0;
+}
+
+def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
+def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
+def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-4} = 0b1111;
+
+ bits<4> opt;
+ let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+
+ bits<4> opt;
+ let Inst{19-16} = opt;
+}
+
+class T2SRS<bits<12> op31_20,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<5> mode;
+ let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2SRS<0b111010000010,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSDB : T2SRS<0b111010000000,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSIAW : T2SRS<0b111010011010,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSIA : T2SRS<0b111010011000,
+ (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Return From Exception is a system instruction -- for disassembly only
+
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xc000;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB : T2RFE<0b111010000001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA : T2RFE<0b111010011001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+ let Inst{15-14} = op15_14{1-0};
+ let Inst{12} = op12{0};
+}
+
+class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = 0b1111;
+}
+
+def t2MRS : T2MRS<0b111100111110, 0b10, 0,
+ (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+ [/* For disassembly only; pattern left blank */]>;
+def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
+ (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
+ 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{20} = mask{4}; // R Bit
+ let Inst{13} = 0b0;
+ let Inst{11-8} = mask{3-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register
+//
+
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : T2Cop<Op, oops, iops,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ pattern> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+ list<dag> pattern = []>
+ : T2Cop<Op, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+ (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+ (outs GPR:$Rt),
+ (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ []>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+ (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+ c_imm:$CRm, i32imm:$opc2), []>;
+
+def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+ GPR:$Rt2, imm:$CRm)]>;
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.
+//
+
+def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 000000000000..f1f3cb9c2ecd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1128 @@
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+def vfp_f32imm : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
+ }]> {
+ let PrintMethod = "printVFPf32ImmOperand";
+}
+
+def vfp_f64imm : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
+ }]> {
+ let PrintMethod = "printVFPf64ImmOperand";
+}
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+ [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+ IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+ [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+ IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+ [(store SPR:$Sd, addrmode5:$addr)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // Double Precision
+ def DIA :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DIA_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+
+ // Single Precision
+ def SIA :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SIA_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+} // neverHasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def VADDD : ADbI<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS : ASbIn<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSUBD : ADbI<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VDIVD : ADbI<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS : ASbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD : ADbI<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS : ASbIn<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+ (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let Defs = [FPSCR] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-6} = 0b11;
+ let Inst{4} = 0;
+}
+
+// Between half-precision and single-precision. For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let neverHasSideEffects = 1 in {
+def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in {
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+ [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def VMOVRRS : AVConv3I<0b11000101, 0b1010,
+ (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+} // neverHasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+ (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+ IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX: SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR: GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ let Inst{7} = 1; // s32
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ let Inst{7} = 1; // s32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ let Inst{7} = 0; // u32
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ let Inst{7} = 0; // u32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+// S16 (U=0, sx=0) -> SH
+// U16 (U=1, sx=0) -> UH
+// S32 (U=0, sx=1) -> SL
+// U32 (U=1, sx=1) -> UL
+
+// FIXME: Marking these as codegen only seems wrong. They are real
+// instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+
+//===----------------------------------------------------------------------===//
+// FP Multiply-Accumulate Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let neverHasSideEffects = 1 in {
+def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
+ 4, IIC_fpUNA64,
+ [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
+ 4, IIC_fpUNA32,
+ [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> Rt;
+
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
+ let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+ def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpexc", []>;
+ def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpsid", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
+ let Inst{27-20} = 0b11101110;
+ let Inst{19-16} = opc19_16;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
+
+let Defs = [FPSCR] in {
+ // Application level GPR -> FPSCR
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+ // System level GPR -> FPEXC
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpexc, $src", []>;
+ // System level GPR -> FPSID
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+ VFPMiscFrm, IIC_fpUNA64,
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+ let Inst{19} = imm{31};
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
+ let Inst{27-23} = 0b11101;
+ let Inst{21-20} = 0b11;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision.
+ let Inst{7-4} = 0b0000;
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+ let Inst{19} = imm{31}; // The immediate is handled as a double.
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
+ let Inst{27-23} = 0b11101;
+ let Inst{21-20} = 0b11;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision.
+ let Inst{7-4} = 0b0000;
+}
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 000000000000..45b7e48d0cfb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,336 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
+// to preserve more context and manipulate the stack directly). Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback();
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "ARMCompilationCallback\n"
+ ASMPREFIX "ARMCompilationCallback:\n"
+ // Save caller saved registers since they may contain stuff
+ // for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned, so we can't just preserve the callee saved regs.
+ "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ // The LR contains the address of the stub function on entry.
+ // pass it as the argument to the C part of the callback
+ "mov r0, lr\n"
+ "sub sp, sp, #4\n"
+ // Call the C portion of the callback
+ "bl " ASMPREFIX "ARMCompilationCallbackC\n"
+ "add sp, sp, #4\n"
+ // Restoring the LR to the return address of the function that invoked
+ // the stub and de-allocating the stack space for it requires us to
+ // swap the two saved LR values on the stack, as they're backwards
+ // for what we need since the pop instruction has a pre-determined
+ // order for the registers.
+ // +--------+
+ // 0 | LR | Original return address
+ // +--------+
+ // 1 | LR | Stub address (start of stub)
+ // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
+ // 6-20 | D0..D7 | Saved VFP registers
+ // +--------+
+ //
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+ // Restore VFP caller-saved registers.
+ "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ //
+ // We need to exchange the values in slots 0 and 1 so we can
+ // return to the address in slot 1 with the address in slot 0
+ // restored to the LR.
+ "ldr r0, [sp,#20]\n"
+ "ldr r1, [sp,#16]\n"
+ "str r1, [sp,#20]\n"
+ "str r0, [sp,#16]\n"
+ // Return to the (newly modified) stub to invoke the real function.
+ // The above twiddling of the saved return addresses allows us to
+ // deallocate everything, including the LR the stub saved, with two
+ // updating load instructions.
+ "ldmia sp!, {r0, r1, r2, r3, lr}\n"
+ "ldr pc, [sp], #4\n"
+ );
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call. We're replacing the first two instructions of the
+ // stub with:
+ // ldr pc, [pc,#-4]
+ // <addr>
+ if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
+ *(intptr_t *)(StubAddr+4) = NewVal;
+ if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+ JITCodeEmitter &JCE) {
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
+ void *PtrAddr = JCE.allocIndirectGV(
+ GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
+ addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+ return PtrAddr;
+}
+
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+ // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+ // 4-byte address. See emitFunctionStub for details.
+ StubLayout Result = {16, 4};
+ return Result;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ void *Addr;
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ // Branch to the corresponding function addr.
+ if (IsPIC) {
+ // The stub is 16-byte size and 4-aligned.
+ intptr_t LazyPtr = getIndirectSymAddr(Fn);
+ if (!LazyPtr) {
+ // In PIC mode, the function stub is loading a lazy-ptr.
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ DEBUG(if (F)
+ errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+ << "] for GV '" << F->getName() << "'\n";
+ else
+ errs() << "JIT: Stub emitted at [" << LazyPtr
+ << "] for external function at '" << Fn << "'\n");
+ }
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
+ JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
+ JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
+ JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ } else {
+ // The stub is 8-byte size and 4-aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ JCE.emitWordLE((intptr_t)Fn); // addr of function
+ sys::Memory::InvalidateInstructionCache(Addr, 8);
+ if (!sys::Memory::setRangeExecutable(Addr, 8)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+ } else {
+ // The compilation callback will overwrite the first two words of this
+ // stub with indirect branch instructions targeting the compiled code.
+ // This stub sets the return address to restart the stub, so that
+ // the new branch will be invoked when we come back.
+ //
+ // Branch and link to the compilation callback.
+ // The stub is 16-byte size and 4-byte aligned.
+ JCE.emitAlignment(4);
+ Addr = (void*)JCE.getCurrentPCValue();
+ if (!sys::Memory::setRangeWritable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub writable");
+ }
+ // Save LR so the callback can determine which stub called it.
+ // The compilation callback is responsible for popping this prior
+ // to returning.
+ JCE.emitWordLE(0xe92d4000); // push {lr}
+ // Set the return address to go back to the start of this stub.
+ JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+ // Invoke the compilation callback.
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ // The address of the compilation callback.
+ JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+ llvm_unreachable("ERROR: Unable to mark stub executable");
+ }
+ }
+
+ return Addr;
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+ ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+ switch (RT) {
+ default:
+ return (intptr_t)(MR->getResultPointer());
+ case ARM::reloc_arm_pic_jt:
+ // Destination address - jump table base.
+ return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+ case ARM::reloc_arm_jt_base:
+ // Jump table base address.
+ return getJumpTableBaseAddr(MR->getJumpTableIndex());
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ // Constant pool entry address.
+ return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+ case ARM::reloc_arm_machine_cp_entry: {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+ assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+ "Can't handle this machine constant pool entry yet!");
+ intptr_t Addr = (intptr_t)(MR->getResultPointer());
+ Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+ return Addr;
+ }
+ }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = resolveRelocDestAddr(MR);
+ switch ((ARM::RelocationType)MR->getRelocationType()) {
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_relative: {
+ // It is necessary to calculate the correct PC relative value. We
+ // subtract the base addr from the target addr to form a byte offset.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ // If the result is positive, set bit U(23) to 1.
+ if (ResultPtr >= 0)
+ *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+ else {
+ // Otherwise, obtain the absolute value and set bit U(23) to 0.
+ *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+ ResultPtr = - ResultPtr;
+ }
+ // Set the immed value calculated.
+ // VFP immediate offset is multiplied by 4.
+ if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+ ResultPtr = ResultPtr >> 2;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ // Set register Rn to PC.
+ *((intptr_t*)RelocPos) |=
+ getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ break;
+ }
+ case ARM::reloc_arm_pic_jt:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_absolute: {
+ // These addresses have already been resolved.
+ *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_branch: {
+ // It is necessary to calculate the correct value of signed_immed_24
+ // field. We subtract the base addr from the target addr to form a
+ // byte offset, which must be inside the range -33554432 and +33554428.
+ // Then, we set the signed_immed_24 field of the instruction to bits
+ // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+ assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_jt_base: {
+ // JT base - (instruction addr + 8)
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 000000000000..2f9792813d32
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,183 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+ // CONSTPOOL_ENTRY addresses.
+ SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+ // JumpTableId2AddrMap - A map from inline jumptable ids to the
+ // corresponding inline jump table bases.
+ SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+ // PCLabelMap - A map from PC labels to addresses.
+ DenseMap<unsigned, intptr_t> PCLabelMap;
+
+ // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+ // addresses to their indirect symbol addresses.
+ DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+ // IsPIC - True if the relocation model is PIC. This is used to determine
+ // how to codegen function stubs.
+ bool IsPIC;
+
+ public:
+ explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on ARM.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// hasCustomConstantPool - Allows a target to specify that constant
+ /// pool address resolution is handled by the target.
+ virtual bool hasCustomConstantPool() const { return true; }
+
+ /// hasCustomJumpTables - Allows a target to specify that jumptables
+ /// are emitted by the target.
+ virtual bool hasCustomJumpTables() const { return true; }
+
+ /// allocateSeparateGVMemory - If true, globals should be placed in
+ /// separately allocated heap memory rather than in the same
+ /// code memory allocated by JITCodeEmitter.
+ virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+ return true;
+#else
+ return false;
+#endif
+ }
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+ /// jump table ids to jump table bases map; remember if codegen relocation
+ /// model is PIC.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+ }
+
+ /// getConstantPoolEntryAddr - The ARM target puts all constant
+ /// pool entries into constant islands. This returns the address of the
+ /// constant pool entry of the specified index.
+ intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ return ConstPoolId2AddrMap[CPI];
+ }
+
+ /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+ /// where its associated value is stored. When relocations are processed,
+ /// this value will be used to resolve references to the constant.
+ void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ ConstPoolId2AddrMap[CPI] = Addr;
+ }
+
+ /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+ /// text section of the function. This returns the address of the base of
+ /// the jump table of the specified index.
+ intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+ assert(JTI < JumpTableId2AddrMap.size());
+ return JumpTableId2AddrMap[JTI];
+ }
+
+ /// addJumpTableBaseAddr - Map a jump table index to the address where
+ /// the corresponding inline jump table is emitted. When relocations are
+ /// processed, this value will be used to resolve references to the
+ /// jump table.
+ void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+ assert(JTI < JumpTableId2AddrMap.size());
+ JumpTableId2AddrMap[JTI] = Addr;
+ }
+
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
+ intptr_t getPCLabelAddr(unsigned Id) const {
+ DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+ assert(I != PCLabelMap.end());
+ return I->second;
+ }
+
+ /// addPCLabelAddr - Remember the address of the specified PC label.
+ void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+ PCLabelMap.insert(std::make_pair(Id, Addr));
+ }
+
+ /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+ /// specified symbol located at address. Returns 0 if the indirect symbol
+ /// has not been emitted.
+ intptr_t getIndirectSymAddr(void *Addr) const {
+ DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+ if (I != Sym2IndirectSymMap.end())
+ return I->second;
+ return 0;
+ }
+
+ /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+ /// its indirect symbol address.
+ void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+ Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+ }
+
+ private:
+ /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+ /// if it's not already solved. Constantpool entries must be resolved by
+ /// ARM target.
+ intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 000000000000..c6efea1d7806
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,1839 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
+
+namespace {
+ struct ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+ bool isThumb2;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Reg;
+ bool isKill;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ void MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &MemOps,
+ unsigned memOpsBegin,
+ unsigned memOpsEnd,
+ unsigned insertAfter,
+ int Offset,
+ unsigned Base,
+ bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA;
+ case ARM_AM::da: return ARM::LDMDA;
+ case ARM_AM::db: return ARM::LDMDB;
+ case ARM_AM::ib: return ARM::LDMIB;
+ }
+ break;
+ case ARM::STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA;
+ case ARM_AM::da: return ARM::STMDA;
+ case ARM_AM::db: return ARM::STMDB;
+ case ARM_AM::ib: return ARM::STMIB;
+ }
+ break;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA;
+ case ARM_AM::db: return ARM::t2LDMDB;
+ }
+ break;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA;
+ case ARM_AM::db: return ARM::t2STMDB;
+ }
+ break;
+ case ARM::VLDRS:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA;
+ case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
+ }
+ break;
+ case ARM::VSTRS:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA;
+ case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
+ }
+ break;
+ case ARM::VLDRD:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA;
+ case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
+ }
+ break;
+ case ARM::VSTRD:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA;
+ case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
+ }
+ break;
+ }
+
+ return 0;
+}
+
+namespace llvm {
+ namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMIA_UPD:
+ case ARM::STMIA:
+ case ARM::STMIA_UPD:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMIA_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ return ARM_AM::ia;
+
+ case ARM::LDMDA:
+ case ARM::LDMDA_UPD:
+ case ARM::STMDA:
+ case ARM::STMDA_UPD:
+ return ARM_AM::da;
+
+ case ARM::LDMDB:
+ case ARM::LDMDB_UPD:
+ case ARM::STMDB:
+ case ARM::STMDB_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMDB:
+ case ARM::t2STMDB_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ return ARM_AM::db;
+
+ case ARM::LDMIB:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIB:
+ case ARM::STMIB_UPD:
+ return ARM_AM::ib;
+ }
+
+ return ARM_AM::bad_am_submode;
+}
+
+ } // end namespace ARM_AM
+} // end namespace llvm
+
+static bool isT2i32Load(unsigned Opc) {
+ return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+ return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+ return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+ return Opc == ARM::STRi12 || isT2i32Store(Opc);
+}
+
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill,
+ int Opcode, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned Scratch, DebugLoc dl,
+ SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
+ Mode = ARM_AM::ib;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ Mode = ARM_AM::da;
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // Check if this is a supported opcode before we insert instructions to
+ // calculate a new base register.
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (isi32Load(Opcode))
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+ if (Offset < 0) {
+ BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = isThumb2
+ ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ // FIXME: Try t2ADDri12 or t2SUBri12?
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VLDRD);
+ Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+ if (!Opcode) return false;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+ | getKillRegState(Regs[i].second));
+
+ return true;
+}
+
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ // First calculate which of the registers should be killed by the merged
+ // instruction.
+ const unsigned insertPos = memOps[insertAfter].Position;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+ if (i == memOpsBegin) {
+ i = memOpsEnd;
+ if (i == e)
+ break;
+ }
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ unsigned Reg = memOps[i].Reg;
+ // If we are inserting the merged operation after an operation that
+ // uses the same register, make sure to transfer any kill flag.
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ // Try to do the merge.
+ MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+ ++Loc;
+ if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+ Pred, PredReg, Scratch, dl, Regs))
+ return;
+
+ // Merge succeeded, update records.
+ Merges.push_back(prior(Loc));
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ // Remove kill flags from any memops that come before insertPos.
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+ assert(Idx >= 0 && "Cannot find killing operand");
+ memOps[j].MBBI->getOperand(Idx).setIsKill(false);
+ memOps[j].isKill = false;
+ }
+ memOps[i].isKill = true;
+ }
+ MBB.erase(memOps[i].MBBI);
+ // Update this memop to refer to the merged instruction.
+ // We may need to move kill flags again.
+ memOps[i].Merged = true;
+ memOps[i].MBBI = Merges.back();
+ memOps[i].Position = insertPos;
+ }
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+void
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned insertAfter = SIndex;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ DebugLoc dl = Loc->getDebugLoc();
+ const MachineOperand &PMO = Loc->getOperand(0);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX
+ : getARMRegisterNumbering(PReg);
+ unsigned Count = 1;
+ unsigned Limit = ~0U;
+
+ // vldm / vstm limit are 32 for S variants, 16 for D variants.
+
+ switch (Opcode) {
+ default: break;
+ case ARM::VSTRS:
+ Limit = 32;
+ break;
+ case ARM::VSTRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRS:
+ Limit = 32;
+ break;
+ }
+
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX
+ : getARMRegisterNumbering(Reg);
+ // Register numbers must be in ascending order. For VFP / NEON load and
+ // store multiples, the registers must also be consecutive and within the
+ // limit on the number of registers per instruction.
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
+ ((isNotVFP && RegNum > PRegNum) ||
+ ((Count < Limit) && RegNum == PRegNum+1))) {
+ Offset += Size;
+ PRegNum = RegNum;
+ ++Count;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+ Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+ MemOps, Merges);
+ return;
+ }
+
+ if (MemOps[i].Position > MemOps[insertAfter].Position)
+ insertAfter = i;
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+ Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ return;
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg){
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+ if (MI->getOpcode() != ARM::t2SUBri &&
+ MI->getOpcode() != ARM::tSUBspi &&
+ MI->getOpcode() != ARM::SUBri)
+ return false;
+
+ // Make sure the offset fits in 8 bits.
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ return (MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg){
+ unsigned MyPredReg = 0;
+ if (!MI)
+ return false;
+ if (MI->getOpcode() != ARM::t2ADDri &&
+ MI->getOpcode() != ARM::tADDspi &&
+ MI->getOpcode() != ARM::ADDri)
+ return false;
+
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
+ // Make sure the offset fits in 8 bits.
+ return false;
+
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ return (MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+ ARM_AM::AMSubMode Mode) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA_UPD;
+ case ARM_AM::ib: return ARM::LDMIB_UPD;
+ case ARM_AM::da: return ARM::LDMDA_UPD;
+ case ARM_AM::db: return ARM::LDMDB_UPD;
+ }
+ break;
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA_UPD;
+ case ARM_AM::ib: return ARM::STMIB_UPD;
+ case ARM_AM::da: return ARM::STMDA_UPD;
+ case ARM_AM::db: return ARM::STMDB_UPD;
+ }
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+ case ARM_AM::db: return ARM::t2LDMDB_UPD;
+ }
+ break;
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA_UPD;
+ case ARM_AM::db: return ARM::t2STMDB_UPD;
+ }
+ break;
+ case ARM::VLDMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+ case ARM_AM::db: return ARM::VLDMSDB_UPD;
+ }
+ break;
+ case ARM::VLDMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+ case ARM_AM::db: return ARM::VLDMDDB_UPD;
+ }
+ break;
+ case ARM::VSTMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+ case ARM_AM::db: return ARM::VSTMSDB_UPD;
+ }
+ break;
+ case ARM::VSTMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+ case ARM_AM::db: return ARM::VSTMDDB_UPD;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ bool BaseKill = MI->getOperand(0).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+
+ bool DoMerge = false;
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
+ // Transfer the rest of operands.
+ for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_PRE;
+ case ARM::STRi12:
+ return ARM::STR_PRE;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_PRE;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_PRE;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_POST;
+ case ARM::STRi12:
+ return ARM::STR_POST;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_POST;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_POST;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+ if (isi32Load(Opcode) || isi32Store(Opcode))
+ if (MI->getOperand(2).getImm() != 0)
+ return false;
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ return false;
+
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ // AM2 - 12 bits, thumb2 - 8 bits.
+ unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (!isAM5 &&
+ isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
+ MBB.erase(PrevMBBI);
+ }
+ }
+
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (!isAM5 &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+ DoMerge = true;
+ }
+ if (DoMerge) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ unsigned Offset = 0;
+ if (isAM2)
+ Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ else if (!isAM5)
+ Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+
+ if (isAM5) {
+ // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
+ MachineOperand &MO = MI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+ getKillRegState(MO.isKill())));
+ } else if (isLd) {
+ if (isAM2)
+ // LDR_PRE, LDR_POST,
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // t2LDR_PRE, t2LDR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ if (isAM2)
+ // STR_PRE, STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // t2STR_PRE, t2STR_POST
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operation that this
+/// pass is capable of operating on.
+static bool isMemoryOp(const MachineInstr *MI) {
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return MI->getOperand(1).isReg();
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return MI->getOperand(1).isReg();
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return MI->getOperand(1).isReg();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM3) {
+ if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ return Offset;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Offset, bool isDef,
+ DebugLoc dl, unsigned NewOpc,
+ unsigned Reg, bool RegDeadKill, bool RegUndef,
+ unsigned BaseReg, bool BaseKill, bool BaseUndef,
+ bool OffKill, bool OffUndef,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isT2) {
+ if (isDef) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+ if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+ return false;
+
+ MachineBasicBlock::iterator NewBBI = MBBI;
+ bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+ bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool EvenUndef = MI->getOperand(0).isUndef();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ bool OddUndef = MI->getOperand(1).isUndef();
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ bool BaseKill = BaseOp.isKill();
+ bool BaseUndef = BaseOp.isUndef();
+ bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+ bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg,
+ getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+ .addReg(OddReg,
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ ++NumSTRD2STM;
+ }
+ NewBBI = llvm::prior(MBBI);
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, false,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, false,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ } else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, EvenUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, OddUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ }
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
+ }
+
+ MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
+ }
+ return false;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+ SmallVector<MachineBasicBlock::iterator,4> Merges;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ if (FixInvalidRegPairOp(MBB, MBBI))
+ continue;
+
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+ int Offset = getMemoryOpOffset(MBBI);
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
+ ++Position;
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register.
+ unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ Merges.clear();
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps, Merges);
+
+ // Try folding preceding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+ if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+ ++NumMerges;
+ NumMerges += Merges.size();
+
+ // Try folding preceding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+ ++NumMerges;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ } else if (NumMemOps == 1) {
+ // Try folding preceding/trailing base inc/dec into the single
+ // load/store.
+ if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+ ++NumMerges;
+ RS->forward(prior(MBBI));
+ }
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
+/// directly restore the value of LR into pc.
+/// ldmfd sp!, {..., lr}
+/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
+/// =>
+/// ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI != MBB.begin() &&
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
+ MachineInstr *PrevMI = prior(MBBI);
+ unsigned Opcode = PrevMI->getOpcode();
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+ Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() != ARM::LR)
+ return false;
+ unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+ assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+ PrevMI->setDesc(TII->get(NewOpc));
+ MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ RS = new RegScavenger();
+ isThumb2 = AFI->isThumb2Function();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+ struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ const TargetData *TD;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ MachineRegisterInfo *MRI;
+ MachineFunction *MF;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pre- register allocation load / store optimization pass";
+ }
+
+ private:
+ bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset,
+ unsigned &PredReg, ARMCC::CondCodes &Pred,
+ bool &isT2);
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TD = Fn.getTarget().getTargetData();
+ TII = Fn.getTarget().getInstrInfo();
+ TRI = Fn.getTarget().getRegisterInfo();
+ STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ MRI = &Fn.getRegInfo();
+ MF = &Fn;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI)
+ Modified |= RescheduleLoadStoreInstrs(MFI);
+
+ return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallSet<unsigned, 4> &MemRegs,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ SmallSet<unsigned, 4> AddedRegPressure;
+ while (++I != E) {
+ if (I->isDebugValue() || MemOps.count(&*I))
+ continue;
+ const MCInstrDesc &MCID = I->getDesc();
+ if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ return false;
+ if (isLd && MCID.mayStore())
+ return false;
+ if (!isLd) {
+ if (MCID.mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (MCID.mayStore())
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+ return false;
+ if (Reg != Base && !MemRegs.count(Reg))
+ AddedRegPressure.insert(Reg);
+ }
+ }
+
+ // Estimate register pressure increase due to the transformation.
+ if (MemRegs.size() <= 4)
+ // Ok if we are moving small number of instructions.
+ return true;
+ return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+ DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset, unsigned &PredReg,
+ ARMCC::CondCodes &Pred,
+ bool &isT2) {
+ // Make sure we're allowed to generate LDRD/STRD.
+ if (!STI->hasV5TEOps())
+ return false;
+
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+ unsigned Scale = 1;
+ unsigned Opcode = Op0->getOpcode();
+ if (Opcode == ARM::LDRi12)
+ NewOpc = ARM::LDRD;
+ else if (Opcode == ARM::STRi12)
+ NewOpc = ARM::STRD;
+ else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ NewOpc = ARM::t2LDRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+ NewOpc = ARM::t2STRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else
+ return false;
+
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
+ if (!Op0->hasOneMemOperand() ||
+ !(*Op0->memoperands_begin())->getValue() ||
+ (*Op0->memoperands_begin())->isVolatile())
+ return false;
+
+ unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+ const Function *Func = MF->getFunction();
+ unsigned ReqAlign = STI->hasV6Ops()
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+ : 8; // Pre-v6 need 8-byte align
+ if (Align < ReqAlign)
+ return false;
+
+ // Then make sure the immediate offset fits.
+ int OffImm = getMemoryOpOffset(Op0);
+ if (isT2) {
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+ return false;
+ Offset = OffImm;
+ } else {
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (OffImm < 0) {
+ AddSub = ARM_AM::sub;
+ OffImm = - OffImm;
+ }
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm & (Scale-1)))
+ return false;
+ Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+ }
+ EvenReg = Op0->getOperand(0).getReg();
+ OddReg = Op1->getOperand(0).getReg();
+ if (EvenReg == OddReg)
+ return false;
+ BaseReg = Op0->getOperand(1).getReg();
+ Pred = llvm::getInstrPredicate(Op0, PredReg);
+ dl = Op0->getDebugLoc();
+ return true;
+}
+
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the following:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = 0;
+ MachineInstr *LastOp = 0;
+ int LastOffset = 0;
+ unsigned LastOpcode = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ unsigned Opcode = Op->getOpcode();
+ if (LastOpcode && Opcode != LastOpcode)
+ break;
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ LastOpcode = Opcode;
+ if (++NumMove == 8) // FIXME: Tune this limit.
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MemOps;
+ SmallSet<unsigned, 4> MemRegs;
+ for (int i = NumMove-1; i >= 0; --i) {
+ MemOps.insert(Ops[i]);
+ MemRegs.insert(Ops[i]->getOperand(0).getReg());
+ }
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+ if (DoMove)
+ DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+ MemOps, MemRegs, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ ++InsertPos;
+
+ // If we are moving a pair of loads / stores, see if it makes sense
+ // to try to allocate a pair of registers that can form register pairs.
+ MachineInstr *Op0 = Ops.back();
+ MachineInstr *Op1 = Ops[Ops.size()-2];
+ unsigned EvenReg = 0, OddReg = 0;
+ unsigned BaseReg = 0, PredReg = 0;
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ bool isT2 = false;
+ unsigned NewOpc = 0;
+ int Offset = 0;
+ DebugLoc dl;
+ if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+ EvenReg, OddReg, BaseReg,
+ Offset, PredReg, Pred, isT2)) {
+ Ops.pop_back();
+ Ops.pop_back();
+
+ const MCInstrDesc &MCID = TII->get(NewOpc);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+ MRI->constrainRegClass(EvenReg, TRC);
+ MRI->constrainRegClass(OddReg, TRC);
+
+ // Form the pair instruction.
+ if (isLd) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(EvenReg, RegState::Define)
+ .addReg(OddReg, RegState::Define)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ ++NumLDRDFormed;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(EvenReg)
+ .addReg(OddReg)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming STRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ ++NumSTRDFormed;
+ }
+ MBB->erase(Op0);
+ MBB->erase(Op1);
+
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+ MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ } else {
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.isCall() || MCID.isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
+ if (!isMemoryOp(MI))
+ continue;
+ unsigned PredReg = 0;
+ if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opc = MI->getOpcode();
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2LdsMap[Base] = MIs;
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2StsMap[Base] = MIs;
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
new file mode 100644
index 000000000000..39be3f0e39f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1314 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "ARMInstrInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+
+public:
+ ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti) {
+ }
+
+ ~ARMMCCodeEmitter() {}
+
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+ }
+ bool isThumb2() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+ }
+ bool isTargetDarwin() const {
+ Triple TT(STI.getTargetTriple());
+ Triple::OSType OS = TT.getOS();
+ return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+ }
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+ /// the specified operand. This is used for operands with :lower16: and
+ /// :upper16: prefixes.
+ uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+ unsigned &Reg, unsigned &Imm,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+ /// BL branch target.
+ uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+ /// BLX branch target.
+ uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// immediate Thumb2 direct branch target.
+ uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+ /// ADR label target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+ uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups)const;
+
+ /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+ /// operand as needed by load/store instructions.
+ uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+ uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::da: return 0;
+ case ARM_AM::ia: return 1;
+ case ARM_AM::db: return 2;
+ case ARM_AM::ib: return 3;
+ }
+ }
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+ switch (ShOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::no_shift:
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::asr: return 2;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+ }
+
+ /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+ uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+ uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+ uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+ uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+ uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+ uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getCCOutOpValue - Return encoding of the 's' bit.
+ unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+ // '1' respectively.
+ return MI.getOperand(Op).getReg() == ARM::CPSR;
+ }
+
+ /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+ }
+
+ /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
+ assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+ return Encoded;
+ }
+
+ unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getSORegOpValue - Return an encoded so_reg shifted register value.
+ unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ switch (MI.getOperand(Op).getImm()) {
+ default: assert (0 && "Not a valid rot_imm value!");
+ case 0: return 0;
+ case 8: return 1;
+ case 16: return 2;
+ case 24: return 3;
+ }
+ }
+
+ unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 1;
+ }
+
+ unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+ }
+
+ unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getSsatBitPosValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ unsigned VFPThumb2PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+ // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+ // set to 1111.
+ unsigned Bit24 = EncodedValue & 0x01000000;
+ unsigned Bit28 = Bit24 << 4;
+ EncodedValue &= 0xEFFFFFFF;
+ EncodedValue |= Bit28;
+ EncodedValue |= 0x0F000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0xF0FFFFFF;
+ EncodedValue |= 0x09000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0x00FFFFFF;
+ EncodedValue |= 0xEE000000;
+ }
+
+ return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue &= 0x0FFFFFFF;
+ EncodedValue |= 0xE0000000;
+ }
+ return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+
+ // Q registers are encoded as 2x their register number.
+ switch (Reg) {
+ default:
+ return RegNo;
+ case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3:
+ case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7:
+ case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11:
+ case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+ return 2 * RegNo;
+ }
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+ unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+ Reg = getARMRegisterNumbering(MO.getReg());
+
+ int32_t SImm = MO1.getImm();
+ bool isAdd = true;
+
+ // Special value for #-0
+ if (SImm == INT32_MIN)
+ SImm = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (SImm < 0) {
+ SImm = -SImm;
+ isAdd = false;
+ }
+
+ Imm = SImm;
+ return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() && "Unexpected branch target type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+ int NumOp = MI.getNumOperands();
+ if (NumOp >= 2) {
+ for (int i = 0; i < NumOp-1; ++i) {
+ const MCOperand &MCOp1 = MI.getOperand(i);
+ const MCOperand &MCOp2 = MI.getOperand(i + 1);
+ if (MCOp1.isImm() && MCOp2.isReg() &&
+ (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: This really, really shouldn't use TargetMachine. We don't want
+ // coupling between MC and TM anywhere we can help it.
+ if (isThumb2())
+ return
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+ return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+}
+
+
+
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Val =
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ bool I = (Val & 0x800000);
+ bool J1 = (Val & 0x400000);
+ bool J2 = (Val & 0x200000);
+ if (I ^ J1)
+ Val &= ~0x400000;
+ else
+ Val |= 0x400000;
+
+ if (I ^ J2)
+ Val &= ~0x200000;
+ else
+ Val |= 0x200000;
+
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &) const {
+ // [Rn, Rm]
+ // {5-3} = Rm
+ // {2-0} = Rn
+ const MCOperand &MO1 = MI.getOperand(OpIdx);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ unsigned Reg, Imm12;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm12 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+ uint32_t Binary = Imm12 & 0xfff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+ uint32_t Binary = (Imm8 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ default: assert(0 && "Unexpected expression type");
+ case MCExpr::SymbolRef: return false;
+ case MCExpr::Binary: return true;
+ }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {20-16} = imm{15-12}
+ // {11-0} = imm{11-0}
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ // Hi / lo 16 bits already extracted during earlier passes.
+ return static_cast<unsigned>(MO.getImm());
+
+ // Handle :upper16: and :lower16: assembly prefixes.
+ const MCExpr *E = MO.getExpr();
+ if (E->getKind() == MCExpr::Target) {
+ const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+ E = ARM16Expr->getSubExpr();
+
+ MCFixupKind Kind;
+ switch (ARM16Expr->getKind()) {
+ default: assert(0 && "Unsupported ARMFixup");
+ case ARMMCExpr::VK_ARM_HI16:
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movt_hi16_pcrel
+ : ARM::fixup_arm_movt_hi16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
+ break;
+ case ARMMCExpr::VK_ARM_LO16:
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ break;
+ }
+ Fixups.push_back(MCFixup::Create(0, E, Kind));
+ return 0;
+ };
+
+ llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+ return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+ bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+ unsigned SBits = getShiftOp(ShOp);
+
+ // {16-13} = Rn
+ // {12} = isAdd
+ // {11-0} = shifter
+ // {3-0} = Rm
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+ uint32_t Binary = Rm;
+ Binary |= Rn << 13;
+ Binary |= SBits << 5;
+ Binary |= ShImm << 7;
+ if (isAdd)
+ Binary |= 1 << 12;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-14} Rn
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+ Binary |= Rn << 14;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+ bool isReg = MO.getReg() != 0;
+ uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+ if (isReg) {
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+ Binary <<= 7; // Shift amount is bits [11:7]
+ Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+ Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ }
+ return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {9} 1 == imm8, 0 == Rm
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO.getReg());
+ return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm = MO2.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO1.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO1.getReg());
+ return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [SP, #imm]
+ // {7-0} = imm8
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+ "Unexpected base register!");
+
+ // The immediate is already shifted for the implicit zeroes, so no change
+ // here.
+ return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [Rn, #imm]
+ // {7-3} = imm5
+ // {2-0} = Rn
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm5 = MO1.getImm();
+ return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is either Rs, the amount to shift by, or reg0 in which
+ // case the imm contains the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 1 if reg shift, 0 if imm shift
+ // {6-5} = type
+ // If reg shift:
+ // {11-8} = Rs
+ // {7} = 0
+ // else (imm shift)
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+ const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+ // Encoded as [Rn, Rm, imm].
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ Value <<= 4;
+ Value |= getARMRegisterNumbering(MO2.getReg());
+ Value <<= 2;
+ Value |= MO3.getImm();
+
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+ // Even though the immediate is 8 bits long, we need 9 bits in order
+ // to represent the (inverse of the) sign bit.
+ Value <<= 9;
+ int32_t tmp = (int32_t)MO2.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 4096; // Set the ADD bit
+ Value |= tmp & 4095;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+ // msb of the mask.
+ const MCOperand &MO = MI.getOperand(Op);
+ uint32_t v = ~MO.getImm();
+ uint32_t lsb = CountTrailingZeros_32(v);
+ uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getMsbOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // MSB - 5 bits.
+ uint32_t lsb = MI.getOperand(Op-1).getImm();
+ uint32_t width = MI.getOperand(Op).getImm();
+ uint32_t msb = lsb+width-1;
+ assert (width != 0 && msb < 32 && "Illegal bit width!");
+ return msb;
+}
+
+unsigned ARMMCCodeEmitter::
+getSsatBitPosValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // For ssat instructions, the bit position should be encoded decremented by 1
+ return MI.getOperand(Op).getImm()-1;
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // VLDM/VSTM:
+ // {12-8} = Vd
+ // {7-0} = Number of registers
+ //
+ // LDM/STM:
+ // {15-0} = Bitfield of GPRs.
+ unsigned Reg = MI.getOperand(Op).getReg();
+ bool SPRRegs = ARM::SPRRegClass.contains(Reg);
+ bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+
+ unsigned Binary = 0;
+
+ if (SPRRegs || DPRRegs) {
+ // VLDM/VSTM
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+ Binary |= (RegNo & 0x1f) << 8;
+ if (SPRRegs)
+ Binary |= NumRegs;
+ else
+ Binary |= NumRegs * 2;
+ } else {
+ for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+ unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ Binary |= 1 << RegNo;
+ }
+ }
+
+ return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x02; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+/// getAddrMode6OneLane32AddressOpValue - Encode an addrmode6 register number
+/// along with the alignment operand for use in VST1 and VLD1 with size 32.
+unsigned ARMMCCodeEmitter::
+getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8:
+ case 16: Align = 0x00; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions. This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.getReg() == 0) return 0x0D;
+ return MO.getReg();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Pseudo instructions don't get encoded.
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+ return;
+
+ int Size;
+ if (Desc.getSize() == 2 || Desc.getSize() == 4)
+ Size = Desc.getSize();
+ else
+ llvm_unreachable("Unexpected instruction size!");
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ if (isThumb() && Size == 4) {
+ EmitConstant(Binary >> 16, 2, OS);
+ EmitConstant(Binary & 0xffff, 2, OS);
+ } else
+ EmitConstant(Binary, Size, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
new file mode 100644
index 000000000000..2727ba8c8aa5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: assert(0 && "Invalid kind!");
+ case VK_ARM_HI16: OS << ":upper16:"; break;
+ case VK_ARM_LO16: OS << ":lower16:"; break;
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ assert(0 && "Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols_(BE->getLHS(), Asm);
+ AddValueSymbols_(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
new file mode 100644
index 000000000000..0a2e883deb1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
@@ -0,0 +1,76 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_ARM_None,
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_HI16, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_LO16, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 000000000000..7411b599f0fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,125 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMMCExpr.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ switch (MO.getTargetFlags()) {
+ default: {
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on symbol operand");
+ case 0:
+ break;
+ case ARMII::MO_LO16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
+ break;
+ case ARMII::MO_HI16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
+ break;
+ }
+ break;
+ }
+
+ case ARMII::MO_PLT:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT,
+ OutContext);
+ break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(),
+ OutContext),
+ OutContext);
+ return MCOperand::CreateExpr(Expr);
+
+}
+
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) {
+ switch (MO.getType()) {
+ default:
+ assert(0 && "unknown operand type");
+ return false;
+ case MachineOperand::MO_Register:
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO,
+ GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+ }
+ return true;
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp
new file mode 100644
index 000000000000..a36e47da06d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+ void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
+
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/true) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+ unsigned &Log2Size) {
+ RelocType = unsigned(macho::RIT_Vanilla);
+ Log2Size = ~0U;
+
+ switch (Kind) {
+ default:
+ return false;
+
+ case FK_Data_1:
+ Log2Size = llvm::Log2_32(1);
+ return true;
+ case FK_Data_2:
+ Log2Size = llvm::Log2_32(2);
+ return true;
+ case FK_Data_4:
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ case FK_Data_8:
+ Log2Size = llvm::Log2_32(8);
+ return true;
+
+ // Handle 24-bit branch kinds.
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ // Handle Thumb branches.
+ case ARM::fixup_arm_thumb_br:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(2);
+ return true;
+
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ }
+}
+
+void ARMMachObjectWriter::
+RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_ARM_Half;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint32_t Value2 = 0;
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_ARM_HalfDifference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+ //
+ // For these two r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ // the other half of the relocated expression is in the following pair
+ // relocation entry in the the low 16 bits of r_address field.
+ unsigned ThumbBit = 0;
+ unsigned MovtBit = 0;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ MovtBit = 1;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ MovtBit = 1;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ ThumbBit = 1;
+ break;
+ }
+
+
+ if (Type == macho::RIT_ARM_HalfDifference) {
+ uint32_t OtherHalf = MovtBit
+ ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((OtherHalf << 0) |
+ (macho::RIT_Pair << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_Difference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size;
+ unsigned RelocType = macho::RIT_Vanilla;
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+ report_fatal_error("unknown ARM fixup kind!");
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB()) {
+ if (RelocType == macho::RIT_ARM_Half ||
+ RelocType == macho::RIT_ARM_HalfDifference)
+ return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ }
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // FIXME: For other platforms, we need to use scattered relocations for
+ // internal relocations with offsets. If this is an internal relocation with
+ // an offset, it also needs a scattered relocation entry.
+ //
+ // Is this right for ARM?
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ Offset += 1 << Log2Size;
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // FIXME!
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ // The type is determined by the fixup kind.
+ Type = RelocType;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 000000000000..138f0c262271
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,250 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+ /// to determine if function is compiled under Thumb mode, for that use
+ /// 'isThumb'.
+ bool hasThumb2;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+ unsigned PICLabelUId;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
+ /// CPEClones - Track constant pool entries clones created by Constant Island
+ /// pass.
+ DenseMap<unsigned, unsigned> CPEClones;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ hasThumb2(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ explicit ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ bool isThumbFunction() const { return isThumb; }
+ bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+ bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+
+ unsigned getNumJumpTables() const {
+ return JumpTableUId;
+ }
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+ unsigned getNumPICLabels() const {
+ return PICLabelUId;
+ }
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+ void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+ if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+ assert(0 && "Duplicate entries!");
+ }
+
+ unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+ if (I != CPEClones.end())
+ return I->second;
+ else
+ return -1U;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 000000000000..18e162000602
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+ 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+ 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+ 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+ 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+ 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+ 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+ 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+ 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+ 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+ 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+ 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+ 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+ 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+ 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+ 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+ 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+ 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+ 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+ 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+ 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+ 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+ 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+ 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+ 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+ 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+ 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+ 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+ 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+ 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+ 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+ 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+ 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+ 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+ 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+ 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+ 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+ 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+ 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+ 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+ 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+ 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+ 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+ 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+ 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+ 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+ 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+ 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+ 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+ 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+ 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+ 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+ 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+ 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+ 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+ 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+ 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+ 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+ 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+ 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+ 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+ 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+ 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+ 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+ 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+ 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+ 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+ 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+ 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+ 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+ 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+ 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+ 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+ 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+ 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+ 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+ 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+ 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+ 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+ 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+ 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+ 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+ 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+ 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+ 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+ 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+ 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+ 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+ 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+ 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+ 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+ 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+ 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+ 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+ 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+ 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+ 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+ 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+ 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+ 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+ 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+ 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+ 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+ 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+ 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+ 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+ 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+ 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+ 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+ 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+ 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+ 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+ 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+ 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+ 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+ 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+ 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+ 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+ 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+ 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+ 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+ 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+ 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+ 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+ 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+ 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+ 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+ 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+ 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+ 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+ 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+ 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+ 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+ 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+ 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+ 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+ 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+ 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+ 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+ 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+ 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+ 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+ 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+ 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+ 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+ 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+ 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+ 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+ 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+ 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+ 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+ 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+ 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+ 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+ 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+ 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+ 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+ 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+ 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+ 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+ 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+ 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+ 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+ 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+ 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+ 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+ 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+ 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+ 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+ 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+ 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+ 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+ 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+ 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+ 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+ 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+ 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+ 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+ 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+ 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+ 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+ 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+ 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+ 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+ 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+ 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+ 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+ 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+ 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+ 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+ 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+ 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+ 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+ 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+ 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+ 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+ 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+ 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+ 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+ 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+ 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+ 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+ 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+ 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+ 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+ 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+ 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+ 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+ 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+ 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+ 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+ 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+ 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+ 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+ 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+ 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+ 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+ 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+ 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+ 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+ 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+ 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+ 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+ 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+ 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+ 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+ 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+ 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+ 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+ 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+ 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+ 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+ 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+ 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+ 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+ 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+ 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+ 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+ 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+ 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+ 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+ 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+ 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+ 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+ 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+ 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+ 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+ 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+ 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+ 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+ 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+ 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+ 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+ 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+ 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+ 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+ 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+ 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+ 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+ 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+ 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+ 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+ 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+ 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+ 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+ 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+ 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+ 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+ 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+ 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+ 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+ 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+ 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+ 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+ 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+ 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+ 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+ 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+ 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+ 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+ 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+ 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+ 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+ 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+ 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+ 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+ 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+ 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+ 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+ 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+ 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+ 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+ 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+ 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+ 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+ 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+ 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+ 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+ 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+ 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+ 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+ 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+ 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+ 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+ 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+ 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+ 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+ 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+ 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+ 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+ 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+ 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+ 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+ 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+ 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+ 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+ 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+ 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+ 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+ 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+ 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+ 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+ 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+ 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+ 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+ 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+ 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+ 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+ 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+ 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+ 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+ 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+ 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+ 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+ 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+ 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+ 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+ 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+ 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+ 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+ 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+ 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+ 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+ 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+ 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+ 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+ 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+ 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+ 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+ 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+ 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+ 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+ 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+ 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+ 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+ 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+ 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+ 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+ 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+ 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+ 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+ 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+ 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+ 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+ 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+ 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+ 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+ 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+ 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+ 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+ 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+ 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+ 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+ 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+ 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+ 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+ 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+ 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+ 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+ 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+ 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+ 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+ 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+ 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+ 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+ 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+ 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+ 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+ 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+ 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+ 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+ 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+ 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+ 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+ 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+ 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+ 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+ 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+ 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+ 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+ 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+ 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+ 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+ 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+ 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+ 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+ 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+ 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+ 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+ 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+ 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+ 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+ 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+ 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+ 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+ 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+ 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+ 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+ 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+ 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+ 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+ 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+ 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+ 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+ 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+ 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+ 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+ 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+ 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+ 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+ 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+ 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+ 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+ 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+ 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+ 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+ 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+ 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+ 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+ 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+ 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+ 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+ 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+ 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+ 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+ 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+ 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+ 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+ 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+ 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+ 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+ 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+ 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+ 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+ 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+ 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+ 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+ 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+ 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+ 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+ 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+ 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+ 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+ 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+ 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+ 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+ 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+ 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+ 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+ 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+ 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+ 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+ 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+ 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+ 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+ 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+ 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+ 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+ 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+ 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+ 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+ 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+ 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+ 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+ 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+ 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+ 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+ 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+ 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+ 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+ 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+ 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+ 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+ 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+ 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+ 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+ 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+ 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+ 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+ 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+ 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+ 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+ 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+ 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+ 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+ 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+ 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+ 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+ 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+ 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+ 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+ 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+ 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+ 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+ 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+ 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+ 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+ 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+ 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+ 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+ 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+ 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+ 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+ 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+ 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+ 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+ 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+ 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+ 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+ 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+ 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+ 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+ 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+ 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+ 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+ 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+ 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+ 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+ 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+ 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+ 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+ 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+ 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+ 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+ 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+ 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+ 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+ 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+ 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+ 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+ 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+ 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+ 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+ 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+ 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+ 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+ 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+ 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+ 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+ 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+ 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+ 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+ 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+ 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+ 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+ 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+ 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+ 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+ 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+ 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+ 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+ 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+ 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+ 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+ 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+ 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+ 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+ 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+ 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+ 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+ 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+ 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+ 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+ 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+ 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+ 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+ 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+ 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+ 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+ 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+ 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+ 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+ 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+ 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+ 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+ 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+ 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+ 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+ 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+ 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+ 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+ 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+ 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+ 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+ 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+ 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+ 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+ 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+ 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+ 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+ 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+ 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+ 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+ 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+ 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+ 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+ 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+ 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+ 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+ 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+ 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+ 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+ 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+ 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+ 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+ 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+ 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+ 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+ 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+ 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+ 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+ 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+ 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+ 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+ 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+ 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+ 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+ 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+ 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+ 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+ 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+ 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+ 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+ 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+ 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+ 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+ 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+ 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+ 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+ 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+ 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+ 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+ 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+ 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+ 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+ 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+ 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+ 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+ 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+ 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+ 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+ 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+ 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+ 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+ 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+ 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+ 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+ 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+ 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+ 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+ 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+ 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+ 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+ 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+ 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+ 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+ 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+ 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+ 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+ 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+ 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+ 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+ 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+ 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+ 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+ 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+ 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+ 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+ 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+ 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+ 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+ 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+ 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+ 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+ 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+ 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+ 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+ 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+ 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+ 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+ 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+ 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+ 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+ 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+ 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+ 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+ 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+ 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+ 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+ 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+ 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+ 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+ 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+ 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+ 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+ 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+ 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+ 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+ 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+ 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+ 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+ 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+ 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+ 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+ 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+ 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+ 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+ 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+ 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+ 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+ 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+ 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+ 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+ 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+ 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+ 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+ 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+ 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+ 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+ 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+ 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+ 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+ 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+ 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+ 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+ 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+ 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+ 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+ 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+ 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+ 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+ 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+ 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+ 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+ 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+ 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+ 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+ 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+ 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+ 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+ 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+ 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+ 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+ 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+ 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+ 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+ 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+ 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+ 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+ 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+ 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+ 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+ 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+ 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+ 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+ 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+ 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+ 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+ 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+ 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+ 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+ 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+ 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+ 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+ 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+ 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+ 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+ 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+ 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+ 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+ 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+ 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+ 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+ 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+ 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+ 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+ 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+ 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+ 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+ 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+ 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+ 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+ 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+ 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+ 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+ 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+ 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+ 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+ 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+ 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+ 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+ 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+ 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+ 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+ 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+ 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+ 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+ 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+ 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+ 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+ 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+ 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+ 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+ 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+ 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+ 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+ 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+ 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+ 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+ 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+ 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+ 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+ 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+ 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+ 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+ 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+ 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+ 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+ 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+ 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+ 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+ 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+ 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+ 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+ 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+ 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+ 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+ 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+ 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+ 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+ 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+ 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+ 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+ 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+ 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+ 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+ 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+ 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+ 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+ 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+ 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+ 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+ 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+ 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+ 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+ 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+ 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+ 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+ 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+ 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+ 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+ 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+ 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+ 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+ 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+ 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+ 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+ 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+ 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+ 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+ 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+ 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+ 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+ 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+ 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+ 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+ 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+ 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+ 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+ 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+ 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+ 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+ 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+ 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+ 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+ 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+ 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+ 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+ 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+ 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+ 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+ 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+ 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+ 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+ 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+ 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+ 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+ 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+ 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+ 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+ 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+ 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+ 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+ 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+ 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+ 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+ 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+ 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+ 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+ 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+ 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+ 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+ 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+ 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+ 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+ 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+ 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+ 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+ 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+ 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+ 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+ 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+ 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+ 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+ 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+ 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+ 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+ 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+ 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+ 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+ 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+ 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+ 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+ 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+ 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+ 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+ 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+ 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+ 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+ 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+ 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+ 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+ 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+ 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+ 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+ 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+ 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+ 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+ 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+ 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+ 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+ 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+ 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+ 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+ 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+ 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+ 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+ 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+ 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+ 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+ 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+ 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+ 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+ 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+ 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+ 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+ 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+ 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+ 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+ 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+ 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+ 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+ 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+ 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+ 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+ 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+ 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+ 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+ 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+ 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+ 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+ 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+ 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+ 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+ 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+ 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+ 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+ 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+ 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+ 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+ 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+ 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+ 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+ 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+ 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+ 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+ 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+ 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+ 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+ 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+ 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+ 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+ 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+ 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+ 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+ 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+ 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+ 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+ 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+ 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+ 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+ 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+ 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+ 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+ 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+ 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+ 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+ 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+ 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+ 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+ 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+ 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+ 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+ 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+ 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+ 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+ 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+ 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+ 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+ 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+ 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+ 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+ 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+ 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+ 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+ 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+ 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+ 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+ 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+ 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+ 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+ 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+ 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+ 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+ 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+ 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+ 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+ 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+ 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+ 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+ 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+ 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+ 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+ 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+ 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+ 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+ 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+ 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+ 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+ 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+ 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+ 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+ 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+ 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+ 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+ 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+ 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+ 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+ 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+ 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+ 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+ 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+ 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+ 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+ 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+ 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+ 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+ 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+ 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+ 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+ 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+ 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+ 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+ 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+ 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+ 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+ 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+ 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+ 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+ 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+ 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+ 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+ 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+ 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+ 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+ 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+ 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+ 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+ 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+ 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+ 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+ 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+ 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+ 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+ 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+ 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+ 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+ 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+ 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+ 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+ 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+ 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+ 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+ 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+ 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+ 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+ 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+ 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+ 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+ 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+ 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+ 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+ 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+ 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+ 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+ 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+ 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+ 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+ 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+ 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+ 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+ 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+ 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+ 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+ 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+ 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+ 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+ 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+ 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+ 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+ 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+ 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+ 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+ 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+ 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+ 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+ 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+ 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+ 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+ 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+ 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+ 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+ 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+ 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+ 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+ 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+ 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+ 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+ 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+ 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+ 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+ 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+ 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+ 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+ 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+ 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+ 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+ 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+ 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+ 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+ 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+ 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+ 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+ 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+ 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+ 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+ 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+ 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+ 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+ 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+ 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+ 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+ 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+ 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+ 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+ 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+ 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+ 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+ 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+ 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+ 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+ 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+ 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+ 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+ 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+ 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+ 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+ 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+ 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+ 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+ 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+ 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+ 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+ 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+ 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+ 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+ 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+ 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+ 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+ 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+ 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+ 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+ 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+ 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+ 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+ 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+ 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+ 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+ 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+ 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+ 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+ 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+ 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+ 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+ 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+ 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+ 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+ 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+ 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+ 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+ 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+ 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+ 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+ 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+ 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+ 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+ 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+ 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+ 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+ 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+ 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+ 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+ 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+ 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+ 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+ 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+ 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+ 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+ 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+ 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+ 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+ 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+ 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+ 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+ 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+ 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+ 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+ 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+ 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+ 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+ 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+ 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+ 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+ 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+ 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+ 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+ 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+ 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+ 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+ 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+ 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+ 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+ 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+ 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+ 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+ 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+ 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+ 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+ 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+ 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+ 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+ 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+ 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+ 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+ 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+ 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+ 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+ 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+ 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+ 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+ 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+ 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+ 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+ 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+ 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+ 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+ 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+ 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+ 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+ 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+ 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+ 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+ 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+ 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+ 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+ 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+ 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+ 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+ 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+ 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+ 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+ 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+ 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+ 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+ 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+ 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+ 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+ 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+ 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+ 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+ 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+ 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+ 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+ 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+ 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+ 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+ 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+ 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+ 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+ 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+ 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+ 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+ 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+ 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+ 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+ 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+ 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+ 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+ 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+ 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+ 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+ 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+ 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+ 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+ 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+ 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+ 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+ 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+ 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+ 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+ 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+ 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+ 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+ 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+ 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+ 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+ 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+ 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+ 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+ 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+ 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+ 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+ 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+ 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+ 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+ 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+ 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+ 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+ 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+ 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+ 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+ 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+ 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+ 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+ 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+ 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+ 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+ 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+ 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+ 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+ 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+ 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+ 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+ 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+ 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+ 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+ 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+ 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+ 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+ 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+ 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+ 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+ 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+ 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+ 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+ 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+ 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+ 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+ 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+ 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+ 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+ 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+ 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+ 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+ 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+ 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+ 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+ 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+ 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+ 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+ 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+ 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+ 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+ 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+ 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+ 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+ 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+ 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+ 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+ 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+ 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+ 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+ 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+ 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+ 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+ 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+ 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+ 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+ 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+ 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+ 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+ 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+ 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+ 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+ 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+ 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+ 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+ 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+ 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+ 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+ 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+ 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+ 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+ 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+ 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+ 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+ 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+ 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+ 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+ 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+ 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+ 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+ 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+ 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+ 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+ 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+ 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+ 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+ 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+ 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+ 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+ 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+ 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+ 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+ 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+ 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+ 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+ 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+ 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+ 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+ 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+ 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+ 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+ 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+ 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+ 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+ 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+ 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+ 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+ 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+ 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+ 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+ 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+ 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+ 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+ 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+ 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+ 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+ 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+ 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+ 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+ 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+ 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+ 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+ 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+ 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+ 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+ 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+ 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+ 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+ 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+ 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+ 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+ 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+ 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+ 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+ 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+ 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+ 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+ 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+ 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+ 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+ 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+ 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+ 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+ 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+ 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+ 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+ 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+ 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+ 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+ 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+ 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+ 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+ 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+ 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+ 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+ 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+ 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+ 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+ 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+ 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+ 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+ 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+ 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+ 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+ 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+ 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+ 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+ 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+ 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+ 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+ 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+ 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+ 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+ 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+ 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+ 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+ 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+ 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+ 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+ 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+ 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+ 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+ 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+ 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+ 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+ 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+ 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+ 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+ 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+ 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+ 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+ 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+ 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+ 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+ 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+ 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+ 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+ 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+ 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+ 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+ 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+ 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+ 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+ 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+ 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+ 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+ 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+ 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+ 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+ 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+ 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+ 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+ 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+ 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+ 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+ 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+ 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+ 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+ 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+ 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+ 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+ 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+ 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+ 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+ 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+ 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+ 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+ 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+ 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+ 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+ 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+ 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+ 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+ 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+ 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+ 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+ 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+ 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+ 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+ 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+ 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+ 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+ 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+ 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+ 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+ 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+ 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+ 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+ 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+ 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+ 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+ 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+ 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+ 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+ 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+ 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+ 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+ 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+ 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+ 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+ 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+ 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+ 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+ 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+ 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+ 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+ 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+ 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+ 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+ 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+ 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+ 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+ 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+ 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+ 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+ 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+ 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+ 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+ 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+ 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+ 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+ 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+ 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+ 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+ 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+ 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+ 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+ 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+ 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+ 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+ 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+ 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+ 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+ 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+ 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+ 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+ 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+ 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+ 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+ 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+ 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+ 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+ 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+ 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+ 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+ 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+ 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+ 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+ 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+ 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+ 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+ 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+ 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+ 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+ 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+ 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+ 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+ 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+ 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+ 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+ 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+ 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+ 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+ 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+ 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+ 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+ 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+ 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+ 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+ 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+ 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+ 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+ 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+ 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+ 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+ 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+ 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+ 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+ 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+ 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+ 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+ 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+ 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+ 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+ 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+ 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+ 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+ 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+ 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+ 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+ 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+ 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+ 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+ 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+ 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+ 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+ 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+ 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+ 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+ 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+ 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+ 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+ 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+ 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+ 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+ 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+ 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+ 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+ 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+ 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+ 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+ 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+ 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+ 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+ 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+ 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+ 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+ 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+ 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+ 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+ 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+ 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+ 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+ 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+ 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+ 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+ 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+ 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+ 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+ 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+ 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+ 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+ 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+ 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+ 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+ 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+ 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+ 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+ 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+ 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+ 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+ 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+ 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+ 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+ 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+ 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+ 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+ 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+ 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+ 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+ 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+ 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+ 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+ 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+ 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+ 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+ 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+ 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+ 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+ 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+ 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+ 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+ 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+ 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+ 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+ 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+ 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+ 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+ 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+ 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+ 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+ 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+ 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+ 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+ 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+ 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+ 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+ 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+ 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+ 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+ 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+ 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+ 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+ 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+ 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+ 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+ 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+ 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+ 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+ 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+ 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+ 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+ 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+ 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+ 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+ 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+ 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+ 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+ 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+ 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+ 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+ 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+ 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+ 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+ 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+ 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+ 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+ 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+ 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+ 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+ 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+ 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+ 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+ 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+ 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+ 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+ 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+ 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+ 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+ 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+ 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+ 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+ 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+ 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+ 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+ 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+ 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+ 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+ 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+ 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+ 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+ 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+ 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+ 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+ 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+ 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+ 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+ 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+ 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+ 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+ 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+ 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+ 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+ 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+ 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+ 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+ 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+ 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+ 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+ 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+ 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+ 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+ 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+ 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+ 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+ 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+ 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+ 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+ 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+ 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+ 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+ 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+ 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+ 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+ 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+ 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+ 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+ 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+ 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+ 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+ 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+ 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+ 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+ 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+ 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+ 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+ 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+ 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+ 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+ 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+ 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+ 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+ 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+ 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+ 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+ 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+ 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+ 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+ 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+ 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+ 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+ 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+ 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+ 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+ 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+ 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+ 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+ 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+ 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+ 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+ 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+ 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+ 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+ 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+ 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+ 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+ 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+ 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+ 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+ 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+ 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+ 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+ 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+ 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+ 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+ 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+ 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+ 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+ 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+ 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+ 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+ 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+ 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+ 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+ 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+ 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+ 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+ 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+ 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+ 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+ 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+ 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+ 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+ 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+ 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+ 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+ 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+ 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+ 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+ 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+ 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+ 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+ 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+ 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+ 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+ 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+ 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+ 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+ 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+ 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+ 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+ 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+ 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+ 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+ 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+ 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+ 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+ 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+ 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+ 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+ 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+ 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+ 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+ 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+ 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+ 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+ 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+ 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+ 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+ 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+ 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+ 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+ 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+ 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+ 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+ 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+ 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+ 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+ 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+ 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+ 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+ 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+ 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+ 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+ 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+ 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+ 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+ 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+ 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+ 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+ 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+ 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+ 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+ 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+ 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+ 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+ 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+ 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+ 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+ 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+ 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+ 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+ 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+ 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+ 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+ 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+ 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+ 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+ 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+ 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+ 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+ 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+ 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+ 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+ 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+ 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+ 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+ 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+ 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+ 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+ 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+ 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+ 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+ 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+ 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+ 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+ 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+ 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+ 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+ 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+ 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+ 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+ 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+ 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+ 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+ 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+ 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+ 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+ 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+ 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+ 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+ 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+ 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+ 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+ 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+ 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+ 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+ 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+ 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+ 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+ 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+ 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+ 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+ 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+ 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+ 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+ 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+ 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+ 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+ 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+ 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+ 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+ 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+ 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+ 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+ 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+ 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+ 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+ 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+ 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+ 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+ 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+ 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+ 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+ 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+ 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+ 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+ 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+ 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+ 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+ 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+ 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+ 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+ 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+ 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+ 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+ 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+ 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+ 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+ 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+ 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+ 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+ 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+ 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+ 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+ 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+ 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+ 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+ 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+ 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+ 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+ 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+ 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+ 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+ 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+ 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+ 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+ 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+ 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+ 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+ 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+ 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+ 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+ 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+ 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+ 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+ 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+ 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+ 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+ 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+ 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+ 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+ 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+ 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+ 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+ 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+ 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+ 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+ 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+ 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+ 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+ 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+ 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+ 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+ 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+ 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+ 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+ 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+ 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+ 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+ 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+ 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+ 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+ 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+ 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+ 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+ 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+ 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+ 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+ 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+ 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+ 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+ 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+ 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+ 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+ 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+ 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+ 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+ 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+ 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+ 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+ 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+ 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+ 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+ 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+ 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+ 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+ 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+ 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+ 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+ 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+ 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+ 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+ 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+ 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+ 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+ 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+ 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+ 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+ 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+ 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+ 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+ 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+ 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+ 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+ 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+ 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+ 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+ 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+ 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+ 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+ 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+ 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+ 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+ 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+ 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+ 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+ 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+ 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+ 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+ 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+ 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+ 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+ 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+ 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+ 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+ 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+ 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+ 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+ 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+ 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+ 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+ 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+ 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+ 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+ 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+ 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+ 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+ 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+ 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+ 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+ 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+ 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+ 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+ 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+ 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+ 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+ 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+ 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+ 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+ 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+ 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+ 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+ 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+ 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+ 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+ 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+ 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+ 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+ 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+ 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+ 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+ 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+ 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+ 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+ 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+ 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+ 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+ 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+ 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+ 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+ 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+ 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+ 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+ 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+ 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+ 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+ 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+ 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+ 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+ 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+ 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+ 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+ 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+ 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+ 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+ 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+ 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+ 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+ 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+ 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+ 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+ 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+ 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+ 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+ 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+ 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+ 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+ 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+ 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+ 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+ 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+ 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+ 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+ 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+ 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+ 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+ 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+ 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+ 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+ 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+ 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+ 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+ 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+ 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+ 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+ 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+ 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+ 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+ 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+ 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+ 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+ 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+ 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+ 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+ 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+ 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+ 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+ 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+ 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+ 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+ 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+ 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+ 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+ 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+ 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+ 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+ 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+ 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+ 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+ 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+ 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+ 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+ 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+ 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+ 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+ 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+ 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+ 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+ 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+ 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+ 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+ 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+ 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+ 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+ 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+ 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+ 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+ 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+ 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+ 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+ 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+ 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+ 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+ 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+ 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+ 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+ 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+ 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+ 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+ 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+ 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+ 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+ 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+ 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+ 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+ 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+ 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+ 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+ 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+ 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+ 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+ 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+ 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+ 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+ 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+ 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+ 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+ 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+ 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+ 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+ 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+ 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+ 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+ 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+ 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+ 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+ 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+ 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+ 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+ 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+ 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+ 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+ 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+ 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+ 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+ 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+ 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+ 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+ 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+ 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+ 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+ 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+ 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+ 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+ 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+ 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+ 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+ 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+ 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+ 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+ 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+ 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+ 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+ 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+ 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+ 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+ 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+ 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+ 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+ 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+ 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+ 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+ 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+ 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+ 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+ 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+ 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+ 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+ 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+ 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+ 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+ 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+ 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+ 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+ 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+ 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+ 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+ 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+ 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+ 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+ 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+ 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+ 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+ 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+ 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+ 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+ 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+ 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+ 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+ 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+ 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+ 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+ 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+ 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+ 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+ 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+ 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+ 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+ 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+ 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+ 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+ 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+ 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+ 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+ 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+ 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+ 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+ 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+ 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+ 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+ 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+ 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+ 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+ 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+ 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+ 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+ 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+ 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+ 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+ 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+ 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+ 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+ 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+ 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+ 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+ 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+ 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+ 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+ 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+ 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+ 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+ 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+ 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+ 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+ 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+ 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+ 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+ 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+ 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+ 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+ 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+ 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+ 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+ 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+ 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+ 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+ 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+ 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+ 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+ 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+ 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+ 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+ 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+ 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+ 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+ 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+ 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+ 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+ 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+ 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+ 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+ 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+ 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+ 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+ 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+ 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+ 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+ 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+ 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+ 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+ 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+ 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+ 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+ 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+ 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+ 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+ 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+ 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+ 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+ 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+ 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+ 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+ 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+ 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+ 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+ 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+ 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+ 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+ 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+ 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+ 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+ 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+ 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+ 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+ 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+ 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+ 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+ 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+ 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+ 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+ 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+ 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+ 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+ 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+ 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+ 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+ 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+ 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+ 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+ 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+ 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+ 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+ 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+ 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+ 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+ 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+ 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+ 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+ 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+ 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+ 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+ 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+ 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+ 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+ 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+ 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+ 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+ 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+ 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+ 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+ 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+ 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+ 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+ 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+ 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+ 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+ 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+ 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+ 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+ 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+ 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+ 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+ 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+ 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+ 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+ 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+ 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+ 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+ 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+ 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+ 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+ 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+ 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+ 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+ 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+ 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+ 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+ 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+ 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+ 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+ 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+ 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+ 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+ 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+ 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+ 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+ 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+ 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+ 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+ 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+ 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+ 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+ 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+ 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+ 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+ 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+ 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+ 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+ 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+ 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+ 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+ 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+ 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+ 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+ 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+ 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+ 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+ 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+ 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+ 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+ 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+ 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+ 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+ 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+ 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+ 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+ 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+ 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+ 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+ 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+ 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+ 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+ 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+ 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+ 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+ 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+ 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+ 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+ 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+ 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+ 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+ 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+ 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+ 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+ 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+ 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+ 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+ 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+ 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+ 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+ 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+ 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+ 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+ 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+ 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+ 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+ 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+ 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+ 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+ 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+ 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+ 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+ 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+ 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+ 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+ 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+ 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+ 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+ 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+ 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+ 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+ 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+ 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+ 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+ 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+ 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+ 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+ 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+ 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+ 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+ 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+ 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+ 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+ 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+ 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+ 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+ 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+ 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+ 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+ 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+ 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+ 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+ 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+ 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+ 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+ 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+ 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+ 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+ 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+ 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+ 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+ 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+ 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+ 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+ 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+ 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+ 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+ 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+ 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+ 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+ 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+ 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+ 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+ 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+ 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+ 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+ 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+ 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+ 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+ 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+ 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+ 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+ 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+ 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+ 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+ 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+ 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+ 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+ 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+ 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+ 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+ 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+ 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+ 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+ 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+ 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+ 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+ 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+ 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+ 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+ 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+ 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+ 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+ 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+ 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+ 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+ 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+ 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+ 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+ 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+ 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+ 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+ 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+ 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+ 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+ 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+ 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+ 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+ 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+ 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+ 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+ 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+ 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+ 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+ 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+ 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+ 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+ 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+ 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+ 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+ 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+ 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+ 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+ 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+ 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+ 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+ 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+ 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+ 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+ 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+ 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+ 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+ 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+ 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+ 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+ 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+ 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+ 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+ 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+ 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+ 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+ 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+ 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+ 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+ 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+ 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+ 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+ 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+ 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+ 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+ 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+ 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+ 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+ 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+ 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+ 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+ 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+ 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+ 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+ 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+ 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+ 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+ 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+ 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+ 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+ 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+ 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+ 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+ 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+ 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+ 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+ 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+ 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+ 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+ 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+ 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+ 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+ 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+ 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+ 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+ 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+ 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+ 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+ 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+ 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+ 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+ 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+ 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+ 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+ 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+ 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+ 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+ 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+ 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+ 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+ 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+ 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+ 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+ 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+ 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+ 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+ 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+ 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+ 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+ 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+ 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+ 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+ 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+ 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+ 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+ 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+ 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+ 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+ 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+ 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+ 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+ 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+ 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+ 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+ 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+ 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+ 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+ 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+ 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+ 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+ 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+ 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+ 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+ 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+ 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+ 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+ 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+ 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+ 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+ 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+ 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+ 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+ 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+ 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+ 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+ 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+ 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+ 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+ 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+ 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+ 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+ 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+ 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+ 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+ 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+ 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+ 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+ 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+ 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+ 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+ 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+ 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+ 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+ 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+ 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+ 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+ 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+ 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+ 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+ 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+ 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+ 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+ 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+ 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+ 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+ 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+ 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+ 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+ 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+ 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+ 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+ 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+ 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+ 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+ 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+ 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+ 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+ 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+ 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+ 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+ 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+ 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+ 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+ 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+ 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+ 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+ 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+ 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+ 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+ 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+ 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+ 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+ 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+ 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+ 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+ 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+ 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+ 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+ 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+ 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+ 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+ 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+ 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+ 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+ 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+ 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+ 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+ 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+ 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+ 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+ 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+ 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+ 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+ 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+ 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+ 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+ 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+ 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+ 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+ 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+ 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+ 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+ 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+ 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+ 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+ 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+ 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+ 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+ 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+ 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+ 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+ 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+ 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+ 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+ 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+ 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+ 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+ 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+ 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+ 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+ 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+ 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+ 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+ 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+ 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+ 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+ 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+ 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+ 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+ 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+ 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+ 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+ 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+ 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+ 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+ 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+ 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+ 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+ 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+ 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+ 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+ 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+ 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+ 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+ 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+ 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+ 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+ 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+ 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+ 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+ 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+ 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+ 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+ 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+ 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+ 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+ 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+ 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+ 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+ 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+ 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+ 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+ 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+ 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+ 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+ 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+ 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+ 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+ 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+ 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+ 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+ 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+ 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+ 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+ 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+ 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+ 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+ 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+ 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+ 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+ 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+ 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+ 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+ 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+ 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+ 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+ 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+ 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+ 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+ 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+ 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+ 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+ 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+ 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+ 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+ 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+ 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+ 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+ 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+ 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+ 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+ 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+ 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+ 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+ 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+ 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+ 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+ 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+ 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+ 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+ 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+ 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+ 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+ 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+ 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+ 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+ 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+ 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+ 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+ 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+ 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+ 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+ 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+ 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+ 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+ 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+ 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+ 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+ 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+ 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+ 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+ 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+ 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+ 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+ 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+ 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+ 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+ 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+ 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+ 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+ 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+ 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+ 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+ 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+ 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+ 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+ 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+ 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+ 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+ 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+ 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+ 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 000000000000..1cba1ba591ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,22 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+using namespace llvm;
+
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 000000000000..8edfb9a2057f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,33 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+ ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 000000000000..76eb496bde42
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,332 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+}
+
+class ARMFReg<bits<6> num, string n> : Register<n> {
+ field bits<6> Num;
+ let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex; // In a Q reg.
+def ssub_3 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+}
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+ (ssub_3 dsub_1, ssub_1)] in {
+def Q0 : ARMReg< 0, "q0", [D0, D1]>;
+def Q1 : ARMReg< 1, "q1", [D2, D3]>;
+def Q2 : ARMReg< 2, "q2", [D4, D5]>;
+def Q3 : ARMReg< 3, "q3", [D6, D7]>;
+def Q4 : ARMReg< 4, "q4", [D8, D9]>;
+def Q5 : ARMReg< 5, "q5", [D10, D11]>;
+def Q6 : ARMReg< 6, "q6", [D12, D13]>;
+def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8 : ARMReg< 8, "q8", [D16, D17]>;
+def Q9 : ARMReg< 9, "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
+let SubRegIndices = [qsub_0, qsub_1],
+ CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
+def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1],
+ CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+def FPSCR : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID : ARMReg<0, "fpsid">;
+def FPEXC : ARMReg<8, "fpexc">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r9 == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+ SP, LR, PC)> {
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+ let AltOrders = [(and tcGPR, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Scalar single precision floating point register class..
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (sequence "D%u", 0, 31)> {
+ // Allocate non-VFP2 registers D16-D31 first.
+ let AltOrders = [(rotl DPR, 16)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (trunc DPR, 16)> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1)];
+}
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ (trunc DPR, 8)> {
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
+}
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (sequence "Q%u", 0, 15)> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1)];
+ // Allocate non-VFP2 aliases Q8-Q15 first.
+ let AltOrders = [(rotl QPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 8)> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1)];
+}
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 4)> {
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR qsub_0, qsub_1)];
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQPR, 4)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+ dsub_4, dsub_5, dsub_6, dsub_7),
+ (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQQQPR, 2)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRelocations.h b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 000000000000..86e7206f2cc6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,62 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+ // reloc_arm_absolute - Absolute relocation, just add the relocated value
+ // to the value already in memory.
+ reloc_arm_absolute,
+
+ // reloc_arm_relative - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_arm_relative,
+
+ // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+ // addresses are kept locally in a map.
+ reloc_arm_cp_entry,
+
+ // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+ // should be divided by 4.
+ reloc_arm_vfp_cp_entry,
+
+ // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+ // entry.
+ reloc_arm_machine_cp_entry,
+
+ // reloc_arm_jt_base - PC relative relocation for jump tables whose
+ // addresses are kept locally in a map.
+ reloc_arm_jt_base,
+
+ // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+ reloc_arm_pic_jt,
+
+ // reloc_arm_branch - Branch address relocation.
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
+ };
+ }
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 000000000000..958c5c647013
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,261 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx : InstrItinClass;
+def IIC_iALUi : InstrItinClass;
+def IIC_iALUr : InstrItinClass;
+def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
+def IIC_iALUsr : InstrItinClass;
+def IIC_iBITi : InstrItinClass;
+def IIC_iBITr : InstrItinClass;
+def IIC_iBITsi : InstrItinClass;
+def IIC_iBITsr : InstrItinClass;
+def IIC_iUNAr : InstrItinClass;
+def IIC_iUNAsi : InstrItinClass;
+def IIC_iEXTr : InstrItinClass;
+def IIC_iEXTAr : InstrItinClass;
+def IIC_iEXTAsr : InstrItinClass;
+def IIC_iCMPi : InstrItinClass;
+def IIC_iCMPr : InstrItinClass;
+def IIC_iCMPsi : InstrItinClass;
+def IIC_iCMPsr : InstrItinClass;
+def IIC_iTSTi : InstrItinClass;
+def IIC_iTSTr : InstrItinClass;
+def IIC_iTSTsi : InstrItinClass;
+def IIC_iTSTsr : InstrItinClass;
+def IIC_iMOVi : InstrItinClass;
+def IIC_iMOVr : InstrItinClass;
+def IIC_iMOVsi : InstrItinClass;
+def IIC_iMOVsr : InstrItinClass;
+def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
+def IIC_iMVNi : InstrItinClass;
+def IIC_iMVNr : InstrItinClass;
+def IIC_iMVNsi : InstrItinClass;
+def IIC_iMVNsr : InstrItinClass;
+def IIC_iCMOVi : InstrItinClass;
+def IIC_iCMOVr : InstrItinClass;
+def IIC_iCMOVsi : InstrItinClass;
+def IIC_iCMOVsr : InstrItinClass;
+def IIC_iCMOVix2 : InstrItinClass;
+def IIC_iMUL16 : InstrItinClass;
+def IIC_iMAC16 : InstrItinClass;
+def IIC_iMUL32 : InstrItinClass;
+def IIC_iMAC32 : InstrItinClass;
+def IIC_iMUL64 : InstrItinClass;
+def IIC_iMAC64 : InstrItinClass;
+def IIC_iLoad_i : InstrItinClass;
+def IIC_iLoad_r : InstrItinClass;
+def IIC_iLoad_si : InstrItinClass;
+def IIC_iLoad_iu : InstrItinClass;
+def IIC_iLoad_ru : InstrItinClass;
+def IIC_iLoad_siu : InstrItinClass;
+def IIC_iLoad_bh_i : InstrItinClass;
+def IIC_iLoad_bh_r : InstrItinClass;
+def IIC_iLoad_bh_si : InstrItinClass;
+def IIC_iLoad_bh_iu : InstrItinClass;
+def IIC_iLoad_bh_ru : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i : InstrItinClass;
+def IIC_iLoad_d_r : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
+def IIC_iPop : InstrItinClass<0>; // micro-coded
+def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoadiALU : InstrItinClass;
+def IIC_iStore_i : InstrItinClass;
+def IIC_iStore_r : InstrItinClass;
+def IIC_iStore_si : InstrItinClass;
+def IIC_iStore_iu : InstrItinClass;
+def IIC_iStore_ru : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i : InstrItinClass;
+def IIC_iStore_bh_r : InstrItinClass;
+def IIC_iStore_bh_si : InstrItinClass;
+def IIC_iStore_bh_iu : InstrItinClass;
+def IIC_iStore_bh_ru : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i : InstrItinClass;
+def IIC_iStore_d_r : InstrItinClass;
+def IIC_iStore_d_ru : InstrItinClass;
+def IIC_iStore_m : InstrItinClass<0>; // micro-coded
+def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_Preload : InstrItinClass;
+def IIC_Br : InstrItinClass;
+def IIC_fpSTAT : InstrItinClass;
+def IIC_fpUNA32 : InstrItinClass;
+def IIC_fpUNA64 : InstrItinClass;
+def IIC_fpCMP32 : InstrItinClass;
+def IIC_fpCMP64 : InstrItinClass;
+def IIC_fpCVTSD : InstrItinClass;
+def IIC_fpCVTDS : InstrItinClass;
+def IIC_fpCVTSH : InstrItinClass;
+def IIC_fpCVTHS : InstrItinClass;
+def IIC_fpCVTIS : InstrItinClass;
+def IIC_fpCVTID : InstrItinClass;
+def IIC_fpCVTSI : InstrItinClass;
+def IIC_fpCVTDI : InstrItinClass;
+def IIC_fpMOVIS : InstrItinClass;
+def IIC_fpMOVID : InstrItinClass;
+def IIC_fpMOVSI : InstrItinClass;
+def IIC_fpMOVDI : InstrItinClass;
+def IIC_fpALU32 : InstrItinClass;
+def IIC_fpALU64 : InstrItinClass;
+def IIC_fpMUL32 : InstrItinClass;
+def IIC_fpMUL64 : InstrItinClass;
+def IIC_fpMAC32 : InstrItinClass;
+def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpDIV32 : InstrItinClass;
+def IIC_fpDIV64 : InstrItinClass;
+def IIC_fpSQRT32 : InstrItinClass;
+def IIC_fpSQRT64 : InstrItinClass;
+def IIC_fpLoad32 : InstrItinClass;
+def IIC_fpLoad64 : InstrItinClass;
+def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpStore32 : InstrItinClass;
+def IIC_fpStore64 : InstrItinClass;
+def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_VLD1 : InstrItinClass;
+def IIC_VLD1x2 : InstrItinClass;
+def IIC_VLD1x3 : InstrItinClass;
+def IIC_VLD1x4 : InstrItinClass;
+def IIC_VLD1u : InstrItinClass;
+def IIC_VLD1x2u : InstrItinClass;
+def IIC_VLD1x3u : InstrItinClass;
+def IIC_VLD1x4u : InstrItinClass;
+def IIC_VLD1ln : InstrItinClass;
+def IIC_VLD1lnu : InstrItinClass;
+def IIC_VLD1dup : InstrItinClass;
+def IIC_VLD1dupu : InstrItinClass;
+def IIC_VLD2 : InstrItinClass;
+def IIC_VLD2x2 : InstrItinClass;
+def IIC_VLD2u : InstrItinClass;
+def IIC_VLD2x2u : InstrItinClass;
+def IIC_VLD2ln : InstrItinClass;
+def IIC_VLD2lnu : InstrItinClass;
+def IIC_VLD2dup : InstrItinClass;
+def IIC_VLD2dupu : InstrItinClass;
+def IIC_VLD3 : InstrItinClass;
+def IIC_VLD3ln : InstrItinClass;
+def IIC_VLD3u : InstrItinClass;
+def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
+def IIC_VLD4 : InstrItinClass;
+def IIC_VLD4ln : InstrItinClass;
+def IIC_VLD4u : InstrItinClass;
+def IIC_VLD4lnu : InstrItinClass;
+def IIC_VLD4dup : InstrItinClass;
+def IIC_VLD4dupu : InstrItinClass;
+def IIC_VST1 : InstrItinClass;
+def IIC_VST1x2 : InstrItinClass;
+def IIC_VST1x3 : InstrItinClass;
+def IIC_VST1x4 : InstrItinClass;
+def IIC_VST1u : InstrItinClass;
+def IIC_VST1x2u : InstrItinClass;
+def IIC_VST1x3u : InstrItinClass;
+def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
+def IIC_VST2 : InstrItinClass;
+def IIC_VST2x2 : InstrItinClass;
+def IIC_VST2u : InstrItinClass;
+def IIC_VST2x2u : InstrItinClass;
+def IIC_VST2ln : InstrItinClass;
+def IIC_VST2lnu : InstrItinClass;
+def IIC_VST3 : InstrItinClass;
+def IIC_VST3u : InstrItinClass;
+def IIC_VST3ln : InstrItinClass;
+def IIC_VST3lnu : InstrItinClass;
+def IIC_VST4 : InstrItinClass;
+def IIC_VST4u : InstrItinClass;
+def IIC_VST4ln : InstrItinClass;
+def IIC_VST4lnu : InstrItinClass;
+def IIC_VUNAD : InstrItinClass;
+def IIC_VUNAQ : InstrItinClass;
+def IIC_VBIND : InstrItinClass;
+def IIC_VBINQ : InstrItinClass;
+def IIC_VPBIND : InstrItinClass;
+def IIC_VFMULD : InstrItinClass;
+def IIC_VFMULQ : InstrItinClass;
+def IIC_VMOV : InstrItinClass;
+def IIC_VMOVImm : InstrItinClass;
+def IIC_VMOVD : InstrItinClass;
+def IIC_VMOVQ : InstrItinClass;
+def IIC_VMOVIS : InstrItinClass;
+def IIC_VMOVID : InstrItinClass;
+def IIC_VMOVISL : InstrItinClass;
+def IIC_VMOVSI : InstrItinClass;
+def IIC_VMOVDI : InstrItinClass;
+def IIC_VMOVN : InstrItinClass;
+def IIC_VPERMD : InstrItinClass;
+def IIC_VPERMQ : InstrItinClass;
+def IIC_VPERMQ3 : InstrItinClass;
+def IIC_VMACD : InstrItinClass;
+def IIC_VMACQ : InstrItinClass;
+def IIC_VRECSD : InstrItinClass;
+def IIC_VRECSQ : InstrItinClass;
+def IIC_VCNTiD : InstrItinClass;
+def IIC_VCNTiQ : InstrItinClass;
+def IIC_VUNAiD : InstrItinClass;
+def IIC_VUNAiQ : InstrItinClass;
+def IIC_VQUNAiD : InstrItinClass;
+def IIC_VQUNAiQ : InstrItinClass;
+def IIC_VBINiD : InstrItinClass;
+def IIC_VBINiQ : InstrItinClass;
+def IIC_VSUBiD : InstrItinClass;
+def IIC_VSUBiQ : InstrItinClass;
+def IIC_VBINi4D : InstrItinClass;
+def IIC_VBINi4Q : InstrItinClass;
+def IIC_VSUBi4D : InstrItinClass;
+def IIC_VSUBi4Q : InstrItinClass;
+def IIC_VABAD : InstrItinClass;
+def IIC_VABAQ : InstrItinClass;
+def IIC_VSHLiD : InstrItinClass;
+def IIC_VSHLiQ : InstrItinClass;
+def IIC_VSHLi4D : InstrItinClass;
+def IIC_VSHLi4Q : InstrItinClass;
+def IIC_VPALiD : InstrItinClass;
+def IIC_VPALiQ : InstrItinClass;
+def IIC_VMULi16D : InstrItinClass;
+def IIC_VMULi32D : InstrItinClass;
+def IIC_VMULi16Q : InstrItinClass;
+def IIC_VMULi32Q : InstrItinClass;
+def IIC_VMACi16D : InstrItinClass;
+def IIC_VMACi32D : InstrItinClass;
+def IIC_VMACi16Q : InstrItinClass;
+def IIC_VMACi32Q : InstrItinClass;
+def IIC_VEXTD : InstrItinClass;
+def IIC_VEXTQ : InstrItinClass;
+def IIC_VTB1 : InstrItinClass;
+def IIC_VTB2 : InstrItinClass;
+def IIC_VTB3 : InstrItinClass;
+def IIC_VTB4 : InstrItinClass;
+def IIC_VTBX1 : InstrItinClass;
+def IIC_VTBX2 : InstrItinClass;
+def IIC_VTBX3 : InstrItinClass;
+def IIC_VTBX4 : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 000000000000..8d86c01dc741
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1034 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0 : FuncUnit; // pipeline 0
+def A8_Pipe1 : FuncUnit; // pipeline 1
+def A8_LSPipe : FuncUnit; // Load / store pipeline
+def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+ [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+ [], [
+ // Two fully-pipelined integer ALU pipelines
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+ // Integer multiply pipeline
+ // Result written in E5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ // FIXME: lsl by 2 takes 1 cycle.
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<7, [A8_NPipe], 0>,
+ InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NPipe], 0>,
+ InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<9, [A8_NPipe], 0>,
+ InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<11, [A8_NPipe], 0>,
+ InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<20, [A8_NPipe], 0>,
+ InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 20, 1]>,
+
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
+ // NEON
+ // Issue through integer pipeline, and execute in NEON unit.
+ //
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1]>,
+ //
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1]>,
+ //
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2]>,
+ //
+ // Double-register FP Binary
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Quad-register Permute Move
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [2, 1]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 000000000000..49fedf63f8bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,1827 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0 : FuncUnit; // Issue 0
+def A9_Issue1 : FuncUnit; // Issue 1
+def A9_Branch : FuncUnit; // Branch
+def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1 : FuncUnit; // ALU pipeline 1
+def A9_AGU : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe : FuncUnit; // NEON pipeline
+def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+ [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+ A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+ [A9_LdBypass], [
+ // Two fully-pipelined integer ALU pipelines
+
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>],
+ [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1], [A9_LdBypass]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit], 0>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 1, 1], [A9_LdBypass]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 4, 1, 1], [A9_LdBypass]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
+
+ // VFP and NEON shares the same register file. This means that every VFP
+ // instruction should wait for full completion of the consecutive NEON
+ // instruction and vice-versa. We model this behavior with two artificial FUs:
+ // DRegsVFP and DRegsVFP.
+ //
+ // Every VFP instruction:
+ // - Acquires DRegsVFP resource for 1 cycle
+ // - Reserves DRegsN resource for the whole duration (including time to
+ // register file writeback!).
+ // Every NEON instruction does the same but with FUs swapped.
+ //
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit.
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<6, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<7, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<16, [A9_DRegsN], 0, Reserved>,
+ InstrStage<10, [A9_NPipe]>],
+ [15, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<26, [A9_DRegsN], 0, Reserved>,
+ InstrStage<20, [A9_NPipe]>],
+ [25, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<18, [A9_DRegsN], 0, Reserved>,
+ InstrStage<13, [A9_NPipe]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<33, [A9_DRegsN], 0, Reserved>,
+ InstrStage<28, [A9_NPipe]>],
+ [32, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Load
+ // FIXME: Result latency is 1 if address is 64-bit aligned.
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Load Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ // NEON
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 2, 1]>,
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 2]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 3, 2, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1,1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 2]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 1]>,
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 10 cycles
+ InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 11 cycles
+ InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [10, 2, 2]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 4, 1, 1]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 000000000000..c1880a72fff3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,294 @@
+//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+ [V6_Pipe], [], [
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [4]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // Double-precision FP Store
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..ef0aaf2a59eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,198 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+// Adjust parameters for memset, EABI uses format (ptr, size, value),
+// GNU library uses (ptr, value, size)
+// See RTABI section 4.3.4
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const
+{
+ // Use default for non AAPCS subtargets
+ if (!Subtarget->isAAPCS_ABI())
+ return SDValue();
+
+ const ARMTargetLowering &TLI =
+ *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // First argument: data pointer
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ // Second argument: buffer size
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+ // Third argument: value to fill
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+
+ // Emit __eabi_memset call
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain,
+ Type::getVoidTy(*DAG.getContext()), // return type
+ false, // return sign ext
+ false, // return zero ext
+ false, // is var arg
+ false, // is in regs
+ 0, // number of fixed arguments
+ TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
+ false, // is tail call
+ false, // is return val used
+ DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()), // callee
+ Args, DAG, dl); // arg list, DAG and debug
+
+ return CallResult.second;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 000000000000..ec1bf5ca1941
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,51 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+ ~ARMSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ // Adjust parameters for memset, see RTABI section 4.3.4
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 000000000000..1cab9e44ce75
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,214 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ReserveR9("arm-reserve-r9", cl::Hidden,
+ cl::desc("Reserve R9, making it unavailable as GPR"));
+
+static cl::opt<bool>
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+ cl::desc("Disallow all unaligned memory accesses"));
+
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS)
+ : ARMGenSubtargetInfo(TT, CPU, FS)
+ , ARMProcFamily(Others)
+ , HasV4TOps(false)
+ , HasV5TOps(false)
+ , HasV5TEOps(false)
+ , HasV6Ops(false)
+ , HasV6T2Ops(false)
+ , HasV7Ops(false)
+ , HasVFPv2(false)
+ , HasVFPv3(false)
+ , HasNEON(false)
+ , UseNEONForSinglePrecisionFP(false)
+ , SlowFPVMLx(false)
+ , HasVMLxForwarding(false)
+ , SlowFPBrcc(false)
+ , InThumbMode(false)
+ , HasThumb2(false)
+ , NoARM(false)
+ , PostRAScheduler(false)
+ , IsR9Reserved(ReserveR9)
+ , UseMovt(false)
+ , HasFP16(false)
+ , HasD16(false)
+ , HasHardwareDivide(false)
+ , HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , AvoidCPSRPartialUpdate(false)
+ , HasMPExtension(false)
+ , FPOnlySP(false)
+ , AllowsUnalignedMem(false)
+ , Thumb2DSP(false)
+ , stackAlignment(4)
+ , CPUString(CPU)
+ , TargetTriple(TT)
+ , TargetABI(ARM_ABI_APCS) {
+ // Determine default and user specified characteristics
+ if (CPUString.empty())
+ CPUString = "generic";
+
+ // Insert the architecture feature derived from the target triple into the
+ // feature string. This is important for setting features that are implied
+ // based on the architecture version.
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS;
+ else
+ ArchFS = FS;
+ }
+ ParseSubtargetFeatures(CPUString, ArchFS);
+
+ // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
+ // ARM version or CPU and then remove this.
+ if (!HasV6T2Ops && hasThumb2())
+ HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ // After parsing Itineraries, set ItinData.IssueWidth.
+ computeIssueWidth();
+
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (!isTargetDarwin())
+ UseMovt = hasV6T2Ops();
+ else {
+ IsR9Reserved = ReserveR9 | !HasV6Ops;
+ UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ }
+
+ if (!isThumb() || hasThumb2())
+ PostRAScheduler = true;
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+ AllowsUnalignedMem = true;
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ if (!isTargetDarwin()) {
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else {
+ if (RelocM == Reloc::PIC_) {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage())
+ // Hidden $non_lazy_ptr reference.
+ return true;
+
+ return false;
+ } else {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+ // If we have a reasonable estimate of the pipeline depth, then we can
+ // estimate the penalty of a misprediction based on that.
+ if (isCortexA8())
+ return 13;
+ else if (isCortexA9())
+ return 8;
+
+ // Otherwise, just return a sensible default.
+ return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+ unsigned allStage1Units = 0;
+ for (const InstrItinerary *itin = InstrItins.Itineraries;
+ itin->FirstStage != ~0U; ++itin) {
+ const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+ allStage1Units |= IS->getUnits();
+ }
+ InstrItins.IssueWidth = 0;
+ while (allStage1Units) {
+ ++InstrItins.IssueWidth;
+ // clear the lowest bit
+ allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+ }
+ assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
+bool ARMSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 000000000000..c6508723a576
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,252 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class ARMSubtarget : public ARMGenSubtargetInfo {
+protected:
+ enum ARMProcFamilyEnum {
+ Others, CortexA8, CortexA9
+ };
+
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
+ /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+ /// Specify whether target support specific ARM ISA variants.
+ bool HasV4TOps;
+ bool HasV5TOps;
+ bool HasV5TEOps;
+ bool HasV6Ops;
+ bool HasV6T2Ops;
+ bool HasV7Ops;
+
+ /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
+ /// supported.
+ bool HasVFPv2;
+ bool HasVFPv3;
+ bool HasNEON;
+
+ /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+ /// specified. Use the method useNEONForSinglePrecisionFP() to
+ /// determine if NEON should actually be used.
+ bool UseNEONForSinglePrecisionFP;
+
+ /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+ /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVMLx;
+
+ /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+ /// forwarding to allow mul + mla being issued back to back.
+ bool HasVMLxForwarding;
+
+ /// SlowFPBrcc - True if floating point compare + branch is slow.
+ bool SlowFPBrcc;
+
+ /// InThumbMode - True if compiling for Thumb, false for ARM.
+ bool InThumbMode;
+
+ /// HasThumb2 - True if Thumb2 instructions are supported.
+ bool HasThumb2;
+
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
+ /// PostRAScheduler - True if using post-register-allocation scheduler.
+ bool PostRAScheduler;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+ /// imms (including global addresses).
+ bool UseMovt;
+
+ /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+ /// only so far)
+ bool HasFP16;
+
+ /// HasD16 - True if subtarget is limited to 16 double precision
+ /// FP registers for VFPv3.
+ bool HasD16;
+
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+ /// that partially update CPSR and add false dependency on the previous
+ /// CPSR setting instruction.
+ bool AvoidCPSRPartialUpdate;
+
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
+ /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// accesses for some types. For details, see
+ /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ bool AllowsUnalignedMem;
+
+ /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
+ /// and such) instructions in Thumb2 code.
+ bool Thumb2DSP;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+ // Change this once Thumb1 ldmia / stmia support is added.
+ return isThumb1Only() ? 0 : 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ void computeIssueWidth();
+
+ bool hasV4TOps() const { return HasV4TOps; }
+ bool hasV5TOps() const { return HasV5TOps; }
+ bool hasV5TEOps() const { return HasV5TEOps; }
+ bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6T2Ops() const { return HasV6T2Ops; }
+ bool hasV7Ops() const { return HasV7Ops; }
+
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
+ bool hasARMOps() const { return !NoARM; }
+
+ bool hasVFP2() const { return HasVFPv2; }
+ bool hasVFP3() const { return HasVFPv3; }
+ bool hasNEON() const { return HasNEON; }
+ bool useNEONForSinglePrecisionFP() const {
+ return hasNEON() && UseNEONForSinglePrecisionFP; }
+
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool hasVMLxForwarding() const { return HasVMLxForwarding; }
+ bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool hasMPExtension() const { return HasMPExtension; }
+ bool hasThumb2DSP() const { return Thumb2DSP; }
+
+ bool hasFP16() const { return HasFP16; }
+ bool hasD16() const { return HasD16; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetELF() const { return !isTargetDarwin(); }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return InThumbMode; }
+ bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+ bool isThumb2() const { return InThumbMode && HasThumb2; }
+ bool hasThumb2() const { return HasThumb2; }
+
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
+ bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ unsigned getMispredictionPenalty() const;
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+ /// symbol.
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 000000000000..f0b176ad6981
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,189 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMFrameLowering.h"
+#include "ARM.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("ARM does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeARMTarget() {
+ // Register the target.
+ RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+ RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
+
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS),
+ JITInfo(),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+ DefRelocModel = getRelocationModel();
+
+ // Default to soft float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Soft;
+}
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "v128:32:128-v64:32:64-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32")),
+ ELFWriterInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : ARMBaseTargetMachine(T, TT, CPU, FS),
+ InstrInfo(Subtarget.hasThumb2()
+ ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+ : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32")),
+ ELFWriterInfo(*this),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget.hasThumb2()
+ ? new ARMFrameLowering(Subtarget)
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+}
+
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+
+ return false;
+}
+
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createARMISelDag(*this, OptLevel));
+ return false;
+}
+
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+ PM.add(createARMLoadStoreOptimizationPass(true));
+ if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+ PM.add(createMLxExpansionPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+ if (OptLevel != CodeGenOpt::None) {
+ if (!Subtarget.isThumb1Only())
+ PM.add(createARMLoadStoreOptimizationPass());
+ if (Subtarget.hasNEON())
+ PM.add(createNEONMoveFixPass());
+ }
+
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ PM.add(createARMExpandPseudoPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ if (!Subtarget.isThumb1Only())
+ PM.add(createIfConverterPass());
+ }
+ if (Subtarget.isThumb2())
+ PM.add(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ PM.add(createARMConstantIslandPass());
+ return true;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ if (DefRelocModel == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 000000000000..bc3d46a50ea5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,143 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "ARMInstrInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
+ ARMSubtarget Subtarget;
+private:
+ ARMJITInfo JITInfo;
+ InstrItineraryData InstrItins;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+public:
+ ARMBaseTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &MCE);
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+ ARMInstrInfo InstrInfo;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ ARMFrameLowering FrameLowering;
+ public:
+ ARMTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const ARMRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+/// Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ OwningPtr<ARMBaseInstrInfo> InstrInfo;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
+ ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ OwningPtr<ARMFrameLowering> FrameLowering;
+public:
+ ThumbTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
+ virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+ virtual const ARMBaseInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
+ /// returns either Thumb1FrameLowering or ARMFrameLowering
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 000000000000..19defa1b5196
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,47 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ LSDASection = NULL;
+ }
+
+ AttributesSection =
+ getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 000000000000..c6a7261439d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,38 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+ const MCSection *AttributesSection;
+public:
+ ARMElfTargetObjectFile() :
+ TargetLoweringObjectFileELF(),
+ AttributesSection(NULL)
+ {}
+
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ virtual const MCSection *getAttributesSection() const {
+ return AttributesSection;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 000000000000..d9a5fa223b4b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,154 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+class ARMBaseAsmLexer : public TargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const TargetRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
+ }
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
+ }
+ }
+public:
+ ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : TargetAsmLexer(T), AsmInfo(MAI) {
+ }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+ ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ std::string tripleString("arm-unknown-unknown");
+ std::string featureString;
+ std::string CPU;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+ ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ std::string tripleString("thumb-unknown-unknown");
+ std::string featureString;
+ std::string CPU;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+};
+
+} // end anonymous namespace
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default: break;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ break;
+ case AsmToken::Identifier: {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+ // Check for register aliases.
+ // r13 -> sp
+ // r14 -> lr
+ // r15 -> pc
+ // ip -> r12
+ // FIXME: Some assemblers support lots of others. Do we want them all?
+ if (!regID) {
+ regID = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+
+ if (regID)
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ }
+ }
+
+ return AsmToken(lexedToken);
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+ RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
+ RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 000000000000..a4741270c7a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,2390 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMOperand;
+
+class ARMAsmParser : public TargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ int TryParseRegister();
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
+ ARMII::AddrMode AddrMode);
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
+ const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant);
+
+
+ bool ParseMemoryOffsetReg(bool &Negative,
+ bool &OffsetRegShifted,
+ enum ARM_AM::ShiftOpc &ShiftType,
+ const MCExpr *&ShiftAmount,
+ const MCExpr *&Offset,
+ bool &OffsetIsReg,
+ int &OffsetRegNum,
+ SMLoc &E);
+ bool ParseShift(enum ARM_AM::ShiftOpc &St,
+ const MCExpr *&ShiftAmount, SMLoc &E);
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveThumb(SMLoc L);
+ bool ParseDirectiveThumbFunc(SMLoc L);
+ bool ParseDirectiveCode(SMLoc L);
+ bool ParseDirectiveSyntax(SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+ void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode);
+
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+ }
+ bool isThumbOne() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+ }
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ setAvailableFeatures(FB);
+ }
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+ /// }
+
+ OperandMatchResultTy tryParseCoprocNumOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseCoprocRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseProcIFlagsOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMSRMaskOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemMode2Operand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemMode3Operand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+
+ // Asm Match Converter Methods
+ bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+
+public:
+ ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : TargetAsmParser(), STI(_STI), Parser(_Parser) {
+ MCAsmParserExtension::Initialize(_Parser);
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// instruction.
+class ARMOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ CondCode,
+ CCOut,
+ CoprocNum,
+ CoprocReg,
+ Immediate,
+ MemBarrierOpt,
+ Memory,
+ MSRMask,
+ ProcIFlags,
+ Register,
+ RegisterList,
+ DPRRegisterList,
+ SPRRegisterList,
+ ShiftedRegister,
+ Shifter,
+ Token
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ SmallVector<unsigned, 8> Registers;
+
+ union {
+ struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
+
+ struct {
+ unsigned Val;
+ } Cop;
+
+ struct {
+ ARM_PROC::IFlags Val;
+ } IFlags;
+
+ struct {
+ unsigned Val;
+ } MMask;
+
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ /// Combined record for all forms of ARM address expressions.
+ struct {
+ ARMII::AddrMode AddrMode;
+ unsigned BaseRegNum;
+ union {
+ unsigned RegNum; ///< Offset register num, when OffsetIsReg.
+ const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
+ } Offset;
+ const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
+ enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true
+ unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
+ unsigned Preindexed : 1;
+ unsigned Postindexed : 1;
+ unsigned OffsetIsReg : 1;
+ unsigned Negative : 1; // only used when OffsetIsReg is true
+ unsigned Writeback : 1;
+ } Mem;
+
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned Imm;
+ } Shift;
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ } ShiftedReg;
+ };
+
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case CondCode:
+ CC = o.CC;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ case CCOut:
+ case Register:
+ Reg = o.Reg;
+ break;
+ case RegisterList:
+ case DPRRegisterList:
+ case SPRRegisterList:
+ Registers = o.Registers;
+ break;
+ case CoprocNum:
+ case CoprocReg:
+ Cop = o.Cop;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case MemBarrierOpt:
+ MBOpt = o.MBOpt;
+ break;
+ case Memory:
+ Mem = o.Mem;
+ break;
+ case MSRMask:
+ MMask = o.MMask;
+ break;
+ case ProcIFlags:
+ IFlags = o.IFlags;
+ break;
+ case Shifter:
+ Shift = o.Shift;
+ break;
+ case ShiftedRegister:
+ ShiftedReg = o.ShiftedReg;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
+ unsigned getCoproc() const {
+ assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == Register || Kind == CCOut) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == RegisterList || Kind == DPRRegisterList ||
+ Kind == SPRRegisterList) && "Invalid access!");
+ return Registers;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
+
+ ARM_PROC::IFlags getProcIFlags() const {
+ assert(Kind == ProcIFlags && "Invalid access!");
+ return IFlags.Val;
+ }
+
+ unsigned getMSRMask() const {
+ assert(Kind == MSRMask && "Invalid access!");
+ return MMask.Val;
+ }
+
+ /// @name Memory Operand Accessors
+ /// @{
+ ARMII::AddrMode getMemAddrMode() const {
+ return Mem.AddrMode;
+ }
+ unsigned getMemBaseRegNum() const {
+ return Mem.BaseRegNum;
+ }
+ unsigned getMemOffsetRegNum() const {
+ assert(Mem.OffsetIsReg && "Invalid access!");
+ return Mem.Offset.RegNum;
+ }
+ const MCExpr *getMemOffset() const {
+ assert(!Mem.OffsetIsReg && "Invalid access!");
+ return Mem.Offset.Value;
+ }
+ unsigned getMemOffsetRegShifted() const {
+ assert(Mem.OffsetIsReg && "Invalid access!");
+ return Mem.OffsetRegShifted;
+ }
+ const MCExpr *getMemShiftAmount() const {
+ assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+ return Mem.ShiftAmount;
+ }
+ enum ARM_AM::ShiftOpc getMemShiftType() const {
+ assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+ return Mem.ShiftType;
+ }
+ bool getMemPreindexed() const { return Mem.Preindexed; }
+ bool getMemPostindexed() const { return Mem.Postindexed; }
+ bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
+ bool getMemNegative() const { return Mem.Negative; }
+ bool getMemWriteback() const { return Mem.Writeback; }
+
+ /// @}
+
+ bool isCoprocNum() const { return Kind == CoprocNum; }
+ bool isCoprocReg() const { return Kind == CoprocReg; }
+ bool isCondCode() const { return Kind == CondCode; }
+ bool isCCOut() const { return Kind == CCOut; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isImm0_255() const {
+ if (Kind != Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 256;
+ }
+ bool isImm0_7() const {
+ if (Kind != Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 8;
+ }
+ bool isImm0_15() const {
+ if (Kind != Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 16;
+ }
+ bool isImm0_65535() const {
+ if (Kind != Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isT2SOImm() const {
+ if (Kind != Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) != -1;
+ }
+ bool isReg() const { return Kind == Register; }
+ bool isRegList() const { return Kind == RegisterList; }
+ bool isDPRRegList() const { return Kind == DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == SPRRegisterList; }
+ bool isToken() const { return Kind == Token; }
+ bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
+ bool isMemory() const { return Kind == Memory; }
+ bool isShifter() const { return Kind == Shifter; }
+ bool isShiftedReg() const { return Kind == ShiftedRegister; }
+ bool isMemMode2() const {
+ if (getMemAddrMode() != ARMII::AddrMode2)
+ return false;
+
+ if (getMemOffsetIsReg())
+ return true;
+
+ if (getMemNegative() &&
+ !(getMemPostindexed() || getMemPreindexed()))
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+
+ // The offset must be in the range 0-4095 (imm12).
+ if (Value > 4095 || Value < -4095)
+ return false;
+
+ return true;
+ }
+ bool isMemMode3() const {
+ if (getMemAddrMode() != ARMII::AddrMode3)
+ return false;
+
+ if (getMemOffsetIsReg()) {
+ if (getMemOffsetRegShifted())
+ return false; // No shift with offset reg allowed
+ return true;
+ }
+
+ if (getMemNegative() &&
+ !(getMemPostindexed() || getMemPreindexed()))
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+
+ // The offset must be in the range 0-255 (imm8).
+ if (Value > 255 || Value < -255)
+ return false;
+
+ return true;
+ }
+ bool isMemMode5() const {
+ if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
+ getMemNegative())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ // The offset must be a multiple of 4 in the range 0-1020.
+ int64_t Value = CE->getValue();
+ return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
+ }
+ bool isMemMode7() const {
+ if (!isMemory() ||
+ getMemPreindexed() ||
+ getMemPostindexed() ||
+ getMemOffsetIsReg() ||
+ getMemNegative() ||
+ getMemWriteback())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ if (CE->getValue())
+ return false;
+
+ return true;
+ }
+ bool isMemModeRegThumb() const {
+ if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+ return false;
+ return true;
+ }
+ bool isMemModeImmThumb() const {
+ if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ // The offset must be a multiple of 4 in the range 0-124.
+ uint64_t Value = CE->getValue();
+ return ((Value & 0x3) == 0 && Value <= 124);
+ }
+ bool isMSRMask() const { return Kind == MSRMask; }
+ bool isProcIFlags() const { return Kind == ProcIFlags; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+ Inst.addOperand(MCOperand::CreateReg(RegNum));
+ }
+
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCCOutOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addShiftedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!");
+ assert((ShiftedReg.ShiftReg == 0 ||
+ ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) &&
+ "Invalid shifted register operand!");
+ Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg));
+ Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg));
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm)));
+ }
+
+ void addShifterOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(Shift.ShiftTy, 0)));
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::CreateReg(*I));
+ }
+
+ void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_255Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+ }
+
+ void addMemMode7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && isMemMode7() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ (void)CE;
+ assert((CE || CE->getValue() == 0) &&
+ "No offset operand support in mode 7");
+ }
+
+ void addMemMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(isMemMode2() && "Invalid mode or number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+ if (getMemOffsetIsReg()) {
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+ ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift;
+ int64_t ShiftAmount = 0;
+
+ if (getMemOffsetRegShifted()) {
+ ShOpc = getMemShiftType();
+ const MCConstantExpr *CE =
+ dyn_cast<MCConstantExpr>(getMemShiftAmount());
+ ShiftAmount = CE->getValue();
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount,
+ ShOpc, IdxMode)));
+ return;
+ }
+
+ // Create a operand placeholder to always yield the same number of operands.
+ Inst.addOperand(MCOperand::CreateReg(0));
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 2 offset operand!");
+ int64_t Offset = CE->getValue();
+
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add,
+ Offset, ARM_AM::no_shift, IdxMode)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub,
+ -Offset, ARM_AM::no_shift, IdxMode)));
+ }
+
+ void addMemMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(isMemMode3() && "Invalid mode or number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+ if (getMemOffsetIsReg()) {
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+ ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0,
+ IdxMode)));
+ return;
+ }
+
+ // Create a operand placeholder to always yield the same number of operands.
+ Inst.addOperand(MCOperand::CreateReg(0));
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 3 offset operand!");
+ int64_t Offset = CE->getValue();
+
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add,
+ Offset, IdxMode)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub,
+ -Offset, IdxMode)));
+ }
+
+ void addMemMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 5 offset operand!");
+
+ // The MCInst offset operand doesn't include the low two bits (like
+ // the instruction encoding).
+ int64_t Offset = CE->getValue() / 4;
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
+ Offset)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
+ -Offset)));
+ }
+
+ void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ }
+
+ void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode offset operand!");
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+ }
+
+ void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CondCode);
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CCOut);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(ShiftedRegister);
+ Op->ShiftedReg.ShiftTy = ShTy;
+ Op->ShiftedReg.SrcReg = SrcReg;
+ Op->ShiftedReg.ShiftReg = ShiftReg;
+ Op->ShiftedReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Shifter);
+ Op->Shift.ShiftTy = ShTy;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *
+ CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ KindTy Kind = RegisterList;
+
+ if (ARM::DPRRegClass.contains(Regs.front().first))
+ Kind = DPRRegisterList;
+ else if (ARM::SPRRegClass.contains(Regs.front().first))
+ Kind = SPRRegisterList;
+
+ ARMOperand *Op = new ARMOperand(Kind);
+ for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ I = Regs.begin(), E = Regs.end(); I != E; ++I)
+ Op->Registers.push_back(I->first);
+ array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum,
+ bool OffsetIsReg, const MCExpr *Offset,
+ int OffsetRegNum, bool OffsetRegShifted,
+ enum ARM_AM::ShiftOpc ShiftType,
+ const MCExpr *ShiftAmount, bool Preindexed,
+ bool Postindexed, bool Negative, bool Writeback,
+ SMLoc S, SMLoc E) {
+ assert((OffsetRegNum == -1 || OffsetIsReg) &&
+ "OffsetRegNum must imply OffsetIsReg!");
+ assert((!OffsetRegShifted || OffsetIsReg) &&
+ "OffsetRegShifted must imply OffsetIsReg!");
+ assert((Offset || OffsetIsReg) &&
+ "Offset must exists unless register offset is used!");
+ assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
+ "Cannot have shift amount without shifted register offset!");
+ assert((!Offset || !OffsetIsReg) &&
+ "Cannot have expression offset and register offset!");
+
+ ARMOperand *Op = new ARMOperand(Memory);
+ Op->Mem.AddrMode = AddrMode;
+ Op->Mem.BaseRegNum = BaseRegNum;
+ Op->Mem.OffsetIsReg = OffsetIsReg;
+ if (OffsetIsReg)
+ Op->Mem.Offset.RegNum = OffsetRegNum;
+ else
+ Op->Mem.Offset.Value = Offset;
+ Op->Mem.OffsetRegShifted = OffsetRegShifted;
+ Op->Mem.ShiftType = ShiftType;
+ Op->Mem.ShiftAmount = ShiftAmount;
+ Op->Mem.Preindexed = Preindexed;
+ Op->Mem.Postindexed = Postindexed;
+ Op->Mem.Negative = Negative;
+ Op->Mem.Writeback = Writeback;
+
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(ProcIFlags);
+ Op->IFlags.Val = IFlags;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MSRMask);
+ Op->MMask.Val = MMask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case CondCode:
+ OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+ break;
+ case CCOut:
+ OS << "<ccout " << getReg() << ">";
+ break;
+ case CoprocNum:
+ OS << "<coprocessor number: " << getCoproc() << ">";
+ break;
+ case CoprocReg:
+ OS << "<coprocessor register: " << getCoproc() << ">";
+ break;
+ case MSRMask:
+ OS << "<mask: " << getMSRMask() << ">";
+ break;
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case MemBarrierOpt:
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ break;
+ case Memory:
+ OS << "<memory "
+ << "am:" << ARMII::AddrModeToString(getMemAddrMode())
+ << " base:" << getMemBaseRegNum();
+ if (getMemOffsetIsReg()) {
+ OS << " offset:<register " << getMemOffsetRegNum();
+ if (getMemOffsetRegShifted()) {
+ OS << " offset-shift-type:" << getMemShiftType();
+ OS << " offset-shift-amount:" << *getMemShiftAmount();
+ }
+ } else {
+ OS << " offset:" << *getMemOffset();
+ }
+ if (getMemOffsetIsReg())
+ OS << " (offset-is-reg)";
+ if (getMemPreindexed())
+ OS << " (pre-indexed)";
+ if (getMemPostindexed())
+ OS << " (post-indexed)";
+ if (getMemNegative())
+ OS << " (negative)";
+ if (getMemWriteback())
+ OS << " (writeback)";
+ OS << ">";
+ break;
+ case ProcIFlags: {
+ OS << "<ARM_PROC::";
+ unsigned IFlags = getProcIFlags();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ OS << ARM_PROC::IFlagsToString(1 << i);
+ OS << ">";
+ break;
+ }
+ case Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case Shifter:
+ OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">";
+ break;
+ case ShiftedRegister:
+ OS << "<so_reg"
+ << ShiftedReg.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm))
+ << ", " << ShiftedReg.ShiftReg << ", "
+ << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm)
+ << ">";
+ break;
+ case RegisterList:
+ case DPRRegisterList:
+ case SPRRegisterList: {
+ OS << "<register_list ";
+
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ) {
+ OS << *I;
+ if (++I < E) OS << ", ";
+ }
+
+ OS << ">";
+ break;
+ }
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = TryParseRegister();
+
+ return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name. The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+int ARMAsmParser::TryParseRegister() {
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ // FIXME: Validate register for the current architecture; we have to do
+ // validation later, so maybe there is no need for this here.
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ if (!RegNum) {
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+ if (!RegNum) return -1;
+
+ Parser.Lex(); // Eat identifier token.
+ return RegNum;
+}
+
+// Try to parse a shifter (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::TryParseShiftRegister(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("lsl", ARM_AM::lsl)
+ .Case("lsr", ARM_AM::lsr)
+ .Case("asr", ARM_AM::asr)
+ .Case("ror", ARM_AM::ror)
+ .Case("rrx", ARM_AM::rrx)
+ .Default(ARM_AM::no_shift);
+
+ if (ShiftTy == ARM_AM::no_shift)
+ return 1;
+
+ Parser.Lex(); // Eat the operator.
+
+ // The source register for the shift has already been added to the
+ // operand list, so we need to pop it off and combine it into the shifted
+ // register operand instead.
+ OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ if (!PrevOp->isReg())
+ return Error(PrevOp->getStartLoc(), "shift must be of a register");
+ int SrcReg = PrevOp->getReg();
+ int64_t Imm = 0;
+ int ShiftReg = 0;
+ if (ShiftTy == ARM_AM::rrx) {
+ // RRX Doesn't have an explicit shift amount. The encoder expects
+ // the shift register to be the same as the source register. Seems odd,
+ // but OK.
+ ShiftReg = SrcReg;
+ } else {
+ // Figure out if this is shifted by a constant or a register (for non-RRX).
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat hash.
+ SMLoc ImmLoc = Parser.getTok().getLoc();
+ const MCExpr *ShiftExpr = 0;
+ if (getParser().ParseExpression(ShiftExpr)) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // The expression must be evaluatable as an immediate.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+ if (!CE) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+ ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+ Error(ImmLoc, "immediate shift value out of range");
+ return -1;
+ }
+ } else if (Parser.getTok().is(AsmToken::Identifier)) {
+ ShiftReg = TryParseRegister();
+ SMLoc L = Parser.getTok().getLoc();
+ if (ShiftReg == -1) {
+ Error (L, "expected immediate or register in shift operand");
+ return -1;
+ }
+ } else {
+ Error (Parser.getTok().getLoc(),
+ "expected immediate or register in shift operand");
+ return -1;
+ }
+ }
+
+ Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+ ShiftReg, Imm,
+ S, Parser.getTok().getLoc()));
+
+ return 0;
+}
+
+
+/// Try to parse a register name. The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int RegNo = TryParseRegister();
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
+
+ return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+ // Use the same layout as the tablegen'erated register name matcher. Ugly,
+ // but efficient.
+ switch (Name.size()) {
+ default: break;
+ case 2:
+ if (Name[0] != CoprocOp)
+ return -1;
+ switch (Name[1]) {
+ default: return -1;
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+ break;
+ case 3:
+ if (Name[0] != CoprocOp || Name[1] != '1')
+ return -1;
+ switch (Name[2]) {
+ default: return -1;
+ case '0': return 10;
+ case '1': return 11;
+ case '2': return 12;
+ case '3': return 13;
+ case '4': return 14;
+ case '5': return 15;
+ }
+ break;
+ }
+
+ return -1;
+}
+
+/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ if (Num == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+ return MatchOperand_Success;
+}
+
+/// Parse a register list, return it if successful else return null. The first
+/// token must be a '{' when called.
+bool ARMAsmParser::
+ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+
+ // Read the rest of the registers in the list.
+ unsigned PrevRegNum = 0;
+ SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
+
+ do {
+ bool IsRange = Parser.getTok().is(AsmToken::Minus);
+ Parser.Lex(); // Eat non-identifier token.
+
+ const AsmToken &RegTok = Parser.getTok();
+ SMLoc RegLoc = RegTok.getLoc();
+ if (RegTok.isNot(AsmToken::Identifier)) {
+ Error(RegLoc, "register expected");
+ return true;
+ }
+
+ int RegNum = TryParseRegister();
+ if (RegNum == -1) {
+ Error(RegLoc, "register expected");
+ return true;
+ }
+
+ if (IsRange) {
+ int Reg = PrevRegNum;
+ do {
+ ++Reg;
+ Registers.push_back(std::make_pair(Reg, RegLoc));
+ } while (Reg != RegNum);
+ } else {
+ Registers.push_back(std::make_pair(RegNum, RegLoc));
+ }
+
+ PrevRegNum = RegNum;
+ } while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus));
+
+ // Process the right curly brace of the list.
+ const AsmToken &RCurlyTok = Parser.getTok();
+ if (RCurlyTok.isNot(AsmToken::RCurly)) {
+ Error(RCurlyTok.getLoc(), "'}' expected");
+ return true;
+ }
+
+ SMLoc E = RCurlyTok.getLoc();
+ Parser.Lex(); // Eat right curly brace token.
+
+ // Verify the register list.
+ SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ RI = Registers.begin(), RE = Registers.end();
+
+ unsigned HighRegNum = getARMRegisterNumbering(RI->first);
+ bool EmittedWarning = false;
+
+ DenseMap<unsigned, bool> RegMap;
+ RegMap[HighRegNum] = true;
+
+ for (++RI; RI != RE; ++RI) {
+ const std::pair<unsigned, SMLoc> &RegInfo = *RI;
+ unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+
+ if (RegMap[Reg]) {
+ Error(RegInfo.second, "register duplicated in register list");
+ return true;
+ }
+
+ if (!EmittedWarning && Reg < HighRegNum)
+ Warning(RegInfo.second,
+ "register not in ascending order in register list");
+
+ RegMap[Reg] = true;
+ HighRegNum = std::max(Reg, HighRegNum);
+ }
+
+ Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+ return false;
+}
+
+/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef OptStr = Tok.getString();
+
+ unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
+
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef IFlagsStr = Tok.getString();
+
+ unsigned IFlags = 0;
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
+
+ IFlags |= Flag;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef Mask = Tok.getString();
+
+ // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+ size_t Start = 0, Next = Mask.find('_');
+ StringRef Flags = "";
+ StringRef SpecReg = Mask.slice(Start, Next);
+ if (Next != StringRef::npos)
+ Flags = Mask.slice(Next+1, Mask.size());
+
+ // FlagsVal contains the complete mask:
+ // 3-0: Mask
+ // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ unsigned FlagsVal = 0;
+
+ if (SpecReg == "apsr") {
+ FlagsVal = StringSwitch<unsigned>(Flags)
+ .Case("nzcvq", 0x8) // same as CPSR_c
+ .Case("g", 0x4) // same as CPSR_s
+ .Case("nzcvqg", 0xc) // same as CPSR_fs
+ .Default(~0U);
+
+ if (FlagsVal == ~0U) {
+ if (!Flags.empty())
+ return MatchOperand_NoMatch;
+ else
+ FlagsVal = 0; // No flag
+ }
+ } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+ if (Flags == "all") // cpsr_all is an alias for cpsr_fc
+ Flags = "fc";
+ for (int i = 0, e = Flags.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+ .Case("c", 1)
+ .Case("x", 2)
+ .Case("s", 4)
+ .Case("f", 8)
+ .Default(~0U);
+
+ // If some specific flag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (FlagsVal == ~0U || (FlagsVal & Flag))
+ return MatchOperand_NoMatch;
+ FlagsVal |= Flag;
+ }
+ } else // No match for special register.
+ return MatchOperand_NoMatch;
+
+ // Special register without flags are equivalent to "fc" flags.
+ if (!FlagsVal)
+ FlagsVal = 0x9;
+
+ // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ if (SpecReg == "spsr")
+ FlagsVal |= 16;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+ if (ParseMemory(Operands, ARMII::AddrMode2))
+ return MatchOperand_NoMatch;
+
+ return MatchOperand_Success;
+}
+
+/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+ if (ParseMemory(Operands, ARMII::AddrMode3))
+ return MatchOperand_NoMatch;
+
+ return MatchOperand_Success;
+}
+
+/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error. The first token must be a '[' when called.
+///
+/// TODO Only preindexing and postindexing addressing are started, unindexed
+/// with option, etc are still to do.
+bool ARMAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ ARMII::AddrMode AddrMode = ARMII::AddrModeNone) {
+ SMLoc S, E;
+ assert(Parser.getTok().is(AsmToken::LBrac) &&
+ "Token is not a Left Bracket");
+ S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const AsmToken &BaseRegTok = Parser.getTok();
+ if (BaseRegTok.isNot(AsmToken::Identifier)) {
+ Error(BaseRegTok.getLoc(), "register expected");
+ return true;
+ }
+ int BaseRegNum = TryParseRegister();
+ if (BaseRegNum == -1) {
+ Error(BaseRegTok.getLoc(), "register expected");
+ return true;
+ }
+
+ // The next token must either be a comma or a closing bracket.
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+ return true;
+
+ bool Preindexed = false;
+ bool Postindexed = false;
+ bool OffsetIsReg = false;
+ bool Negative = false;
+ bool Writeback = false;
+ ARMOperand *WBOp = 0;
+ int OffsetRegNum = -1;
+ bool OffsetRegShifted = false;
+ enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl;
+ const MCExpr *ShiftAmount = 0;
+ const MCExpr *Offset = 0;
+
+ // First look for preindexed address forms, that is after the "[Rn" we now
+ // have to see if the next token is a comma.
+ if (Tok.is(AsmToken::Comma)) {
+ Preindexed = true;
+ Parser.Lex(); // Eat comma token.
+
+ if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+ Offset, OffsetIsReg, OffsetRegNum, E))
+ return true;
+ const AsmToken &RBracTok = Parser.getTok();
+ if (RBracTok.isNot(AsmToken::RBrac)) {
+ Error(RBracTok.getLoc(), "']' expected");
+ return true;
+ }
+ E = RBracTok.getLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ // None of addrmode3 instruction uses "!"
+ if (AddrMode == ARMII::AddrMode3)
+ return true;
+
+ WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc());
+ Writeback = true;
+ Parser.Lex(); // Eat exclaim token
+ } else { // In addressing mode 2, pre-indexed mode always end with "!"
+ if (AddrMode == ARMII::AddrMode2)
+ Preindexed = false;
+ }
+ } else {
+ // The "[Rn" we have so far was not followed by a comma.
+
+ // If there's anything other than the right brace, this is a post indexing
+ // addressing form.
+ E = Tok.getLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ const AsmToken &NextTok = Parser.getTok();
+
+ if (NextTok.isNot(AsmToken::EndOfStatement)) {
+ Postindexed = true;
+ Writeback = true;
+
+ if (NextTok.isNot(AsmToken::Comma)) {
+ Error(NextTok.getLoc(), "',' expected");
+ return true;
+ }
+
+ Parser.Lex(); // Eat comma token.
+
+ if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+ ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
+ E))
+ return true;
+ }
+ }
+
+ // Force Offset to exist if used.
+ if (!OffsetIsReg) {
+ if (!Offset)
+ Offset = MCConstantExpr::Create(0, getContext());
+ } else {
+ if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) {
+ Error(E, "shift amount not supported");
+ return true;
+ }
+ }
+
+ Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg,
+ Offset, OffsetRegNum, OffsetRegShifted,
+ ShiftType, ShiftAmount, Preindexed,
+ Postindexed, Negative, Writeback, S, E));
+ if (WBOp)
+ Operands.push_back(WBOp);
+
+ return false;
+}
+
+/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
+/// we will parse the following (were +/- means that a plus or minus is
+/// optional):
+/// +/-Rm
+/// +/-Rm, shift
+/// #offset
+/// we return false on success or an error otherwise.
+bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
+ bool &OffsetRegShifted,
+ enum ARM_AM::ShiftOpc &ShiftType,
+ const MCExpr *&ShiftAmount,
+ const MCExpr *&Offset,
+ bool &OffsetIsReg,
+ int &OffsetRegNum,
+ SMLoc &E) {
+ Negative = false;
+ OffsetRegShifted = false;
+ OffsetIsReg = false;
+ OffsetRegNum = -1;
+ const AsmToken &NextTok = Parser.getTok();
+ E = NextTok.getLoc();
+ if (NextTok.is(AsmToken::Plus))
+ Parser.Lex(); // Eat plus token.
+ else if (NextTok.is(AsmToken::Minus)) {
+ Negative = true;
+ Parser.Lex(); // Eat minus token
+ }
+ // See if there is a register following the "[Rn," or "[Rn]," we have so far.
+ const AsmToken &OffsetRegTok = Parser.getTok();
+ if (OffsetRegTok.is(AsmToken::Identifier)) {
+ SMLoc CurLoc = OffsetRegTok.getLoc();
+ OffsetRegNum = TryParseRegister();
+ if (OffsetRegNum != -1) {
+ OffsetIsReg = true;
+ E = CurLoc;
+ }
+ }
+
+ // If we parsed a register as the offset then there can be a shift after that.
+ if (OffsetRegNum != -1) {
+ // Look for a comma then a shift
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat comma token.
+
+ const AsmToken &Tok = Parser.getTok();
+ if (ParseShift(ShiftType, ShiftAmount, E))
+ return Error(Tok.getLoc(), "shift expected");
+ OffsetRegShifted = true;
+ }
+ }
+ else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
+ // Look for #offset following the "[Rn," or "[Rn],"
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+
+ Parser.Lex(); // Eat hash token.
+
+ if (getParser().ParseExpression(Offset))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ }
+ return false;
+}
+
+/// ParseShift as one of these two:
+/// ( lsl | lsr | asr | ror ) , # shift_amount
+/// rrx
+/// and returns true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
+ const MCExpr *&ShiftAmount, SMLoc &E) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ St = ARM_AM::lsl;
+ else if (ShiftName == "lsr" || ShiftName == "LSR")
+ St = ARM_AM::lsr;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ St = ARM_AM::asr;
+ else if (ShiftName == "ror" || ShiftName == "ROR")
+ St = ARM_AM::ror;
+ else if (ShiftName == "rrx" || ShiftName == "RRX")
+ St = ARM_AM::rrx;
+ else
+ return true;
+ Parser.Lex(); // Eat shift type token.
+
+ // Rrx stands alone.
+ if (St == ARM_AM::rrx)
+ return false;
+
+ // Otherwise, there must be a '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
+
+ if (getParser().ParseExpression(ShiftAmount))
+ return true;
+
+ return false;
+}
+
+/// Parse a arm instruction operand. For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+ SMLoc S, E;
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Identifier: {
+ if (!TryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = TryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+
+ // Fall though for the Identifier case that is not a register or a
+ // special name.
+ }
+ case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::Dot: { // . as a branch target
+ // This was not a register so parse other operands that start with an
+ // identifier (like labels) as expressions and create them as immediates.
+ const MCExpr *IdVal;
+ S = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(IdVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::LBrac:
+ return ParseMemory(Operands);
+ case AsmToken::LCurly:
+ return ParseRegisterList(Operands);
+ case AsmToken::Hash:
+ // #42 -> immediate.
+ // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ const MCExpr *ImmVal;
+ if (getParser().ParseExpression(ImmVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+ return false;
+ case AsmToken::Colon: {
+ // ":lower16:" and ":upper16:" expression prefixes
+ // FIXME: Check it's an expression prefix,
+ // e.g. (FOO - :lower16:BAR) isn't legal.
+ ARMMCExpr::VariantKind RefKind;
+ if (ParsePrefix(RefKind))
+ return true;
+
+ const MCExpr *SubExprVal;
+ if (getParser().ParseExpression(SubExprVal))
+ return true;
+
+ const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+ getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+ return false;
+ }
+ }
+}
+
+// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// :lower16: and :upper16:.
+bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ RefKind = ARMMCExpr::VK_ARM_None;
+
+ // :lower16: and :upper16: modifiers
+ assert(getLexer().is(AsmToken::Colon) && "expected a :");
+ Parser.Lex(); // Eat ':'
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+ return true;
+ }
+
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ if (IDVal == "lower16") {
+ RefKind = ARMMCExpr::VK_ARM_LO16;
+ } else if (IDVal == "upper16") {
+ RefKind = ARMMCExpr::VK_ARM_HI16;
+ } else {
+ Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+ return true;
+ }
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+ return true;
+ }
+ Parser.Lex(); // Eat the last ':'
+ return false;
+}
+
+const MCExpr *
+ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant) {
+ // Recurse over the given expression, rebuilding it to apply the given variant
+ // to the leftmost symbol.
+ if (Variant == MCSymbolRefExpr::VK_None)
+ return E;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle target expr yet");
+ case MCExpr::Constant:
+ llvm_unreachable("Can't handle lower16/upper16 of constant yet");
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ if (SRE->getKind() != MCSymbolRefExpr::VK_None)
+ return 0;
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ }
+
+ case MCExpr::Unary:
+ llvm_unreachable("Can't handle unary expressions yet");
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = BE->getRHS();
+ if (!LHS)
+ return 0;
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ }
+ }
+
+ assert(0 && "Invalid expression kind!");
+ return 0;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+static StringRef SplitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod) {
+ PredicationCode = ARMCC::AL;
+ CarrySetting = false;
+ ProcessorIMod = 0;
+
+ // Ignore some mnemonics we know aren't predicated forms.
+ //
+ // FIXME: Would be nice to autogen this.
+ if (Mnemonic == "teq" || Mnemonic == "vceq" ||
+ Mnemonic == "movs" ||
+ Mnemonic == "svc" ||
+ (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls") ||
+ Mnemonic == "vacge" || Mnemonic == "vcge" ||
+ Mnemonic == "vclt" ||
+ Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" ||
+ (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
+ Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
+ Mnemonic == "vqdmlal" || Mnemonic == "bics"))
+ return Mnemonic;
+
+ // First, split out any predication code. Ignore mnemonics we know aren't
+ // predicated but do have a carry-set and so weren't caught above.
+ if (Mnemonic != "adcs") {
+ unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+ PredicationCode = CC;
+ }
+ }
+
+ // Next, determine if we have a carry setting bit. We explicitly ignore all
+ // the instructions we know end in 's'.
+ if (Mnemonic.endswith("s") &&
+ !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
+ Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+ Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
+ Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+ CarrySetting = true;
+ }
+
+ // The "cps" instruction can have a interrupt mode operand which is glued into
+ // the mnemonic. Check if this is the case, split it and parse the imod op
+ if (Mnemonic.startswith("cps")) {
+ // Split out any imod code.
+ unsigned IMod =
+ StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+ .Case("ie", ARM_PROC::IE)
+ .Case("id", ARM_PROC::ID)
+ .Default(~0U);
+ if (IMod != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+ ProcessorIMod = IMod;
+ }
+ }
+
+ return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+ Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+ Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mvn" ||
+ Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+ Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
+ Mnemonic == "eor" || Mnemonic == "smlal" ||
+ (Mnemonic == "mov" && !isThumbOne())) {
+ CanAcceptCarrySet = true;
+ } else {
+ CanAcceptCarrySet = false;
+ }
+
+ if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+ Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+ Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+ Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+ Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
+ Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+ CanAcceptPredicationCode = false;
+ } else {
+ CanAcceptPredicationCode = true;
+ }
+
+ if (isThumb())
+ if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+ Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+ CanAcceptPredicationCode = false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Split out the predication code and carry setting flag from the mnemonic.
+ unsigned PredicationCode;
+ unsigned ProcessorIMod;
+ bool CarrySetting;
+ Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
+ ProcessorIMod);
+
+ Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+
+ // Next, add the CCOut and ConditionCode operands, if needed.
+ //
+ // For mnemonics which can ever incorporate a carry setting bit or predication
+ // code, our matching model involves us always generating CCOut and
+ // ConditionCode operands to match the mnemonic "as written" and then we let
+ // the matcher deal with finding the right instruction or generating an
+ // appropriate error.
+ bool CanAcceptCarrySet, CanAcceptPredicationCode;
+ GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+ // If we had a carry-set on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptCarrySet && CarrySetting) {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Head +
+ "' can not set flags, but 's' suffix specified");
+ }
+
+ // Add the carry setting operand, if necessary.
+ //
+ // FIXME: It would be awesome if we could somehow invent a location such that
+ // match errors on this operand would print a nice diagnostic about how the
+ // 's' character in the mnemonic resulted in a CCOut operand.
+ if (CanAcceptCarrySet)
+ Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+ NameLoc));
+
+ // Add the predication code operand, if necessary.
+ if (CanAcceptPredicationCode) {
+ Operands.push_back(ARMOperand::CreateCondCode(
+ ARMCC::CondCodes(PredicationCode), NameLoc));
+ } else {
+ // This mnemonic can't ever accept a predication code, but the user wrote
+ // one (or misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the processor imod operand, if necessary.
+ if (ProcessorIMod) {
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(ProcessorIMod, getContext()),
+ NameLoc, NameLoc));
+ } else {
+ // This mnemonic can't ever accept a imod, but the user wrote
+ // one (or misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ StringRef ExtraToken = Name.slice(Start, Next);
+
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Head)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Head)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return TokError("unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ unsigned ErrorInfo;
+ MatchResultTy MatchResult, MatchResult2;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult != Match_Success) {
+ // If we get a Match_InvalidOperand it might be some arithmetic instruction
+ // that does not update the condition codes. So try adding a CCOut operand
+ // with a value of reg0.
+ if (MatchResult == Match_InvalidOperand) {
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateCCOut(0,
+ ((ARMOperand*)Operands[0])->getStartLoc()));
+ MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult2 == Match_Success)
+ MatchResult = Match_Success;
+ else {
+ ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete CCOut;
+ }
+ }
+ // If we get a Match_MnemonicFail it might be some arithmetic instruction
+ // that updates the condition codes if it ends in 's'. So see if the
+ // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
+ // operand with a value of CPSR.
+ else if (MatchResult == Match_MnemonicFail) {
+ // Get the instruction mnemonic, which is the first token.
+ StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
+ if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
+ // removed the 's' from the mnemonic for matching.
+ StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
+ SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
+ ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+ Operands.erase(Operands.begin());
+ delete OldMnemonic;
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(MnemonicNoS, NameLoc));
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
+ MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult2 == Match_Success)
+ MatchResult = Match_Success;
+ else {
+ ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+ Operands.erase(Operands.begin());
+ delete OldMnemonic;
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(Mnemonic, NameLoc));
+ ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete CCOut;
+ }
+ }
+ }
+ }
+ switch (MatchResult) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+/// ParseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".thumb")
+ return ParseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".thumb_func")
+ return ParseDirectiveThumbFunc(DirectiveID.getLoc());
+ else if (IDVal == ".code")
+ return ParseDirectiveCode(DirectiveID.getLoc());
+ else if (IDVal == ".syntax")
+ return ParseDirectiveSyntax(DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveThumb
+/// ::= .thumb
+bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO: set thumb mode
+ // TODO: tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// ParseDirectiveThumbFunc
+/// ::= .thumbfunc symbol_name
+bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
+ const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
+ bool isMachO = MAI.hasSubsectionsViaSymbols();
+ StringRef Name;
+
+ // Darwin asm has function name after .thumb_func direction
+ // ELF doesn't
+ if (isMachO) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+ return Error(L, "unexpected token in .thumb_func directive");
+ Name = Tok.getString();
+ Parser.Lex(); // Consume the identifier token.
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // FIXME: assuming function name will be the line following .thumb_func
+ if (!isMachO) {
+ Name = Parser.getTok().getString();
+ }
+
+ // Mark symbol as a thumb symbol.
+ MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+ getParser().getStreamer().EmitThumbFunc(Func);
+ return false;
+}
+
+/// ParseDirectiveSyntax
+/// ::= .syntax unified | divided
+bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(L, "unexpected token in .syntax directive");
+ StringRef Mode = Tok.getString();
+ if (Mode == "unified" || Mode == "UNIFIED")
+ Parser.Lex();
+ else if (Mode == "divided" || Mode == "DIVIDED")
+ return Error(L, "'.syntax divided' arm asssembly not supported");
+ else
+ return Error(L, "unrecognized syntax mode in .syntax directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ // TODO tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// ParseDirectiveCode
+/// ::= .code 16 | 32
+bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Integer))
+ return Error(L, "unexpected token in .code directive");
+ int64_t Val = Parser.getTok().getIntVal();
+ if (Val == 16)
+ Parser.Lex();
+ else if (Val == 32)
+ Parser.Lex();
+ else
+ return Error(L, "invalid operand to .code directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.Lex();
+
+ if (Val == 16) {
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ } else {
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+
+ return false;
+}
+
+extern "C" void LLVMInitializeARMAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+ RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
+ RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+ LLVMInitializeARMAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 000000000000..bdce2c4cf896
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,589 @@
+//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to implement the public interfaces of ARMDisassembler and
+// ThumbDisassembler, both of which are instances of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassembler.h"
+#include "ARMDisassemblerCore.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
+/// ARMDecoderEmitter.cpp TableGen backend. It contains:
+///
+/// o Mappings from opcode to ARM/Thumb instruction format
+///
+/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
+/// for an ARM instruction.
+///
+/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
+/// function for a Thumb instruction.
+///
+#include "ARMGenDecoderTables.inc"
+
+#include "ARMGenEDInfo.inc"
+
+using namespace llvm;
+
+/// showBitVector - Use the raw_ostream to log a diagnostic message describing
+/// the inidividual bits of the instruction.
+///
+static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
+ // Split the bit position markers into more than one lines to fit 80 columns.
+ os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
+ << " 10 9 8 7 6 5 4 3 2 1 0 \n";
+ os << "---------------------------------------------------------------"
+ << "----------------------------------\n";
+ os << '|';
+ for (unsigned i = 32; i != 0; --i) {
+ if (insn >> (i - 1) & 0x01)
+ os << " 1";
+ else
+ os << " 0";
+ os << (i%4 == 1 ? '|' : ':');
+ }
+ os << '\n';
+ // Split the bit position markers into more than one lines to fit 80 columns.
+ os << "---------------------------------------------------------------"
+ << "----------------------------------\n";
+ os << '\n';
+}
+
+/// decodeARMInstruction is a decorator function which tries special cases of
+/// instruction matching before calling the auto-generated decoder function.
+static unsigned decodeARMInstruction(uint32_t &insn) {
+ if (slice(insn, 31, 28) == 15)
+ goto AutoGenedDecoder;
+
+ // Special case processing, if any, goes here....
+
+ // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
+ // The insufficient encoding information of the combined instruction confuses
+ // the decoder wrt BFC/BFI. Therefore, we try to recover here.
+ // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
+ // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
+ if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
+ if (slice(insn, 3, 0) == 15)
+ return ARM::BFC;
+ else
+ return ARM::BFI;
+ }
+
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+ // A1 & A2.
+ // As a result, the decoder fails to deocode USAT properly.
+ if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+ return ARM::USAT;
+ // As a result, the decoder fails to deocode UQADD16 properly.
+ if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1)
+ return ARM::UQADD16;
+
+ // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
+ // As a result, the decoder fails to decode UMULL properly.
+ if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
+ return ARM::UMULL;
+ }
+
+ // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
+ // As a result, the decoder fails to decode SBFX properly.
+ if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
+ return ARM::SBFX;
+
+ // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
+ // As a result, the decoder fails to decode UBFX properly.
+ if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
+ return ARM::UBFX;
+
+ // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
+ // As a result, the decoder fails to deocode SSAT properly.
+ if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
+ return ARM::SSAT;
+
+ // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
+ // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
+ if (slice(insn, 27, 24) == 0) {
+ switch (slice(insn, 21, 20)) {
+ case 2:
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ return ARM::STRHT;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 3:
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ return ARM::LDRHT;
+ case 13:
+ return ARM::LDRSBT;
+ case 15:
+ return ARM::LDRSHT;
+ default:
+ break; // fallthrough
+ }
+ break;
+ default:
+ break; // fallthrough
+ }
+ }
+
+ // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
+ // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
+ // properly.
+ if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
+ unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::STRH;
+ case 3: // Pre-indexed
+ return ARM::STRH_PRE;
+ case 0: // Post-indexed
+ return ARM::STRH_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 13:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRD;
+ case 3: // Pre-indexed
+ return ARM::LDRD_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRD_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 15:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::STRD;
+ case 3: // Pre-indexed
+ return ARM::STRD_PRE;
+ case 0: // Post-indexed
+ return ARM::STRD_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ default:
+ break; // fallthrough
+ }
+ }
+
+ // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
+ // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
+ // properly.
+ if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
+ unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+ switch (slice(insn, 7, 4)) {
+ case 11:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRH;
+ case 3: // Pre-indexed
+ return ARM::LDRH_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRH_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 13:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRSB;
+ case 3: // Pre-indexed
+ return ARM::LDRSB_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRSB_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ case 15:
+ switch (PW) {
+ case 2: // Offset
+ return ARM::LDRSH;
+ case 3: // Pre-indexed
+ return ARM::LDRSH_PRE;
+ case 0: // Post-indexed
+ return ARM::LDRSH_POST;
+ default:
+ break; // fallthrough
+ }
+ break;
+ default:
+ break; // fallthrough
+ }
+ }
+
+AutoGenedDecoder:
+ // Calling the auto-generated decoder function.
+ return decodeInstruction(insn);
+}
+
+// Helper function for special case handling of LDR (literal) and friends.
+// See, for example, A6.3.7 Load word: Table A6-18 Load word.
+// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return Opcode; // Return unmorphed opcode.
+
+ case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2LDRs: case ARM::t2LDRT:
+ return ARM::t2LDRpci;
+
+ case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE:
+ case ARM::t2LDRBi12: case ARM::t2LDRBi8:
+ case ARM::t2LDRBs: case ARM::t2LDRBT:
+ return ARM::t2LDRBpci;
+
+ case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE:
+ case ARM::t2LDRHi12: case ARM::t2LDRHi8:
+ case ARM::t2LDRHs: case ARM::t2LDRHT:
+ return ARM::t2LDRHpci;
+
+ case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSBi12: case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBs: case ARM::t2LDRSBT:
+ return ARM::t2LDRSBpci;
+
+ case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12: case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHs: case ARM::t2LDRSHT:
+ return ARM::t2LDRSHpci;
+ }
+}
+
+// Helper function for special case handling of PLD (literal) and friends.
+// See A8.6.117 T1 & T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2PLDLiteral(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return Opcode; // Return unmorphed opcode.
+
+ case ARM::t2PLDi8: case ARM::t2PLDs:
+ case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+ case ARM::t2PLDWs:
+ return ARM::t2PLDi12;
+
+ case ARM::t2PLIi8: case ARM::t2PLIs:
+ return ARM::t2PLIi12;
+ }
+}
+
+/// decodeThumbSideEffect is a decorator function which can potentially twiddle
+/// the instruction or morph the returned opcode under Thumb2.
+///
+/// First it checks whether the insn is a NEON or VFP instr; if true, bit
+/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
+/// equivalent instruction and decodeInstruction is called with the transformed
+/// insn.
+///
+/// Next, there is special handling for Load byte/halfword/word instruction by
+/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
+/// Thumb2 instruction. See comments below for further details.
+///
+/// Finally, one last check is made to see whether the insn is a NEON/VFP and
+/// decodeInstruction(insn) is invoked on the original insn.
+///
+/// Otherwise, decodeThumbInstruction is called with the original insn.
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
+ if (IsThumb2) {
+ uint16_t op1 = slice(insn, 28, 27);
+ uint16_t op2 = slice(insn, 26, 20);
+
+ // A6.3 32-bit Thumb instruction encoding
+ // Table A6-9 32-bit Thumb instruction encoding
+
+ // The coprocessor instructions of interest are transformed to their ARM
+ // equivalents.
+
+ // --------- Transform Begin Marker ---------
+ if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
+ // A7.4 Advanced SIMD data-processing instructions
+ // U bit of Thumb corresponds to Inst{24} of ARM.
+ uint16_t U = slice(op1, 1, 1);
+
+ // Inst{28-24} of ARM = {1,0,0,1,U};
+ uint16_t bits28_24 = 9 << 1 | U;
+ DEBUG(showBitVector(errs(), insn));
+ setSlice(insn, 28, 24, bits28_24);
+ return decodeInstruction(insn);
+ }
+
+ if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
+ // A7.7 Advanced SIMD element or structure load/store instructions
+ // Inst{27-24} of Thumb = 0b1001
+ // Inst{27-24} of ARM = 0b0100
+ DEBUG(showBitVector(errs(), insn));
+ setSlice(insn, 27, 24, 4);
+ return decodeInstruction(insn);
+ }
+ // --------- Transform End Marker ---------
+
+ unsigned unmorphed = decodeThumbInstruction(insn);
+
+ // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+ // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+ // before returning it to our caller.
+ if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
+ && slice(insn, 19, 16) == 15) {
+ unsigned morphed = T2Morph2LoadLiteral(unmorphed);
+ if (morphed != unmorphed)
+ return morphed;
+ }
+
+ // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for
+ // why we morphed the opcode before returning it to our caller.
+ if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF
+ && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1
+ && slice(insn, 19, 16) == 15) {
+ unsigned morphed = T2Morph2PLDLiteral(unmorphed);
+ if (morphed != unmorphed)
+ return morphed;
+ }
+
+ // One last check for NEON/VFP instructions.
+ if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
+ return decodeInstruction(insn);
+
+ // Fall through.
+ }
+
+ return decodeThumbInstruction(insn);
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool ARMDisassembler::getInstruction(MCInst &MI,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os) const {
+ // The machine instruction.
+ uint32_t insn;
+ uint8_t bytes[4];
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1)
+ return false;
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ unsigned Opcode = decodeARMInstruction(insn);
+ ARMFormat Format = ARMFormats[Opcode];
+ Size = 4;
+
+ DEBUG({
+ errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode)
+ << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+ << ")\n";
+ showBitVector(errs(), insn);
+ });
+
+ OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
+ if (!Builder)
+ return false;
+
+ Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+ getDisInfoBlock(), getMCContext(),
+ Address);
+
+ if (!Builder->Build(MI, insn))
+ return false;
+
+ return true;
+}
+
+bool ThumbDisassembler::getInstruction(MCInst &MI,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os) const {
+ // The Thumb instruction stream is a sequence of halfwords.
+
+ // This represents the first halfword as well as the machine instruction
+ // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
+ // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
+ // the top half followed by the second halfword.
+ unsigned insn = 0;
+ // Possible second halfword.
+ uint16_t insn1 = 0;
+
+ // A6.1 Thumb instruction set encoding
+ //
+ // If bits [15:11] of the halfword being decoded take any of the following
+ // values, the halfword is the first halfword of a 32-bit instruction:
+ // o 0b11101
+ // o 0b11110
+ // o 0b11111.
+ //
+ // Otherwise, the halfword is a 16-bit instruction.
+
+ // Read 2 bytes of data first.
+ uint8_t bytes[2];
+ if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1)
+ return false;
+
+ // Encoded as a small-endian 16-bit halfword in the stream.
+ insn = (bytes[1] << 8) | bytes[0];
+ unsigned bits15_11 = slice(insn, 15, 11);
+ bool IsThumb2 = false;
+
+ // 32-bit instructions if the bits [15:11] of the halfword matches
+ // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
+ if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
+ IsThumb2 = true;
+ if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1)
+ return false;
+ // Encoded as a small-endian 16-bit halfword in the stream.
+ insn1 = (bytes[1] << 8) | bytes[0];
+ insn = (insn << 16 | insn1);
+ }
+
+ // The insn could potentially be bit-twiddled in order to be decoded as an ARM
+ // NEON/VFP opcode. In such case, the modified insn is later disassembled as
+ // an ARM NEON/VFP instruction.
+ //
+ // This is a short term solution for lack of encoding bits specified for the
+ // Thumb2 NEON/VFP instructions. The long term solution could be adding some
+ // infrastructure to have each instruction support more than one encodings.
+ // Which encoding is used would be based on which subtarget the compiler/
+ // disassembler is working with at the time. This would allow the sharing of
+ // the NEON patterns between ARM and Thumb2, as well as potential greater
+ // sharing between the regular ARM instructions and the 32-bit wide Thumb2
+ // instructions as well.
+ unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
+
+ ARMFormat Format = ARMFormats[Opcode];
+ Size = IsThumb2 ? 4 : 2;
+
+ DEBUG({
+ errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+ << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+ << ")\n";
+ showBitVector(errs(), insn);
+ });
+
+ OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
+ if (!Builder)
+ return false;
+
+ Builder->SetSession(const_cast<Session *>(&SO));
+
+ Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+ getDisInfoBlock(), getMCContext(),
+ Address);
+
+ if (!Builder->Build(MI, insn))
+ return false;
+
+ return true;
+}
+
+// A8.6.50
+// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
+static unsigned short CountITSize(unsigned ITMask) {
+ // First count the trailing zeros of the IT mask.
+ unsigned TZ = CountTrailingZeros_32(ITMask);
+ if (TZ > 3) {
+ DEBUG(errs() << "Encoding error: IT Mask '0000'");
+ return 0;
+ }
+ return (4 - TZ);
+}
+
+/// Init ITState. Note that at least one bit is always 1 in mask.
+bool Session::InitIT(unsigned short bits7_0) {
+ ITCounter = CountITSize(slice(bits7_0, 3, 0));
+ if (ITCounter == 0)
+ return false;
+
+ // A8.6.50 IT
+ unsigned short FirstCond = slice(bits7_0, 7, 4);
+ if (FirstCond == 0xF) {
+ DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
+ return false;
+ }
+ if (FirstCond == 0xE && ITCounter != 1) {
+ DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
+ return false;
+ }
+
+ ITState = bits7_0;
+
+ return true;
+}
+
+/// Update ITState if necessary.
+void Session::UpdateIT() {
+ assert(ITCounter);
+ --ITCounter;
+ if (ITCounter == 0)
+ ITState = 0;
+ else {
+ unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
+ setSlice(ITState, 4, 0, NewITState4_0);
+ }
+}
+
+static MCDisassembler *createARMDisassembler(const Target &T) {
+ return new ARMDisassembler;
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T) {
+ return new ThumbDisassembler;
+}
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+ createThumbDisassembler);
+}
+
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
new file mode 100644
index 000000000000..0a74a3866eed
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -0,0 +1,99 @@
+//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the header for ARMDisassembler and ThumbDisassembler, both are
+// subclasses of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLER_H
+#define ARMDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ARMDisassembler() :
+ MCDisassembler() {
+ }
+
+ ~ARMDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+};
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+/// Session - Keep track of the IT Block progression.
+class Session {
+ friend class ARMBasicMCBuilder;
+public:
+ Session() : ITCounter(0), ITState(0) {}
+ ~Session() {}
+ /// InitIT - Initializes ITCounter/ITState.
+ bool InitIT(unsigned short bits7_0);
+ /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
+ void UpdateIT();
+
+private:
+ unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
+ unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
+};
+
+/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ThumbDisassembler() :
+ MCDisassembler(), SO() {
+ }
+
+ ~ThumbDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ Session SO;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
new file mode 100644
index 000000000000..d89c80a9d457
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -0,0 +1,3818 @@
+//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to represent the core concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassemblerCore.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
+/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the
+/// operand info for each ARMInsts[i].
+///
+/// Together with an instruction's encoding format, we can take advantage of the
+/// NumOperands and the OpInfo fields of the target instruction description in
+/// the quest to build out the MCOperand list for an MCInst.
+///
+/// The general guideline is that with a known format, the number of dst and src
+/// operands are well-known. The dst is built first, followed by the src
+/// operand(s). The operands not yet used at this point are for the Implicit
+/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
+/// defined with two components:
+///
+/// def pred { // Operand PredicateOperand
+/// ValueType Type = OtherVT;
+/// string PrintMethod = "printPredicateOperand";
+/// string AsmOperandLowerMethod = ?;
+/// dag MIOperandInfo = (ops i32imm, CCR);
+/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
+/// dag DefaultOps = (ops (i32 14), (i32 zero_reg));
+/// }
+///
+/// which is manifested by the MCOperandInfo[] of:
+///
+/// { 0, 0|(1<<MCOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 }
+///
+/// So the first predicate MCOperand corresponds to the immediate part of the
+/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
+/// corresponds to a register kind of ARM::CPSR.
+///
+/// For the Defs part, in the simple case of only cc_out:$s, we have:
+///
+/// def cc_out { // Operand OptionalDefOperand
+/// ValueType Type = OtherVT;
+/// string PrintMethod = "printSBitModifierOperand";
+/// string AsmOperandLowerMethod = ?;
+/// dag MIOperandInfo = (ops CCR);
+/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
+/// dag DefaultOps = (ops (i32 zero_reg));
+/// }
+///
+/// which is manifested by the one MCOperandInfo of:
+///
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 }
+///
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+
+using namespace llvm;
+
+const char *ARMUtils::OpcodeName(unsigned Opcode) {
+ return ARMInsts[Opcode].Name;
+}
+
+// Return the register enum Based on RegClass and the raw register number.
+// FIXME: Auto-gened?
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
+ if (RegClassID == ARM::rGPRRegClassID) {
+ // Check for The register numbers 13 and 15 that are not permitted for many
+ // Thumb register specifiers.
+ if (RawRegister == 13 || RawRegister == 15) {
+ B->SetErr(-1);
+ return 0;
+ }
+ // For this purpose, we can treat rGPR as if it were GPR.
+ RegClassID = ARM::GPRRegClassID;
+ }
+
+ // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+ // A7.3 register encoding
+ // Qd -> bit[12] == 0
+ // Qn -> bit[16] == 0
+ // Qm -> bit[0] == 0
+ //
+ // If one of these bits is 1, the instruction is UNDEFINED.
+ if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) {
+ B->SetErr(-1);
+ return 0;
+ }
+ unsigned RegNum =
+ RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
+
+ switch (RegNum) {
+ default:
+ break;
+ case 0:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D0;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q0;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
+ }
+ break;
+ case 1:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D1;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q1;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
+ }
+ break;
+ case 2:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D2;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q2;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
+ }
+ break;
+ case 3:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D3;
+ case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ return ARM::Q3;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
+ }
+ break;
+ case 4:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D4;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
+ }
+ break;
+ case 5:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D5;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
+ }
+ break;
+ case 6:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D6;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
+ }
+ break;
+ case 7:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
+ case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ return ARM::D7;
+ case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
+ }
+ break;
+ case 8:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R8;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
+ case ARM::QPRRegClassID: return ARM::Q8;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
+ }
+ break;
+ case 9:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R9;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
+ case ARM::QPRRegClassID: return ARM::Q9;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
+ }
+ break;
+ case 10:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R10;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
+ case ARM::QPRRegClassID: return ARM::Q10;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
+ }
+ break;
+ case 11:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R11;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
+ case ARM::QPRRegClassID: return ARM::Q11;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
+ }
+ break;
+ case 12:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::R12;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
+ case ARM::QPRRegClassID: return ARM::Q12;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
+ }
+ break;
+ case 13:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::SP;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
+ case ARM::QPRRegClassID: return ARM::Q13;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
+ }
+ break;
+ case 14:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::LR;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
+ case ARM::QPRRegClassID: return ARM::Q14;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
+ }
+ break;
+ case 15:
+ switch (RegClassID) {
+ case ARM::GPRRegClassID: return ARM::PC;
+ case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
+ case ARM::QPRRegClassID: return ARM::Q15;
+ case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
+ }
+ break;
+ case 16:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D16;
+ case ARM::SPRRegClassID: return ARM::S16;
+ }
+ break;
+ case 17:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D17;
+ case ARM::SPRRegClassID: return ARM::S17;
+ }
+ break;
+ case 18:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D18;
+ case ARM::SPRRegClassID: return ARM::S18;
+ }
+ break;
+ case 19:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D19;
+ case ARM::SPRRegClassID: return ARM::S19;
+ }
+ break;
+ case 20:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D20;
+ case ARM::SPRRegClassID: return ARM::S20;
+ }
+ break;
+ case 21:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D21;
+ case ARM::SPRRegClassID: return ARM::S21;
+ }
+ break;
+ case 22:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D22;
+ case ARM::SPRRegClassID: return ARM::S22;
+ }
+ break;
+ case 23:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D23;
+ case ARM::SPRRegClassID: return ARM::S23;
+ }
+ break;
+ case 24:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D24;
+ case ARM::SPRRegClassID: return ARM::S24;
+ }
+ break;
+ case 25:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D25;
+ case ARM::SPRRegClassID: return ARM::S25;
+ }
+ break;
+ case 26:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D26;
+ case ARM::SPRRegClassID: return ARM::S26;
+ }
+ break;
+ case 27:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D27;
+ case ARM::SPRRegClassID: return ARM::S27;
+ }
+ break;
+ case 28:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D28;
+ case ARM::SPRRegClassID: return ARM::S28;
+ }
+ break;
+ case 29:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D29;
+ case ARM::SPRRegClassID: return ARM::S29;
+ }
+ break;
+ case 30:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D30;
+ case ARM::SPRRegClassID: return ARM::S30;
+ }
+ break;
+ case 31:
+ switch (RegClassID) {
+ case ARM::DPRRegClassID: return ARM::D31;
+ case ARM::SPRRegClassID: return ARM::S31;
+ }
+ break;
+ }
+ DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
+ // Encoding error. Mark the builder with error code != 0.
+ B->SetErr(-1);
+ return 0;
+}
+
+///////////////////////////////
+// //
+// Utility Functions //
+// //
+///////////////////////////////
+
+// Extract/Decode Rd: Inst{15-12}.
+static inline unsigned decodeRd(uint32_t insn) {
+ return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rn: Inst{19-16}.
+static inline unsigned decodeRn(uint32_t insn) {
+ return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rm: Inst{3-0}.
+static inline unsigned decodeRm(uint32_t insn) {
+ return (insn & ARMII::GPRRegMask);
+}
+
+// Extract/Decode Rs: Inst{11-8}.
+static inline unsigned decodeRs(uint32_t insn) {
+ return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
+}
+
+static inline unsigned getCondField(uint32_t insn) {
+ return (insn >> ARMII::CondShift);
+}
+
+static inline unsigned getIBit(uint32_t insn) {
+ return (insn >> ARMII::I_BitShift) & 1;
+}
+
+static inline unsigned getAM3IBit(uint32_t insn) {
+ return (insn >> ARMII::AM3_I_BitShift) & 1;
+}
+
+static inline unsigned getPBit(uint32_t insn) {
+ return (insn >> ARMII::P_BitShift) & 1;
+}
+
+static inline unsigned getUBit(uint32_t insn) {
+ return (insn >> ARMII::U_BitShift) & 1;
+}
+
+static inline unsigned getPUBits(uint32_t insn) {
+ return (insn >> ARMII::U_BitShift) & 3;
+}
+
+static inline unsigned getSBit(uint32_t insn) {
+ return (insn >> ARMII::S_BitShift) & 1;
+}
+
+static inline unsigned getWBit(uint32_t insn) {
+ return (insn >> ARMII::W_BitShift) & 1;
+}
+
+static inline unsigned getDBit(uint32_t insn) {
+ return (insn >> ARMII::D_BitShift) & 1;
+}
+
+static inline unsigned getNBit(uint32_t insn) {
+ return (insn >> ARMII::N_BitShift) & 1;
+}
+
+static inline unsigned getMBit(uint32_t insn) {
+ return (insn >> ARMII::M_BitShift) & 1;
+}
+
+// See A8.4 Shifts applied to a register.
+// A8.4.2 Register controlled shifts.
+//
+// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
+// into llvm enums for shift opcode. The API clients should pass in the value
+// encoded with two bits, so the assert stays to signal a wrong API usage.
+//
+// A8-12: DecodeRegShift()
+static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
+ switch (bits) {
+ default: assert(0 && "No such value"); return ARM_AM::no_shift;
+ case 0: return ARM_AM::lsl;
+ case 1: return ARM_AM::lsr;
+ case 2: return ARM_AM::asr;
+ case 3: return ARM_AM::ror;
+ }
+}
+
+// See A8.4 Shifts applied to a register.
+// A8.4.1 Constant shifts.
+//
+// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
+// encodings into the intended ShiftOpc and shift amount.
+//
+// A8-11: DecodeImmShift()
+static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
+ if (ShImm != 0)
+ return;
+ switch (ShOp) {
+ case ARM_AM::no_shift:
+ case ARM_AM::rrx:
+ break;
+ case ARM_AM::lsl:
+ ShOp = ARM_AM::no_shift;
+ break;
+ case ARM_AM::lsr:
+ case ARM_AM::asr:
+ ShImm = 32;
+ break;
+ case ARM_AM::ror:
+ ShOp = ARM_AM::rrx;
+ break;
+ }
+}
+
+// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
+// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode. The API
+// clients should pass in the value encoded with two bits, so the assert stays
+// to signal a wrong API usage.
+static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
+ switch (bits) {
+ default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
+ case 1: return ARM_AM::ia; // P=0 U=1
+ case 3: return ARM_AM::ib; // P=1 U=1
+ case 0: return ARM_AM::da; // P=0 U=0
+ case 2: return ARM_AM::db; // P=1 U=0
+ }
+}
+
+////////////////////////////////////////////
+// //
+// Disassemble function definitions //
+// //
+////////////////////////////////////////////
+
+/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
+/// instr into a list of MCOperands in the appropriate order, with possible dst,
+/// followed by possible src(s).
+///
+/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
+/// the CPSR, is factored into ARMBasicMCBuilder's method named
+/// TryPredicateAndSBitModifier.
+
+static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+ assert(0 && "Unexpected pseudo instruction!");
+ return false;
+}
+
+// A8.6.94 MLA
+// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE;
+//
+// A8.6.105 MUL
+// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+//
+// A8.6.246 UMULL
+// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+// if dHi == dLo then UNPREDICTABLE;
+static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
+ unsigned R19_16 = slice(insn, 19, 16);
+ unsigned R15_12 = slice(insn, 15, 12);
+ unsigned R11_8 = slice(insn, 11, 8);
+ unsigned R3_0 = slice(insn, 3, 0);
+ switch (Opcode) {
+ default:
+ // Did we miss an opcode?
+ DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!");
+ return false;
+ case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT:
+ case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT:
+ case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR:
+ case ARM::USADA8:
+ if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+ return true;
+ return false;
+ case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR:
+ case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT:
+ case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX:
+ // A8.6.167 SMLAD & A8.6.172 SMLSD
+ case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX:
+ case ARM::USAD8:
+ if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15)
+ return true;
+ return false;
+ case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL:
+ case ARM::UMULL:
+ case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT:
+ case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX:
+ if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+ return true;
+ if (R19_16 == R15_12)
+ return true;
+ return false;;
+ }
+}
+
+// Multiply Instructions.
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR,
+// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience):
+// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is
+// only for {d, n, m}.
+//
+// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD,
+// SMUADX, and USAD8 (for convenience):
+// Rd{19-16} Rn{3-0} Rm{11-8}
+//
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT,
+// SMLALD, SMLADLX, SMLSLD, SMLSLDX:
+// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
+//
+// The mapping of the multiply registers to the "regular" ARM registers, where
+// there are convenience decoder functions, is:
+//
+// Inst{15-12} => Rd
+// Inst{19-16} => Rn
+// Inst{3-0} => Rm
+// Inst{11-8} => Rs
+static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ unsigned short NumDefs = MCID.getNumDefs();
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
+ assert(NumOps >= 3
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && OpInfo[2].RegClass == ARM::GPRRegClassID
+ && "Expect three register operands");
+
+ // Sanity check for the register encodings.
+ if (BadRegsMulFrm(Opcode, insn))
+ return false;
+
+ // Instructions with two destination registers have RdLo{15-12} first.
+ if (NumDefs == 2) {
+ assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
+ "Expect 4th register operand");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ // The destination register: RdHi{19-16} or Rd{19-16}.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+
+ // The two src regsiters: Rn{3-0}, then Rm{11-8}.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ OpIdx += 3;
+
+ // Many multiply instructions (e.g., MLA) have three src registers.
+ // The third register operand is Ra{15-12}.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// Helper routines for disassembly of coprocessor instructions.
+
+static bool LdStCopOpcode(unsigned Opcode) {
+ if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
+ (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
+ return true;
+ return false;
+}
+static bool CoprocessorOpcode(unsigned Opcode) {
+ if (LdStCopOpcode(Opcode))
+ return true;
+
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::CDP: case ARM::CDP2:
+ case ARM::MCR: case ARM::MCR2: case ARM::MRC: case ARM::MRC2:
+ case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
+ return true;
+ }
+}
+static inline unsigned GetCoprocessor(uint32_t insn) {
+ return slice(insn, 11, 8);
+}
+static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
+ return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
+}
+static inline unsigned GetCopOpc2(uint32_t insn) {
+ return slice(insn, 7, 5);
+}
+static inline unsigned GetCopOpc(uint32_t insn) {
+ return slice(insn, 7, 4);
+}
+// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
+// core registers.
+//
+// CDP, CDP2: cop opc1 CRd CRn CRm opc2
+//
+// MCR, MCR2, MRC, MRC2: cop opc1 Rd CRn CRm opc2
+//
+// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
+//
+// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// <-- addrmode2 -->
+//
+// LDC_OPTION: cop CRd Rn imm8
+// and friends
+// STC_OPTION: cop CRd Rn imm8
+// and friends
+//
+static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr");
+
+ unsigned &OpIdx = NumOpsAdded;
+ // A8.6.92
+ // if coproc == '101x' then SEE "Advanced SIMD and VFP"
+ // But since the special instructions have more explicit encoding bits
+ // specified, if coproc == 10 or 11, we should reject it as invalid.
+ unsigned coproc = GetCoprocessor(insn);
+ if ((Opcode == ARM::MCR || Opcode == ARM::MCRR ||
+ Opcode == ARM::MRC || Opcode == ARM::MRRC) &&
+ (coproc == 10 || coproc == 11)) {
+ DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n");
+ return false;
+ }
+
+ bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
+ Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+
+ // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
+ bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
+ bool LdStCop = LdStCopOpcode(Opcode);
+ bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2);
+
+ OpIdx = 0;
+
+ if (RtOut) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+ MI.addOperand(MCOperand::CreateImm(coproc));
+ ++OpIdx;
+
+ if (LdStCop) {
+ // Unindex if P:W = 0b00 --> _OPTION variant
+ unsigned PW = getPBit(insn) << 1 | getWBit(insn);
+
+ MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ OpIdx += 2;
+
+ if (PW) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ unsigned IndexMode =
+ (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+ unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
+ ARM_AM::no_shift, IndexMode);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ OpIdx += 2;
+ } else {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
+ ++OpIdx;
+ }
+ } else {
+ MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
+ : GetCopOpc1(insn, NoGPR)));
+ ++OpIdx;
+
+ if (!RtOut) {
+ MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+ : MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn)))
+ : MCOperand::CreateImm(decodeRn(insn)));
+
+ MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
+
+ OpIdx += 2;
+
+ if (!OneCopOpc) {
+ MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
+ ++OpIdx;
+ }
+ }
+
+ return true;
+}
+
+// Branch Instructions.
+// BL: SignExtend(Imm24:'00', 32)
+// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// SMC: ZeroExtend(imm4, 32)
+// SVC: ZeroExtend(Imm24, 32)
+//
+// Various coprocessor instructions are assigned BrFrm arbitrarily.
+// Delegates to DisassembleCoprocessor() helper function.
+//
+// MRS/MRSsys: Rd
+// MSR/MSRsys: Rm mask=Inst{19-16}
+// BXJ: Rm
+// MSRi/MSRsysi: so_imm
+// SRSW/SRS: ldstm_mode:$amode mode_imm
+// RFEW/RFE: ldstm_mode:$amode Rn
+static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (CoprocessorOpcode(Opcode))
+ return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ // MRS and MRSsys take one GPR reg Rd.
+ if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
+ assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // BXJ takes one GPR reg Rm.
+ if (Opcode == ARM::BXJ) {
+ assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
+ // bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::MSR) {
+ assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+ slice(insn, 19, 16) /* Special Reg */ ));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ NumOpsAdded = 2;
+ return true;
+ }
+ // MSRi take a mask, followed by one so_imm operand. The mask contains the
+ // R Bit in bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::MSRi) {
+ // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
+ // The hints instructions have more specific encodings, so if mask == 0,
+ // we should reject this as an invalid instruction.
+ if (slice(insn, 19, 16) == 0)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+ slice(insn, 19, 16) /* Special Reg */ ));
+ // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+ // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+ // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+ unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+ unsigned Imm = insn & 0xFF;
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
+ Opcode == ARM::RFEW || Opcode == ARM::RFE) {
+ ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+ if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+ else
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 3;
+ return true;
+ }
+
+ assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred
+ || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
+ "Unexpected Opcode");
+
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
+
+ int Imm32 = 0;
+ if (Opcode == ARM::SMC) {
+ // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
+ Imm32 = slice(insn, 3, 0);
+ } else if (Opcode == ARM::SVC) {
+ // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
+ Imm32 = slice(insn, 23, 0);
+ } else {
+ // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
+ unsigned Imm26 = slice(insn, 23, 0) << 2;
+ //Imm32 = signextend<signed int, 26>(Imm26);
+ Imm32 = SignExtend32<26>(Imm26);
+ }
+
+ MI.addOperand(MCOperand::CreateImm(Imm32));
+ NumOpsAdded = 1;
+
+ return true;
+}
+
+// Misc. Branch Instructions.
+// BX_RET, MOVPCLR
+// BLX, BLX_pred, BX, BX_pred
+// BLXi
+static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // BX_RET and MOVPCLR have only two predicate operands; do an early return.
+ if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
+ return true;
+
+ // BLX and BX take one GPR reg.
+ if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
+ Opcode == ARM::BX || Opcode == ARM::BX_pred) {
+ assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ OpIdx = 1;
+ return true;
+ }
+
+ // BLXi takes imm32 (the PC offset).
+ if (Opcode == ARM::BLXi) {
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
+ // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}.
+ unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1;
+ int Imm32 = SignExtend32<26>(Imm26);
+ MI.addOperand(MCOperand::CreateImm(Imm32));
+ OpIdx = 1;
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
+ uint32_t lsb = slice(insn, 11, 7);
+ uint32_t msb = slice(insn, 20, 16);
+ uint32_t Val = 0;
+ if (msb < lsb) {
+ DEBUG(errs() << "Encoding error: msb < lsb\n");
+ return false;
+ }
+
+ for (uint32_t i = lsb; i <= msb; ++i)
+ Val |= (1 << i);
+ mask = ~Val;
+ return true;
+}
+
+// Standard data-processing instructions allow PC as a register specifier,
+// but we should reject other DPFrm instructions with PC as registers.
+static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
+ switch (Opcode) {
+ default:
+ // Did we miss an opcode?
+ if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) {
+ DEBUG(errs() << "DPFrm with bad reg specifier(s)\n");
+ return true;
+ }
+ case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr:
+ case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr:
+ case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr:
+ case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr:
+ return false;
+ }
+}
+
+// A major complication is the fact that some of the saturating add/subtract
+// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
+// They are QADD, QDADD, QDSUB, and QSUB.
+static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ unsigned short NumDefs = MCID.getNumDefs();
+ bool isUnary = isUnaryDP(MCID.TSFlags);
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Disassemble register def if there is one.
+ if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ // Now disassemble the src operands.
+ if (OpIdx >= NumOps)
+ return false;
+
+ // Special-case handling of BFC/BFI/SBFX/UBFX.
+ if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+ // A8.6.17 BFC & A8.6.18 BFI
+ // Sanity check Rd.
+ if (decodeRd(insn) == 15)
+ return false;
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+ }
+ uint32_t mask = 0;
+ if (!getBFCInvMask(insn, mask))
+ return false;
+
+ MI.addOperand(MCOperand::CreateImm(mask));
+ OpIdx += 2;
+ return true;
+ }
+ if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+ // Sanity check Rd and Rm.
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
+ OpIdx += 3;
+ return true;
+ }
+
+ bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
+ Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
+
+ // BinaryDP has an Rn operand.
+ if (!isUnary) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ RmRn ? decodeRm(insn) : decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
+ if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Now disassemble operand 2.
+ if (OpIdx >= NumOps)
+ return false;
+
+ if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+ // We have a reg/reg form.
+ // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
+ // routed here as well.
+ // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+ if (BadRegsDPFrm(Opcode, insn))
+ return false;
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ RmRn? decodeRn(insn) : decodeRm(insn))));
+ ++OpIdx;
+ } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+ // These two instructions don't allow d as 15.
+ if (decodeRd(insn) == 15)
+ return false;
+ // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
+ assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+ unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
+ if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
+ MI.addOperand(MCOperand::CreateImm(Imm16));
+ ++OpIdx;
+ } else {
+ // We have a reg/imm form.
+ // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+ // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+ // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+ assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+ unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+ unsigned Imm = insn & 0xFF;
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ unsigned short NumDefs = MCID.getNumDefs();
+ bool isUnary = isUnaryDP(MCID.TSFlags);
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Disassemble register def if there is one.
+ if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the src operands.
+ if (OpIdx >= NumOps)
+ return false;
+
+ // BinaryDP has an Rn operand.
+ if (!isUnary) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
+ if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Disassemble operand 2, which consists of three components.
+ if (OpIdx + 2 >= NumOps)
+ return false;
+
+ assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+2].RegClass < 0) &&
+ "Expect 3 reg operands");
+
+ // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
+ unsigned Rs = slice(insn, 4, 4);
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ if (Rs) {
+ // If Inst{7} != 0, we should reject this insn as an invalid encoding.
+ if (slice(insn, 7, 7))
+ return false;
+
+ // A8.6.3 ADC (register-shifted register)
+ // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+ //
+ // This also accounts for shift instructions (register) where, fortunately,
+ // Inst{19-16} = 0b0000.
+ // A8.6.89 LSL (register)
+ // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+ if (decodeRd(insn) == 15 || decodeRn(insn) == 15 ||
+ decodeRm(insn) == 15 || decodeRs(insn) == 15)
+ return false;
+
+ // Register-controlled shifts: [Rm, Rs, shift].
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
+ } else {
+ // Constant shifts: [Rm, reg0, shift_imm].
+ MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShImm = slice(insn, 11, 7);
+
+ // A8.4.1. Possible rrx or shift amount of 32...
+ getImmShiftSE(ShOp, ShImm);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
+ }
+ OpIdx += 3;
+
+ return true;
+}
+
+static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack,
+ bool Imm) {
+ const StringRef Name = ARMInsts[Opcode].Name;
+ unsigned Rt = decodeRd(insn);
+ unsigned Rn = decodeRn(insn);
+ unsigned Rm = decodeRm(insn);
+ unsigned P = getPBit(insn);
+ unsigned W = getWBit(insn);
+
+ if (Store) {
+ // Only STR (immediate, register) allows PC as the source.
+ if (Name.startswith("STRB") && Rt == 15) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (WBack && (Rn == 15 || Rn == Rt)) {
+ DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+ return true;
+ }
+ if (!Imm && Rm == 15) {
+ DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ // Only LDR (immediate, register) allows PC as the destination.
+ if (Name.startswith("LDRB") && Rt == 15) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (Imm) {
+ // Immediate
+ if (Rn == 15) {
+ // The literal form must be in offset mode; it's an encoding error
+ // otherwise.
+ if (!(P == 1 && W == 0)) {
+ DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n");
+ return true;
+ }
+ // LDRB (literal) does not allow PC as the destination.
+ if (Opcode != ARM::LDRi12 && Rt == 15) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ // Write back while Rn == Rt does not make sense.
+ if (WBack && (Rn == Rt)) {
+ DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ } else {
+ // Register
+ if (Rm == 15) {
+ DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (WBack && (Rn == 15 || Rn == Rt)) {
+ DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(((!isStore && MCID.getNumDefs() > 0) ||
+ (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
+ && "Invalid arguments");
+
+ // Operand 0 of a pre- and post-indexed store is the address base writeback.
+ if (isPrePost && isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the dst/src operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // After dst of a pre- and post-indexed load is the address base writeback.
+ if (isPrePost && !isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the base operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
+ && "Index mode or tied_to operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ // For reg/reg form, base reg is followed by +/- reg shop imm.
+ // For immediate form, it is followed by +/- imm12.
+ // See also ARMAddressingModes.h (Addressing Mode #2).
+ if (OpIdx + 1 >= NumOps)
+ return false;
+
+ if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0))
+ return false;
+
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned IndexMode =
+ (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+ if (getIBit(insn) == 0) {
+ // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
+ // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
+ // been populated.
+ if (isPrePost) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ OpIdx += 1;
+ }
+
+ unsigned Imm12 = slice(insn, 11, 0);
+ if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 ||
+ Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) {
+ // Disassemble the 12-bit immediate offset, which is the second operand in
+ // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm).
+ int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12;
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ } else {
+ // Disassemble the 12-bit immediate offset, which is the second operand in
+ // $am2offset => (ops GPR, i32imm).
+ unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift,
+ IndexMode);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ }
+ OpIdx += 1;
+ } else {
+ // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid.
+ if (slice(insn,4,4) == 1)
+ return false;
+
+ // Disassemble the offset reg (Rm), shift type, and immediate shift length.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShImm = slice(insn, 11, 7);
+
+ // A8.4.1. Possible rrx or shift amount of 32...
+ getImmShiftSE(ShOp, ShImm);
+ MI.addOperand(MCOperand::CreateImm(
+ ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode)));
+ OpIdx += 2;
+ }
+
+ return true;
+}
+
+static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
+}
+
+static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+static bool HasDualReg(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
+ case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
+ return true;
+ }
+}
+
+static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(((!isStore && MCID.getNumDefs() > 0) ||
+ (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
+ && "Invalid arguments");
+
+ // Operand 0 of a pre- and post-indexed store is the address base writeback.
+ if (isPrePost && isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the dst/src operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // Fill in LDRD and STRD's second operand Rt operand.
+ if (HasDualReg(Opcode)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn) + 1)));
+ ++OpIdx;
+ }
+
+ // After dst of a pre- and post-indexed load is the address base writeback.
+ if (isPrePost && !isStore) {
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ // Disassemble the base operand.
+ if (OpIdx >= NumOps)
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+ assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
+ && "Offset mode or tied_to operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ // For reg/reg form, base reg is followed by +/- reg.
+ // For immediate form, it is followed by +/- imm8.
+ // See also ARMAddressingModes.h (Addressing Mode #3).
+ if (OpIdx + 1 >= NumOps)
+ return false;
+
+ assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
+ "Expect 1 reg operand followed by 1 imm operand");
+
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned IndexMode =
+ (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+ if (getAM3IBit(insn) == 1) {
+ MI.addOperand(MCOperand::CreateReg(0));
+
+ // Disassemble the 8-bit immediate offset.
+ unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
+ unsigned Imm4L = insn & 0xF;
+ unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L,
+ IndexMode);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ } else {
+ // Disassemble the offset reg (Rm).
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ }
+ OpIdx += 2;
+
+ return true;
+}
+
+static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+ B);
+}
+
+static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+// The algorithm for disassembly of LdStMulFrm is different from others because
+// it explicitly populates the two predicate operands after the base register.
+// After that, we need to populate the reglist with each affected register
+// encoded as an MCOperand.
+static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4");
+ NumOpsAdded = 0;
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+ // Writeback to base, if necessary.
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
+ Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
+ Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++NumOpsAdded;
+ }
+
+ // Add the base register operand.
+ MI.addOperand(MCOperand::CreateReg(Base));
+
+ // Handling the two predicate operands before the reglist.
+ int64_t CondVal = getCondField(insn);
+ if (CondVal == 0xF)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondVal));
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+ NumOpsAdded += 3;
+
+ // Fill the variadic part of reglist.
+ unsigned RegListBits = insn & ((1 << 16) - 1);
+ for (unsigned i = 0; i < 16; ++i) {
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ i)));
+ ++NumOpsAdded;
+ }
+ }
+
+ return true;
+}
+
+// LDREX, LDREXB, LDREXH: Rd Rn
+// LDREXD: Rd Rd+1 Rn
+// STREX, STREXB, STREXH: Rd Rm Rn
+// STREXD: Rd Rm Rm+1 Rn
+//
+// SWP, SWPB: Rd Rm Rn
+static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect 2 reg operands");
+
+ bool isStore = slice(insn, 20, 20) == 0;
+ bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ // Store register Exclusive needs a source operand.
+ if (isStore) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ if (isDW) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn)+1)));
+ ++OpIdx;
+ }
+ } else if (isDW) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn)+1)));
+ ++OpIdx;
+ }
+
+ // Finally add the pointer operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// Misc. Arithmetic Instructions.
+// CLZ: Rd Rm
+// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
+// RBIT, REV, REV16, REVSH: Rd Rm
+static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect 2 reg operands");
+
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+ // Sanity check the registers, which should not be 15.
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+ if (ThreeReg && decodeRn(insn) == 15)
+ return false;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ assert(NumOps >= 4 && "Expect >= 4 operands");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ // If there is still an operand info left which is an immediate operand, add
+ // an additional imm5 LSL/ASR operand.
+ if (ThreeReg && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Extract the 5-bit immediate field Inst{11-7}.
+ unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
+ ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+ if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::lsl;
+ else if (Opcode == ARM::PKHTB)
+ Opc = ARM_AM::asr;
+ getImmShiftSE(Opc, ShiftAmt);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // A8.6.183 SSAT
+ // if d == 15 || n == 15 then UNPREDICTABLE;
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ unsigned Pos = slice(insn, 20, 16);
+ if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 11, 7);
+ if (ShAmt == 0) {
+ // A8.6.183. Possible ASR shift amount of 32...
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
+// Extend instructions.
+// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
+// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
+// three register operand form. Otherwise, Rn=0b1111 and only Rm is used.
+static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // A8.6.220 SXTAB
+ // if d == 15 || m == 15 then UNPREDICTABLE;
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && "Expect 2 reg operands");
+
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ // If there is still an operand info left which is an immediate operand, add
+ // an additional rotate immediate operand.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Extract the 2-bit rotate field Inst{11-10}.
+ unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
+ // Rotation by 8, 16, or 24 bits.
+ MI.addOperand(MCOperand::CreateImm(rot << 3));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+/////////////////////////////////////
+// //
+// Utility Functions For VFP //
+// //
+/////////////////////////////////////
+
+// Extract/Decode Dd/Sd:
+//
+// SP => d = UInt(Vd:D)
+// DP => d = UInt(D:Vd)
+static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
+ return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
+ : (decodeRd(insn) | getDBit(insn) << 4);
+}
+
+// Extract/Decode Dn/Sn:
+//
+// SP => n = UInt(Vn:N)
+// DP => n = UInt(N:Vn)
+static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
+ return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
+ : (decodeRn(insn) | getNBit(insn) << 4);
+}
+
+// Extract/Decode Dm/Sm:
+//
+// SP => m = UInt(Vm:M)
+// DP => m = UInt(M:Vm)
+static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
+ return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
+ : (decodeRm(insn) | getMBit(insn) << 4);
+}
+
+// A7.5.1
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
+ assert(N == 32 || N == 64);
+
+ uint64_t Result;
+ unsigned bit6 = slice(byte, 6, 6);
+ if (N == 32) {
+ Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
+ if (bit6)
+ Result |= 0x1f << 25;
+ else
+ Result |= 0x1 << 30;
+ } else {
+ Result = (uint64_t)slice(byte, 7, 7) << 63 |
+ (uint64_t)slice(byte, 5, 0) << 48;
+ if (bit6)
+ Result |= 0xffULL << 54;
+ else
+ Result |= 0x1ULL << 62;
+ }
+ return APInt(N, Result);
+}
+
+// VFP Unary Format Instructions:
+//
+// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
+// VCVTDS, VCVTSD: converts between double-precision and single-precision
+// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
+static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RegClass = OpInfo[OpIdx].RegClass;
+ assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+ "Reg operand expected");
+ bool isSP = (RegClass == ARM::SPRRegClassID);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+ ++OpIdx;
+
+ // Early return for compare with zero instructions.
+ if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
+ || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
+ return true;
+
+ RegClass = OpInfo[OpIdx].RegClass;
+ assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+ "Reg operand expected");
+ isSP = (RegClass == ARM::SPRRegClassID);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+ ++OpIdx;
+
+ return true;
+}
+
+// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
+// Some of them have operand constraints which tie the first operand in the
+// InOperandList to that of the dst. As far as asm printing is concerned, this
+// tied_to operand is simply skipped.
+static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RegClass = OpInfo[OpIdx].RegClass;
+ assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+ "Reg operand expected");
+ bool isSP = (RegClass == ARM::SPRRegClassID);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+ ++OpIdx;
+
+ // Skip tied_to operand constraint.
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+ assert(NumOps >= 4 && "Expect >=4 operands");
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
+ ++OpIdx;
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+ ++OpIdx;
+
+ return true;
+}
+
+// A8.6.295 vcvt (floating-point <-> integer)
+// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
+// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
+//
+// A8.6.297 vcvt (floating-point and fixed-point)
+// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
+static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ if (!OpInfo) return false;
+
+ bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
+ bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
+ unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+ if (fixed_point) {
+ // A8.6.297
+ assert(NumOps >= 3 && "Expect >= 3 operands");
+ int size = slice(insn, 7, 7) == 0 ? 16 : 32;
+ int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClassID,
+ decodeVFPRd(insn, SP))));
+
+ assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
+ "Tied to operand expected");
+ MI.addOperand(MI.getOperand(0));
+
+ assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
+ !OpInfo[2].isOptionalDef() && "Imm operand expected");
+ MI.addOperand(MCOperand::CreateImm(fbits));
+
+ NumOpsAdded = 3;
+ } else {
+ // A8.6.295
+ // The Rd (destination) and Rm (source) bits have different interpretations
+ // depending on their single-precisonness.
+ unsigned d, m;
+ if (slice(insn, 18, 18) == 1) { // to_integer operation
+ d = decodeVFPRd(insn, true /* Is Single Precision */);
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::SPRRegClassID, d)));
+ m = decodeVFPRm(insn, SP);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
+ } else {
+ d = decodeVFPRd(insn, SP);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
+ m = decodeVFPRm(insn, true /* Is Single Precision */);
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::SPRRegClassID, m)));
+ }
+ NumOpsAdded = 2;
+ }
+
+ return true;
+}
+
+// VMOVRS - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ decodeVFPRn(insn, true))));
+ NumOpsAdded = 2;
+ return true;
+}
+
+// VMOVRRD - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ OpIdx = 2;
+
+ if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+ unsigned Sm = decodeVFPRm(insn, true);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm)));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm+1)));
+ OpIdx += 2;
+ } else {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeVFPRm(insn, false))));
+ ++OpIdx;
+ }
+ return true;
+}
+
+// VMOVSR - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ decodeVFPRn(insn, true))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ NumOpsAdded = 2;
+ return true;
+}
+
+// VMOVDRR - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+ unsigned Sm = decodeVFPRm(insn, true);
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm)));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+ Sm+1)));
+ OpIdx += 2;
+ } else {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeVFPRm(insn, false))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ OpIdx += 2;
+ return true;
+}
+
+// VFP Load/Store Instructions.
+// VLDRD, VLDRS, VSTRD, VSTRS
+static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
+
+ bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
+ unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+ // Extract Dd/Sd for operand 0.
+ unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+ MI.addOperand(MCOperand::CreateReg(Base));
+
+ // Next comes the AM5 Opcode.
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned char Imm8 = insn & 0xFF;
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
+
+ NumOpsAdded = 3;
+
+ return true;
+}
+
+// VFP Load/Store Multiple Instructions.
+// We have an optional write back reg, the base, and two predicate operands.
+// It is then followed by a reglist of either DPR(s) or SPR(s).
+//
+// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
+static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+ // Writeback to base, if necessary.
+ if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
+ Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
+ Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
+ Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(Base));
+
+ // Handling the two predicate operands before the reglist.
+ int64_t CondVal = getCondField(insn);
+ if (CondVal == 0xF)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondVal));
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+ OpIdx += 3;
+
+ bool isSPVFP = (Opcode == ARM::VLDMSIA ||
+ Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
+ Opcode == ARM::VSTMSIA ||
+ Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
+ unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+ // Extract Dd/Sd.
+ unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+ // Fill the variadic part of reglist.
+ unsigned char Imm8 = insn & 0xFF;
+ unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+
+ // Apply some sanity checks before proceeding.
+ if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16))
+ return false;
+
+ for (unsigned i = 0; i < Regs; ++i) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
+ RegD + i)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// Misc. VFP Instructions.
+// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
+// FCONSTD (DPR and a VFPf64Imm operand)
+// FCONSTS (SPR and a VFPf32Imm operand)
+// VMRS/VMSR (GPR operand)
+static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ if (Opcode == ARM::FMSTAT)
+ return true;
+
+ assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
+
+ unsigned RegEnum = 0;
+ switch (OpInfo[0].RegClass) {
+ case ARM::DPRRegClassID:
+ RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
+ break;
+ case ARM::SPRRegClassID:
+ RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
+ break;
+ case ARM::GPRRegClassID:
+ RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
+ break;
+ default:
+ assert(0 && "Invalid reg class id");
+ return false;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(RegEnum));
+ ++OpIdx;
+
+ // Extract/decode the f64/f32 immediate.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // The asm syntax specifies the floating point value, not the 8-bit literal.
+ APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+ Opcode == ARM::FCONSTD ? 64 : 32);
+ APFloat immFP = APFloat(immRaw, true);
+ double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+ immFP.convertToFloat();
+ MI.addOperand(MCOperand::CreateFPImm(imm));
+
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
+#include "ThumbDisassemblerCore.h"
+
+/////////////////////////////////////////////////////
+// //
+// Utility Functions For ARM Advanced SIMD //
+// //
+/////////////////////////////////////////////////////
+
+// The following NEON namings are based on A8.6.266 VABA, VABAL. Notice that
+// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
+
+// A7.3 Register encoding
+
+// Extract/Decode NEON D/Vd:
+//
+// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
+// doubleword, Dd = UInt(D:Vd). We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// D = Inst{22}, Vd = Inst{15-12}
+static unsigned decodeNEONRd(uint32_t insn) {
+ return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
+ | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON N/Vn:
+//
+// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
+// doubleword, Dn = UInt(N:Vn). We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// N = Inst{7}, Vn = Inst{19-16}
+static unsigned decodeNEONRn(uint32_t insn) {
+ return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
+ | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON M/Vm:
+//
+// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
+// doubleword, Dm = UInt(M:Vm). We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// M = Inst{5}, Vm = Inst{3-0}
+static unsigned decodeNEONRm(uint32_t insn) {
+ return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
+ | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
+}
+
+namespace {
+enum ElemSize {
+ ESizeNA = 0,
+ ESize8 = 8,
+ ESize16 = 16,
+ ESize32 = 32,
+ ESize64 = 64
+};
+} // End of unnamed namespace
+
+// size field -> Inst{11-10}
+// index_align field -> Inst{7-4}
+//
+// The Lane Index interpretation depends on the Data Size:
+// 8 (encoded as size = 0b00) -> Index = index_align[3:1]
+// 16 (encoded as size = 0b01) -> Index = index_align[3:2]
+// 32 (encoded as size = 0b10) -> Index = index_align[3]
+//
+// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
+static unsigned decodeLaneIndex(uint32_t insn) {
+ unsigned size = insn >> 10 & 3;
+ assert((size == 0 || size == 1 || size == 2) &&
+ "Encoding error: size should be either 0, 1, or 2");
+
+ unsigned index_align = insn >> 4 & 0xF;
+ return (index_align >> 1) >> size;
+}
+
+// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
+// op = Inst{5}, cmode = Inst{11-8}
+// i = Inst{24} (ARM architecture)
+// imm3 = Inst{18-16}, imm4 = Inst{3-0}
+// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
+static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+ unsigned char op = (insn >> 5) & 1;
+ unsigned char cmode = (insn >> 8) & 0xF;
+ unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
+ ((insn >> 16) & 7) << 4 |
+ (insn & 0xF);
+ return (op << 12) | (cmode << 8) | Imm8;
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
+// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
+static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize16:
+ return insn & 7;
+ case ESize32:
+ return insn & 0xF;
+ default:
+ assert(0 && "Unreachable code!");
+ return 0;
+ }
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
+// ESize32 => index = Inst{5} (M) D0-D15
+static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize16:
+ return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
+ case ESize32:
+ return (insn >> 5) & 1;
+ default:
+ assert(0 && "Unreachable code!");
+ return 0;
+ }
+}
+
+// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
+// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
+static unsigned decodeVCVTFractionBits(uint32_t insn) {
+ return 64 - ((insn >> 16) & 0x3F);
+}
+
+// A8.6.302 VDUP (scalar)
+// ESize8 => index = Inst{19-17}
+// ESize16 => index = Inst{19-18}
+// ESize32 => index = Inst{19}
+static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize8:
+ return (insn >> 17) & 7;
+ case ESize16:
+ return (insn >> 18) & 3;
+ case ESize32:
+ return (insn >> 19) & 1;
+ default:
+ assert(0 && "Unspecified element size!");
+ return 0;
+ }
+}
+
+// A8.6.328 VMOV (ARM core register to scalar)
+// A8.6.329 VMOV (scalar to ARM core register)
+// ESize8 => index = Inst{21:6-5}
+// ESize16 => index = Inst{21:6}
+// ESize32 => index = Inst{21}
+static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
+ switch (esize) {
+ case ESize8:
+ return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
+ case ESize16:
+ return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
+ case ESize32:
+ return ((insn >> 21) & 1);
+ default:
+ assert(0 && "Unspecified element size!");
+ return 0;
+ }
+}
+
+// Imm6 = Inst{21-16}, L = Inst{7}
+//
+// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
+// case L:imm6 of
+// '0001xxx' => esize = 8; shift_amount = imm6 - 8
+// '001xxxx' => esize = 16; shift_amount = imm6 - 16
+// '01xxxxx' => esize = 32; shift_amount = imm6 - 32
+// '1xxxxxx' => esize = 64; shift_amount = imm6
+//
+// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
+// case L:imm6 of
+// '0001xxx' => esize = 8; shift_amount = 16 - imm6
+// '001xxxx' => esize = 16; shift_amount = 32 - imm6
+// '01xxxxx' => esize = 32; shift_amount = 64 - imm6
+// '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
+//
+static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
+ ElemSize esize = ESizeNA;
+ unsigned L = (insn >> 7) & 1;
+ unsigned imm6 = (insn >> 16) & 0x3F;
+ if (L == 0) {
+ if (imm6 >> 3 == 1)
+ esize = ESize8;
+ else if (imm6 >> 4 == 1)
+ esize = ESize16;
+ else if (imm6 >> 5 == 1)
+ esize = ESize32;
+ else
+ assert(0 && "Wrong encoding of Inst{7:21-16}!");
+ } else
+ esize = ESize64;
+
+ if (LeftShift)
+ return esize == ESize64 ? imm6 : (imm6 - esize);
+ else
+ return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
+}
+
+// A8.6.305 VEXT
+// Imm4 = Inst{11-8}
+static unsigned decodeN3VImm(uint32_t insn) {
+ return (insn >> 8) & 0xF;
+}
+
+// VLD*
+// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
+// VLD*LN*
+// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
+// VST*
+// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
+// VST*LN*
+// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
+//
+// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
+static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
+ unsigned alignment, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+ // At least one DPR register plus addressing mode #6.
+ assert(NumOps >= 3 && "Expect >= 3 operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // We have homogeneous NEON registers for Load/Store.
+ unsigned RegClass = 0;
+
+ // Double-spaced registers have increments of 2.
+ unsigned Inc = DblSpaced ? 2 : 1;
+
+ unsigned Rn = decodeRn(insn);
+ unsigned Rm = decodeRm(insn);
+ unsigned Rd = decodeNEONRd(insn);
+
+ // A7.7.1 Advanced SIMD addressing mode.
+ bool WB = Rm != 15;
+
+ // LLVM Addressing Mode #6.
+ unsigned RmEnum = 0;
+ if (WB && Rm != 13)
+ RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
+
+ if (Store) {
+ // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
+ // then possible lane index.
+ assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Reg operand expected");
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ ++OpIdx;
+ }
+
+ assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+ // addrmode6 := (ops GPR:$addr, i32imm)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
+ OpIdx += 2;
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(RmEnum));
+ ++OpIdx;
+ }
+
+ assert(OpIdx < NumOps &&
+ (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+ OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+ "Reg operand expected");
+
+ RegClass = OpInfo[OpIdx].RegClass;
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, Rd)));
+ Rd += Inc;
+ ++OpIdx;
+ }
+
+ // Handle possible lane index.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+ ++OpIdx;
+ }
+
+ } else {
+ // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
+ // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
+ RegClass = OpInfo[0].RegClass;
+
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass, Rd)));
+ Rd += Inc;
+ ++OpIdx;
+ }
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ ++OpIdx;
+ }
+
+ assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+ // addrmode6 := (ops GPR:$addr, i32imm)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ Rn)));
+ MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
+ OpIdx += 2;
+
+ if (WB) {
+ MI.addOperand(MCOperand::CreateReg(RmEnum));
+ ++OpIdx;
+ }
+
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+ assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 &&
+ "Tied to operand expected");
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Handle possible lane index.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+ ++OpIdx;
+ }
+ }
+
+ // Accessing registers past the end of the NEON register file is not
+ // defined.
+ if (Rd > 32)
+ return false;
+
+ return true;
+}
+
+// A8.6.308, A8.6.311, A8.6.314, A8.6.317.
+static bool Align4OneLaneInst(unsigned elem, unsigned size,
+ unsigned index_align, unsigned & alignment) {
+ unsigned bits = 0;
+ switch (elem) {
+ default:
+ return false;
+ case 1:
+ // A8.6.308
+ if (size == 0)
+ return slice(index_align, 0, 0) == 0;
+ else if (size == 1) {
+ bits = slice(index_align, 1, 0);
+ if (bits != 0 && bits != 1)
+ return false;
+ if (bits == 1)
+ alignment = 16;
+ return true;
+ } else if (size == 2) {
+ bits = slice(index_align, 2, 0);
+ if (bits != 0 && bits != 3)
+ return false;
+ if (bits == 3)
+ alignment = 32;
+ return true;;
+ }
+ return true;
+ case 2:
+ // A8.6.311
+ if (size == 0) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 16;
+ return true;
+ } if (size == 1) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 32;
+ return true;
+ } else if (size == 2) {
+ if (slice(index_align, 1, 1) != 0)
+ return false;
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 64;
+ return true;;
+ }
+ return true;
+ case 3:
+ // A8.6.314
+ if (size == 0) {
+ if (slice(index_align, 0, 0) != 0)
+ return false;
+ return true;
+ } if (size == 1) {
+ if (slice(index_align, 0, 0) != 0)
+ return false;
+ return true;
+ return true;
+ } else if (size == 2) {
+ if (slice(index_align, 1, 0) != 0)
+ return false;
+ return true;;
+ }
+ return true;
+ case 4:
+ // A8.6.317
+ if (size == 0) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 32;
+ return true;
+ } if (size == 1) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 64;
+ return true;
+ } else if (size == 2) {
+ bits = slice(index_align, 1, 0);
+ if (bits == 3)
+ return false;
+ if (bits == 1)
+ alignment = 64;
+ else if (bits == 2)
+ alignment = 128;
+ return true;;
+ }
+ return true;
+ }
+}
+
+// A7.7
+// If L (Inst{21}) == 0, store instructions.
+// Find out about double-spaced-ness of the Opcode and pass it on to
+// DisassembleNLdSt0().
+static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const StringRef Name = ARMInsts[Opcode].Name;
+ bool DblSpaced = false;
+ // 0 represents standard alignment, i.e., unaligned data access.
+ unsigned alignment = 0;
+
+ unsigned elem = 0; // legal values: {1, 2, 3, 4}
+ if (Name.startswith("VST1") || Name.startswith("VLD1"))
+ elem = 1;
+
+ if (Name.startswith("VST2") || Name.startswith("VLD2"))
+ elem = 2;
+
+ if (Name.startswith("VST3") || Name.startswith("VLD3"))
+ elem = 3;
+
+ if (Name.startswith("VST4") || Name.startswith("VLD4"))
+ elem = 4;
+
+ if (Name.find("LN") != std::string::npos) {
+ // To one lane instructions.
+ // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
+
+ // Utility function takes number of elements, size, and index_align.
+ if (!Align4OneLaneInst(elem,
+ slice(insn, 11, 10),
+ slice(insn, 7, 4),
+ alignment))
+ return false;
+
+ // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
+ if (Name.endswith("16") || Name.endswith("16_UPD"))
+ DblSpaced = slice(insn, 5, 5) == 1;
+
+ // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
+ if (Name.endswith("32") || Name.endswith("32_UPD"))
+ DblSpaced = slice(insn, 6, 6) == 1;
+ } else if (Name.find("DUP") != std::string::npos) {
+ // Single element (or structure) to all lanes.
+ // Inst{9-8} encodes the number of element(s) in the structure, with:
+ // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32.
+ // 0b01 (VLD2DUP)
+ // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0)
+ // 0b11 (VLD4DUP)
+ //
+ // Inst{7-6} encodes the data size, with:
+ // 0b00 => 8, 0b01 => 16, 0b10 => 32
+ //
+ // Inst{4} (the a bit) encodes the align action (0: standard alignment)
+ unsigned elem = slice(insn, 9, 8) + 1;
+ unsigned a = slice(insn, 4, 4);
+ if (elem != 3) {
+ // 0b11 is not a valid encoding for Inst{7-6}.
+ if (slice(insn, 7, 6) == 3)
+ return false;
+ unsigned data_size = 8 << slice(insn, 7, 6);
+ // For VLD1DUP, a bit makes sense only for data size of 16 and 32.
+ if (a && data_size == 8)
+ return false;
+
+ // Now we can calculate the alignment!
+ if (a)
+ alignment = elem * data_size;
+ } else {
+ if (a) {
+ // A8.6.315 VLD3 (single 3-element structure to all lanes)
+ // The a bit must be encoded as 0.
+ return false;
+ }
+ }
+ } else {
+ // Multiple n-element structures with type encoded as Inst{11-8}.
+ // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
+
+ // Inst{5-4} encodes alignment.
+ unsigned align = slice(insn, 5, 4);
+ switch (align) {
+ default:
+ break;
+ case 1:
+ alignment = 64; break;
+ case 2:
+ alignment = 128; break;
+ case 3:
+ alignment = 256; break;
+ }
+
+ unsigned type = slice(insn, 11, 8);
+ // Reject UNDEFINED instructions based on type and align.
+ // Plus set DblSpaced flag where appropriate.
+ switch (elem) {
+ default:
+ break;
+ case 1:
+ // n == 1
+ // A8.6.307 & A8.6.391
+ if ((type == 7 && slice(align, 1, 1) == 1) ||
+ (type == 10 && align == 3) ||
+ (type == 6 && slice(align, 1, 1) == 1))
+ return false;
+ break;
+ case 2:
+ // n == 2 && type == 0b1001 -> DblSpaced = true
+ // A8.6.310 & A8.6.393
+ if ((type == 8 || type == 9) && align == 3)
+ return false;
+ DblSpaced = (type == 9);
+ break;
+ case 3:
+ // n == 3 && type == 0b0101 -> DblSpaced = true
+ // A8.6.313 & A8.6.395
+ if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1)
+ return false;
+ DblSpaced = (type == 5);
+ break;
+ case 4:
+ // n == 4 && type == 0b0001 -> DblSpaced = true
+ // A8.6.316 & A8.6.397
+ if (slice(insn, 7, 6) == 3)
+ return false;
+ DblSpaced = (type == 1);
+ break;
+ }
+ }
+ return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
+ slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B);
+}
+
+// VMOV (immediate)
+// Qd/Dd imm
+// VBIC (immediate)
+// VORR (immediate)
+// Qd/Dd imm src(=Qd/Dd)
+static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+ assert(NumOps >= 2 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass < 0) &&
+ "Expect 1 reg operand followed by 1 imm operand");
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+ decodeNEONRd(insn))));
+
+ ElemSize esize = ESizeNA;
+ switch (Opcode) {
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv16i8:
+ esize = ESize8;
+ break;
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv8i16:
+ case ARM::VMVNv4i16:
+ case ARM::VMVNv8i16:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv8i16:
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv8i16:
+ esize = ESize16;
+ break;
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv4i32:
+ case ARM::VMVNv2i32:
+ case ARM::VMVNv4i32:
+ case ARM::VBICiv2i32:
+ case ARM::VBICiv4i32:
+ case ARM::VORRiv2i32:
+ case ARM::VORRiv4i32:
+ esize = ESize32;
+ break;
+ case ARM::VMOVv1i64:
+ case ARM::VMOVv2i64:
+ esize = ESize64;
+ break;
+ default:
+ assert(0 && "Unexpected opcode!");
+ return false;
+ }
+
+ // One register and a modified immediate value.
+ // Add the imm operand.
+ MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
+
+ NumOpsAdded = 2;
+
+ // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in.
+ if (NumOps >= 3 &&
+ (OpInfo[2].RegClass == ARM::DPRRegClassID ||
+ OpInfo[2].RegClass == ARM::QPRRegClassID)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+ decodeNEONRd(insn))));
+ NumOpsAdded += 1;
+ }
+
+ return true;
+}
+
+namespace {
+enum N2VFlag {
+ N2V_None,
+ N2V_VectorDupLane,
+ N2V_VectorConvert_Between_Float_Fixed
+};
+} // End of unnamed namespace
+
+// Vector Convert [between floating-point and fixed-point]
+// Qd/Dd Qm/Dm [fbits]
+//
+// Vector Duplicate Lane (from scalar to all elements) Instructions.
+// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
+// Qd/Dd Dm index
+//
+// Vector Move Long:
+// Qd Dm
+//
+// Vector Move Narrow:
+// Dd Qm
+//
+// Others
+static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opc];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+ assert(NumOps >= 2 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+ OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+ "Expect >= 2 operands and first 2 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ ElemSize esize = ESizeNA;
+ if (Flag == N2V_VectorDupLane) {
+ // VDUPLN has its index embedded. Its size can be inferred from the Opcode.
+ assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
+ "Unexpected Opcode");
+ esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
+ : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
+ : ESize32);
+ }
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ // VPADAL...
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Dm = Inst{5:3-0} => NEON Rm
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRm(insn))));
+ ++OpIdx;
+
+ // VZIP and others have two TIED_TO reg operands.
+ int Idx;
+ while (OpIdx < NumOps &&
+ (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ // Add TIED_TO operand.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // Add the imm operand, if required.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+
+ unsigned imm = 0xFFFFFFFF;
+
+ if (Flag == N2V_VectorDupLane)
+ imm = decodeNVLaneDupIndex(insn, esize);
+ if (Flag == N2V_VectorConvert_Between_Float_Fixed)
+ imm = decodeVCVTFractionBits(insn);
+
+ assert(imm != 0xFFFFFFFF && "Internal error");
+ MI.addOperand(MCOperand::CreateImm(imm));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+ N2V_None, B);
+}
+static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+ N2V_VectorConvert_Between_Float_Fixed, B);
+}
+static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+ N2V_VectorDupLane, B);
+}
+
+// Vector Shift [Accumulate] Instructions.
+// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
+//
+// Vector Shift Left Long (with maximum shift count) Instructions.
+// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
+//
+static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+ assert(NumOps >= 3 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+ OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+ "Expect >= 3 operands and first 2 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+ OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+ "Reg operand expected");
+
+ // Qm/Dm = Inst{5:3-0} => NEON Rm
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRm(insn))));
+ ++OpIdx;
+
+ assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
+
+ // Add the imm operand.
+
+ // VSHLL has maximum shift count as the imm, inferred from its size.
+ unsigned Imm;
+ switch (Opcode) {
+ default:
+ Imm = decodeNVSAmt(insn, LeftShift);
+ break;
+ case ARM::VSHLLi8:
+ Imm = 8;
+ break;
+ case ARM::VSHLLi16:
+ Imm = 16;
+ break;
+ case ARM::VSHLLi32:
+ Imm = 32;
+ break;
+ }
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ ++OpIdx;
+
+ return true;
+}
+
+// Left shift instructions.
+static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
+ B);
+}
+// Right shift instructions have different shift amount interpretation.
+static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+ B);
+}
+
+namespace {
+enum N3VFlag {
+ N3V_None,
+ N3V_VectorExtract,
+ N3V_VectorShift,
+ N3V_Multiply_By_Scalar
+};
+} // End of unnamed namespace
+
+// NEON Three Register Instructions with Optional Immediate Operand
+//
+// Vector Extract Instructions.
+// Qd/Dd Qn/Dn Qm/Dm imm4
+//
+// Vector Shift (Register) Instructions.
+// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
+//
+// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
+// Qd/Dd Qn/Dn RestrictedDm index
+//
+// Others
+static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+ // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
+ assert(NumOps >= 3 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+ OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+ "Expect >= 3 operands and first 2 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ bool VdVnVm = Flag == N3V_VectorShift ? false : true;
+ bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
+ bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
+ ElemSize esize = ESizeNA;
+ if (Flag == N3V_Multiply_By_Scalar) {
+ unsigned size = (insn >> 20) & 3;
+ if (size == 1) esize = ESize16;
+ if (size == 2) esize = ESize32;
+ assert (esize == ESize16 || esize == ESize32);
+ }
+
+ // Qd/Dd = Inst{22:15-12} => NEON Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ // VABA, VABAL, VBSLd, VBSLq, ...
+ if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+ ++OpIdx;
+ }
+
+ // Dn = Inst{7:19-16} => NEON Rn
+ // or
+ // Dm = Inst{5:3-0} => NEON Rm
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ VdVnVm ? decodeNEONRn(insn)
+ : decodeNEONRm(insn))));
+ ++OpIdx;
+
+ // Dm = Inst{5:3-0} => NEON Rm
+ // or
+ // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
+ // or
+ // Dn = Inst{7:19-16} => NEON Rn
+ unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
+ : decodeNEONRm(insn))
+ : decodeNEONRn(insn);
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
+ ++OpIdx;
+
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Add the imm operand.
+ unsigned Imm = 0;
+ if (IsImm4)
+ Imm = decodeN3VImm(insn);
+ else if (IsDmRestricted)
+ Imm = decodeRestrictedDmIndex(insn, esize);
+ else {
+ assert(0 && "Internal error: unreachable code!");
+ return false;
+ }
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_None, B);
+}
+static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_VectorShift, B);
+}
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_VectorExtract, B);
+}
+static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ N3V_Multiply_By_Scalar, B);
+}
+
+// Vector Table Lookup
+//
+// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
+// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
+// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
+// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
+static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::DPRRegClassID &&
+ OpInfo[1].RegClass == ARM::DPRRegClassID &&
+ OpInfo[2].RegClass == ARM::DPRRegClassID &&
+ "Expect >= 3 operands and first 3 as reg operands");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned Rn = decodeNEONRn(insn);
+
+ // {Dn} encoded as len = 0b00
+ // {Dn Dn+1} encoded as len = 0b01
+ // {Dn Dn+1 Dn+2 } encoded as len = 0b10
+ // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
+ unsigned Len = slice(insn, 9, 8) + 1;
+
+ // Dd (the destination vector)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRd(insn))));
+ ++OpIdx;
+
+ // Process tied_to operand constraint.
+ int Idx;
+ if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // Do the <list> now.
+ for (unsigned i = 0; i < Len; ++i) {
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+ "Reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ Rn + i)));
+ ++OpIdx;
+ }
+
+ // Dm (the index vector)
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+ "Reg operand (index vector) expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRm(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// Vector Get Lane (move scalar to ARM core register) Instructions.
+// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ if (!OpInfo) return false;
+
+ assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ OpInfo[1].RegClass == ARM::DPRRegClassID &&
+ OpInfo[2].RegClass < 0 &&
+ "Expect >= 3 operands with one dst operand");
+
+ ElemSize esize =
+ Opcode == ARM::VGETLNi32 ? ESize32
+ : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
+ : ESize8);
+
+ // Rt = Inst{15-12} => ARM Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ // Dn = Inst{7:19-16} => NEON Rn
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRn(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+ NumOpsAdded = 3;
+ return true;
+}
+
+// Vector Set Lane (move ARM core register to scalar) Instructions.
+// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ if (!OpInfo) return false;
+
+ assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::DPRRegClassID &&
+ OpInfo[1].RegClass == ARM::DPRRegClassID &&
+ MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
+ OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[3].RegClass < 0 &&
+ "Expect >= 3 operands with one dst operand");
+
+ ElemSize esize =
+ Opcode == ARM::VSETLNi8 ? ESize8
+ : (Opcode == ARM::VSETLNi16 ? ESize16
+ : ESize32);
+
+ // Dd = Inst{7:19-16} => NEON Rn
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+ decodeNEONRn(insn))));
+
+ // TIED_TO operand.
+ MI.addOperand(MCOperand::CreateReg(0));
+
+ // Rt = Inst{15-12} => ARM Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+ NumOpsAdded = 4;
+ return true;
+}
+
+// Vector Duplicate Instructions (from ARM core register to all elements).
+// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ assert(NumOps >= 2 &&
+ (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+ OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ "Expect >= 2 operands and first 2 as reg operand");
+
+ unsigned RegClass = OpInfo[0].RegClass;
+
+ // Qd/Dd = Inst{7:19-16} => NEON Rn
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
+ decodeNEONRn(insn))));
+
+ // Rt = Inst{15-12} => ARM Rd
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ NumOpsAdded = 2;
+ return true;
+}
+
+static inline bool PreLoadOpcode(unsigned Opcode) {
+ switch(Opcode) {
+ case ARM::PLDi12: case ARM::PLDrs:
+ case ARM::PLDWi12: case ARM::PLDWrs:
+ case ARM::PLIi12: case ARM::PLIrs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // Preload Data/Instruction requires either 2 or 3 operands.
+ // PLDi12, PLDWi12, PLIi12: addrmode_imm12
+ // PLDrs, PLDWrs, PLIrs: ldst_so_reg
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+
+ if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
+ || Opcode == ARM::PLIi12) {
+ unsigned Imm12 = slice(insn, 11, 0);
+ bool Negative = getUBit(insn) == 0;
+
+ // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12.
+ if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) {
+ DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n");
+ MI.setOpcode(ARM::PLDi12);
+ }
+
+ // -0 is represented specially. All other values are as normal.
+ int Offset = Negative ? -1 * Imm12 : Imm12;
+ if (Imm12 == 0 && Negative)
+ Offset = INT32_MIN;
+
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ NumOpsAdded = 2;
+ } else {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+
+ // Inst{6-5} encodes the shift opcode.
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShImm = slice(insn, 11, 7);
+
+ // A8.4.1. Possible rrx or shift amount of 32...
+ getImmShiftSE(ShOp, ShImm);
+ MI.addOperand(MCOperand::CreateImm(
+ ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+ NumOpsAdded = 3;
+ }
+
+ return true;
+}
+
+static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) {
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ unsigned opt = slice(insn, 3, 0);
+ switch (opt) {
+ case ARM_MB::SY: case ARM_MB::ST:
+ case ARM_MB::ISH: case ARM_MB::ISHST:
+ case ARM_MB::NSH: case ARM_MB::NSHST:
+ case ARM_MB::OSH: case ARM_MB::OSHST:
+ MI.addOperand(MCOperand::CreateImm(opt));
+ NumOpsAdded = 1;
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ switch (Opcode) {
+ case ARM::CLREX:
+ case ARM::NOP:
+ case ARM::TRAP:
+ case ARM::YIELD:
+ case ARM::WFE:
+ case ARM::WFI:
+ case ARM::SEV:
+ return true;
+ case ARM::SWP:
+ case ARM::SWPB:
+ // SWP, SWPB: Rd Rm Rn
+ // Delegate to DisassembleLdStExFrm()....
+ return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ default:
+ break;
+ }
+
+ if (Opcode == ARM::SETEND) {
+ NumOpsAdded = 1;
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+ return true;
+ }
+
+ // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+ // opcodes which match the same real instruction. This is needed since there's
+ // no current handling of optional arguments. Fix here when a better handling
+ // of optional arguments is implemented.
+ if (Opcode == ARM::CPS3p) { // M = 1
+ // Let's reject these impossible imod values by returning false:
+ // 1. (imod=0b01)
+ //
+ // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an
+ // invalid combination, so we just check for imod=0b00 here.
+ if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 3;
+ return true;
+ }
+ if (Opcode == ARM::CPS2p) { // mode = 0, M = 0
+ // Let's reject these impossible imod values by returning false:
+ // 1. (imod=0b00,M=0)
+ // 2. (imod=0b01)
+ if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // DBG has its option specified in Inst{3-0}.
+ if (Opcode == ARM::DBG) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
+ if (Opcode == ARM::BKPT) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
+ slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ if (PreLoadOpcode(Opcode))
+ return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ assert(0 && "Unexpected misc instruction!");
+ return false;
+}
+
+/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
+/// We divide the disassembly task into different categories, with each one
+/// corresponding to a specific instruction encoding format. There could be
+/// exceptions when handling a specific format, and that is why the Opcode is
+/// also present in the function prototype.
+static const DisassembleFP FuncPtrs[] = {
+ &DisassemblePseudo,
+ &DisassembleMulFrm,
+ &DisassembleBrFrm,
+ &DisassembleBrMiscFrm,
+ &DisassembleDPFrm,
+ &DisassembleDPSoRegFrm,
+ &DisassembleLdFrm,
+ &DisassembleStFrm,
+ &DisassembleLdMiscFrm,
+ &DisassembleStMiscFrm,
+ &DisassembleLdStMulFrm,
+ &DisassembleLdStExFrm,
+ &DisassembleArithMiscFrm,
+ &DisassembleSatFrm,
+ &DisassembleExtFrm,
+ &DisassembleVFPUnaryFrm,
+ &DisassembleVFPBinaryFrm,
+ &DisassembleVFPConv1Frm,
+ &DisassembleVFPConv2Frm,
+ &DisassembleVFPConv3Frm,
+ &DisassembleVFPConv4Frm,
+ &DisassembleVFPConv5Frm,
+ &DisassembleVFPLdStFrm,
+ &DisassembleVFPLdStMulFrm,
+ &DisassembleVFPMiscFrm,
+ &DisassembleThumbFrm,
+ &DisassembleMiscFrm,
+ &DisassembleNGetLnFrm,
+ &DisassembleNSetLnFrm,
+ &DisassembleNDupFrm,
+
+ // VLD and VST (including one lane) Instructions.
+ &DisassembleNLdSt,
+
+ // A7.4.6 One register and a modified immediate value
+ // 1-Register Instructions with imm.
+ // LLVM only defines VMOVv instructions.
+ &DisassembleN1RegModImmFrm,
+
+ // 2-Register Instructions with no imm.
+ &DisassembleN2RegFrm,
+
+ // 2-Register Instructions with imm (vector convert float/fixed point).
+ &DisassembleNVCVTFrm,
+
+ // 2-Register Instructions with imm (vector dup lane).
+ &DisassembleNVecDupLnFrm,
+
+ // Vector Shift Left Instructions.
+ &DisassembleN2RegVecShLFrm,
+
+ // Vector Shift Righ Instructions, which has different interpretation of the
+ // shift amount from the imm6 field.
+ &DisassembleN2RegVecShRFrm,
+
+ // 3-Register Data-Processing Instructions.
+ &DisassembleN3RegFrm,
+
+ // Vector Shift (Register) Instructions.
+ // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
+ &DisassembleN3RegVecShFrm,
+
+ // Vector Extract Instructions.
+ &DisassembleNVecExtractFrm,
+
+ // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
+ // By Scalar Instructions.
+ &DisassembleNVecMulScalarFrm,
+
+ // Vector Table Lookup uses byte indexes in a control vector to look up byte
+ // values in a table and generate a new vector.
+ &DisassembleNVTBLFrm,
+
+ NULL
+};
+
+/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+/// The general idea is to set the Opcode for the MCInst, followed by adding
+/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
+/// to the Format-specific disassemble function for disassembly, followed by
+/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
+/// which follow the Dst/Src Operands.
+bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
+ // Stage 1 sets the Opcode.
+ MI.setOpcode(Opcode);
+ // If the number of operands is zero, we're done!
+ if (NumOps == 0)
+ return true;
+
+ // Stage 2 calls the format-specific disassemble function to build the operand
+ // list.
+ if (Disasm == NULL)
+ return false;
+ unsigned NumOpsAdded = 0;
+ bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
+
+ if (!OK || this->Err != 0) return false;
+ if (NumOpsAdded >= NumOps)
+ return true;
+
+ // Stage 3 deals with operands unaccounted for after stage 2 is finished.
+ // FIXME: Should this be done selectively?
+ return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
+}
+
+// A8.3 Conditional execution
+// A8.3.1 Pseudocode details of conditional execution
+// Condition bits '111x' indicate the instruction is always executed.
+static uint32_t CondCode(uint32_t CondField) {
+ if (CondField == 0xF)
+ return ARMCC::AL;
+ return CondField;
+}
+
+/// DoPredicateOperands - DoPredicateOperands process the predicate operands
+/// of some Thumb instructions which come before the reglist operands. It
+/// returns true if the two predicate operands have been processed.
+bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
+ uint32_t /* insn */, unsigned short NumOpsRemaining) {
+
+ assert(NumOpsRemaining > 0 && "Invalid argument");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned Idx = MI.getNumOperands();
+
+ // First, we check whether this instr specifies the PredicateOperand through
+ // a pair of MCOperandInfos with isPredicate() property.
+ if (NumOpsRemaining >= 2 &&
+ OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ {
+ // If we are inside an IT block, get the IT condition bits maintained via
+ // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+ // See also A2.5.2.
+ if (InITBlock())
+ MI.addOperand(MCOperand::CreateImm(GetITCond()));
+ else
+ MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ return true;
+ }
+
+ return false;
+}
+
+/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+/// the possible Predicate and SBitModifier, to build the remaining MCOperand
+/// constituents.
+bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOpsRemaining) {
+
+ assert(NumOpsRemaining > 0 && "Invalid argument");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ const std::string &Name = ARMInsts[Opcode].Name;
+ unsigned Idx = MI.getNumOperands();
+ uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
+
+ // First, we check whether this instr specifies the PredicateOperand through
+ // a pair of MCOperandInfos with isPredicate() property.
+ if (NumOpsRemaining >= 2 &&
+ OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ {
+ // If we are inside an IT block, get the IT condition bits maintained via
+ // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+ // See also A2.5.2.
+ if (InITBlock())
+ MI.addOperand(MCOperand::CreateImm(GetITCond()));
+ else {
+ if (Name.length() > 1 && Name[0] == 't') {
+ // Thumb conditional branch instructions have their cond field embedded,
+ // like ARM.
+ //
+ // A8.6.16 B
+ // Check for undefined encodings.
+ unsigned cond;
+ if (Name == "t2Bcc") {
+ if ((cond = slice(insn, 25, 22)) >= 14)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+ } else if (Name == "tBcc") {
+ if ((cond = slice(insn, 11, 8)) == 14)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+ } else
+ MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ } else {
+ // ARM instructions get their condition field from Inst{31-28}.
+ // We should reject Inst{31-28} = 0b1111 as invalid encoding.
+ if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
+ }
+ }
+ MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ Idx += 2;
+ NumOpsRemaining -= 2;
+ }
+
+ if (NumOpsRemaining == 0)
+ return true;
+
+ // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
+ if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
+ MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
+ --NumOpsRemaining;
+ }
+
+ if (NumOpsRemaining == 0)
+ return true;
+ else
+ return false;
+}
+
+/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+/// after BuildIt is finished.
+bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
+ uint32_t insn) {
+
+ if (!SP) return Status;
+
+ if (Opcode == ARM::t2IT)
+ Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
+ else if (InITBlock())
+ SP->UpdateIT();
+
+ return Status;
+}
+
+/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
+ unsigned short num)
+ : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
+ unsigned Idx = (unsigned)format;
+ assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
+ Disasm = FuncPtrs[Idx];
+}
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder. API clients
+/// are responsible for freeing up of the allocated memory. Cacheing can be
+/// performed by the API clients to improve performance.
+ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+ // For "Unknown format", fail by returning a NULL pointer.
+ if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
+ DEBUG(errs() << "Unknown format\n");
+ return 0;
+ }
+
+ return new ARMBasicMCBuilder(Opcode, Format,
+ ARMInsts[Opcode].getNumOperands());
+}
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst. The immediate
+/// Value has had any PC adjustment made by the caller. If the getOpInfo()
+/// function was set as part of the setupBuilderForSymbolicDisassembly() call
+/// then that function is called to get any symbolic information at the
+/// builder's Address for this instrution. If that returns non-zero then the
+/// symbolic information it returns is used to create an MCExpr and that is
+/// added as an operand to the MCInst. This function returns true if it adds
+/// an operand to the MCInst and false otherwise.
+bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
+ uint64_t InstSize,
+ MCInst &MI) {
+ if (!GetOpInfo)
+ return false;
+
+ struct LLVMOpInfo1 SymbolicOp;
+ SymbolicOp.Value = Value;
+ if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
+ return false;
+
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ else
+ assert("bad SymbolicOp.VariantKind");
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
new file mode 100644
index 000000000000..a7ba14141c0a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -0,0 +1,336 @@
+//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+//
+// The first part defines the enumeration type of ARM instruction format, which
+// specifies the encoding used by the instruction, as well as a helper function
+// to convert the enums to printable char strings.
+//
+// It also contains code to represent the concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLERCORE_H
+#define ARMDISASSEMBLERCORE_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm-c/Disassembler.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMDisassembler.h"
+
+namespace llvm {
+class MCContext;
+
+class ARMUtils {
+public:
+ static const char *OpcodeName(unsigned Opcode);
+};
+
+/////////////////////////////////////////////////////
+// //
+// Enums and Utilities for ARM Instruction Format //
+// //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS \
+ ENTRY(ARM_FORMAT_PSEUDO, 0) \
+ ENTRY(ARM_FORMAT_MULFRM, 1) \
+ ENTRY(ARM_FORMAT_BRFRM, 2) \
+ ENTRY(ARM_FORMAT_BRMISCFRM, 3) \
+ ENTRY(ARM_FORMAT_DPFRM, 4) \
+ ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \
+ ENTRY(ARM_FORMAT_LDFRM, 6) \
+ ENTRY(ARM_FORMAT_STFRM, 7) \
+ ENTRY(ARM_FORMAT_LDMISCFRM, 8) \
+ ENTRY(ARM_FORMAT_STMISCFRM, 9) \
+ ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
+ ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
+ ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+ ARM_FORMATS
+ ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const inline char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+ switch(form) {
+ ARM_FORMATS
+ case ARM_FORMAT_NA:
+ default:
+ return "";
+ }
+#undef ENTRY
+}
+
+/// Expands on the enum definitions from ARMBaseInstrInfo.h.
+/// They are being used by the disassembler implementation.
+namespace ARMII {
+ enum {
+ NEONRegMask = 15,
+ GPRRegMask = 15,
+ NEON_RegRdShift = 12,
+ NEON_D_BitShift = 22,
+ NEON_RegRnShift = 16,
+ NEON_N_BitShift = 7,
+ NEON_RegRmShift = 0,
+ NEON_M_BitShift = 5
+ };
+}
+
+/// Utility function for extracting [From, To] bits from a uint32_t.
+static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
+ assert(From < 32 && To < 32 && From >= To);
+ return (Bits >> To) & ((1 << (From - To + 1)) - 1);
+}
+
+/// Utility function for setting [From, To] bits to Val for a uint32_t.
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+ unsigned Val) {
+ assert(From < 32 && To < 32 && From >= To);
+ uint32_t Mask = ((1 << (From - To + 1)) - 1);
+ Bits &= ~(Mask << To);
+ Bits |= (Val & Mask) << To;
+}
+
+// Return an integer result equal to the number of bits of x that are ones.
+static inline uint32_t
+BitCount (uint64_t x)
+{
+ // c accumulates the total bits set in x
+ uint32_t c;
+ for (c = 0; x; ++c)
+ {
+ x &= x - 1; // clear the least significant bit set
+ }
+ return c;
+}
+
+static inline bool
+BitIsSet (const uint64_t value, const uint64_t bit)
+{
+ return (value & (1ull << bit)) != 0;
+}
+
+static inline bool
+BitIsClear (const uint64_t value, const uint64_t bit)
+{
+ return (value & (1ull << bit)) == 0;
+}
+
+/// Various utilities for checking the target specific flags.
+
+/// A unary data processing instruction doesn't have an Rn operand.
+static inline bool isUnaryDP(uint64_t TSFlags) {
+ return (TSFlags & ARMII::UnaryDP);
+}
+
+/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
+static inline bool isNEONDomain(uint64_t TSFlags) {
+ return (TSFlags & ARMII::DomainNEON) ||
+ (TSFlags & ARMII::DomainNEONA8);
+}
+
+/// This four-bit field describes the addressing mode used.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getAddrMode(uint64_t TSFlags) {
+ return (TSFlags & ARMII::AddrModeMask);
+}
+
+/// {IndexModePre, IndexModePost}
+/// Only valid for load and store ops.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getIndexMode(uint64_t TSFlags) {
+ return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+}
+
+/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
+ return (TSFlags & ARMII::IndexModeMask) != 0;
+}
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+// Builder Object is mostly ignored except in some Thumb disassemble functions.
+typedef ARMBasicMCBuilder *BO;
+
+/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
+/// and builds the MCOperand list upon disassembly. It returns false on failure
+/// or true on success. The number of operands added is updated upon success.
+typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder. API clients
+/// are responsible for freeing up of the allocated memory. Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
+/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
+/// knows how to build up the MCOperand list.
+class ARMBasicMCBuilder {
+ friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+ unsigned Opcode;
+ ARMFormat Format;
+ unsigned short NumOps;
+ DisassembleFP Disasm;
+ Session *SP;
+ int Err; // !=0 if the builder encounters some error condition during build.
+
+private:
+ /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+
+public:
+ ARMBasicMCBuilder(ARMBasicMCBuilder &B)
+ : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
+ SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
+ Err = 0;
+ }
+
+ virtual ~ARMBasicMCBuilder() {}
+
+ void SetSession(Session *sp) {
+ SP = sp;
+ }
+
+ void SetErr(int ErrCode) {
+ Err = ErrCode;
+ }
+
+ /// DoPredicateOperands - DoPredicateOperands process the predicate operands
+ /// of some Thumb instructions which come before the reglist operands. It
+ /// returns true if the two predicate operands have been processed.
+ bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOpsRemaning);
+
+ /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+ /// the possible Predicate and SBitModifier, to build the remaining MCOperand
+ /// constituents.
+ bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOpsRemaning);
+
+ /// InITBlock - InITBlock returns true if we are inside an IT block.
+ bool InITBlock() {
+ if (SP)
+ return SP->ITCounter > 0;
+
+ return false;
+ }
+
+ /// Build - Build delegates to BuildIt to perform the heavy liftling. After
+ /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
+ virtual bool Build(MCInst &MI, uint32_t insn) {
+ bool Status = BuildIt(MI, insn);
+ return RunBuildAfterHook(Status, MI, insn);
+ }
+
+ /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+ /// The general idea is to set the Opcode for the MCInst, followed by adding
+ /// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
+ /// to the Format-specific disassemble function for disassembly, followed by
+ /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
+ /// which follow the Dst/Src Operands.
+ virtual bool BuildIt(MCInst &MI, uint32_t insn);
+
+ /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+ /// after BuildIt is finished.
+ virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
+
+private:
+ /// Get condition of the current IT instruction.
+ unsigned GetITCond() {
+ assert(SP);
+ return slice(SP->ITState, 7, 4);
+ }
+
+private:
+ //
+ // Hooks for symbolic disassembly via the public 'C' interface.
+ //
+ // The function to get the symbolic information for operands.
+ LLVMOpInfoCallback GetOpInfo;
+ // The pointer to the block of symbolic information for above call back.
+ void *DisInfo;
+ // The assembly context for creating symbols and MCExprs in place of
+ // immediate operands when there is symbolic information.
+ MCContext *Ctx;
+ // The address of the instruction being disassembled.
+ uint64_t Address;
+
+public:
+ void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
+ void *disInfo, MCContext *ctx,
+ uint64_t address) {
+ GetOpInfo = getOpInfo;
+ DisInfo = disInfo;
+ Ctx = ctx;
+ Address = address;
+ }
+
+ uint64_t getBuilderAddress() const { return Address; }
+
+ /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+ /// operand in place of the immediate Value in the MCInst. The immediate
+ /// Value has had any PC adjustment made by the caller. If the getOpInfo()
+ /// function was set as part of the setupBuilderForSymbolicDisassembly() call
+ /// then that function is called to get any symbolic information at the
+ /// builder's Address for this instrution. If that returns non-zero then the
+ /// symbolic information it returns is used to create an MCExpr and that is
+ /// added as an operand to the MCInst. This function returns true if it adds
+ /// an operand to the MCInst and false otherwise.
+ bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
new file mode 100644
index 000000000000..834c6f65295d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -0,0 +1,2459 @@
+//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code for disassembling a Thumb instr. It is to be included by
+// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
+// function which acts as the dispatcher to disassemble a Thumb instruction.
+//
+//===----------------------------------------------------------------------===//
+
+///////////////////////////////
+// //
+// Utility Functions //
+// //
+///////////////////////////////
+
+// Utilities for 16-bit Thumb instructions.
+/*
+15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ [ tRt ]
+ [ tRm ] [ tRn ] [ tRd ]
+ D [ Rm ] [ Rd ]
+
+ [ imm3]
+ [ imm5 ]
+ i [ imm5 ]
+ [ imm7 ]
+ [ imm8 ]
+ [ imm11 ]
+
+ [ cond ]
+*/
+
+// Extract tRt: Inst{10-8}.
+static inline unsigned getT1tRt(uint32_t insn) {
+ return slice(insn, 10, 8);
+}
+
+// Extract tRm: Inst{8-6}.
+static inline unsigned getT1tRm(uint32_t insn) {
+ return slice(insn, 8, 6);
+}
+
+// Extract tRn: Inst{5-3}.
+static inline unsigned getT1tRn(uint32_t insn) {
+ return slice(insn, 5, 3);
+}
+
+// Extract tRd: Inst{2-0}.
+static inline unsigned getT1tRd(uint32_t insn) {
+ return slice(insn, 2, 0);
+}
+
+// Extract [D:Rd]: Inst{7:2-0}.
+static inline unsigned getT1Rd(uint32_t insn) {
+ return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
+}
+
+// Extract Rm: Inst{6-3}.
+static inline unsigned getT1Rm(uint32_t insn) {
+ return slice(insn, 6, 3);
+}
+
+// Extract imm3: Inst{8-6}.
+static inline unsigned getT1Imm3(uint32_t insn) {
+ return slice(insn, 8, 6);
+}
+
+// Extract imm5: Inst{10-6}.
+static inline unsigned getT1Imm5(uint32_t insn) {
+ return slice(insn, 10, 6);
+}
+
+// Extract i:imm5: Inst{9:7-3}.
+static inline unsigned getT1Imm6(uint32_t insn) {
+ return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
+}
+
+// Extract imm7: Inst{6-0}.
+static inline unsigned getT1Imm7(uint32_t insn) {
+ return slice(insn, 6, 0);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getT1Imm8(uint32_t insn) {
+ return slice(insn, 7, 0);
+}
+
+// Extract imm11: Inst{10-0}.
+static inline unsigned getT1Imm11(uint32_t insn) {
+ return slice(insn, 10, 0);
+}
+
+// Extract cond: Inst{11-8}.
+static inline unsigned getT1Cond(uint32_t insn) {
+ return slice(insn, 11, 8);
+}
+
+static inline bool IsGPR(unsigned RegClass) {
+ return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
+}
+
+// Utilities for 32-bit Thumb instructions.
+
+static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; }
+
+// Extract imm4: Inst{19-16}.
+static inline unsigned getImm4(uint32_t insn) {
+ return slice(insn, 19, 16);
+}
+
+// Extract imm3: Inst{14-12}.
+static inline unsigned getImm3(uint32_t insn) {
+ return slice(insn, 14, 12);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getImm8(uint32_t insn) {
+ return slice(insn, 7, 0);
+}
+
+// A8.6.61 LDRB (immediate, Thumb) and friends
+// +/-: Inst{9}
+// imm8: Inst{7-0}
+static inline int decodeImm8(uint32_t insn) {
+ int Offset = getImm8(insn);
+ return slice(insn, 9, 9) ? Offset : -Offset;
+}
+
+// Extract imm12: Inst{11-0}.
+static inline unsigned getImm12(uint32_t insn) {
+ return slice(insn, 11, 0);
+}
+
+// A8.6.63 LDRB (literal) and friends
+// +/-: Inst{23}
+// imm12: Inst{11-0}
+static inline int decodeImm12(uint32_t insn) {
+ int Offset = getImm12(insn);
+ return slice(insn, 23, 23) ? Offset : -Offset;
+}
+
+// Extract imm2: Inst{7-6}.
+static inline unsigned getImm2(uint32_t insn) {
+ return slice(insn, 7, 6);
+}
+
+// For BFI, BFC, t2SBFX, and t2UBFX.
+// Extract lsb: Inst{14-12:7-6}.
+static inline unsigned getLsb(uint32_t insn) {
+ return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// For BFI and BFC.
+// Extract msb: Inst{4-0}.
+static inline unsigned getMsb(uint32_t insn) {
+ return slice(insn, 4, 0);
+}
+
+// For t2SBFX and t2UBFX.
+// Extract widthminus1: Inst{4-0}.
+static inline unsigned getWidthMinus1(uint32_t insn) {
+ return slice(insn, 4, 0);
+}
+
+// For t2ADDri12 and t2SUBri12.
+// imm12 = i:imm3:imm8;
+static inline unsigned getIImm3Imm8(uint32_t insn) {
+ return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
+}
+
+// For t2MOVi16 and t2MOVTi16.
+// imm16 = imm4:i:imm3:imm8;
+static inline unsigned getImm16(uint32_t insn) {
+ return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
+ getImm3(insn) << 8 | getImm8(insn);
+}
+
+// Inst{5-4} encodes the shift type.
+static inline unsigned getShiftTypeBits(uint32_t insn) {
+ return slice(insn, 5, 4);
+}
+
+// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
+static inline unsigned getShiftAmtBits(uint32_t insn) {
+ return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// A8.6.17 BFC
+// Encoding T1 ARMv6T2, ARMv7
+// LLVM-specific encoding for #<lsb> and #<width>
+static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
+ uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
+ uint32_t msb = getMsb(insn);
+ uint32_t Val = 0;
+ if (msb < lsb) {
+ DEBUG(errs() << "Encoding error: msb < lsb\n");
+ return false;
+ }
+ for (uint32_t i = lsb; i <= msb; ++i)
+ Val |= (1 << i);
+ mask = ~Val;
+ return true;
+}
+
+// A8.4 Shifts applied to a register
+// A8.4.1 Constant shifts
+// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
+//
+// decodeImmShift() returns the shift amount and the the shift opcode.
+// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
+static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
+ ARM_AM::ShiftOpc &ShOp) {
+
+ assert(imm5 < 32 && "Invalid imm5 argument");
+ switch (bits2) {
+ default: assert(0 && "No such value");
+ case 0:
+ ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
+ return imm5;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ return (imm5 == 0 ? 32 : imm5);
+ case 2:
+ ShOp = ARM_AM::asr;
+ return (imm5 == 0 ? 32 : imm5);
+ case 3:
+ ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
+ return (imm5 == 0 ? 1 : imm5);
+ }
+}
+
+// A6.3.2 Modified immediate constants in Thumb instructions
+//
+// ThumbExpandImm() returns the modified immediate constant given an imm12 for
+// Thumb data-processing instructions with modified immediate.
+// See also A6.3.1 Data-processing (modified immediate).
+static inline unsigned ThumbExpandImm(unsigned imm12) {
+ assert(imm12 <= 0xFFF && "Invalid imm12 argument");
+
+ // If the leading two bits is 0b00, the modified immediate constant is
+ // obtained by splatting the low 8 bits into the first byte, every other byte,
+ // or every byte of a 32-bit value.
+ //
+ // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
+ // performed.
+
+ if (slice(imm12, 11, 10) == 0) {
+ unsigned short control = slice(imm12, 9, 8);
+ unsigned imm8 = slice(imm12, 7, 0);
+ switch (control) {
+ default:
+ assert(0 && "No such value");
+ return 0;
+ case 0:
+ return imm8;
+ case 1:
+ return imm8 << 16 | imm8;
+ case 2:
+ return imm8 << 24 | imm8 << 8;
+ case 3:
+ return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
+ }
+ } else {
+ // A rotate is required.
+ unsigned Val = 1 << 7 | slice(imm12, 6, 0);
+ unsigned Amt = slice(imm12, 11, 7);
+ return ARM_AM::rotr32(Val, Amt);
+ }
+}
+
+static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
+ bool S = slice(insn, 26, 26);
+ bool J1 = slice(insn, 13, 13);
+ bool J2 = slice(insn, 11, 11);
+ unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
+ if (S) Imm21 |= 1 << 20;
+ if (J2) Imm21 |= 1 << 19;
+ if (J1) Imm21 |= 1 << 18;
+
+ return SignExtend32<21>(Imm21);
+}
+
+static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
+ unsigned S = slice(insn, 26, 26);
+ bool I1 = slice(insn, 13, 13) == S;
+ bool I2 = slice(insn, 11, 11) == S;
+ unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+ if (S) Imm25 |= 1 << 24;
+ if (I1) Imm25 |= 1 << 23;
+ if (I2) Imm25 |= 1 << 22;
+
+ return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BL(uint32_t insn) {
+ unsigned S = slice(insn, 26, 26);
+ bool I1 = slice(insn, 13, 13) == S;
+ bool I2 = slice(insn, 11, 11) == S;
+ unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+ if (S) Imm25 |= 1 << 24;
+ if (I1) Imm25 |= 1 << 23;
+ if (I2) Imm25 |= 1 << 22;
+
+ return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BLX(uint32_t insn) {
+ unsigned S = slice(insn, 26, 26);
+ bool I1 = slice(insn, 13, 13) == S;
+ bool I2 = slice(insn, 11, 11) == S;
+ unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
+ if (S) Imm25 |= 1 << 24;
+ if (I1) Imm25 |= 1 << 23;
+ if (I2) Imm25 |= 1 << 22;
+
+ return SignExtend32<25>(Imm25);
+}
+
+// See, for example, A8.6.221 SXTAB16.
+static inline unsigned decodeRotate(uint32_t insn) {
+ unsigned rotate = slice(insn, 5, 4);
+ return rotate << 3;
+}
+
+///////////////////////////////////////////////
+// //
+// Thumb1 instruction disassembly functions. //
+// //
+///////////////////////////////////////////////
+
+// See "Utilities for 16-bit Thumb instructions" for register naming convention.
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare
+//
+// shift immediate: tRd CPSR tRn imm5
+// add/sub register: tRd CPSR tRn tRm
+// add/sub 3-bit immediate: tRd CPSR tRn imm3
+// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
+// mov/cmp immediate: tRt [CPSR] imm8 (CPSR present for mov)
+//
+// Special case:
+// tMOVSr: tRd tRn
+static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
+ && "Invalid arguments");
+
+ bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
+
+ // Use Rt implies use imm8.
+ bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
+ Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::tGPRRegClassID,
+ UseRt ? getT1tRt(insn) : getT1tRd(insn))));
+ ++OpIdx;
+
+ // Check whether the next operand to be added is a CCR Register.
+ if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+ assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+ MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+ ++OpIdx;
+ }
+
+ // Check whether the next operand to be added is a Thumb1 Register.
+ assert(OpIdx < NumOps && "More operands expected");
+ if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+ // For UseRt, the reg operand is tied to the first reg operand.
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::tGPRRegClassID,
+ UseRt ? getT1tRt(insn) : getT1tRn(insn))));
+ ++OpIdx;
+ }
+
+ // Special case for tMOVSr.
+ if (OpIdx == NumOps)
+ return true;
+
+ // The next available operand is either a reg operand or an imm operand.
+ if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+ // Three register operand instructions.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRm(insn))));
+ } else {
+ assert(OpInfo[OpIdx].RegClass < 0 &&
+ !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+ unsigned Imm = 0;
+ if (UseRt)
+ Imm = getT1Imm8(insn);
+ else if (Imm3)
+ Imm = getT1Imm3(insn);
+ else {
+ Imm = getT1Imm5(insn);
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11));
+ getImmShiftSE(ShOp, Imm);
+ }
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ }
+ ++OpIdx;
+
+ return true;
+}
+
+// A6.2.2 Data-processing
+//
+// tCMPr, tTST, tCMN: tRd tRn
+// tMVN, tRSB: tRd CPSR tRn
+// Others: tRd CPSR tRd(TIED_TO) tRn
+static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass == ARM::CCRRegClassID
+ || OpInfo[1].RegClass == ARM::tGPRRegClassID)
+ && "Invalid arguments");
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRd(insn))));
+ ++OpIdx;
+
+ // Check whether the next operand to be added is a CCR Register.
+ if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+ assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+ MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+ ++OpIdx;
+ }
+
+ // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
+ // Process the TIED_TO operand first.
+
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+ && "Thumb reg operand expected");
+ int Idx;
+ if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ // The reg operand is tied to the first reg operand.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // Process possible next reg operand.
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+ // Add tRn operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRn(insn))));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.2.3 Special data instructions and branch and exchange
+//
+// tADDhirr: Rd Rd(TIED_TO) Rm
+// tCMPhir: Rd Rm
+// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
+// tBX: Rm
+// tBX_RET: 0 operand
+// tBX_RET_vararg: Rm
+// tBLXr_r9: Rm
+// tBRIND: Rm
+static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // tBX_RET has 0 operand.
+ if (NumOps == 0)
+ return true;
+
+ // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm.
+ if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX || Opcode==ARM::tBRIND) {
+ if (Opcode == ARM::tBLXr_r9) {
+ // Handling the two predicate operands before the reg operand.
+ if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ return false;
+ NumOpsAdded += 2;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ getT1Rm(insn))));
+ NumOpsAdded += 1;
+
+ if (Opcode == ARM::tBX) {
+ // Handling the two predicate operands after the reg operand.
+ if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ return false;
+ NumOpsAdded += 2;
+ }
+
+ return true;
+ }
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ // Add the destination operand.
+ unsigned RegClass = OpInfo[OpIdx].RegClass;
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass,
+ IsGPR(RegClass) ? getT1Rd(insn)
+ : getT1tRd(insn))));
+ ++OpIdx;
+
+ // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
+ // Process the TIED_TO operand first.
+
+ assert(OpIdx < NumOps && "More operands expected");
+ int Idx;
+ if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ // The reg operand is tied to the first reg operand.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ }
+
+ // The next reg operand is either Rm or tRn.
+ assert(OpIdx < NumOps && "More operands expected");
+ RegClass = OpInfo[OpIdx].RegClass;
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RegClass,
+ IsGPR(RegClass) ? getT1Rm(insn)
+ : getT1tRn(insn))));
+ ++OpIdx;
+
+ return true;
+}
+
+// A8.6.59 LDR (literal)
+//
+// tLDRpci: tRt imm8*4
+static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass < 0 &&
+ !OpInfo[1].isPredicate() &&
+ !OpInfo[1].isOptionalDef())
+ && "Invalid arguments");
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+
+ // And the (imm8 << 2) operand.
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+// Thumb specific addressing modes (see ARMInstrThumb.td):
+//
+// t_addrmode_rr := reg + reg
+//
+// t_addrmode_s4 := reg + reg
+// reg + imm5 * 4
+//
+// t_addrmode_s2 := reg + reg
+// reg + imm5 * 2
+//
+// t_addrmode_s1 := reg + reg
+// reg + imm5
+//
+// t_addrmode_sp := sp + imm8 * 4
+//
+
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ OpInfo[1].RegClass < 0 &&
+ "Expect >= 2 operands, first as reg, and second as imm operand");
+
+ // Build the register operand, followed by the (+/-)imm12 immediate.
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register (reg|imm): tRd tRn imm5|tRm
+// Load Register Signed Byte|Halfword: tRd tRn tRm
+static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass == ARM::tGPRRegClassID
+ && OpInfo[1].RegClass == ARM::tGPRRegClassID
+ && "Expect >= 2 operands and first two as thumb reg operands");
+
+ // Add the destination reg and the base reg.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRn(insn))));
+ OpIdx = 2;
+
+ // We have either { imm5 } or { tRm } remaining.
+ // Note that STR/LDR (register) should skip the imm5 offset operand for
+ // t_addrmode_s[1|2|4].
+
+ assert(OpIdx < NumOps && "More operands expected");
+
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
+ !OpInfo[OpIdx].isOptionalDef()) {
+ // Table A6-5 16-bit Thumb Load/store instructions
+ // opA = 0b0101 for STR/LDR (register) and friends.
+ // Otherwise, we have STR/LDR (immediate) and friends.
+ assert(opA != 5 && "Immediate operand expected for this opcode");
+ MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn)));
+ ++OpIdx;
+ } else {
+ // The next reg operand is tRm, the offset.
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+ && "Thumb reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRm(insn))));
+ ++OpIdx;
+ }
+ return true;
+}
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register SP relative: tRt ARM::SP imm8
+static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+ && "Unexpected opcode");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ (OpInfo[2].RegClass < 0 &&
+ !OpInfo[2].isPredicate() &&
+ !OpInfo[2].isOptionalDef())
+ && "Invalid arguments");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+ NumOpsAdded = 3;
+ return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.10 ADR
+//
+// tADDrPCi: tRt imm8
+static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass < 0 &&
+ !OpInfo[1].isPredicate() &&
+ !OpInfo[1].isOptionalDef())
+ && "Invalid arguments");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+ NumOpsAdded = 2;
+ return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.8 ADD (SP plus immediate)
+//
+// tADDrSPi: tRt ARM::SP imm8
+static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ (OpInfo[2].RegClass < 0 &&
+ !OpInfo[2].isPredicate() &&
+ !OpInfo[2].isOptionalDef())
+ && "Invalid arguments");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRt(insn))));
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+ NumOpsAdded = 3;
+ return true;
+}
+
+// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
+//
+// where register_list = low registers + [lr] for PUSH or
+// low registers + [pc] for POP
+//
+// "low registers" is specified by Inst{7-0}
+// lr|pc is specified by Inst{8}
+static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ // Handling the two predicate operands before the reglist.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ OpIdx += 2;
+ else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+
+ unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
+ | slice(insn, 7, 0);
+
+ // Fill the variadic part of reglist.
+ for (unsigned i = 0; i < 16; ++i) {
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ i)));
+ ++OpIdx;
+ }
+ }
+
+ return true;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions
+// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
+//
+// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
+// t2IT: firstcond=Inst{7-4} mask=Inst{3-0}
+// tCBNZ, tCBZ: tRd imm6*2
+// tBKPT: imm8
+// tNOP, tSEV, tYIELD, tWFE, tWFI:
+// no operand (except predicate pair)
+// tSETENDBE, tSETENDLE, :
+// no operand
+// Others: tRd tRn
+static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (NumOps == 0)
+ return true;
+
+ if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
+ return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ // Predicate operands are handled elsewhere.
+ if (NumOps == 2 &&
+ OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
+ OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+ return true;
+ }
+
+ if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
+ // Special case handling for tADDspi and tSUBspi.
+ // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateReg(ARM::SP));
+ MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
+ NumOpsAdded = 3;
+ return true;
+ }
+
+ if (Opcode == ARM::t2IT) {
+ // Special case handling for If-Then.
+ // A8.6.50 IT
+ // Tag the (firstcond[0] bit << 4) along with mask.
+
+ // firstcond
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
+
+ // firstcond[0] and mask
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+ NumOpsAdded = 2;
+ return true;
+ }
+
+ if (Opcode == ARM::tBKPT) {
+ MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // CPS has a singleton $opt operand that contains the following information:
+ // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
+ // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
+ // default one. The second get the AIF flags from Inst{2-0}.
+ if (Opcode == ARM::tCPS) {
+ MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
+ NumOpsAdded = 2;
+ return true;
+ }
+
+ assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+ (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+ && "Expect >=2 operands");
+
+ // Add the destination operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRd(insn))));
+
+ if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
+ // Two register instructions.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRn(insn))));
+ } else {
+ // CBNZ, CBZ
+ assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
+ MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
+ }
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+// A8.6.53 LDM / LDMIA
+// A8.6.189 STM / STMIA
+//
+// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps,
+ unsigned &NumOpsAdded, BO B) {
+ assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
+ Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
+
+ unsigned tRt = getT1tRt(insn);
+ NumOpsAdded = 0;
+
+ // WB register, if necessary.
+ if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ tRt)));
+ ++NumOpsAdded;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ tRt)));
+ ++NumOpsAdded;
+
+ // Handling the two predicate operands before the reglist.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+ NumOpsAdded += 2;
+ } else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+
+ unsigned RegListBits = slice(insn, 7, 0);
+ if (BitCount(RegListBits) < 1) {
+ DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n");
+ return false;
+ }
+
+ // Fill the variadic part of reglist.
+ for (unsigned i = 0; i < 8; ++i)
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ i)));
+ ++NumOpsAdded;
+ }
+
+ return true;
+}
+
+static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+}
+
+static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+}
+
+// A8.6.16 B Encoding T1
+// cond = Inst{11-8} & imm8 = Inst{7-0}
+// imm32 = SignExtend(imm8:'0', 32)
+//
+// tBcc: offset Pred-Imm Pred-CCR
+// tSVC: imm8 Pred-Imm Pred-CCR
+// tTRAP: 0 operand (early return)
+static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+ if (Opcode == ARM::tTRAP)
+ return true;
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
+ OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
+ && "Exactly 3 operands expected");
+
+ unsigned Imm8 = getT1Imm8(insn);
+ MI.addOperand(MCOperand::CreateImm(
+ Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1)
+ : (int)Imm8));
+
+ // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+ // But note that for tBcc, if cond = '1110' then UNDEFINED.
+ if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) {
+ DEBUG(errs() << "if cond = '1110' then UNDEFINED\n");
+ return false;
+ }
+ NumOpsAdded = 1;
+
+ return true;
+}
+
+// A8.6.16 B Encoding T2
+// imm11 = Inst{10-0}
+// imm32 = SignExtend(imm11:'0', 32)
+//
+// tB: offset
+static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
+
+ unsigned Imm11 = getT1Imm11(insn);
+
+ MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1)));
+
+ NumOpsAdded = 1;
+
+ return true;
+
+}
+
+// See A6.2 16-bit Thumb instruction encoding for instruction classes
+// corresponding to op.
+//
+// Table A6-1 16-bit Thumb instruction encoding (abridged)
+// op Instruction or instruction class
+// ------ --------------------------------------------------------------------
+// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000 Data-processing on page A6-8
+// 010001 Special data instructions and branch and exchange on page A6-9
+// 01001x Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx Load/store single data item on page A6-10
+// 011xxx
+// 100xxx
+// 10100x Generate PC-relative address, see ADR on page A8-32
+// 10101x Generate SP-relative address, see ADD (SP plus immediate) on
+// page A8-28
+// 1011xx Miscellaneous 16-bit instructions on page A6-11
+// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx Conditional branch, and Supervisor Call on page A6-13
+// 11100x Unconditional Branch, see B on page A8-44
+//
+static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ unsigned op1 = slice(op, 5, 4);
+ unsigned op2 = slice(op, 3, 2);
+ unsigned op3 = slice(op, 1, 0);
+ unsigned opA = slice(op, 5, 2);
+ switch (op1) {
+ case 0:
+ // A6.2.1 Shift (immediate), add, subtract, move, and compare
+ return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 1:
+ switch (op2) {
+ case 0:
+ switch (op3) {
+ case 0:
+ // A6.2.2 Data-processing
+ return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 1:
+ // A6.2.3 Special data instructions and branch and exchange
+ return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ default:
+ // A8.6.59 LDR (literal)
+ return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ break;
+ default:
+ // A6.2.4 Load/store single data item
+ return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ break;
+ }
+ break;
+ case 2:
+ switch (op2) {
+ case 0:
+ // A6.2.4 Load/store single data item
+ return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ case 1:
+ // A6.2.4 Load/store single data item
+ return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 2:
+ if (op3 <= 1) {
+ // A8.6.10 ADR
+ return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ } else {
+ // A8.6.8 ADD (SP plus immediate)
+ return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ default:
+ // A6.2.5 Miscellaneous 16-bit instructions
+ return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ break;
+ case 3:
+ switch (op2) {
+ case 0:
+ if (op3 <= 1) {
+ // A8.6.189 STM / STMIA / STMEA
+ return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ } else {
+ // A8.6.53 LDM / LDMIA / LDMFD
+ return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ case 1:
+ // A6.2.6 Conditional branch, and Supervisor Call
+ return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ case 2:
+ // Unconditional Branch, see B on page A8-44
+ return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ default:
+ assert(0 && "Unreachable code");
+ break;
+ }
+ break;
+ default:
+ assert(0 && "Unreachable code");
+ break;
+ }
+
+ return false;
+}
+
+///////////////////////////////////////////////
+// //
+// Thumb2 instruction disassembly functions. //
+// //
+///////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////
+// //
+// Note: the register naming follows the ARM convention! //
+// //
+///////////////////////////////////////////////////////////
+
+static inline bool Thumb2SRSOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2SRSDBW: case ARM::t2SRSDB:
+ case ARM::t2SRSIAW: case ARM::t2SRSIA:
+ return true;
+ }
+}
+
+static inline bool Thumb2RFEOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2RFEDBW: case ARM::t2RFEDB:
+ case ARM::t2RFEIAW: case ARM::t2RFEIA:
+ return true;
+ }
+}
+
+// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
+static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+ NumOpsAdded = 1;
+ return true;
+}
+
+// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
+static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ unsigned Rn = decodeRn(insn);
+ if (Rn == 15) {
+ DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+ return false;
+ }
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn)));
+ NumOpsAdded = 1;
+ return true;
+}
+
+static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (Thumb2SRSOpcode(Opcode))
+ return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+ if (Thumb2RFEOpcode(Opcode))
+ return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
+ Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
+ Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
+ Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
+ && "Unexpected opcode");
+ assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4");
+
+ NumOpsAdded = 0;
+
+ unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+ // Writeback to base.
+ if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
+ Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++NumOpsAdded;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(Base));
+ ++NumOpsAdded;
+
+ // Handling the two predicate operands before the reglist.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+ NumOpsAdded += 2;
+ } else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+
+ unsigned RegListBits = insn & ((1 << 16) - 1);
+
+ // Fill the variadic part of reglist.
+ for (unsigned i = 0; i < 16; ++i)
+ if ((RegListBits >> i) & 1) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ i)));
+ ++NumOpsAdded;
+ }
+
+ return true;
+}
+
+// t2LDREX: Rd Rn
+// t2LDREXD: Rd Rs Rn
+// t2LDREXB, t2LDREXH: Rd Rn
+// t2STREX: Rs Rd Rn
+// t2STREXD: Rm Rd Rs Rn
+// t2STREXB, t2STREXH: Rm Rd Rn
+static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && OpInfo[0].RegClass > 0
+ && OpInfo[1].RegClass > 0
+ && "Expect >=2 operands and first two as reg operands");
+
+ bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
+ bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
+ bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
+
+ unsigned Rt = decodeRd(insn);
+ unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX.
+ unsigned Rd = decodeRm(insn);
+ unsigned Rn = decodeRn(insn);
+
+ // Some sanity checking first.
+ if (isStore) {
+ // if d == n || d == t then UNPREDICTABLE
+ // if d == n || d == t || d == t2 then UNPREDICTABLE
+ if (isDW) {
+ if (Rd == Rn || Rd == Rt || Rd == Rt2) {
+ DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n");
+ return false;
+ }
+ } else {
+ if (isSW) {
+ if (Rt2 == Rn || Rt2 == Rt) {
+ DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+ return false;
+ }
+ } else {
+ if (Rd == Rn || Rd == Rt) {
+ DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+ }
+ } else {
+ // Load
+ // A8.6.71 LDREXD
+ // if t == t2 then UNPREDICTABLE
+ if (isDW && Rt == Rt2) {
+ DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+
+ // Add the destination operand for store.
+ if (isStore) {
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ isSW ? Rt2 : Rd)));
+ ++OpIdx;
+ }
+
+ // Source operand for store and destination operand for load.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ Rt)));
+ ++OpIdx;
+
+ // Thumb2 doubleword complication: with an extra source/destination operand.
+ if (isDW) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+ Rt2)));
+ ++OpIdx;
+ }
+
+ // Finally add the pointer operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ Rn)));
+ ++OpIdx;
+
+ return true;
+}
+
+// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
+// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
+// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
+//
+// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
+// disassembly only and do not have a tied_to writeback base register operand.
+static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 4
+ && OpInfo[0].RegClass > 0
+ && OpInfo[0].RegClass == OpInfo[1].RegClass
+ && OpInfo[2].RegClass > 0
+ && OpInfo[3].RegClass < 0
+ && "Expect >= 4 operands and first 3 as reg operands");
+
+ // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1).
+ unsigned Rt = decodeRd(insn);
+ unsigned Rt2 = decodeRs(insn);
+ unsigned Rn = decodeRn(insn);
+
+ // Some sanity checking first.
+
+ // A8.6.67 LDRD (literal) has its W bit as (0).
+ if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) {
+ if (Rn == 15 && slice(insn, 21, 21) != 0)
+ return false;
+ } else {
+ // For Dual Store, PC cannot be used as the base register.
+ if (Rn == 15) {
+ DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+ if (Rt == Rt2) {
+ DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+ return false;
+ }
+ if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) {
+ if (Rn == Rt || Rn == Rt2) {
+ DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+
+ // Add the <Rt> <Rt2> operands.
+ unsigned RegClassPair = OpInfo[0].RegClass;
+ unsigned RegClassBase = OpInfo[2].RegClass;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
+ decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
+ decodeRs(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase,
+ decodeRn(insn))));
+
+ // Finally add (+/-)imm8*4, depending on the U bit.
+ int Offset = getImm8(insn) * 4;
+ if (getUBit(insn) == 0)
+ Offset = -Offset;
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ NumOpsAdded = 4;
+
+ return true;
+}
+
+// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
+static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ assert(NumOps >= 2 && "Expect >= 2 operands");
+
+ // The generic version of TBB/TBH needs a base register.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ // Add the index register.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
+ case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
+ case ARM::t2LSLri: case ARM::t2LSRri:
+ case ARM::t2ASRri: case ARM::t2RORri:
+ return true;
+ }
+}
+
+// A6.3.11 Data-processing (shifted register)
+//
+// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
+// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
+// Three register operands: Rs Rn Rm
+// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
+//
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms: (Rm, ConstantShiftSpecifier).
+// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
+//
+// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
+// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
+// nothing else, because the shift amount is already specified.
+// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
+static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ // Special case handling.
+ if (Opcode == ARM::t2BR_JT) {
+ assert(NumOps == 4
+ && OpInfo[0].RegClass == ARM::GPRRegClassID
+ && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && OpInfo[2].RegClass < 0
+ && OpInfo[3].RegClass < 0
+ && "Exactly 4 operands expect and first two as reg operands");
+ // Only need to populate the src reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ MI.addOperand(MCOperand::CreateReg(0));
+ MI.addOperand(MCOperand::CreateImm(0));
+ MI.addOperand(MCOperand::CreateImm(0));
+ NumOpsAdded = 4;
+ return true;
+ }
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2
+ && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+ OpInfo[0].RegClass == ARM::rGPRRegClassID)
+ && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+ OpInfo[1].RegClass == ARM::rGPRRegClassID)
+ && "Expect >= 2 operands and first two as reg operands");
+
+ bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+ OpInfo[2].RegClass == ARM::rGPRRegClassID));
+ bool NoDstReg = (decodeRs(insn) == 0xF);
+
+ // Build the register operands, followed by the constant shift specifier.
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, OpInfo[0].RegClass,
+ NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ int Idx;
+ if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ // Process tied_to operand constraint.
+ MI.addOperand(MI.getOperand(Idx));
+ ++OpIdx;
+ } else if (!NoDstReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
+ decodeRn(insn))));
+ ++OpIdx;
+ } else {
+ DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
+ return false;
+ }
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ if (NumOps == OpIdx)
+ return true;
+
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()) {
+
+ if (Thumb2ShiftOpcode(Opcode)) {
+ unsigned Imm = getShiftAmtBits(insn);
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4));
+ getImmShiftSE(ShOp, Imm);
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ } else {
+ // Build the constant shift specifier operand.
+ unsigned bits2 = getShiftTypeBits(insn);
+ unsigned imm5 = getShiftAmtBits(insn);
+ ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
+ unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+ }
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.1 Data-processing (modified immediate)
+//
+// Two register operands: Rs Rn ModImm
+// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
+//
+// ModImm = ThumbExpandImm(i:imm3:imm8)
+static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
+ && "Expect >= 2 operands and first one as reg operand");
+
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
+ bool NoDstReg = (decodeRs(insn) == 0xF);
+
+ // Build the register operands, followed by the modified immediate.
+
+ MI.addOperand(MCOperand::CreateReg(
+ getRegisterEnum(B, RdRegClassID,
+ NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+ ++OpIdx;
+
+ if (TwoReg) {
+ if (NoDstReg) {
+ DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
+ return false;
+ }
+ int Idx;
+ if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ // The reg operand is tied to the first reg operand.
+ MI.addOperand(MI.getOperand(Idx));
+ } else {
+ // Add second reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ }
+ ++OpIdx;
+ }
+
+ // The modified immediate operand should come next.
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+ !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+
+ // i:imm3:imm8
+ // A6.3.2 Modified immediate constants in Thumb instructions
+ unsigned imm12 = getIImm3Imm8(insn);
+ MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
+ ++OpIdx;
+
+ return true;
+}
+
+static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::t2SSAT: case ARM::t2SSAT16:
+ case ARM::t2USAT: case ARM::t2USAT16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned &NumOpsAdded, BO B) {
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble the register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ unsigned Pos = slice(insn, 4, 0);
+ if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+ ARM_AM::asr : ARM_AM::lsl);
+ // Inst{14-12:7-6} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+ if (ShAmt == 0) {
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
+// A6.3.3 Data-processing (plain binary immediate)
+//
+// o t2ADDri12, t2SUBri12: Rs Rn imm12
+// o t2LEApcrel (ADR): Rs imm12
+// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
+// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm
+// o t2MOVi16: Rs imm16
+// o t2MOVTi16: Rs imm16
+// o t2SBFX (SBFX): Rs Rn lsb width
+// o t2UBFX (UBFX): Rs Rn lsb width
+// o t2BFI (BFI): Rs Rn lsb width
+static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
+ && "Expect >= 2 operands and first one as reg operand");
+
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
+
+ // Build the register operand(s), followed by the immediate(s).
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
+ decodeRs(insn))));
+ ++OpIdx;
+
+ if (TwoReg) {
+ assert(NumOps >= 3 && "Expect >= 3 operands");
+ int Idx;
+ if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+ // Process tied_to operand constraint.
+ MI.addOperand(MI.getOperand(Idx));
+ } else {
+ // Add src reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ }
+ ++OpIdx;
+ }
+
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+
+ // Pre-increment OpIdx.
+ ++OpIdx;
+
+ if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
+ || Opcode == ARM::t2LEApcrel)
+ MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
+ else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
+ if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
+ MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+ } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
+ uint32_t mask = 0;
+ if (getBitfieldInvMask(insn, mask))
+ MI.addOperand(MCOperand::CreateImm(mask));
+ else
+ return false;
+ } else {
+ // Handle the case of: lsb width
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
+ MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.4 Table A6-15 Miscellaneous control instructions
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool t2MiscCtrlInstr(uint32_t insn) {
+ if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
+ slice(insn, 12, 12) == 0)
+ return true;
+
+ return false;
+}
+
+// A6.3.4 Branches and miscellaneous control
+//
+// A8.6.16 B
+// Branches: t2B, t2Bcc -> imm operand
+//
+// Branches: t2TPsoft -> no operand
+//
+// A8.6.23 BL, BLX (immediate)
+// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
+//
+// A8.6.26
+// t2BXJ -> Rn
+//
+// Miscellaneous control:
+// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
+//
+// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
+// -> no operand (except pred-imm pred-ccr)
+//
+// t2DBG -> imm4 = Inst{3-0}
+//
+// t2MRS/t2MRSsys -> Rs
+// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
+// t2SMC -> imm4 = Inst{19-16}
+static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ if (NumOps == 0)
+ return true;
+
+ if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) {
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ unsigned opt = slice(insn, 3, 0);
+ switch (opt) {
+ case ARM_MB::SY: case ARM_MB::ST:
+ case ARM_MB::ISH: case ARM_MB::ISHST:
+ case ARM_MB::NSH: case ARM_MB::NSHST:
+ case ARM_MB::OSH: case ARM_MB::OSHST:
+ MI.addOperand(MCOperand::CreateImm(opt));
+ NumOpsAdded = 1;
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (t2MiscCtrlInstr(insn))
+ return true;
+
+ switch (Opcode) {
+ case ARM::t2CLREX:
+ case ARM::t2NOP:
+ case ARM::t2YIELD:
+ case ARM::t2WFE:
+ case ARM::t2WFI:
+ case ARM::t2SEV:
+ return true;
+ default:
+ break;
+ }
+
+ // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+ // opcodes which match the same real instruction. This is needed since there's
+ // no current handling of optional arguments. Fix here when a better handling
+ // of optional arguments is implemented.
+ if (Opcode == ARM::t2CPS3p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 3;
+ return true;
+ }
+ if (Opcode == ARM::t2CPS2p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::t2CPS1p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // DBG has its option specified in Inst{3-0}.
+ if (Opcode == ARM::t2DBG) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // MRS and MRSsys take one GPR reg Rs.
+ if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRs(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // BXJ takes one GPR reg Rn.
+ if (Opcode == ARM::t2BXJ) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 1;
+ return true;
+ }
+ // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
+ // bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::t2MSR) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
+ slice(insn, 11, 8) /* Special Reg */));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ NumOpsAdded = 2;
+ return true;
+ }
+ // SMC take imm4.
+ if (Opcode == ARM::t2SMC) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+ NumOpsAdded = 1;
+ return true;
+ }
+
+ // Some instructions have predicate operands first before the immediate.
+ if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
+ // Handling the two predicate operands before the imm operand.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ NumOpsAdded += 2;
+ else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+ }
+
+ // Add the imm operand.
+ int Offset = 0;
+
+ switch (Opcode) {
+ default:
+ assert(0 && "Unexpected opcode");
+ return false;
+ case ARM::t2B:
+ Offset = decodeImm32_B_EncodingT4(insn);
+ break;
+ case ARM::t2Bcc:
+ Offset = decodeImm32_B_EncodingT3(insn);
+ break;
+ case ARM::tBLr9:
+ Offset = decodeImm32_BL(insn);
+ break;
+ case ARM::tBLXi_r9:
+ Offset = decodeImm32_BLX(insn);
+ break;
+ }
+
+ if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
+ MI.addOperand(MCOperand::CreateImm(Offset));
+
+ // This is an increment as some predicate operands may have been added first.
+ NumOpsAdded += 1;
+
+ return true;
+}
+
+static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::t2PLDi12: case ARM::t2PLDi8:
+ case ARM::t2PLDs:
+ case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIi12: case ARM::t2PLIi8:
+ case ARM::t2PLIs:
+ return true;
+ }
+}
+
+static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ // Preload Data/Instruction requires either 2 or 3 operands.
+ // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
+ // t2PLDr: Rn Rm
+ // t2PLDs: Rn Rm imm2=Inst{5-4}
+ // Same pattern applies for t2PLDW* and t2PLI*.
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ "Expect >= 2 operands and first one as reg operand");
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+
+ if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+ } else {
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+ int Offset = 0;
+ if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+ Opcode == ARM::t2PLIi8) {
+ // A8.6.117 Encoding T2: add = FALSE
+ unsigned Imm8 = getImm8(insn);
+ Offset = -1 * Imm8;
+ } else {
+ // The i12 forms. See, for example, A8.6.117 Encoding T1.
+ // Note that currently t2PLDi12 also handles the previously named t2PLDpci
+ // opcode, that's why we use decodeImm12(insn) which returns +/- imm12.
+ Offset = decodeImm12(insn);
+ }
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ }
+ ++OpIdx;
+
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+ !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load,
+ unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) {
+
+ // Inst{22-21} encodes the data item transferred for load/store.
+ // For single word, it is encoded as ob10.
+ bool Word = (slice(insn, 22, 21) == 2);
+ bool Half = (slice(insn, 22, 21) == 1);
+ bool Byte = (slice(insn, 22, 21) == 0);
+
+ if (UseRm && BadReg(R2)) {
+ DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n");
+ return true;
+ }
+
+ if (Load) {
+ if (!Word && R0 == 13) {
+ DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (Byte) {
+ if (WB && R0 == 15 && slice(insn, 10, 8) == 3) {
+ // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0)
+ DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ // A6.3.8 Load halfword, memory hints
+ if (Half) {
+ if (WB) {
+ if (R0 == R1) {
+ // A8.6.82 LDRSH (immediate) Encoding T2
+ DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n");
+ return true;
+ }
+ if (R0 == 15 && slice(insn, 10, 8) == 3) {
+ // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0)
+ DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) {
+ if (R0 == 15 && slice(insn, 10, 8) == 4) {
+ // A8.6.82 LDRSH (immediate) Encoding T2
+ DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE"
+ << " \"Unallocated memory hints\"\n");
+ return true;
+ }
+ } else {
+ if (R0 == 15) {
+ // A8.6.82 LDRSH (immediate) Encoding T1
+ DEBUG(errs() << "if Rt == '1111' then SEE"
+ << " \"Unallocated memory hints\"\n");
+ return true;
+ }
+ }
+ }
+ }
+ } else {
+ if (WB && R0 == R1) {
+ DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+ return true;
+ }
+ if ((WB && R0 == 15) || (!WB && R1 == 15)) {
+ DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n");
+ return true;
+ }
+ if (Word) {
+ if ((WB && R1 == 15) || (!WB && R0 == 15)) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) {
+ DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// A6.3.10 Store single data item
+// A6.3.9 Load byte, memory hints
+// A6.3.8 Load halfword, memory hints
+// A6.3.7 Load word
+//
+// For example,
+//
+// t2LDRi12: Rd Rn (+)imm12
+// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also
+// DisassembleThumb2DPSoReg)
+// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// t2STRi12: Rd Rn (+)imm12
+// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also
+// DisassembleThumb2DPSoReg)
+// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
+// correctly, as LLVM AsmPrinter depends on it. For indexed stores, the first
+// operand is Rn; for all the other instructions, Rd is the first operand.
+//
+// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
+// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
+static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ unsigned Rn = decodeRn(insn);
+
+ if (Thumb2PreloadOpcode(Opcode))
+ return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+ // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+ if (Load && Rn == 15)
+ return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass > 0 &&
+ OpInfo[1].RegClass > 0 &&
+ "Expect >= 3 operands and first two as reg operands");
+
+ bool ThreeReg = (OpInfo[2].RegClass > 0);
+ bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1;
+ bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
+
+ // Build the register operands, followed by the immediate.
+ unsigned R0 = 0, R1 = 0, R2 = 0;
+ unsigned Rd = decodeRd(insn);
+ int Imm = 0;
+
+ if (!Load && TIED_TO) {
+ R0 = Rn;
+ R1 = Rd;
+ } else {
+ R0 = Rd;
+ R1 = Rn;
+ }
+ if (ThreeReg) {
+ if (TIED_TO) {
+ R2 = Rn;
+ Imm = decodeImm8(insn);
+ } else {
+ R2 = decodeRm(insn);
+ // See, for example, A8.6.64 LDRB (register).
+ // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
+ // LSL is the default shift opc, and LLVM does not expect it to be encoded
+ // as part of the immediate operand.
+ // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
+ Imm = slice(insn, 5, 4);
+ }
+ } else {
+ if (Imm12)
+ Imm = getImm12(insn);
+ else
+ Imm = decodeImm8(insn);
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ R0)));
+ ++OpIdx;
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ R1)));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ // This could be an offset register or a TIED_TO register.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+ R2)));
+ ++OpIdx;
+ }
+
+ if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO,
+ TIED_TO))
+ return false;
+
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+ && !OpInfo[OpIdx].isOptionalDef()
+ && "Pure imm operand expected");
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ ++OpIdx;
+
+ return true;
+}
+
+// A6.3.12 Data-processing (register)
+//
+// Two register operands [rotate]: Rs Rm [rotation(= (rotate:'000'))]
+// Three register operands only: Rs Rn Rm
+// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
+//
+// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
+//
+// Miscellaneous operations: Rs [Rn] Rm
+static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCInstrDesc &MCID = ARMInsts[Opcode];
+ const MCOperandInfo *OpInfo = MCID.OpInfo;
+ unsigned &OpIdx = NumOpsAdded;
+
+ OpIdx = 0;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass > 0 &&
+ OpInfo[1].RegClass > 0 &&
+ "Expect >= 2 operands and first two as reg operands");
+
+ // Build the register operands, followed by the optional rotation amount.
+
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeRs(insn))));
+ ++OpIdx;
+
+ if (ThreeReg) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ decodeRm(insn))));
+ ++OpIdx;
+
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+ && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+ // Add the rotation amount immediate.
+ MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
+ ++OpIdx;
+ }
+
+ return true;
+}
+
+// A6.3.16 Multiply, multiply accumulate, and absolute difference
+//
+// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
+// t2MUL, t2SMMUL: Rs Rn Rm
+// t2SMLA[BB|BT|TB|TT|WB|WT]: Rs Rn Rm Ra=Inst{15-12}
+// t2SMUL[BB|BT|TB|TT|WB|WT]: Rs Rn Rm
+//
+// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
+// Rs Rn Rm Ra=Inst{15-12}
+//
+// Unsigned Sum of Absolute Differences [and Accumulate]
+// Rs Rn Rm [Ra=Inst{15-12}]
+static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+ "Expect >= 3 operands and first three as reg operands");
+
+ // Build the register operands.
+
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRm(insn))));
+
+ if (FourReg)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRd(insn))));
+
+ NumOpsAdded = FourReg ? 4 : 3;
+
+ return true;
+}
+
+// A6.3.17 Long multiply, long multiply accumulate, and divide
+//
+// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
+static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+ assert(NumOps >= 3 &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+ "Expect >= 3 operands and first three as reg operands");
+
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+ // Build the register operands.
+
+ if (FourReg)
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRm(insn))));
+
+ if (FourReg)
+ NumOpsAdded = 4;
+ else
+ NumOpsAdded = 3;
+
+ return true;
+}
+
+// See A6.3 32-bit Thumb instruction encoding for instruction classes
+// corresponding to (op1, op2, op).
+//
+// Table A6-9 32-bit Thumb instruction encoding
+// op1 op2 op Instruction class, see
+// --- ------- -- -----------------------------------------------------------
+// 01 00xx0xx - Load/store multiple on page A6-23
+// 00xx1xx - Load/store dual, load/store exclusive, table branch on
+// page A6-24
+// 01xxxxx - Data-processing (shifted register) on page A6-31
+// 1xxxxxx - Coprocessor instructions on page A6-40
+// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15
+// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19
+// - 1 Branches and miscellaneous control on page A6-20
+// 11 000xxx0 - Store single data item on page A6-30
+// 001xxx0 - Advanced SIMD element or structure load/store instructions
+// on page A7-27
+// 00xx001 - Load byte, memory hints on page A6-28
+// 00xx011 - Load halfword, memory hints on page A6-26
+// 00xx101 - Load word on page A6-25
+// 00xx111 - UNDEFINED
+// 010xxxx - Data-processing (register) on page A6-33
+// 0110xxx - Multiply, multiply accumulate, and absolute difference on
+// page A6-38
+// 0111xxx - Long multiply, long multiply accumulate, and divide on
+// page A6-39
+// 1xxxxxx - Coprocessor instructions on page A6-40
+//
+static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
+ MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
+ unsigned &NumOpsAdded, BO B) {
+
+ switch (op1) {
+ case 1:
+ if (slice(op2, 6, 5) == 0) {
+ if (slice(op2, 2, 2) == 0) {
+ // Load/store multiple.
+ return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+
+ // Load/store dual, load/store exclusive, table branch, otherwise.
+ assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
+ if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
+ (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
+ // Load/store exclusive.
+ return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ if (Opcode == ARM::t2LDRDi8 ||
+ Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
+ Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
+ // Load/store dual.
+ return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
+ // Table branch.
+ return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+ } else if (slice(op2, 6, 5) == 1) {
+ // Data-processing (shifted register).
+ return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ }
+
+ // FIXME: A6.3.18 Coprocessor instructions
+ // But see ThumbDisassembler::getInstruction().
+
+ break;
+ case 2:
+ if (op == 0) {
+ if (slice(op2, 5, 5) == 0)
+ // Data-processing (modified immediate)
+ return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ if (Thumb2SaturateOpcode(Opcode))
+ return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
+
+ // Data-processing (plain binary immediate)
+ return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ // Branches and miscellaneous control on page A6-20.
+ return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ case 3:
+ switch (slice(op2, 6, 5)) {
+ case 0:
+ // Load/store instructions...
+ if (slice(op2, 0, 0) == 0) {
+ if (slice(op2, 4, 4) == 0) {
+ // Store single data item on page A6-30
+ return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
+ B);
+ } else {
+ // FIXME: Advanced SIMD element or structure load/store instructions.
+ // But see ThumbDisassembler::getInstruction().
+ ;
+ }
+ } else {
+ // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
+ return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+ NumOpsAdded, B);
+ }
+ break;
+ case 1:
+ if (slice(op2, 4, 4) == 0) {
+ // A6.3.12 Data-processing (register)
+ return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ } else if (slice(op2, 3, 3) == 0) {
+ // A6.3.16 Multiply, multiply accumulate, and absolute difference
+ return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+ } else {
+ // A6.3.17 Long multiply, long multiply accumulate, and divide
+ return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ break;
+ default:
+ // FIXME: A6.3.18 Coprocessor instructions
+ // But see ThumbDisassembler::getInstruction().
+ ;
+ break;
+ }
+
+ break;
+ default:
+ assert(0 && "Thumb2 encoding error!");
+ break;
+ }
+
+ return false;
+}
+
+static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+ uint16_t HalfWord = slice(insn, 31, 16);
+
+ if (HalfWord == 0) {
+ // A6.2 16-bit Thumb instruction encoding
+ // op = bits[15:10]
+ uint16_t op = slice(insn, 15, 10);
+ return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
+ Builder);
+ }
+
+ unsigned bits15_11 = slice(HalfWord, 15, 11);
+
+ // A6.1 Thumb instruction set encoding
+ if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+ assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+ return false;
+ }
+
+ // A6.3 32-bit Thumb instruction encoding
+
+ uint16_t op1 = slice(HalfWord, 12, 11);
+ uint16_t op2 = slice(HalfWord, 10, 4);
+ uint16_t op = slice(insn, 15, 15);
+
+ return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
+ Builder);
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 000000000000..78d3e477975c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,759 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
+#include "ARMAddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "ARMGenAsmWriter.inc"
+
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ unsigned Opcode = MI->getOpcode();
+
+ // Check for MOVs and print canonical forms, instead.
+ if (Opcode == ARM::MOVs) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+ const MCOperand &MO3 = MI->getOperand(3);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+ printSBitModifierOperand(MI, 6, O);
+ printPredicateOperand(MI, 4, O);
+
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+ return;
+
+ O << ", ";
+
+ if (MO2.getReg()) {
+ O << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+ return;
+ }
+
+ // A8.6.123 PUSH
+ if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ // A8.6.122 POP
+ if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ // A8.6.355 VPUSH
+ if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ // A8.6.354 VPOP
+ if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
+ }
+
+ printInstruction(MI, O);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << getRegisterName(MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+ if (MO2.getReg()) {
+ O << ' ' << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+ if (!MO2.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ImmOffs;
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM2PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM2PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs;
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+ << " #" << ShImm;
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+ if (MO2.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << getRegisterName(MO2.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
+}
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << '[' << getRegisterName(MO1.getReg());
+
+ if (MO2.getReg()) {
+ O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << getRegisterName(MO2.getReg()) << ']';
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
+ O << ']';
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM3PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM3PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (MO1.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << getRegisterName(MO1.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << ImmOffs;
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+ .getImm());
+ O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ O << ", #"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ImmOffs * 4;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm()) {
+ // FIXME: Both darwin as and GNU as violate ARM docs here.
+ O << ", :" << (MO2.getImm() << 3);
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ O << "[" << getRegisterName(MO1.getReg()) << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getReg() == 0)
+ O << "!";
+ else
+ O << ", " << getRegisterName(MO.getReg());
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint32_t v = ~MO.getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+ assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+ O << '#' << lsb << ", #" << width;
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != OpNum) O << ", ";
+ O << getRegisterName(MI->getOperand(i).getReg());
+ }
+ O << "}";
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ if (Op.getImm())
+ O << "be";
+ else
+ O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned IFlags = Op.getImm();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ O << ARM_PROC::IFlagsToString(1 << i);
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned SpecRegRBit = Op.getImm() >> 4;
+ unsigned Mask = Op.getImm() & 0xf;
+
+ if (SpecRegRBit)
+ O << "spsr";
+ else
+ O << "cpsr";
+
+ if (Mask) {
+ O << '_';
+ if (Mask & 8) O << 'f';
+ if (Mask & 4) O << 's';
+ if (Mask & 2) O << 'x';
+ if (Mask & 1) O << 'c';
+ }
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNum).getReg()) {
+ assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+ "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned RegNum = MO2.getReg())
+ O << ", " << getRegisterName(RegNum);
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O,
+ unsigned Scale) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs * Scale;
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ unsigned Reg = MO1.getReg();
+ O << getRegisterName(Reg);
+
+ // Print the shift opc.
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << ", #" << OffImm * 4;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else if (OffImm > 0)
+ O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << "#-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ assert(MO2.getReg() && "Invalid so_reg load / store address!");
+ O << ", " << getRegisterName(MO2.getReg());
+
+ unsigned ShAmt = MO3.getImm();
+ if (ShAmt) {
+ assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+ O << ", lsl #" << ShAmt;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#';
+ if (MO.isFPImm()) {
+ O << (float)MO.getFPImm();
+ } else {
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ FPUnion.I = MO.getImm();
+ O << FPUnion.F;
+ }
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#';
+ if (MO.isFPImm()) {
+ O << MO.getFPImm();
+ } else {
+ // We expect the binary encoding of a floating point number here.
+ union {
+ uint64_t I;
+ double D;
+ } FPUnion;
+
+ FPUnion.I = MO.getImm();
+ O << FPUnion.D;
+ }
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 000000000000..d5f238bb8a61
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,120 @@
+//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+ ARMInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned Scale);
+ void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..18645c0864a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmPrinter
+ ARMInstPrinter.cpp
+ )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
new file mode 100644
index 000000000000..65d372e44b88
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
new file mode 100644
index 000000000000..53b4c95d3801
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -0,0 +1,78 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables"),
+ cl::init(false));
+
+
+static const char *const arm_asm_table[] = {
+ "{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0
+};
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+ AsmTransCBE = arm_asm_table;
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::SjLj;
+}
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ Data64bitsDirective = 0;
+ CommentString = "@";
+
+ HasLEB128 = true;
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ HasLCOMMDirective = true;
+
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ if (EnableARMEHABI)
+ ExceptionsType = ExceptionHandling::ARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
new file mode 100644
index 000000000000..90f7822ea580
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit ARMMCAsmInfoDarwin();
+ };
+
+ struct ARMELFMCAsmInfo : public MCAsmInfo {
+ explicit ARMELFMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 000000000000..f8fcf2b8aff1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,144 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "ARMMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::string ARM_MC::ParseARMTriple(StringRef TT) {
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ unsigned Len = TT.size();
+ unsigned Idx = 0;
+
+ // FIXME: Enahnce Triple helper class to extract ARM version.
+ bool isThumb = false;
+ if (Len >= 5 && TT.substr(0, 4) == "armv")
+ Idx = 4;
+ else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+ isThumb = true;
+ if (Len >= 7 && TT[5] == 'v')
+ Idx = 6;
+ }
+
+ std::string ARMArchFeature;
+ if (Idx) {
+ unsigned SubVer = TT[Idx];
+ if (SubVer >= '7' && SubVer <= '9') {
+ if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv";
+ } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
+ // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+ // FeatureT2XtPk
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk";
+ } else
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp";
+ } else if (SubVer == '6') {
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+ ARMArchFeature = "+v6t2";
+ else
+ ARMArchFeature = "+v6";
+ } else if (SubVer == '5') {
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchFeature = "+v5te";
+ else
+ ARMArchFeature = "+v5t";
+ } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
+ ARMArchFeature = "+v4t";
+ }
+
+ if (isThumb) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+thumb-mode";
+ else
+ ARMArchFeature += ",+thumb-mode";
+ }
+
+ return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitARMMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeARMMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo() {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitARMMCRegisterInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeARMMCRegInfo() {
+ TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return new ARMMCAsmInfoDarwin();
+
+ return new ARMELFMCAsmInfo();
+}
+
+extern "C" void LLVMInitializeARMMCAsmInfo() {
+ // Register the target asm info.
+ RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 000000000000..74701e3516dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,52 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCTARGETDESC_H
+#define ARMMCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheARMTarget, TheThumbTarget;
+
+namespace ARM_MC {
+ std::string ParseARMTriple(StringRef TT);
+
+ /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..68daf42c9191
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMARMDesc
+ ARMMCTargetDesc.cpp
+ ARMMCAsmInfo.cpp
+ )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..448ed9df2bff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 000000000000..2df00538b39f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,331 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+ struct MLxExpansion : public MachineFunctionPass {
+ static char ID;
+ MLxExpansion() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM MLA / MLS expansion pass";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool isA9;
+ unsigned MIIdx;
+ MachineInstr* LastMIs[4];
+ SmallPtrSet<MachineInstr*, 4> IgnoreStall;
+
+ void clearStack();
+ void pushStack(MachineInstr *MI);
+ MachineInstr *getAccDefMI(MachineInstr *MI) const;
+ unsigned getDefReg(MachineInstr *MI) const;
+ bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI);
+ void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane);
+ bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+ };
+ char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+ std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+ LastMIs[MIIdx] = MI;
+ if (++MIIdx == 4)
+ MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+ // Look past COPY and INSERT_SUBREG instructions to find the
+ // real definition MI. This is important for _sfp instructions.
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+ if (DefMI->getParent() != MBB)
+ break;
+ if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+ break;
+ }
+ return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+
+ while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+ Reg = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+ UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+ }
+
+ return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MCID.mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
+ return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
+ if (NumExpand >= ExpandLimit)
+ return false;
+
+ if (ForceExapnd)
+ return true;
+
+ MachineInstr *DefMI = getAccDefMI(MI);
+ if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
+ // r0 = vmla
+ // r3 = vmla r0, r1, r2
+ // takes 16 - 17 cycles
+ //
+ // r0 = vmla
+ // r4 = vmul r1, r2
+ // r3 = vadd r0, r4
+ // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ IgnoreStall.insert(DefMI);
+ return true;
+ }
+
+ if (IgnoreStall.count(MI))
+ return false;
+
+ // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+ // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+ // preserves the in-order retirement of the instructions.
+ // Look at the next few instructions, if *most* of them can cause hazards,
+ // then the scheduler can't *fix* this, we'd better break up the VMLA.
+ unsigned Limit1 = isA9 ? 1 : 4;
+ unsigned Limit2 = isA9 ? 1 : 4;
+ for (unsigned i = 1; i <= 4; ++i) {
+ int Idx = ((int)MIIdx - i + 4) % 4;
+ MachineInstr *NextMI = LastMIs[Idx];
+ if (!NextMI)
+ continue;
+
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+ if (i <= Limit1)
+ return true;
+ }
+
+ // Look for VMLx RAW hazard.
+ if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool DstDead = MI->getOperand(0).isDead();
+ unsigned AccReg = MI->getOperand(1).getReg();
+ unsigned Src1Reg = MI->getOperand(2).getReg();
+ unsigned Src2Reg = MI->getOperand(3).getReg();
+ bool Src1Kill = MI->getOperand(2).isKill();
+ bool Src2Kill = MI->getOperand(3).isKill();
+ unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+ unsigned NextOp = HasLane ? 5 : 4;
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+ unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+ const MCInstrDesc &MCID1 = TII->get(MulOpc);
+ const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+ unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
+
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ .addReg(Src1Reg, getKillRegState(Src1Kill))
+ .addReg(Src2Reg, getKillRegState(Src2Kill));
+ if (HasLane)
+ MIB.addImm(LaneImm);
+ MIB.addImm(Pred).addReg(PredReg);
+
+ MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+ if (NegAcc) {
+ bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+ MIB.addReg(TmpReg, getKillRegState(true))
+ .addReg(AccReg, getKillRegState(AccKill));
+ } else {
+ MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+ }
+ MIB.addImm(Pred).addReg(PredReg);
+
+ DEBUG({
+ dbgs() << "Expanding: " << *MI;
+ dbgs() << " to:\n";
+ MachineBasicBlock::iterator MII = MI;
+ MII = llvm::prior(MII);
+ MachineInstr &MI2 = *MII;
+ MII = llvm::prior(MII);
+ MachineInstr &MI1 = *MII;
+ dbgs() << " " << MI1;
+ dbgs() << " " << MI2;
+ });
+
+ MI->eraseFromParent();
+ ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ clearStack();
+ IgnoreStall.clear();
+
+ unsigned Skip = 0;
+ MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+ while (MII != E) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.isBarrier()) {
+ clearStack();
+ Skip = 0;
+ ++MII;
+ continue;
+ }
+
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainGeneral) {
+ if (++Skip == 2)
+ // Assume dual issues of non-VFP / NEON instructions.
+ pushStack(0);
+ } else {
+ Skip = 0;
+
+ unsigned MulOpc, AddSubOpc;
+ bool NegAcc, HasLane;
+ if (!TII->isFpMLxInstruction(MCID.getOpcode(),
+ MulOpc, AddSubOpc, NegAcc, HasLane) ||
+ !FindMLxHazard(MI))
+ pushStack(MI);
+ else {
+ ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+ E = MBB.rend(); // May have changed if MI was the 1st instruction.
+ Changed = true;
+ continue;
+ }
+ }
+
+ ++MII;
+ }
+
+ return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ isA9 = STI->isCortexA9();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= ExpandFPMLxInstructions(MBB);
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+ return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
new file mode 100644
index 000000000000..c85d1e99705a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
@@ -0,0 +1,149 @@
+//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "neon-mov-fix"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumVMovs, "Number of reg-reg moves converted");
+
+namespace {
+ struct NEONMoveFixPass : public MachineFunctionPass {
+ static char ID;
+ NEONMoveFixPass() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "NEON reg-reg move conversion";
+ }
+
+ private:
+ const TargetRegisterInfo *TRI;
+ const ARMBaseInstrInfo *TII;
+ bool isA8;
+
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ bool InsertMoves(MachineBasicBlock &MBB);
+ };
+ char NEONMoveFixPass::ID = 0;
+}
+
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+ return (Domain & ARMII::DomainNEON) ||
+ (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
+bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
+ RegMap Defs;
+ bool Modified = false;
+
+ // Walk over MBB tracking the def points of the registers.
+ MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = llvm::next(MII);
+ MachineInstr *MI = &*MII;
+
+ if (MI->getOpcode() == ARM::VMOVD &&
+ !TII->isPredicated(MI)) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ // If we do not find an instruction defining the reg, this means the
+ // register should be live-in for this BB. It's always to better to use
+ // NEON reg-reg moves.
+ unsigned Domain = ARMII::DomainNEON;
+ RegMap::iterator DefMI = Defs.find(SrcReg);
+ if (DefMI != Defs.end()) {
+ Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
+ // Instructions in general domain are subreg accesses.
+ // Map them to NEON reg-reg moves.
+ if (Domain == ARMII::DomainGeneral)
+ Domain = ARMII::DomainNEON;
+ }
+
+ if (inNEONDomain(Domain, isA8)) {
+ // Convert VMOVD to VORRd
+ unsigned DestReg = MI->getOperand(0).getReg();
+
+ DEBUG({errs() << "vmov convert: "; MI->dump();});
+
+ // It's safe to ignore imp-defs / imp-uses here, since:
+ // - We're running late, no intelligent condegen passes should be run
+ // afterwards
+ // - The imp-defs / imp-uses are superregs only, we don't care about
+ // them.
+ AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
+ TII->get(ARM::VORRd), DestReg)
+ .addReg(SrcReg).addReg(SrcReg));
+ MBB.erase(MI);
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+
+ DEBUG({errs() << " into: "; MI->dump();});
+
+ Modified = true;
+ ++NumVMovs;
+ } else {
+ assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
+ // Do nothing.
+ }
+ }
+
+ // Update def information.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+
+ Defs[MOReg] = MI;
+ // Catch aliases as well.
+ for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
+ Defs[*R] = MI;
+ }
+ }
+
+ return Modified;
+}
+
+bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
+ ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
+ const TargetMachine &TM = Fn.getTarget();
+
+ if (AFI->isThumb1OnlyFunction())
+ return false;
+
+ TRI = TM.getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= InsertMoves(MBB);
+ }
+
+ return Modified;
+}
+
+/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
+/// pass.
+FunctionPass *llvm::createNEONMoveFixPass() {
+ return new NEONMoveFixPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 000000000000..163a0a987584
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() {
+ RegisterTarget<Triple::arm, /*HasJIT=*/true>
+ X(TheARMTarget, "arm", "ARM");
+
+ RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+ Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 000000000000..c258870e48a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,361 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, MIFlags);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ MachineInstr::FrameSetup);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ NumBytes = DPRCSOffset;
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NumBytes > 508)
+ // If offset is > 508 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes)
+ // Insert it after all the callee-save spills.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF))
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP));
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ if (MI->getOpcode() == ARM::tLDRspi &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert((MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot, the target is ELF and the function has FP, or
+ // the target uses var sized objects.
+ if (NumBytes) {
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ } else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ }
+ }
+
+ if (VARegSaveSize) {
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. Therefore, we
+ // pop the old LR into R3 as a temporary.
+
+ // Move back past the callee-saved register restoration
+ while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill));
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ MIB.setMIFlags(MachineInstr::FrameSetup);
+ return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(MIB);
+
+ bool NumRegs = false;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NumRegs = true;
+ }
+
+ // It's illegal to emit pop instruction without operands.
+ if (NumRegs)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 000000000000..bcfc5165fad0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMB_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 000000000000..218311d78d30
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,100 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1InstrInfo.h"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+ if (RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+ if (RC == ARM::tGPRRegisterClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 000000000000..17ef2f758ef4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,59 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+ Thumb1RegisterInfo RI;
+public:
+ explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 000000000000..4eb0b6c93e1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,714 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+const TargetRegisterClass*
+Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ if (ARM::tGPRRegClass.hasSubClassEq(RC))
+ return ARM::tGPRRegisterClass;
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+const TargetRegisterClass *
+Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return ARM::tGPRRegisterClass;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = MachineInstr::NoFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ bool isHigh = !isARMLowRegister(DestReg) ||
+ (BaseReg != 0 && !isARMLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+ ARMCC::AL, 0, MIFlags);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (Opc != ARM::tADDhirr)
+ MIB = AddDefaultT1CC(MIB);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ AddDefaultPred(MIB);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+ bool NeedCC = false;
+ bool NeedPred = false;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ if (DestReg == ARM::SP) {
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ } else {
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NumBits = 8;
+ NeedPred = NeedCC = true;
+ }
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+ DestReg, BaseReg, NumBytes, true,
+ TII, MRI, MIFlags);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+ const MachineInstrBuilder MIB =
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags));
+ AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+ } else {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill))
+ .setMIFlags(MIFlags);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(DestReg).addImm(ThisVal);
+ if (NeedPred)
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+ }
+ else {
+ bool isKill = BaseReg != ARM::SP;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (NeedCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+ if (NeedPred)
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+
+ BaseReg = DestReg;
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ NeedPred = NeedCC = isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc) {
+ const MCInstrDesc &MCID = TII.get(ExtraOpc);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+ .addReg(DestReg, RegState::Kill)
+ .addImm(((unsigned)NumBytes) & 3)
+ .setMIFlags(MIFlags));
+ }
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII,
+ const Thumb1RegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+ DestReg))
+ .addImm(ThisVal));
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
+ if (isSub) {
+ const MCInstrDesc &MCID = TII.get(ARM::tRSB);
+ AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+ .addReg(DestReg, RegState::Kill));
+ }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+ unsigned Op = i;
+ for (unsigned e = MI.getNumOperands(); i != e; ++i)
+ MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::tLDRspi:
+ return ARM::tLDRi;
+
+ case ARM::tSTRspi:
+ return ARM::tSTRi;
+ }
+
+ return Opcode;
+}
+
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+ if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ MI.setDesc(TII.get(Opcode));
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and add predicate operands.
+ MI.RemoveOperand(FrameRegIdx+1);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ return true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ if (Opcode == ARM::tADDi3) {
+ removeOperands(MI, FrameRegIdx);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+ .addImm(Offset / Scale));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
+ }
+ return true;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
+ return true;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ if (Opcode == ARM::tADDi3) {
+ removeOperands(MI, FrameRegIdx);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+ } else {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
+ }
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = llvm::next(II);
+ emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+ *this);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
+ MI.setDesc(TII.get(ARM::tADDhirr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
+ if (Opcode == ARM::tADDi3) {
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ }
+ }
+ return true;
+ } else {
+ if (AddrMode != ARMII::AddrModeT1_s)
+ llvm_unreachable("Unsupported addressing mode!");
+
+ unsigned ImmIdx = FrameRegIdx + 1;
+ int InstrOffs = MI.getOperand(ImmIdx).getImm();
+ unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ unsigned Scale = 4;
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with the frame register (e.g., sp).
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ ImmOp.ChangeToImmediate(ImmedOffset);
+
+ // If we're using a register where sp was stored, convert the instruction
+ // to the non-SP version.
+ unsigned NewOpc = convertToNonSPOpcode(Opcode);
+ if (NewOpc != Opcode && FrameReg != ARM::SP)
+ MI.setDesc(TII.get(NewOpc));
+
+ return true;
+ }
+
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+ ImmOp.ChangeToImmediate(0);
+ } else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask * Scale);
+ }
+ }
+
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill));
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+
+ // Remove predicate first.
+ int PIdx = MI.findFirstPredOperandIdx();
+ if (PIdx != -1)
+ removeOperands(MI, PIdx);
+
+ if (Desc.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tLDRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+ UseRR = true;
+ }
+ } else {
+ emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+ *this);
+ }
+
+ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else if (Desc.mayStore()) {
+ VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ bool UseRR = false;
+
+ if (Opcode == ARM::tSTRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+ *this);
+ MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+ MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else {
+ assert(false && "Unexpected opcode!");
+ }
+
+ // Add predicate back if it's needed.
+ if (MI.getDesc().isPredicable()) {
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 000000000000..9060e59e5980
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,69 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
+
+ /// Code Generation virtual methods...
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 000000000000..360ec009e201
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,252 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ public:
+ static char ID;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Thumb IT blocks insertion pass";
+ }
+
+ private:
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
+ };
+ char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC == ARMCC::AL) {
+ ++MBBI;
+ continue;
+ }
+
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
+ // Insert an IT instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+ .addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
+ ++MBBI;
+
+ // Form IT block.
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ unsigned Mask = 0, Pos = 3;
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
+ }
+
+ // Finalize IT mask.
+ Mask |= (1 << Pos);
+ // Tag along (firstcond[0] << 4) with the mask.
+ Mask |= (CC & 1) << 4;
+ MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+ Modified = true;
+ ++NumITs;
+ }
+
+ return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+
+ if (!AFI->isThumbFunction())
+ return false;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+ MachineBasicBlock &MBB = *MFI;
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
+ }
+
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
+ return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+ return new Thumb2ITBlockPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 000000000000..51b56aaeb008
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,610 @@
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+ cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+ cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ // FIXME
+ return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
+ }
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
+ }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
+ unsigned PredReg = 0;
+ return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Handle SPR, DPR, and QPR copies.
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ // If profitable, use a movw or movt to materialize the offset.
+ // FIXME: Use the scavenger to grab a scratch register.
+ if (DestReg != ARM::SP && DestReg != BaseReg &&
+ NumBytes >= 4096 &&
+ ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+ bool Fits = false;
+ if (NumBytes < 65536) {
+ // Use a movw to materialize the 16-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+ .addImm(NumBytes)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ } else if ((NumBytes & 0xffff) == 0) {
+ // Use a movt to materialize the 32-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+ .addReg(DestReg)
+ .addImm(NumBytes >> 16)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ }
+
+ if (Fits) {
+ if (isSub) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addReg(DestReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ }
+ return;
+ }
+ }
+
+ while (NumBytes) {
+ unsigned ThisVal = NumBytes;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+ .addReg(BaseReg).setMIFlags(MIFlags));
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ bool HasCCOut = true;
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ // FIXME: Fix Thumb1 immediate encoding.
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ } else {
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ HasCCOut = false;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ }
+
+ // Build the new ADD / SUB.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)).setMIFlags(MIFlags);
+ if (HasCCOut)
+ AddDefaultCC(MIB);
+
+ BaseReg = DestReg;
+ }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi12: return ARM::t2LDRi8;
+ case ARM::t2LDRHi12: return ARM::t2LDRHi8;
+ case ARM::t2LDRBi12: return ARM::t2LDRBi8;
+ case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+ case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+ case ARM::t2STRi12: return ARM::t2STRi8;
+ case ARM::t2STRBi12: return ARM::t2STRBi8;
+ case ARM::t2STRHi12: return ARM::t2STRHi8;
+
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi8: return ARM::t2LDRi12;
+ case ARM::t2LDRHi8: return ARM::t2LDRHi12;
+ case ARM::t2LDRBi8: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+ case ARM::t2STRi8: return ARM::t2STRi12;
+ case ARM::t2STRBi8: return ARM::t2STRBi12;
+ case ARM::t2STRHi8: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRs: return ARM::t2LDRi12;
+ case ARM::t2LDRHs: return ARM::t2LDRHi12;
+ case ARM::t2LDRBs: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+ case ARM::t2STRs: return ARM::t2STRi12;
+ case ARM::t2STRBs: return ARM::t2STRBi12;
+ case ARM::t2STRHs: return ARM::t2STRHi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrModeT2_i12.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+ if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and remaining explicit predicate operands.
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1);
+ MachineInstrBuilder MIB(&MI);
+ AddDefaultPred(MIB);
+ return true;
+ }
+
+ bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(ARM::t2ADDri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ Offset = 0;
+ return true;
+ }
+ // Another common case: imm12.
+ if (Offset < 4096 &&
+ (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+ unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ MI.setDesc(TII.get(NewOpc));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Remove the cc_out operand.
+ if (HasCCOut)
+ MI.RemoveOperand(MI.getNumOperands()-1);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, extract 8 adjacent bits from the immediate into this
+ // t2ADDri/t2SUBri.
+ unsigned RotAmt = CountLeadingZeros_32(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ } else {
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return false;
+
+ // AddrModeT2_so cannot handle any offset. If there is no offset
+ // register then we change to an immediate version.
+ unsigned NewOpc = Opcode;
+ if (AddrMode == ARMII::AddrModeT2_so) {
+ unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+ if (OffsetReg != 0) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ return Offset == 0;
+ }
+
+ MI.RemoveOperand(FrameRegIdx+1);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+ NewOpc = immediateOffsetOpcode(Opcode);
+ AddrMode = ARMII::AddrModeT2_i12;
+ }
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign convert Opcode to the appropriate
+ // instruction
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset < 0) {
+ NewOpc = negativeOffsetOpcode(Opcode);
+ NumBits = 8;
+ isSub = true;
+ Offset = -Offset;
+ } else {
+ NewOpc = positiveOffsetOpcode(Opcode);
+ NumBits = 12;
+ }
+ } else if (AddrMode == ARMII::AddrMode5) {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+ int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ Offset += InstrOffs * 4;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+ } else {
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ if (NewOpc != Opcode)
+ MI.setDesc(TII.get(NewOpc));
+
+ MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+ // Attempt to fold address computation
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with fp/sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else
+ ImmedOffset = -ImmedOffset;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, offset doesn't fit. Pull in what we can to simplify
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else {
+ ImmedOffset = -ImmedOffset;
+ if (ImmedOffset == 0)
+ // Change the opcode back if the encoded offset is zero.
+ MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+ }
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+ MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const {
+ if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
+ return;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return;
+
+ // Schedule the copy so it doesn't come between previous instructions
+ // and UseMI which can form an IT block.
+ unsigned SrcReg = SrcMI->getOperand(1).getReg();
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ MachineBasicBlock *MBB = UseMI->getParent();
+ MachineBasicBlock::iterator MBBI = SrcMI;
+ unsigned NumInsts = 0;
+ while (--MBBI != MBB->begin()) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ if (!(NCC == CC || NCC == OCC) ||
+ NMI->modifiesRegister(SrcReg, &TRI) ||
+ NMI->definesRegister(ARM::CPSR))
+ break;
+ if (++NumInsts == 4)
+ // Too many in a row!
+ return;
+ }
+
+ if (NumInsts) {
+ MBB->remove(SrcMI);
+ MBB->insert(++MBBI, SrcMI);
+ }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 000000000000..f2637d7fbcab
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,78 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+ Thumb2RegisterInfo RI;
+public:
+ explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const;
+
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+ /// two-addrss instruction inserted by two-address pass.
+ void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 000000000000..355c3bf0352c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,52 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 000000000000..824378aeab4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,43 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+ class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+ Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 000000000000..c741a6e8a5b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,881 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+ cl::init(-1), cl::Hidden);
+
+namespace {
+ /// ReduceTable - A static table with information on mapping from wide
+ /// opcodes to narrow
+ struct ReduceEntry {
+ unsigned WideOpc; // Wide opcode
+ unsigned NarrowOpc1; // Narrow opcode to transform to
+ unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint8_t Imm1Limit; // Limit of immediate field (bits)
+ uint8_t Imm2Limit; // Limit of immediate field when it's two-address
+ unsigned LowRegs1 : 1; // Only possible if low-registers are used
+ unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+ unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
+ // 1 - No cc field.
+ // 2 - Always set CPSR.
+ unsigned PredCC2 : 2;
+ unsigned PartFlag : 1; // 16-bit instruction does partial flag update
+ unsigned Special : 1; // Needs to be dealt with specially
+ };
+
+ static const ReduceEntry ReduceTable[] = {
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
+ // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+ // likely to cause issue in the loop. As a size / performance workaround,
+ // they are not marked as such.
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
+ };
+
+ class Thumb2SizeReduce : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2SizeReduce();
+
+ const Thumb2InstrInfo *TII;
+ const ARMSubtarget *STI;
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Thumb2 instruction size reduction pass";
+ }
+
+ private:
+ /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+ DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+ bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+
+ bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+ bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry);
+
+ bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR,
+ MachineInstr *CPSRDef);
+
+ /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+ /// instruction.
+ bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef);
+
+ /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+ /// non-two-address instruction.
+ bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef);
+
+ /// ReduceMBB - Reduce width of instructions in the specified basic block.
+ bool ReduceMBB(MachineBasicBlock &MBB);
+ };
+ char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+ unsigned FromOpc = ReduceTable[i].WideOpc;
+ if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ }
+}
+
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+ for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+ if (*Regs == ARM::CPSR)
+ return true;
+ return false;
+}
+
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+/// = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
+ if (!Def || !STI->avoidCPSRPartialUpdate())
+ return false;
+
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Def->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ Defs.insert(Reg);
+ }
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Use->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Defs.count(Reg))
+ return false;
+ }
+
+ // No read-after-write dependency. The narrowing will add false dependency.
+ return true;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead) {
+ if ((is2Addr && Entry.PredCC2 == 0) ||
+ (!is2Addr && Entry.PredCC1 == 0)) {
+ if (Pred == ARMCC::AL) {
+ // Not predicated, must set CPSR.
+ if (!HasCC) {
+ // Original instruction was not setting CPSR, but CPSR is not
+ // currently live anyway. It's ok to set it. The CPSR def is
+ // dead though.
+ if (!LiveCPSR) {
+ HasCC = true;
+ CCDead = true;
+ return true;
+ }
+ return false;
+ }
+ } else {
+ // Predicated, must not set CPSR.
+ if (HasCC)
+ return false;
+ }
+ } else if ((is2Addr && Entry.PredCC2 == 2) ||
+ (!is2Addr && Entry.PredCC1 == 2)) {
+ /// Old opcode has an optional def of CPSR.
+ if (HasCC)
+ return true;
+ // If old opcode does not implicitly define CPSR, then it's not ok since
+ // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+ if (!HasImplicitCPSRDef(MI->getDesc()))
+ return false;
+ HasCC = true;
+ } else {
+ // 16-bit instruction does not set CPSR.
+ if (HasCC)
+ return false;
+ }
+
+ return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
+ Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
+ Opc == ARM::t2LDMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+ bool isSPOk = isPCOk || isLROk;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ if (isPCOk && Reg == ARM::PC)
+ continue;
+ if (isLROk && Reg == ARM::LR)
+ continue;
+ if (Reg == ARM::SP) {
+ if (isSPOk)
+ continue;
+ if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+ // Special case for these ldr / str with sp as base register.
+ continue;
+ }
+ if (!isARMLowRegister(Reg))
+ return false;
+ }
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry) {
+ if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+ return false;
+
+ unsigned Scale = 1;
+ bool HasImmOffset = false;
+ bool HasShift = false;
+ bool HasOffReg = true;
+ bool isLdStMul = false;
+ unsigned Opc = Entry.NarrowOpc1;
+ unsigned OpNum = 3; // First 'rest' of operands.
+ uint8_t ImmLimit = Entry.Imm1Limit;
+
+ switch (Entry.WideOpc) {
+ default:
+ llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+ case ARM::t2LDRi12:
+ case ARM::t2STRi12:
+ if (MI->getOperand(1).getReg() == ARM::SP) {
+ Opc = Entry.NarrowOpc2;
+ ImmLimit = Entry.Imm2Limit;
+ HasOffReg = false;
+ }
+
+ Scale = 4;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRBi12:
+ case ARM::t2STRBi12:
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRHi12:
+ case ARM::t2STRHi12:
+ Scale = 2;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRSHs:
+ case ARM::t2STRs:
+ case ARM::t2STRBs:
+ case ARM::t2STRHs:
+ HasShift = true;
+ OpNum = 4;
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB: {
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
+ return false;
+
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+
+ if (!isOK)
+ return false;
+
+ OpNum = 0;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_RET: {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg != ARM::SP)
+ return false;
+ Opc = Entry.NarrowOpc2; // tPOP_RET
+ OpNum = 2;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ OpNum = 0;
+
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == ARM::SP &&
+ (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+ Entry.WideOpc == ARM::t2STMDB_UPD)) {
+ Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+ OpNum = 2;
+ } else if (!isARMLowRegister(BaseReg) ||
+ (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+ Entry.WideOpc != ARM::t2STMIA_UPD)) {
+ return false;
+ }
+
+ isLdStMul = true;
+ break;
+ }
+ }
+
+ unsigned OffsetReg = 0;
+ bool OffsetKill = false;
+ if (HasShift) {
+ OffsetReg = MI->getOperand(2).getReg();
+ OffsetKill = MI->getOperand(2).isKill();
+
+ if (MI->getOperand(3).getImm())
+ // Thumb1 addressing mode doesn't support shift.
+ return false;
+ }
+
+ unsigned OffsetImm = 0;
+ if (HasImmOffset) {
+ OffsetImm = MI->getOperand(2).getImm();
+ unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+ if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+ // Make sure the immediate field fits.
+ return false;
+ }
+
+ // Add the 16-bit load / store instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ if (!isLdStMul) {
+ MIB.addOperand(MI->getOperand(0));
+ MIB.addOperand(MI->getOperand(1));
+
+ if (HasImmOffset)
+ MIB.addImm(OffsetImm / Scale);
+
+ assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+ if (HasOffReg)
+ MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+ }
+
+ // Transfer the rest of operands.
+ for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++NumLdSts;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::t2ADDri) {
+ // If the source register is SP, try to reduce to tADDrSPi, otherwise
+ // it's a normal reduce.
+ if (MI->getOperand(1).getReg() != ARM::SP) {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ }
+ // Try to reduce to tADDrSPi.
+ unsigned Imm = MI->getOperand(2).getImm();
+ // The immediate must be in range, the destination register must be a low
+ // reg, the predicate must be "always" and the condition flags must not
+ // be being set.
+ if (Imm & 3 || Imm > 1020)
+ return false;
+ if (!isARMLowRegister(MI->getOperand(0).getReg()))
+ return false;
+ if (MI->getOperand(3).getImm() != ARMCC::AL)
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
+
+ MBB.erase(MI);
+ ++NumNarrows;
+ return true;
+ }
+
+ if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+ return false;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.mayLoad() || MCID.mayStore())
+ return ReduceLoadStore(MBB, MI, Entry);
+
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri:
+ case ARM::t2ADDSrr: {
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri: {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ return true;
+ // fallthrough
+ }
+ case ARM::t2ADDSrr:
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ }
+ }
+ break;
+ }
+ case ARM::t2RSBri:
+ case ARM::t2RSBSri:
+ if (MI->getOperand(2).getImm() == 0)
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ break;
+ case ARM::t2MOVi16:
+ // Can convert only 'pure' immediate operands, not immediates obtained as
+ // globals' addresses.
+ if (MI->getOperand(1).isImm())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ break;
+ case ARM::t2CMPrr: {
+ // Try to reduce to the lo-reg only version first. Why there are two
+ // versions of the instruction is a mystery.
+ // It would be nice to just have two entries in the master table that
+ // are prioritized, but the table assumes a unique entry for each
+ // source insn opcode. So for now, we hack a local entry record to use.
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ }
+ }
+ return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef) {
+
+ if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+ return false;
+
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+ return false;
+ if (Entry.Imm2Limit) {
+ unsigned Imm = MI->getOperand(2).getImm();
+ unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+ if (Imm > Limit)
+ return false;
+ } else {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+ return false;
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(CPSRDef, MI))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if (SkipPred && MCID.OpInfo[i].isPredicate())
+ continue;
+ MIB.addOperand(MI->getOperand(i));
+ }
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++Num2Addrs;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, MachineInstr *CPSRDef) {
+ if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+ return false;
+
+ unsigned Limit = ~0U;
+ if (Entry.Imm1Limit)
+ Limit = (1 << Entry.Imm1Limit) - 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate())
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::CPSR)
+ continue;
+ if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+ return false;
+ } else if (MO.isImm() &&
+ !MCID.OpInfo[i].isPredicate()) {
+ if (((unsigned)MO.getImm()) > Limit)
+ return false;
+ }
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(CPSRDef, MI))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if ((MCID.getOpcode() == ARM::t2RSBSri ||
+ MCID.getOpcode() == ARM::t2RSBri) && i == 2)
+ // Skip the zero immediate operand, it's now implicit.
+ continue;
+ bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
+ if (SkipPred && isPred)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+ // Skip implicit def of CPSR. Either it's modeled as an optional
+ // def now or it's already an implicit def on the new instruction.
+ continue;
+ MIB.addOperand(MO);
+ }
+ if (!MCID.isPredicable() && NewMCID.isPredicable())
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase(MI);
+ ++NumNarrows;
+ return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
+ bool HasDef = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+
+ DefCPSR = true;
+ if (!MO.isDead())
+ HasDef = true;
+ }
+
+ return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+ if (MO.isKill()) {
+ LiveCPSR = false;
+ break;
+ }
+ }
+
+ return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Yes, CPSR could be livein.
+ bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+ MachineInstr *CPSRDef = 0;
+
+ MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = llvm::next(MII);
+
+ MachineInstr *MI = &*MII;
+ LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI != ReduceOpcodeMap.end()) {
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+ // Ignore "special" cases for now.
+ if (Entry.Special) {
+ if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ }
+ goto ProcessNext;
+ }
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ goto ProcessNext;
+ }
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ Modified = true;
+ MachineBasicBlock::iterator I = prior(NextMII);
+ MI = &*I;
+ }
+ }
+
+ ProcessNext:
+ bool DefCPSR = false;
+ LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+ if (MI->getDesc().isCall())
+ // Calls don't really set CPSR.
+ CPSRDef = 0;
+ else if (DefCPSR)
+ // This is the last CPSR defining instruction.
+ CPSRDef = MI;
+ }
+
+ return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ STI = &TM.getSubtarget<ARMSubtarget>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Modified |= ReduceMBB(*I);
+ return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+ return new Thumb2SizeReduce();
+}
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.h b/contrib/llvm/lib/Target/Alpha/Alpha.h
new file mode 100644
index 000000000000..6ffaf45f4ed1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,43 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include "MCTargetDesc/AlphaMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ namespace Alpha {
+ // These describe LDAx
+
+ static const int IMM_LOW = -32768;
+ static const int IMM_HIGH = 32767;
+ static const int IMM_MULT = 65536;
+ }
+
+ class AlphaTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
+ FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+ FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+ JITCodeEmitter &JCE);
+ FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+ FunctionPass *createAlphaBranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.td b/contrib/llvm/lib/Target/Alpha/Alpha.td
new file mode 100644
index 000000000000..ae79c2e4b70e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,68 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+ "Enable CIX extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6" , Alpha21264Itineraries, []>;
+def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AlphaInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
new file mode 100644
index 000000000000..46ae286895a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct AlphaAsmPrinter : public AsmPrinter {
+ /// Unique incrementer for label values for referencing Global values.
+ ///
+
+ explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+ : AsmPrinter(tm, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Alpha Assembly Printer";
+ }
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOp(const MachineOperand &MO, raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ void EmitStartOfAsmFile(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+ };
+} // end of anonymous namespace
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physreg??");
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ assert(MO.getImm() < (1 << 30));
+ } else {
+ printOp(MO, O);
+ }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << getRegisterName(MO.getReg());
+ return;
+
+ case MachineOperand::MO_Immediate:
+ assert(0 && "printOp() does not handle immediate values");
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ return;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ return;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyStart() {
+ OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
+}
+
+void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
+ OutStreamer.EmitRawText(StringRef("\t.set noat"));
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() {
+ RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp b/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 000000000000..376811709536
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,66 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+namespace {
+ struct AlphaBSel : public MachineFunctionPass {
+ static char ID;
+ AlphaBSel() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Alpha Branch Selection";
+ }
+ };
+ char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+ return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI) {
+ if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+ MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+
+ // condbranch operands:
+ // 0. bc opcode
+ // 1. reg
+ // 2. target MBB
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
+ }
+ }
+ }
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td b/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td
new file mode 100644
index 000000000000..bde8819f46e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td
@@ -0,0 +1,38 @@
+//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Alpha architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Alpha Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Alpha : CallingConv<[
+ // i64 is returned in register R0
+ // R1 is an llvm extension, I don't know what gcc does
+ CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
+
+ // f32 / f64 are returned in F0/F1
+ CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Alpha Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Alpha : CallingConv<[
+ // The first 6 arguments are passed in registers, whether integer or
+ // floating-point
+ CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
+ [F16, F17, F18, F19, F20, F21]>>,
+
+ CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
+ [R16, R17, R18, R19, R20, R21]>>,
+
+ // Stack slots are 8 bytes in size and 8-byte aligned.
+ CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
new file mode 100644
index 000000000000..690cd1da9c1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
@@ -0,0 +1,143 @@
+//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaFrameLowering.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+static long getUpper16(long l) {
+ long y = l / Alpha::IMM_MULT;
+ if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l) {
+ long h = getUpper16(l);
+ return l - h * Alpha::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasVarSizedObjects();
+}
+
+void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
+ bool FP = hasFP(MF);
+
+ // Handle GOP offset
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+ .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+ .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
+
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+ .addGlobalAddress(MF.getFunction());
+
+ // Get the number of bytes to allocate from the FrameInfo
+ long NumBytes = MFI->getStackSize();
+
+ if (FP)
+ NumBytes += 8; //reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ unsigned Align = getStackAlignment();
+ NumBytes = (NumBytes+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r30 -= numbytes
+ NumBytes = -NumBytes;
+ if (NumBytes >= Alpha::IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+ }
+
+ // Now if we need to, save the old FP and set the new
+ if (FP) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+ .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+ // This must be the last instr in the prolog
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+ .addReg(Alpha::R30).addReg(Alpha::R30);
+ }
+
+}
+
+void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ assert((MBBI->getOpcode() == Alpha::RETDAG ||
+ MBBI->getOpcode() == Alpha::RETDAGp)
+ && "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ long NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+ .addReg(Alpha::R15);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+ .addImm(0).addReg(Alpha::R15);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= Alpha::IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
new file mode 100644
index 000000000000..ebd9e1bac190
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
@@ -0,0 +1,43 @@
+//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Alpha.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class AlphaSubtarget;
+
+class AlphaFrameLowering : public TargetFrameLowering {
+ const AlphaSubtarget &STI;
+ // FIXME: This should end in MachineFunctionInfo, not here!
+ mutable int curgpdist;
+public:
+ explicit AlphaFrameLowering(const AlphaSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 000000000000..7b91fea54af4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,426 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+ //===--------------------------------------------------------------------===//
+ /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+ /// instructions for SelectionDAG operations.
+ class AlphaDAGToDAGISel : public SelectionDAGISel {
+ static const int64_t IMM_LOW = -32768;
+ static const int64_t IMM_HIGH = 32767;
+ static const int64_t IMM_MULT = 65536;
+ static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+ static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
+
+ static int64_t get_ldah16(int64_t x) {
+ int64_t y = x / IMM_MULT;
+ if (x % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+ }
+
+ static int64_t get_lda16(int64_t x) {
+ return x - get_ldah16(x) * IMM_MULT;
+ }
+
+ /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+ /// instruction (if not, return 0). Note that this code accepts partial
+ /// zap masks. For example (and LHS, 1) is a valid zap, as long we know
+ /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
+ /// in checking mode. If LHS is null, we assume that the mask has already
+ /// been validated before.
+ uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
+ uint64_t BitsToCheck = 0;
+ unsigned Result = 0;
+ for (unsigned i = 0; i != 8; ++i) {
+ if (((Constant >> 8*i) & 0xFF) == 0) {
+ // nothing to do.
+ } else {
+ Result |= 1 << i;
+ if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+ // If the entire byte is set, zapnot the byte.
+ } else if (LHS.getNode() == 0) {
+ // Otherwise, if the mask was previously validated, we know its okay
+ // to zapnot this entire byte even though all the bits aren't set.
+ } else {
+ // Otherwise we don't know that the it's okay to zapnot this entire
+ // byte. Only do this iff we can prove that the missing bits are
+ // already null, so the bytezap doesn't need to really null them.
+ BitsToCheck |= ~Constant & (0xFF << 8*i);
+ }
+ }
+ }
+
+ // If there are missing bits in a byte (for example, X & 0xEF00), check to
+ // see if the missing bits (0x1000) are already known zero if not, the zap
+ // isn't okay to do, as it won't clear all the required bits.
+ if (BitsToCheck &&
+ !CurDAG->MaskedValueIsZero(LHS,
+ APInt(LHS.getValueSizeInBits(),
+ BitsToCheck)))
+ return 0;
+
+ return Result;
+ }
+
+ static uint64_t get_zapImm(uint64_t x) {
+ unsigned build = 0;
+ for(int i = 0; i != 8; ++i) {
+ if ((x & 0x00FF) == 0x00FF)
+ build |= 1 << i;
+ else if ((x & 0x00FF) != 0)
+ return 0;
+ x >>= 8;
+ }
+ return build;
+ }
+
+
+ static uint64_t getNearPower2(uint64_t x) {
+ if (!x) return 0;
+ unsigned at = CountLeadingZeros_64(x);
+ uint64_t complow = 1ULL << (63 - at);
+ uint64_t comphigh = 1ULL << (64 - at);
+ //cerr << x << ":" << complow << ":" << comphigh << "\n";
+ if (abs64(complow - x) <= abs64(comphigh - x))
+ return complow;
+ else
+ return comphigh;
+ }
+
+ static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+ uint64_t y = getNearPower2(x);
+ if (swap)
+ return (y - x) == r;
+ else
+ return (x - y) == r;
+ }
+
+ public:
+ explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
+ : SelectionDAGISel(TM)
+ {}
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(int64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ virtual const char *getPassName() const {
+ return "Alpha DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ Op0 = Op;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ return false;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+
+private:
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const AlphaTargetMachine &getTargetMachine() {
+ return static_cast<const AlphaTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const AlphaInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *getGlobalRetAddr();
+ void SelectCALL(SDNode *Op);
+
+ };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// getGlobalRetAddr - Grab the return address.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
+ unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
+ return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case AlphaISD::CALL:
+ SelectCALL(N);
+ return NULL;
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i32),
+ getI64Imm(0));
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+ case AlphaISD::GlobalRetAddr:
+ return getGlobalRetAddr();
+
+ case AlphaISD::DivCall: {
+ SDValue Chain = CurDAG->getEntryNode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1,
+ SDValue(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2,
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0,
+ Chain.getValue(1));
+ SDNode *CNode =
+ CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue,
+ Chain, Chain.getValue(1));
+ Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64,
+ SDValue(CNode, 1));
+ return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+ }
+
+ case ISD::READCYCLECOUNTER: {
+ SDValue Chain = N->getOperand(0);
+ return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+ Chain);
+ }
+
+ case ISD::Constant: {
+ uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (uval == 0) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Alpha::R31, MVT::i64);
+ ReplaceUses(SDValue(N, 0), Result);
+ return NULL;
+ }
+
+ int64_t val = (int64_t)uval;
+ int32_t val32 = (int32_t)val;
+ if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+ val >= IMM_LOW + IMM_LOW * IMM_MULT)
+ break; //(LDAH (LDA))
+ if ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+ // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true
+ break; //(zext (LDAH (LDA)))
+ //Else use the constant pool
+ ConstantInt *C = ConstantInt::get(
+ Type::getInt64Ty(*CurDAG->getContext()), uval);
+ SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+ SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+ SDValue(getGlobalBaseReg(), 0));
+ return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other,
+ CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ EVT T = isDouble ? MVT::f64 : MVT::f32;
+ if (CN->getValueAPF().isPosZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else if (CN->getValueAPF().isNegZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else {
+ report_fatal_error("Unhandled FP constant type");
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ unsigned Opc = Alpha::WTF;
+ bool rev = false;
+ bool inv = false;
+ switch(CC) {
+ default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
+ case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+ Opc = Alpha::CMPTEQ; break;
+ case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT:
+ Opc = Alpha::CMPTLT; break;
+ case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE:
+ Opc = Alpha::CMPTLE; break;
+ case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT:
+ Opc = Alpha::CMPTLT; rev = true; break;
+ case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE:
+ Opc = Alpha::CMPTLE; rev = true; break;
+ case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+ Opc = Alpha::CMPTEQ; inv = true; break;
+ case ISD::SETO:
+ Opc = Alpha::CMPTUN; inv = true; break;
+ case ISD::SETUO:
+ Opc = Alpha::CMPTUN; break;
+ };
+ SDValue tmp1 = N->getOperand(rev?1:0);
+ SDValue tmp2 = N->getOperand(rev?0:1);
+ SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
+ if (inv)
+ cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl,
+ MVT::f64, SDValue(cmp, 0),
+ CurDAG->getRegister(Alpha::F31, MVT::f64));
+ switch(CC) {
+ case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+ case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+ {
+ SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
+ tmp1, tmp2);
+ cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64,
+ SDValue(cmp2, 0), SDValue(cmp, 0));
+ break;
+ }
+ default: break;
+ }
+
+ SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
+ MVT::i64, SDValue(cmp, 0));
+ return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64,
+ CurDAG->getRegister(Alpha::R31, MVT::i64),
+ SDValue(LD,0));
+ }
+ break;
+
+ case ISD::AND: {
+ ConstantSDNode* SC = NULL;
+ ConstantSDNode* MC = NULL;
+ if (N->getOperand(0).getOpcode() == ISD::SRL &&
+ (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+ (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+ uint64_t sval = SC->getZExtValue();
+ uint64_t mval = MC->getZExtValue();
+ // If the result is a zap, let the autogened stuff handle it.
+ if (get_zapImm(N->getOperand(0), mval))
+ break;
+ // given mask X, and shift S, we want to see if there is any zap in the
+ // mask if we play around with the botton S bits
+ uint64_t dontcare = (~0ULL) >> (64 - sval);
+ uint64_t mask = mval << sval;
+
+ if (get_zapImm(mask | dontcare))
+ mask = mask | dontcare;
+
+ if (get_zapImm(mask)) {
+ SDValue Z =
+ SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
+ N->getOperand(0).getOperand(0),
+ getI64Imm(get_zapImm(mask))), 0);
+ return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z,
+ getI64Imm(sval));
+ }
+ }
+ break;
+ }
+
+ }
+
+ return SelectCode(N);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
+ //TODO: add flag stuff to prevent nondeturministic breakage!
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+ SDValue GOT = SDValue(getGlobalBaseReg(), 0);
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other,
+ MVT::Glue, Addr.getOperand(0),
+ Chain, InFlag), 0);
+ } else {
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
+ MVT::Glue, Chain, InFlag), 0);
+ }
+ InFlag = Chain.getValue(1);
+
+ ReplaceUses(SDValue(N, 0), Chain);
+ ReplaceUses(SDValue(N, 1), InFlag);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
+ return new AlphaDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 000000000000..de003fb4c65e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,956 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ // Set up the TargetLowering object.
+ //I am having problems with shr n i8 1
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+ addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+ addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+ if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM , MVT::i64, Custom);
+ setOperationAction(ISD::UREM , MVT::i64, Custom);
+ setOperationAction(ISD::SDIV , MVT::i64, Custom);
+ setOperationAction(ISD::UDIV , MVT::i64, Custom);
+
+ setOperationAction(ISD::ADDC , MVT::i64, Expand);
+ setOperationAction(ISD::ADDE , MVT::i64, Expand);
+ setOperationAction(ISD::SUBC , MVT::i64, Expand);
+ setOperationAction(ISD::SUBE , MVT::i64, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Promote);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Not implemented yet.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool and
+ // ExternalSymbols nodes into the appropriate instructions to
+ // materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i32, Custom);
+
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+ setJumpBufSize(272);
+ setJumpBufAlignment(16);
+
+ setMinFunctionAlignment(4);
+
+ computeRegisterProperties();
+}
+
+MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i64;
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+ case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+ case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+ case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+ case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+ case AlphaISD::RelLit: return "Alpha::RelLit";
+ case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+ case AlphaISD::CALL: return "Alpha::CALL";
+ case AlphaISD::DivCall: return "Alpha::DivCall";
+ case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+ case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+ case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+ }
+}
+
+static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
+ return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n 0(SP) ... (n-7)*8(SP)
+
+// //#define FP $15
+// //#define RA $26
+// //#define PV $27
+// //#define GP $29
+// //#define SP $30
+
+#include "AlphaGenCallingConv.inc"
+
+SDValue
+AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Alpha target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ VA.getLocVT(), InFlag).getValue(1);
+ SDValue RetValue = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+
+ // If this is an 8/16/32-bit value, it is really passed promoted to 64
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+ InVals.push_back(RetValue);
+ }
+
+ return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+ unsigned args_int[] = {
+ Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+ unsigned args_float[] = {
+ Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue argt;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ SDValue ArgVal;
+
+ if (ArgNo < 6) {
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::f64:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::f32:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F4RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::i64:
+ args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+ &Alpha::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
+ break;
+ }
+ } else { //more args
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
+ }
+ InVals.push_back(ArgVal);
+ }
+
+ // If the functions takes variable number of arguments, copy all regs to stack
+ if (isVarArg) {
+ FuncInfo->setVarArgsOffset(Ins.size() * 8);
+ std::vector<SDValue> LS;
+ for (int i = 0; i < 6; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
+ args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+ SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
+ if (i == 0) FuncInfo->setVarArgsBase(FI);
+ SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+ false, false, 0));
+
+ if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
+ args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+ argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
+ SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+ false, false, 0));
+ }
+
+ //Set up a token factor with all the stack traffic
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+ }
+
+ return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
+ DAG.getNode(AlphaISD::GlobalRetAddr,
+ DebugLoc(), MVT::i64),
+ SDValue());
+ switch (Outs.size()) {
+ default:
+ llvm_unreachable("Do not know how to return this many arguments!");
+ case 0:
+ break;
+ //return SDValue(); // ret void is legal
+ case 1: {
+ EVT ArgVT = Outs[0].VT;
+ unsigned ArgReg;
+ if (ArgVT.isInteger())
+ ArgReg = Alpha::R0;
+ else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg = Alpha::F0;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+ OutVals[0], Copy.getValue(1));
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
+ break;
+ }
+ case 2: {
+ EVT ArgVT = Outs[0].VT;
+ unsigned ArgReg1, ArgReg2;
+ if (ArgVT.isInteger()) {
+ ArgReg1 = Alpha::R0;
+ ArgReg2 = Alpha::R1;
+ } else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg1 = Alpha::F0;
+ ArgReg2 = Alpha::F1;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+ OutVals[0], Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+ OutVals[1], Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
+ break;
+ }
+ }
+ return DAG.getNode(AlphaISD::RET_FLAG, dl,
+ MVT::Other, Copy, Copy.getValue(1));
+}
+
+void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
+ SDValue &DataPtr,
+ SelectionDAG &DAG) const {
+ Chain = N->getOperand(0);
+ SDValue VAListP = N->getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+ MachinePointerInfo(VAListS),
+ false, false, 0);
+ SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ Tmp, MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+ DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+ if (N->getValueType(0).isFloatingPoint())
+ {
+ //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+ SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+ DAG.getConstant(8*6, MVT::i64));
+ SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+ DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+ DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+ }
+
+ SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
+ DAG.getConstant(8, MVT::i64));
+ Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: break; // Don't custom lower most intrinsics.
+ case Intrinsic::alpha_umulh:
+ return DAG.getNode(ISD::MULHU, dl, MVT::i64,
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+
+ case ISD::SRL_PARTS: {
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+ DAG.getConstant(64, MVT::i64), ShAmt);
+ SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+ DAG.getConstant(0, MVT::i64), ISD::SETLE);
+ // if 64 - shAmt <= 0
+ SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
+ SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
+ DAG.getConstant(0, MVT::i64), bm);
+ SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+ // else
+ SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+ SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
+ SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+ Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+ // Merge
+ SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+ SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ // case ISD::SRA_PARTS:
+
+ // case ISD::SHL_PARTS:
+
+
+ case ISD::SINT_TO_FP: {
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ SDValue LD;
+ bool isDouble = Op.getValueType() == MVT::f64;
+ LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
+ isDouble?MVT::f64:MVT::f32, LD);
+ return FP;
+ }
+ case ISD::FP_TO_SINT: {
+ bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+ SDValue src = Op.getOperand(0);
+
+ if (!isDouble) //Promote
+ src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+ src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
+
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
+ }
+ case ISD::ConstantPool: {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+ // FIXME there isn't really any debug info here
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
+ return Lo;
+ }
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Alpha.");
+ case ISD::GlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+ GSDN->getOffset());
+ // FIXME there isn't really any debug info here
+
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+ // && !GV->hasLinkOnceLinkage()) {
+ if (GV->hasLocalLinkage()) {
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
+ return Lo;
+ } else
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+ case ISD::ExternalSymbol: {
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
+ DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+ ->getSymbol(), MVT::i64),
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+
+ case ISD::UREM:
+ case ISD::SREM:
+ //Expand only on constant case
+ if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
+ BuildUDIV(Op.getNode(), DAG, NULL) :
+ BuildSDIV(Op.getNode(), DAG, NULL);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+ return Tmp1;
+ }
+ //fall through
+ case ISD::SDIV:
+ case ISD::UDIV:
+ if (Op.getValueType().isInteger()) {
+ if (Op.getOperand(1).getOpcode() == ISD::Constant)
+ return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
+ : BuildUDIV(Op.getNode(), DAG, NULL);
+ const char* opstr = 0;
+ switch (Op.getOpcode()) {
+ case ISD::UREM: opstr = "__remqu"; break;
+ case ISD::SREM: opstr = "__remq"; break;
+ case ISD::UDIV: opstr = "__divqu"; break;
+ case ISD::SDIV: opstr = "__divq"; break;
+ }
+ SDValue Tmp1 = Op.getOperand(0),
+ Tmp2 = Op.getOperand(1),
+ Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+ return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+ }
+ break;
+
+ case ISD::VAARG: {
+ SDValue Chain, DataPtr;
+ LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+
+ SDValue Result;
+ if (Op.getValueType() == MVT::i32)
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ MachinePointerInfo(), MVT::i32, false, false, 0);
+ else
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return Result;
+ }
+ case ISD::VACOPY: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue DestP = Op.getOperand(1);
+ SDValue SrcP = Op.getOperand(2);
+ const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+ MachinePointerInfo(SrcS),
+ false, false, 0);
+ SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
+ MachinePointerInfo(DestS),
+ false, false, 0);
+ SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+ DAG.getConstant(8, MVT::i64));
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ NP, MachinePointerInfo(), MVT::i32, false, false, 0);
+ SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+ MachinePointerInfo(), MVT::i32,
+ false, false, 0);
+ }
+ case ISD::VASTART: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAListP = Op.getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // vastart stores the address of the VarArgsBase and VarArgsOffset
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
+ SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP,
+ MachinePointerInfo(VAListS), false, false, 0);
+ SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(S1, dl,
+ DAG.getConstant(FuncInfo->getVarArgsOffset(),
+ MVT::i64),
+ SA2, MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+ }
+ case ISD::RETURNADDR:
+ return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
+ //FIXME: implement
+ case ISD::FRAMEADDR: break;
+ }
+
+ return SDValue();
+}
+
+void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ assert(N->getValueType(0) == MVT::i32 &&
+ N->getOpcode() == ISD::VAARG &&
+ "Unknown node to custom promote!");
+
+ SDValue Chain, DataPtr;
+ LowerVAARG(N, Chain, DataPtr, DAG);
+ SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ Results.push_back(Res);
+ Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'f':
+ case 'r':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AlphaTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'f':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, Alpha::GPRCRegisterClass);
+ case 'f':
+ return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
+ std::make_pair(0U, Alpha::F4RCRegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ assert((MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::CAS64 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::LAS64 ||
+ MI->getOpcode() == Alpha::SWAP32 ||
+ MI->getOpcode() == Alpha::SWAP64) &&
+ "Unexpected instr type to insert");
+
+ bool is32 = MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::SWAP32;
+
+ //Load locked store conditional for atomic ops take on the same form
+ //start:
+ //ll
+ //do stuff (maybe branch to exit)
+ //sc
+ //test sc and maybe branck to start
+ //exit:
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ sinkMBB->splice(sinkMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ F->insert(It, llscMBB);
+ F->insert(It, sinkMBB);
+
+ BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
+
+ unsigned reg_res = MI->getOperand(0).getReg(),
+ reg_ptr = MI->getOperand(1).getReg(),
+ reg_v2 = MI->getOperand(2).getReg(),
+ reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
+ reg_res).addImm(0).addReg(reg_ptr);
+ switch (MI->getOpcode()) {
+ case Alpha::CAS32:
+ case Alpha::CAS64: {
+ unsigned reg_cmp
+ = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
+ .addReg(reg_v2).addReg(reg_res);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+ break;
+ }
+ case Alpha::LAS32:
+ case Alpha::LAS64: {
+ BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+ .addReg(reg_res).addReg(reg_v2);
+ break;
+ }
+ case Alpha::SWAP32:
+ case Alpha::SWAP64: {
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(reg_v2).addReg(reg_v2);
+ break;
+ }
+ }
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+ .addReg(reg_store).addImm(0).addReg(reg_ptr);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_store).addMBB(llscMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(sinkMBB);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+
+ return sinkMBB;
+}
+
+bool
+AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Alpha target isn't yet aware of offsets.
+ return false;
+}
+
+bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+ // +0.0 F31
+ // +0.0f F31
+ // -0.0 -F31
+ // -0.0f -F31
+ return Imm.isZero() || Imm.isNegZero();
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 000000000000..13383f4430f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,142 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+ namespace AlphaISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ //These corrospond to the identical Instruction
+ CVTQT_, CVTQS_, CVTTQ_,
+
+ /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+ /// parts of a global address respectively.
+ GPRelHi, GPRelLo,
+
+ /// RetLit - Literal Relocation of a Global
+ RelLit,
+
+ /// GlobalRetAddr - used to restore the return address
+ GlobalRetAddr,
+
+ /// CALL - Normal call.
+ CALL,
+
+ /// DIVCALL - used for special library calls for div and rem
+ DivCall,
+
+ /// return flag operand
+ RET_FLAG,
+
+ /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction.
+ /// *PRC is the input register to compare to zero,
+ /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+ /// DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH_I, COND_BRANCH_F
+
+ };
+ }
+
+ class AlphaTargetLowering : public TargetLowering {
+ public:
+ explicit AlphaTargetLowering(TargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+ /// getSetCCResultType - Get the SETCC result ValueType
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ // Friendly names for dumps
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ private:
+ // Helpers for custom lowering.
+ void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ };
+}
+
+#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 000000000000..6f4ebf279643
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,268 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm : Operand<i64>;
+def s14imm : Operand<i64>;
+def s16imm : Operand<i64>;
+def s21imm : Operand<i64>;
+def s64imm : Operand<i64>;
+def u64imm : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+ let Namespace = "Alpha";
+ let AsmString = asmstr;
+ let Inst{31-26} = op;
+ let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let canFoldAsLoad = load;
+ let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+ bits<5> Ra;
+ bits<16> disp;
+ bits<5> Rb;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+
+ let OutOperandList = (outs GPRC:$RA);
+ let InOperandList = (ins);
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern=pattern;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+ bits<64> Opc; //dummy
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs);
+ let InOperandList = (ins target:$DISP);
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = 31;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+// let Constraints = "$RFALSE = $RDEST";
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, u8imm:$L);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+// let Constraints = "$RFALSE = $RDEST";
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+
+ bits<5> Fc;
+ bits<5> Fa;
+ bits<5> Fb;
+ bits<11> Function = fun;
+
+ let Inst{25-21} = Fa;
+ let Inst{20-16} = Fb;
+ let Inst{15-5} = Function;
+ let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (outs);
+ let InOperandList = OL;
+ bits<26> Function;
+
+ let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<0, nm, itin> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let Pattern = pattern;
+
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 000000000000..4dcec8f31750
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,383 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AlphaGenInstrInfo.inc"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+ : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+ RI(*this) {
+}
+
+
+unsigned
+AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::LDL:
+ case Alpha::LDQ:
+ case Alpha::LDBU:
+ case Alpha::LDWU:
+ case Alpha::LDS:
+ case Alpha::LDT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned
+AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::STL:
+ case Alpha::STQ:
+ case Alpha::STB:
+ case Alpha::STW:
+ case Alpha::STS:
+ case Alpha::STT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ:
+ case Alpha::BNE:
+ case Alpha::BGE:
+ case Alpha::BGT:
+ case Alpha::BLE:
+ case Alpha::BLT:
+ case Alpha::BLBC:
+ case Alpha::BLBS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Alpha branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
+ else // Conditional branch
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
+ return 2;
+}
+
+void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ llvm_unreachable("Attempt to copy register that is not GPR or FPR");
+ }
+}
+
+void
+AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+ // << FrameIdx << "\n";
+ //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STT))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STQ))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ llvm_unreachable("Unhandled register class");
+}
+
+void
+AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+ // << FrameIdx << "\n";
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ llvm_unreachable("Unhandled register class");
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ: return Alpha::BNE;
+ case Alpha::BNE: return Alpha::BEQ;
+ case Alpha::BGE: return Alpha::BLT;
+ case Alpha::BGT: return Alpha::BLE;
+ case Alpha::BLE: return Alpha::BGT;
+ case Alpha::BLT: return Alpha::BGE;
+ case Alpha::BLBC: return Alpha::BLBS;
+ case Alpha::BLBS: return Alpha::BLBC;
+ case Alpha::FBEQ: return Alpha::FBNE;
+ case Alpha::FBNE: return Alpha::FBEQ;
+ case Alpha::FBGE: return Alpha::FBLT;
+ case Alpha::FBGT: return Alpha::FBLE;
+ case Alpha::FBLE: return Alpha::FBGT;
+ case Alpha::FBLT: return Alpha::FBGE;
+ default:
+ llvm_unreachable("Unknown opcode");
+ }
+ return 0; // Not reached
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Alpha::BR) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+ if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Alpha::BRs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Alpha::BR &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != Alpha::BR &&
+ I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+ Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+ return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+ unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Alpha::R29);
+ RegInfo.addLiveIn(Alpha::R29);
+
+ AlphaFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
+
+/// getGlobalRetAddr - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
+ AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+ unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr();
+ if (GlobalRetAddr != 0)
+ return GlobalRetAddr;
+
+ // Insert the set of GlobalRetAddr into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalRetAddr).addReg(Alpha::R26);
+ RegInfo.addLiveIn(Alpha::R26);
+
+ AlphaFI->setGlobalRetAddr(GlobalRetAddr);
+ return GlobalRetAddr;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 000000000000..337a85cdf22d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,85 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AlphaGenInstrInfo.inc"
+
+namespace llvm {
+
+class AlphaInstrInfo : public AlphaGenInstrInfo {
+ const AlphaRegisterInfo RI;
+public:
+ AlphaInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+ /// getGlobalRetAddr - Return a virtual register initialized with the
+ /// the global return address register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalRetAddr(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 000000000000..b20171224e29
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1157 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [
+ SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
+def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>;
+
+def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+ SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+ return getI64Imm(~N->getZExtValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+ return getI64Imm(~N->getZExtValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+ return getI64Imm(get_lda16(N->getZExtValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+ return getI64Imm(get_ldah16(N->getZExtValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+ ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+ return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+ return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+ uint64_t x =
+ abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+ return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+ return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+ return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+ return ((uint64_t)~N->getZExtValue() + 1) ==
+ (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ (int64_t)N->getZExtValue();
+}]>;
+def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS) return 0;
+ uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+ return build != 0;
+}]>;
+
+def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+ (void)N; // silence warning.
+ return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+ return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+ N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+ return isPowerOf2_64(N->getZExtValue() -
+ getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+ int64_t d = abs64((int64_t)N->getZExtValue() -
+ (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+ if (isPowerOf2_64(d)) return false;
+ switch (d) {
+ case 1: case 3: case 5: return false;
+ default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+ };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 2), node:$op2)>;
+def sub4 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 2), node:$op2)>;
+def add8 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 3), node:$op2)>;
+def sub8 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
+ "; ADJUP $amt",
+ [(callseq_start timm:$amt)], s_pseudo>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+ "; ADJDOWN $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
+}
+
+def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+ "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+ (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+ (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq,
+ string asmstr, PatFrag OpNode, InstrItinClass itin> {
+ def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+ def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+ def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+ def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>;
+defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>;
+defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>;
+defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>;
+defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>;
+
+defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>;
+defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>;
+defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC",
+ [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC",
+ [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC",
+ [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+
+def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(zappat:$imm GPRC:$RA),
+ (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+ def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+ def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0",
+ [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, Ra = 26,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+ def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, Ra = 26, Rb = 27, disp = 0,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+ def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, Ra = 23, Rb = 27, disp = 0,
+ Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+ def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
+ [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
+ [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+}
+
+
+let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
+ [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
+ [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
+ [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
+ [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+ [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+}
+
+//Load address
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)",
+ [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address
+def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
+ [], s_lda>; //Load address high
+def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+ [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high
+}
+
+let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)",
+ [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+ [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)",
+ [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+ [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)",
+ [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+ [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
+ [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+ [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+ (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+ (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+ (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+ (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDBU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDWU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDL immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+ (LDQ 0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+ (LDT 0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+ (LDS 0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+ (STQ GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+ (STT F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+ (STS F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+ (STL GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+ (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+ (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OutOperandList = (outs GPRC:$RA),
+ InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+ mayLoad = 1 in {
+def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+}
+
+//Load quad, rellocated literal form
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in
+def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+ [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+ (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+let OutOperandList = (outs GPRC:$RR),
+ InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+ Constraints = "$RA = $RR",
+ DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (outs GPRC:$RA),
+ InOperandList = (ins s64imm:$DISP, GPRC:$RB),
+ mayLoad = 1 in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
+ (WMB)>;
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
+ (MB)>;
+
+//Basic Floating point ops
+
+//Floats
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+ [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
+def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+ [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
+def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
+def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
+def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OutOperandList = (outs F4RC:$RDEST),
+ InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQS : FPForm<0x17, 0x02A,
+ "fcmoveq $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D,
+ "fcmovge $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F,
+ "fcmovgt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E,
+ "fcmovle $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C,
+ "fcmovlt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B,
+ "fcmovne $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OutOperandList = (outs F8RC:$RDEST),
+ InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
+ [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+ [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+ [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+ [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+ [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+def : Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
+ (f64 (FCMOVEQT F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>;
+def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
+ (f32 (FCMOVEQS F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>;
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+let Ra = 31, isBarrier = 1 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst",
+ s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+ s_fbr>;
+//Branches, int
+def BEQ : br_icc<0x39, "beq">;
+def BGE : br_icc<0x3E, "bge">;
+def BGT : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE : br_icc<0x3B, "ble">;
+def BLT : br_icc<0x3A, "blt">;
+def BNE : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+ switch((uint64_t)N->getZExtValue()) {
+ default: assert(0 && "Unknown branch type");
+ case 0: return getI64Imm(Alpha::BEQ);
+ case 1: return getI64Imm(Alpha::BNE);
+ case 2: return getI64Imm(Alpha::BGE);
+ case 3: return getI64Imm(Alpha::BGT);
+ case 4: return getI64Imm(Alpha::BLE);
+ case 5: return getI64Imm(Alpha::BLT);
+ case 6: return getI64Imm(Alpha::BLBS);
+ case 7: return getI64Imm(Alpha::BLBC);
+ case 20: return getI64Imm(Alpha::FBEQ);
+ case 21: return getI64Imm(Alpha::FBNE);
+ case 22: return getI64Imm(Alpha::FBGE);
+ case 23: return getI64Imm(Alpha::FBGT);
+ case 24: return getI64Imm(Alpha::FBLE);
+ case 25: return getI64Imm(Alpha::FBLT);
+ }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extensions, so let's not use them yet
+//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair
+ int64_t val = (int64_t)N->getZExtValue();
+ return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair with zeroext
+ uint64_t uval = N->getZExtValue();
+ int32_t val32 = (int32_t)uval;
+ return ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_FULLHIGH);
+// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+ (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+ (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+ (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+ (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
+ (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+// (R31)>;
+//def : Pat<(f64 0.0),
+// (F31)>;
+//def : Pat<(f64 -0.0),
+// (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+// (F31)>;
+//def : Pat<(f32 -0.0),
+// (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+ (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+ (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+ (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+ (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+ (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+ (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+ (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+ (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+ (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+ (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA),
+ (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+ (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+//FIXME: figure out why something is truncating the imm to 32bits
+// this will fix 2007-11-27-mulneg3
+//def : Pat<(mul GPRC:$RA, immRem1n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+//def : Pat<(mul GPRC:$RA, immRem2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp b/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 000000000000..85fbfd1affe2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,158 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+ cl::opt<bool>
+ AlignAll("alpha-align-all", cl::Hidden,
+ cl::desc("Align all blocks"));
+
+ struct AlphaLLRPPass : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ AlphaTargetMachine &TM;
+
+ static char ID;
+ AlphaLLRPPass(AlphaTargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Alpha NOP inserter";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+ bool Changed = false;
+ MachineInstr* prev[3] = {0,0,0};
+ DebugLoc dl;
+ unsigned count = 0;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ MachineBasicBlock& MBB = *FI;
+ bool ub = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ if (count%4 == 0)
+ prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+ ++count;
+ MachineInstr *MI = I++;
+ switch (MI->getOpcode()) {
+ case Alpha::LDQ: case Alpha::LDL:
+ case Alpha::LDWU: case Alpha::LDBU:
+ case Alpha::LDT: case Alpha::LDS:
+ case Alpha::STQ: case Alpha::STL:
+ case Alpha::STW: case Alpha::STB:
+ case Alpha::STT: case Alpha::STS:
+ if (MI->getOperand(2).getReg() == Alpha::R30) {
+ if (prev[0] &&
+ prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
+ prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 1;
+ count += 1;
+ } else if (prev[1]
+ && prev[1]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[1]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[2];
+ prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 2;
+ count += 2;
+ } else if (prev[2]
+ && prev[2]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[2]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ Changed = true; nopintro += 3;
+ count += 3;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = MI;
+ break;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ case Alpha::ALTENT:
+ case Alpha::MEMLABEL:
+ case Alpha::PCLABEL:
+ --count;
+ break;
+ case Alpha::BR:
+ case Alpha::JMP:
+ ub = true;
+ //fall through
+ default:
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ }
+ }
+ if (ub || AlignAll) {
+ //we can align stuff for free at this point
+ while (count % 4) {
+ BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ ++count;
+ ++nopalign;
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ }
+ }
+ }
+ return Changed;
+ }
+ };
+ char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+ return new AlphaLLRPPass(tm);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h
new file mode 100644
index 000000000000..186738c20c70
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -0,0 +1,62 @@
+//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Alpha-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMACHINEFUNCTIONINFO_H
+#define ALPHAMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// AlphaMachineFunctionInfo - This class is derived from MachineFunction
+/// private Alpha target-specific information for each MachineFunction.
+class AlphaMachineFunctionInfo : public MachineFunctionInfo {
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// GlobalRetAddr = keeps track of the virtual register initialized for
+ /// the return address value.
+ unsigned GlobalRetAddr;
+
+ /// VarArgsOffset - What is the offset to the first vaarg
+ int VarArgsOffset;
+ /// VarArgsBase - What is the base FrameIndex
+ int VarArgsBase;
+
+public:
+ AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
+ VarArgsOffset(0), VarArgsBase(0) {}
+
+ explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
+ GlobalRetAddr(0),
+ VarArgsOffset(0),
+ VarArgsBase(0) {}
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
+ void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+
+ int getVarArgsOffset() const { return VarArgsOffset; }
+ void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+ int getVarArgsBase() const { return VarArgsBase; }
+ void setVarArgsBase(int Base) { VarArgsBase = Base; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 000000000000..df8f157266e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,215 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+ : AlphaGenRegisterInfo(),
+ TII(tii) {
+}
+
+static long getUpper16(long l) {
+ long y = l / Alpha::IMM_MULT;
+ if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l) {
+ long h = getUpper16(l);
+ return l - h * Alpha::IMM_MULT;
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ Alpha::R9, Alpha::R10,
+ Alpha::R11, Alpha::R12,
+ Alpha::R13, Alpha::R14,
+ Alpha::F2, Alpha::F3,
+ Alpha::F4, Alpha::F5,
+ Alpha::F6, Alpha::F7,
+ Alpha::F8, Alpha::F9, 0
+ };
+ return CalleeSavedRegs;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Alpha::R15);
+ Reserved.set(Alpha::R29);
+ Reserved.set(Alpha::R30);
+ Reserved.set(Alpha::R31);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+ // <amt>'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(-Amount).addReg(Alpha::R30);
+ } else {
+ assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(Amount).addReg(Alpha::R30);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void
+AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ bool FP = TFI->hasFP(MF);
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Add the base register of R30 (SP) or R15 (FP).
+ MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+ // Now add the frame object offset to the offset from the virtual frame index.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
+
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ DEBUG(errs() << "Corrected Offset " << Offset
+ << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
+
+ if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
+ DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
+ << Offset << "\n");
+ //so in this case, we need to use a temporary register, and move the
+ //original inst off the SP/FP
+ //fix up the old:
+ MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+ MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+ //insert the new
+ MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
+ TII.get(Alpha::LDAH), Alpha::R28)
+ .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+ MBB.insert(II, nMI);
+ } else {
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+unsigned AlphaRegisterInfo::getRARegister() const {
+ return Alpha::R26;
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ llvm_unreachable("What is the dwarf register number");
+ return -1;
+}
+
+int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const {
+ llvm_unreachable("What is the dwarf register number");
+ return -1;
+}
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+ std::string s(AlphaRegDesc[reg].Name);
+ return s;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 000000000000..1072bf73f199
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,60 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AlphaGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 000000000000..32120d750413
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,133 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+//#define FP $15
+//#define RA $26
+//#define PV $27
+//#define GP $29
+//#define SP $30
+
+// General-purpose registers
+def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>;
+def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>;
+def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>;
+def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>;
+def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>;
+def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>;
+def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>;
+def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>;
+def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>;
+def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>;
+def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>;
+def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>;
+def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>;
+def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>;
+def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>;
+def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>;
+def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>;
+def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>;
+def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
+
+ // //#define FP $15
+ // //#define RA $26
+ // //#define PV $27
+ // //#define GP $29
+ // //#define SP $30
+ // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64, (add
+ // Volatile
+ R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+ R23, R24, R25, R28,
+ //Special meaning, but volatile
+ R27, //procedure address
+ R26, //return address
+ R29, //global offset table address
+ // Non-volatile
+ R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+ R15, R30, R31)>; //zero
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31)>; //zero
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h b/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 000000000000..4c92045d4696
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Alpha {
+ enum RelocationType {
+ reloc_literal,
+ reloc_gprellow,
+ reloc_gprelhigh,
+ reloc_gpdist,
+ reloc_bsr
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 000000000000..3703dd4fa9f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,85 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0 : FuncUnit;
+def L1 : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0 : FuncUnit;
+def U1 : FuncUnit;
+def FA : FuncUnit;
+def FM : FuncUnit;
+
+def s_ild : InstrItinClass;
+def s_fld : InstrItinClass;
+def s_ist : InstrItinClass;
+def s_fst : InstrItinClass;
+def s_lda : InstrItinClass;
+def s_rpcc : InstrItinClass;
+def s_rx : InstrItinClass;
+def s_mxpr : InstrItinClass;
+def s_icbr : InstrItinClass;
+def s_ubr : InstrItinClass;
+def s_jsr : InstrItinClass;
+def s_iadd : InstrItinClass;
+def s_ilog : InstrItinClass;
+def s_ishf : InstrItinClass;
+def s_cmov : InstrItinClass;
+def s_imul : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr : InstrItinClass;
+def s_fadd : InstrItinClass;
+def s_fmul : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi : InstrItinClass;
+def s_itof : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2-4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<
+ [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
+ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
+ InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
+ InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
+ InstrItinData<s_fst , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_lda , [InstrStage<1, [L0, L1, U0, U1]>]>,
+ InstrItinData<s_rpcc , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_rx , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_mxpr , [InstrStage<1, [L0, L1]>]>,
+ InstrItinData<s_icbr , [InstrStage<0, [U0, U1]>]>,
+ InstrItinData<s_ubr , [InstrStage<3, [U0, U1]>]>,
+ InstrItinData<s_jsr , [InstrStage<3, [L0]>]>,
+ InstrItinData<s_iadd , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ilog , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ishf , [InstrStage<1, [U0, U1]>]>,
+ InstrItinData<s_cmov , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_imul , [InstrStage<7, [U1]>]>,
+ InstrItinData<s_imisc , [InstrStage<3, [U0]>]>,
+ InstrItinData<s_fbr , [InstrStage<0, [FA]>]>,
+ InstrItinData<s_fadd , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fmul , [InstrStage<6, [FM]>]>,
+ InstrItinData<s_fcmov , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fdivs , [InstrStage<12, [FA]>]>,
+ InstrItinData<s_fdivt , [InstrStage<15, [FA]>]>,
+ InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+ InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+ InstrItinData<s_ftoi , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_itof , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..f1958fe6b5ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-selectiondag-info"
+#include "AlphaTargetMachine.h"
+using namespace llvm;
+
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h
new file mode 100644
index 000000000000..3405cc0cdedd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASELECTIONDAGINFO_H
+#define ALPHASELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AlphaTargetMachine;
+
+class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
+ ~AlphaSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 000000000000..624a5e2ebd09
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,36 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AlphaGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS)
+ : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 000000000000..70b311683f8b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,49 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AlphaGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRe;
+
+class AlphaSubtarget : public AlphaGenSubtargetInfo {
+protected:
+
+ bool HasCT;
+
+ InstrItineraryData InstrItins;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ AlphaSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 000000000000..3b65d41be892
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,53 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeAlphaTarget() {
+ // Register the target.
+ RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ DataLayout("e-f128:128:128-n64"),
+ FrameLowering(Subtarget),
+ Subtarget(TT, CPU, FS),
+ TLInfo(*this),
+ TSInfo(*this) {
+ setRelocationModel(Reloc::PIC_);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createAlphaISelDag(*this));
+ return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer
+ PM.add(createAlphaBranchSelectionPass());
+ PM.add(createAlphaLLRPPass(*this));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 000000000000..cf00e5875d34
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,65 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "AlphaInstrInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaFrameLowering.h"
+#include "AlphaSelectionDAGInfo.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ AlphaInstrInfo InstrInfo;
+ AlphaFrameLowering FrameLowering;
+ AlphaSubtarget Subtarget;
+ AlphaTargetLowering TLInfo;
+ AlphaSelectionDAGInfo TSInfo;
+
+public:
+ AlphaTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const AlphaRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const AlphaTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
new file mode 100644
index 000000000000..a35e8846e072
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCAsmInfo.h"
+using namespace llvm;
+
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ GPRel32Directive = ".gprel32";
+ WeakRefDirective = "\t.weak\t";
+ HasSetDirective = false;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
new file mode 100644
index 000000000000..837844bd29a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct AlphaMCAsmInfo : public MCAsmInfo {
+ explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
new file mode 100644
index 000000000000..562052b6df67
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCTargetDesc.h"
+#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AlphaGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createAlphaMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAlphaMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+}
+
+static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+ createAlphaMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeAlphaMCAsmInfo() {
+ RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
new file mode 100644
index 000000000000..b0619e6cb011
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheAlphaTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Alpha registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AlphaGenRegisterInfo.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AlphaGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ad0dd26aafb1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMAlphaDesc
+ AlphaMCTargetDesc.cpp
+ AlphaMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..d55175fa69dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
new file mode 100644
index 000000000000..f7099b9ae975
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+llvm::Target llvm::TheAlphaTarget;
+
+extern "C" void LLVMInitializeAlphaTargetInfo() {
+ RegisterTarget<Triple::alpha, /*HasJIT=*/true>
+ X(TheAlphaTarget, "alpha", "Alpha [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/Blackfin.h b/contrib/llvm/lib/Target/Blackfin/Blackfin.h
new file mode 100644
index 000000000000..a00ff4cc3275
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/Blackfin.h
@@ -0,0 +1,31 @@
+//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Blackfin back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_BLACKFIN_H
+#define TARGET_BLACKFIN_H
+
+#include "MCTargetDesc/BlackfinMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ class FunctionPass;
+ class BlackfinTargetMachine;
+
+ FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/Blackfin.td b/contrib/llvm/lib/Target/Blackfin/Blackfin.td
new file mode 100644
index 000000000000..cd90962a9540
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/Blackfin.td
@@ -0,0 +1,202 @@
+//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Blackfin Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
+ "Build for SDRAM">;
+
+def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
+ "Assume instruction cache lookaside buffers are enabled at runtime">;
+
+//===----------------------------------------------------------------------===//
+// Bugs in the silicon becomes workarounds in the compiler.
+// See http://www.analog.com/ for the full list of IC anomalies.
+//===----------------------------------------------------------------------===//
+
+def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
+ "Work around 05000074 - "
+ "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
+
+def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
+ "Work around 05000244 - "
+ "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
+
+def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
+ "Work around 05000245 - "
+ "Access in the Shadow of a Conditional Branch">;
+
+def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
+ "Work around 05000257 - "
+ "Interrupt/Exception During Short Hardware Loop">;
+
+def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
+ "Work around 05000283 - "
+ "System MMR Write Is Stalled Indefinitely when Killed">;
+
+def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
+ "Work around 05000312 - "
+ "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
+
+def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
+ "wa_killed_mmr", "true",
+ "Work around 05000315 - "
+ "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
+
+def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
+ "Work around 05000371 - "
+ "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
+
+def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
+ "Work around 05000426 - "
+ "Speculative Fetches of Indirect-Pointer Instructions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "BlackfinRegisterInfo.td"
+include "BlackfinCallingConv.td"
+include "BlackfinIntrinsics.td"
+include "BlackfinInstrInfo.td"
+
+def BlackfinInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Blackfin processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
+ : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
+
+def : Proc<"generic", "", []>;
+
+multiclass Core<string Name,string Suffix,
+ list<SubtargetFeature> Features> {
+ def : Proc<Name, Suffix, Features>;
+ def : Proc<Name, "", Features>;
+ def : Proc<Name, "-none", []>;
+}
+
+multiclass CoreEdinburgh<string Name>
+ : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
+ def : Proc<Name, "-0.5",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS]>;
+ def : Proc<Name, "-0.4",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS]>;
+ def : Proc<Name, "-0.3",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS]>;
+}
+multiclass CoreBraemar<string Name>
+ : Core<Name, "-0.3",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
+ def : Proc<Name, "-0.2",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreStirling<string Name>
+ : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.4",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-0.3",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMoab<string Name>
+ : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+ def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-0.0",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreTeton<string Name>
+ : Core<Name, "-0.5",
+ [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+ WA_RETS, WA_IND_CALL]> {
+ def : Proc<Name, "-0.3",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any",
+ [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+ WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreKookaburra<string Name>
+ : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+ def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMockingbird<string Name>
+ : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+ def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+multiclass CoreBrodie<string Name>
+ : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+ def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+ def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+
+defm BF512 : CoreBrodie<"bf512">;
+defm BF514 : CoreBrodie<"bf514">;
+defm BF516 : CoreBrodie<"bf516">;
+defm BF518 : CoreBrodie<"bf518">;
+defm BF522 : CoreMockingbird<"bf522">;
+defm BF523 : CoreKookaburra<"bf523">;
+defm BF524 : CoreMockingbird<"bf524">;
+defm BF525 : CoreKookaburra<"bf525">;
+defm BF526 : CoreMockingbird<"bf526">;
+defm BF527 : CoreKookaburra<"bf527">;
+defm BF531 : CoreEdinburgh<"bf531">;
+defm BF532 : CoreEdinburgh<"bf532">;
+defm BF533 : CoreEdinburgh<"bf533">;
+defm BF534 : CoreBraemar<"bf534">;
+defm BF536 : CoreBraemar<"bf536">;
+defm BF537 : CoreBraemar<"bf537">;
+defm BF538 : CoreStirling<"bf538">;
+defm BF539 : CoreStirling<"bf539">;
+defm BF542 : CoreMoab<"bf542">;
+defm BF544 : CoreMoab<"bf544">;
+defm BF548 : CoreMoab<"bf548">;
+defm BF549 : CoreMoab<"bf549">;
+defm BF561 : CoreTeton<"bf561">;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Blackfin : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = BlackfinInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
new file mode 100644
index 000000000000..6ba258beb2b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -0,0 +1,156 @@
+//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Blackfin.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class BlackfinAsmPrinter : public AsmPrinter {
+ public:
+ BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Blackfin Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ };
+} // end of anonymous namespace
+
+#include "BlackfinGenAsmWriter.inc"
+
+extern "C" void LLVMInitializeBlackfinAsmPrinter() {
+ RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
+}
+
+void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should be already mapped!");
+ O << getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ break;
+ }
+}
+
+void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << " + ";
+ printOperand(MI, opNum+1, O);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td b/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td
new file mode 100644
index 000000000000..0abc84c3c405
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td
@@ -0,0 +1,30 @@
+//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Blackfin architectures.
+//
+//===----------------------------------------------------------------------===//
+
+// Blackfin C Calling convention.
+def CC_Blackfin : CallingConv<[
+ CCIfType<[i16], CCPromoteToType<i32>>,
+ CCIfSRet<CCAssignToReg<[P0]>>,
+ CCAssignToReg<[R0, R1, R2]>,
+ CCAssignToStack<4, 4>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Blackfin C return-value convention.
+def RetCC_Blackfin : CallingConv<[
+ CCIfType<[i16], CCPromoteToType<i32>>,
+ CCAssignToReg<[R0, R1]>
+]>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
new file mode 100644
index 000000000000..0b0984d2f777
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
@@ -0,0 +1,130 @@
+//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinFrameLowering.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) ||
+ MFI->adjustsStack() || MFI->hasVarSizedObjects();
+}
+
+// Always reserve a call frame. We dont have enough registers to adjust SP.
+bool BlackfinFrameLowering::
+hasReservedCallFrame(const MachineFunction &MF) const {
+ return true;
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const BlackfinInstrInfo &TII =
+ *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ int FrameSize = MFI->getStackSize();
+ if (FrameSize%4) {
+ FrameSize = (FrameSize+3) & ~3;
+ MFI->setStackSize(FrameSize);
+ }
+
+ if (!hasFP(MF)) {
+ assert(!MFI->adjustsStack() &&
+ "FP elimination on a non-leaf function is not supported");
+ RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+ return;
+ }
+
+ // emit a LINK instruction
+ if (FrameSize <= 0x3ffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+ return;
+ }
+
+ // Frame is too big, do a manual LINK:
+ // [--SP] = RETS;
+ // [--SP] = FP;
+ // FP = SP;
+ // P1 = -FrameSize;
+ // SP = SP + P1;
+ BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+ .addReg(BF::RETS, RegState::Kill);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+ .addReg(BF::FP, RegState::Kill);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+ .addReg(BF::SP);
+ RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+ .addReg(BF::SP, RegState::Kill)
+ .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const BlackfinInstrInfo &TII =
+ *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ if (!hasFP(MF)) {
+ assert(!MFI->adjustsStack() &&
+ "FP elimination on a non-leaf function is not supported");
+ RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+ return;
+ }
+
+ // emit an UNLINK instruction
+ BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+void BlackfinFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const TargetRegisterClass *RC = BF::DPRegisterClass;
+
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
new file mode 100644
index 000000000000..726fa2c063f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -0,0 +1,47 @@
+//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Blackfin.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class BlackfinSubtarget;
+
+class BlackfinFrameLowering : public TargetFrameLowering {
+protected:
+ const BlackfinSubtarget &STI;
+
+public:
+ explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
new file mode 100644
index 000000000000..215ca43ea338
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -0,0 +1,180 @@
+//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Blackfin target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinTargetMachine.h"
+#include "BlackfinRegisterInfo.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
+/// instructions for SelectionDAG operations.
+namespace {
+ class BlackfinDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
+ /// can make the right decision when generating code for different targets.
+ //const BlackfinSubtarget &Subtarget;
+ public:
+ BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+ virtual void PostprocessISelDAG();
+
+ virtual const char *getPassName() const {
+ return "Blackfin DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "BlackfinGenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Walk the DAG after instruction selection, fixing register class issues.
+ void FixRegisterClasses(SelectionDAG &DAG);
+
+ const BlackfinInstrInfo &getInstrInfo() {
+ return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
+ }
+ const BlackfinRegisterInfo *getRegisterInfo() {
+ return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
+ }
+ };
+} // end anonymous namespace
+
+FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new BlackfinDAGToDAGISel(TM, OptLevel);
+}
+
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
+ FixRegisterClasses(*CurDAG);
+}
+
+SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
+ // SP, Px
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ }
+ }
+
+ return SelectCode(N);
+}
+
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
+ SDValue &Base,
+ SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
+ (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+ (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+static inline bool isCC(const TargetRegisterClass *RC) {
+ return BF::AnyCCRegClass.hasSubClassEq(RC);
+}
+
+static inline bool isDCC(const TargetRegisterClass *RC) {
+ return BF::DRegClass.hasSubClassEq(RC) || isCC(RC);
+}
+
+static void UpdateNodeOperand(SelectionDAG &DAG,
+ SDNode *N,
+ unsigned Num,
+ SDValue Val) {
+ SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
+ ops[Num] = Val;
+ SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
+ DAG.ReplaceAllUsesWith(N, New);
+}
+
+// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
+// choosing the proper register classes.
+void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
+ const BlackfinInstrInfo &TII = getInstrInfo();
+ const BlackfinRegisterInfo *TRI = getRegisterInfo();
+ DAG.AssignTopologicalOrder();
+ HandleSDNode Dummy(DAG.getRoot());
+
+ for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
+ NI != DAG.allnodes_end(); ++NI) {
+ if (NI->use_empty() || !NI->isMachineOpcode())
+ continue;
+ const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
+ for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
+ if (!UI->isMachineOpcode())
+ continue;
+
+ if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
+ continue;
+ const TargetRegisterClass *DefRC =
+ TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
+
+ const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
+ if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
+ continue;
+ const TargetRegisterClass *UseRC =
+ TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
+ if (!DefRC || !UseRC)
+ continue;
+ // We cannot copy CC <-> !(CC/D)
+ if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
+ SDNode *Copy =
+ DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ NI->getDebugLoc(),
+ MVT::i32,
+ UI.getUse().get(),
+ DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
+ UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
+ }
+ }
+ }
+ DAG.setRoot(Dummy.getValue());
+}
+
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
new file mode 100644
index 000000000000..d5728324de87
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -0,0 +1,643 @@
+//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Blackfin uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinISelLowering.h"
+#include "BlackfinTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setStackPointerRegisterToSaveRestore(BF::SP);
+ setIntDivIsCheap(false);
+
+ // Set up the legal register classes.
+ addRegisterClass(MVT::i32, BF::DRegisterClass);
+ addRegisterClass(MVT::i16, BF::D16RegisterClass);
+
+ computeRegisterProperties();
+
+ // Blackfin doesn't have i1 loads or stores
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // i16 registers don't do much
+ setOperationAction(ISD::AND, MVT::i16, Promote);
+ setOperationAction(ISD::OR, MVT::i16, Promote);
+ setOperationAction(ISD::XOR, MVT::i16, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+ // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
+ // immediately.
+ setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+ setOperationAction(ISD::SETCC, MVT::i16, Promote);
+
+ // Blackfin has no division
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ // No carry-in operations.
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+
+ // Blackfin has no intrinsics for these particular operations.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // i32 has native CTPOP, but not CTLZ/CTTZ
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+
+ // READCYCLECOUNTER needs special type legalization.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ setMinFunctionAlignment(2);
+}
+
+const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case BFISD::CALL: return "BFISD::CALL";
+ case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
+ case BFISD::Wrapper: return "BFISD::Wrapper";
+ }
+}
+
+MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+ // SETCC always sets the CC register. Technically that is an i1 register, but
+ // that type is not legal, so we treat it as an i32 register.
+ return MVT::i32;
+}
+
+SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ int JTI = cast<JumpTableSDNode>(Op)->getIndex();
+
+ Op = DAG.getTargetJumpTable(JTI, MVT::i32);
+ return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue
+BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
+ BF::PRegisterClass : BF::DRegisterClass;
+ assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
+ assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+ unsigned ObjSize = VA.getLocVT().getStoreSize();
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+BlackfinTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue Opi = OutVals[i];
+
+ // Expand to i32 if necessary
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
+ break;
+ case CCValAssign::ZExt:
+ Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
+ break;
+ case CCValAssign::AExt:
+ Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
+ break;
+ }
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode()) {
+ return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ } else {
+ return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
+ }
+}
+
+SDValue
+BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Blackfin target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space
+ CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+ int Offset = VA.getLocMemOffset();
+ assert(Offset%4 == 0 && "Unaligned LocMemOffset");
+ assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
+ SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
+ SDValue OffsetN = DAG.getIntPtrConstant(Offset);
+ OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
+ MachinePointerInfo(),false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ std::vector<EVT> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+ SDValue Ops[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
+ InFlag.getNode() ? 3 : 2);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &RV = RVLocs[i];
+ unsigned Reg = RV.getLocReg();
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getLocVT(), InFlag);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ Chain = Chain.getValue(1);
+
+ // Callee is responsible for extending any i16 return values.
+ switch (RV.getLocInfo()) {
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
+ DAG.getValueType(RV.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
+ DAG.getValueType(RV.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate to valtype
+ if (RV.getLocInfo() != CCValAssign::Full)
+ Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
+// add-with-carry instructions.
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
+ // Operands: lhs, rhs, carry-in (AC0 flag)
+ // Results: sum, carry-out (AC0 flag)
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
+
+ // zext incoming carry flag in AC0 to 32 bits
+ SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+ /* flag= */ Op.getOperand(2));
+ CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
+ SDValue(CarryIn, 0));
+
+ // Add operands, produce sum and carry flag
+ SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ Op.getOperand(0), Op.getOperand(1));
+
+ // Store intermediate carry from Sum
+ SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+ /* flag= */ SDValue(Sum, 1));
+
+ // Add incoming carry, again producing an output flag
+ Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ SDValue(Sum, 0), SDValue(CarryIn, 0));
+
+ // Update AC0 with the intermediate carry, producing a flag.
+ SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
+ SDValue(Carry1, 0));
+
+ // Compose (i32, flag) pair
+ SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
+ return DAG.getMergeValues(ops, 2, dl);
+}
+
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ llvm_unreachable("Should not custom lower this!");
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Blackfin.");
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ // Frame & Return address. Currently unimplemented
+ case ISD::FRAMEADDR: return SDValue();
+ case ISD::RETURNADDR: return SDValue();
+ case ISD::ADDE:
+ case ISD::SUBE: return LowerADDE(Op, DAG);
+ }
+}
+
+void
+BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::READCYCLECOUNTER: {
+ // The low part of the cycle counter is in CYCLES, the high part in
+ // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
+ // CYCLES2 last.
+ SDValue TheChain = N->getOperand(0);
+ SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
+ SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
+ // Outgoing chain. If we were to use the chain from lo instead, it would be
+ // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
+ // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
+ // read beyond the next CYCLES read, leading to invalid results.
+ Results.push_back(hi.getValue(1));
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Blackfin Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+BlackfinTargetLowering::ConstraintType
+BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() != 1)
+ return TargetLowering::getConstraintType(Constraint);
+
+ switch (Constraint[0]) {
+ // Standard constraints
+ case 'r':
+ return C_RegisterClass;
+
+ // Blackfin-specific constraints
+ case 'a':
+ case 'd':
+ case 'z':
+ case 'D':
+ case 'W':
+ case 'e':
+ case 'b':
+ case 'v':
+ case 'f':
+ case 'c':
+ case 't':
+ case 'u':
+ case 'k':
+ case 'x':
+ case 'y':
+ case 'w':
+ return C_RegisterClass;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'Z':
+ case 'Y':
+ return C_Register;
+ }
+
+ // Not implemented: q0-q7, qA. Use {R2} etc instead
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+BlackfinTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+
+ // Blackfin-specific constraints
+ case 'a':
+ case 'd':
+ case 'z':
+ case 'D':
+ case 'W':
+ case 'e':
+ case 'b':
+ case 'v':
+ case 'f':
+ case 'c':
+ case 't':
+ case 'u':
+ case 'k':
+ case 'x':
+ case 'y':
+ case 'w':
+ return CW_Register;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'Z':
+ case 'Y':
+ return CW_SpecificReg;
+ }
+ return weight;
+}
+
+/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
+/// constraint.
+std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
+ using namespace BF;
+
+ if (Constraint.size() != 1)
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ switch (Constraint[0]) {
+ // Standard constraints
+ case 'r':
+ return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
+
+ // Blackfin-specific constraints
+ case 'a': return Pair(0U, PRegisterClass);
+ case 'd': return Pair(0U, DRegisterClass);
+ case 'e': return Pair(0U, AccuRegisterClass);
+ case 'A': return Pair(A0, AccuRegisterClass);
+ case 'B': return Pair(A1, AccuRegisterClass);
+ case 'b': return Pair(0U, IRegisterClass);
+ case 'v': return Pair(0U, BRegisterClass);
+ case 'f': return Pair(0U, MRegisterClass);
+ case 'C': return Pair(CC, JustCCRegisterClass);
+ case 'x': return Pair(0U, GRRegisterClass);
+ case 'w': return Pair(0U, ALLRegisterClass);
+ case 'Z': return Pair(P3, PRegisterClass);
+ case 'Y': return Pair(P1, PRegisterClass);
+ case 'z': return Pair(0U, zConsRegisterClass);
+ case 'D': return Pair(0U, DConsRegisterClass);
+ case 'W': return Pair(0U, WConsRegisterClass);
+ case 'c': return Pair(0U, cConsRegisterClass);
+ case 't': return Pair(0U, tConsRegisterClass);
+ case 'u': return Pair(0U, uConsRegisterClass);
+ case 'k': return Pair(0U, kConsRegisterClass);
+ case 'y': return Pair(0U, yConsRegisterClass);
+ }
+
+ // Not implemented: q0-q7, qA. Use {R2} etc instead.
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool BlackfinTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Blackfin target isn't yet aware of offsets.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
new file mode 100644
index 000000000000..b65775b9285d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -0,0 +1,83 @@
+//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Blackfin uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_ISELLOWERING_H
+#define BLACKFIN_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Blackfin.h"
+
+namespace llvm {
+
+ namespace BFISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ Wrapper // Address wrapper
+ };
+ }
+
+ class BlackfinTargetLowering : public TargetLowering {
+ public:
+ BlackfinTargetLowering(TargetMachine &TM);
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ private:
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ };
+} // end namespace llvm
+
+#endif // BLACKFIN_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td
new file mode 100644
index 000000000000..d8e6e252e787
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td
@@ -0,0 +1,34 @@
+//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "BF";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// Single-word (16-bit) instructions
+class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBfin<outs, ins, asmstr, pattern> {
+}
+
+// Double-word (32-bit) instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBfin<outs, ins, asmstr, pattern> {
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
new file mode 100644
index 000000000000..d190ae7984b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -0,0 +1,256 @@
+//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "BlackfinGenInstrInfo.inc"
+
+using namespace llvm;
+
+BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
+ : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
+ RI(ST, *this),
+ Subtarget(ST) {}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case BF::LOAD32fi:
+ case BF::LOAD16fi:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case BF::STORE32fi:
+ case BF::STORE16fi:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned BlackfinInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "Branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ llvm_unreachable("Implement conditional branches!");
+}
+
+void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (BF::D16RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
+ if (BF::DRegClass.contains(DestReg)) {
+ if (SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
+ return;
+ }
+ if (SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ }
+
+ if (BF::DRegClass.contains(SrcReg)) {
+ if (DestReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+ return;
+ }
+ if (DestReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ }
+
+
+ if (DestReg == BF::NCC && SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (DestReg == BF::CC && SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ llvm_unreachable("Bad reg-to-reg copy");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+ unsigned Reg,
+ const TargetRegisterClass *RC) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Test.contains(Reg);
+ else
+ return Test.hasSubClassEq(RC);
+}
+
+void
+BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg,
+ bool isKill,
+ int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+
+ if (inClass(BF::DPRegClass, SrcReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::STORE32fi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::D16RegClass, SrcReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::STORE16fi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::STORE8fi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
+ RC->getName()).c_str());
+}
+
+void BlackfinInstrInfo::
+storeRegToAddr(MachineFunction &MF,
+ unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ llvm_unreachable("storeRegToAddr not implemented");
+}
+
+void
+BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (inClass(BF::DPRegClass, DestReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::D16RegClass, DestReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
+ BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0);
+ return;
+ }
+
+ llvm_unreachable("Cannot load regclass from stack slot");
+}
+
+void BlackfinInstrInfo::
+loadRegFromAddr(MachineFunction &MF,
+ unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ llvm_unreachable("loadRegFromAddr not implemented");
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h
new file mode 100644
index 000000000000..d22ddf0d7313
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -0,0 +1,81 @@
+//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFININSTRUCTIONINFO_H
+#define BLACKFININSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "BlackfinRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "BlackfinGenInstrInfo.inc"
+
+namespace llvm {
+
+ class BlackfinInstrInfo : public BlackfinGenInstrInfo {
+ const BlackfinRegisterInfo RI;
+ const BlackfinSubtarget& Subtarget;
+ public:
+ explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned
+ InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
new file mode 100644
index 000000000000..5b59d7769c7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -0,0 +1,862 @@
+//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Blackfin instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "BlackfinInstrFormats.td"
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Transformations
+//===----------------------------------------------------------------------===//
+
+def trailingZeros_xform : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+ MVT::i32);
+}]>;
+
+def trailingOnes_xform : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+ MVT::i32);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediates
+//===----------------------------------------------------------------------===//
+
+def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
+
+def uimm5m2 : PatLeaf<(imm), [{
+ uint64_t value = N->getZExtValue();
+ return value % 2 == 0 && isUInt<5>(value);
+}]>;
+
+def uimm6m4 : PatLeaf<(imm), [{
+ uint64_t value = N->getZExtValue();
+ return value % 4 == 0 && isUInt<6>(value);
+}]>;
+
+def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
+def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
+
+def ximm16 : PatLeaf<(imm), [{
+ int64_t value = N->getSExtValue();
+ return value < (1<<16) && value >= -(1<<15);
+}]>;
+
+def imm17m2 : PatLeaf<(imm), [{
+ int64_t value = N->getSExtValue();
+ return value % 2 == 0 && isInt<17>(value);
+}]>;
+
+def imm18m4 : PatLeaf<(imm), [{
+ int64_t value = N->getSExtValue();
+ return value % 4 == 0 && isInt<18>(value);
+}]>;
+
+// 32-bit bitmask transformed to a bit number
+def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
+ return isPowerOf2_32(N->getZExtValue());
+}], trailingZeros_xform>;
+
+// 32-bit inverse bitmask transformed to a bit number
+def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
+ return isPowerOf2_32(~N->getZExtValue());
+}], trailingOnes_xform>;
+
+//===----------------------------------------------------------------------===//
+// Operands
+//===----------------------------------------------------------------------===//
+
+def calltarget : Operand<iPTR>;
+
+def brtarget : Operand<OtherVT>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemoryOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBfin<outs, ins, asmstr, pattern>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "${:comment}ADJCALLSTACKDOWN $amt",
+ [(BfinCallseqStart timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "${:comment}ADJCALLSTACKUP $amt1 $amt2",
+ [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-9. Program Flow Control Instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+
+let isIndirectBranch = 1 in
+def JUMPp : F1<(outs), (ins P:$target),
+ "JUMP ($target);",
+ [(brind P:$target)]>;
+
+// TODO JUMP (PC-P)
+
+// NOTE: assembler chooses between JUMP.S and JUMP.L
+def JUMPa : F1<(outs), (ins brtarget:$target),
+ "jump $target;",
+ [(br bb:$target)]>;
+
+def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
+ "if $cc jump $target;",
+ [(brcond AnyCC:$cc, bb:$target)]>;
+}
+
+let isCall = 1,
+ Defs = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
+def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
+ "call $func;", []>;
+def CALLp: F1<(outs), (ins P:$func, variable_ops),
+ "call ($func);", [(BfinCall P:$func)]>;
+}
+
+let isReturn = 1,
+ isTerminator = 1,
+ isBarrier = 1,
+ Uses = [RETS] in
+def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-10. Load / Store Instructions
+//===----------------------------------------------------------------------===//
+
+// Immediate constant loads
+
+// sext immediate, i32 D/P regs
+def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
+ "$dst = $src (x);",
+ [(set DP:$dst, imm7:$src)]>;
+
+// zext immediate, i32 reg groups 0-3
+def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+ "$dst = $src (z);",
+ [(set GR:$dst, uimm16:$src)]>;
+
+// sext immediate, i32 reg groups 0-3
+def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+ "$dst = $src (x);",
+ [(set GR:$dst, imm16:$src)]>;
+
+// Pseudo-instruction for loading a general 32-bit constant.
+def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+ "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
+ [(set GR:$dst, imm:$src)]>;
+
+def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+ "$dst.h = $src; $dst.l = $src;", []>;
+
+
+// 16-bit immediate, i16 reg groups 0-3
+def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
+ "$dst = $src;", []>;
+
+def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
+ (LOAD32sym tglobaladdr:$addr)>;
+
+def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
+ (LOAD32sym tjumptable:$addr)>;
+
+// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
+// emit GR16 instructions. As a hack, we use this fake instruction instead.
+def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
+ "$dst = $src;",
+ [(set D16:$dst, ximm16:$src)]>;
+
+// Memory loads with patterns
+
+def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
+ "$dst = [$ptr];",
+ [(set DP:$dst, (load P:$ptr))]>;
+
+// Pseudo-instruction for loading a stack slot
+def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
+ "${:comment}FI $dst = [$mem];",
+ [(set DP:$dst, (load ADDRspii:$mem))]>;
+
+// Note: Expands to multiple insns
+def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
+ "${:comment}FI $dst = [$mem];",
+ [(set D16:$dst, (load ADDRspii:$mem))]>;
+
+// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
+// Replaced with Load D + CC=D
+def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
+ "${:comment}FI $dst = B[$mem];",
+ [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
+
+def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = [$ptr + $off];",
+ [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
+
+def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = [$ptr + $off];",
+ [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
+
+def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = W[$ptr] (z);",
+ [(set D:$dst, (zextloadi16 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
+
+def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (z);",
+ [(set D:$dst, (zextloadi16 (add P:$ptr,
+ uimm5m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
+ (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (z);",
+ [(set D:$dst,
+ (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
+ (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = w[$ptr] (x);",
+ [(set D:$dst, (sextloadi16 P:$ptr))]>;
+
+def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (x);",
+ [(set D:$dst,
+ (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
+
+def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = w[$ptr + $off] (x);",
+ [(set D:$dst,
+ (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
+ "$dst = w[$ptr];",
+ [(set D16:$dst, (load PI:$ptr))]>;
+
+def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = B[$ptr] (z);",
+ [(set D:$dst, (zextloadi8 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
+def : Pat<(i16 (extloadi8 P:$ptr)),
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+def : Pat<(i16 (zextloadi8 P:$ptr)),
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = b[$ptr + $off] (z);",
+ [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
+ (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
+def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
+ (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+ lo16)>;
+def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
+ (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+ lo16)>;
+
+def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
+ "$dst = b[$ptr] (x);",
+ [(set D:$dst, (sextloadi8 P:$ptr))]>;
+
+def : Pat<(i16 (sextloadi8 P:$ptr)),
+ (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+ "$dst = b[$ptr + $off] (x);",
+ [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
+ (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
+ lo16)>;
+// Memory loads without patterns
+
+let mayLoad = 1 in {
+
+multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
+ string mem="", string suf=";"> {
+ def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+ !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
+ def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+ !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
+}
+multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
+ string mem="", string suf=";">
+ : LOAD_incdec<drc, prc, mem, suf> {
+ def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
+ !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
+}
+
+defm LOAD32p: LOAD_incdec<DP, P>;
+defm LOAD32i: LOAD_incdec<D, I>;
+defm LOAD8z32p: LOAD_incdec<D, P, "b", " (z);">;
+defm LOAD8s32p: LOAD_incdec<D, P, "b", " (x);">;
+defm LOADhi: LOAD_incdec<D16, I, "w">;
+defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
+defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
+
+def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+ "$dst = [$ptr ++ $off];", []>;
+
+// Note: $fp MUST be FP
+def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
+ "$dst = [$fp - $off];", []>;
+
+def LOAD32i: F1<(outs D:$dst), (ins I:$ptr),
+ "$dst = [$ptr];", []>;
+def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
+ "$dst = [$ptr ++ $off];", []>;
+
+
+
+def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+ "$dst = w[$ptr ++ $off];", []>;
+
+
+}
+
+// Memory stores with patterns
+def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
+ "[$ptr] = $val;",
+ [(store DP:$val, P:$ptr)]>;
+
+// Pseudo-instructions for storing to a stack slot
+def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
+ "${:comment}FI [$mem] = $val;",
+ [(store DP:$val, ADDRspii:$mem)]>;
+
+// Note: This stack-storing pseudo-instruction is expanded to multiple insns
+def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
+ "${:comment}FI [$mem] = $val;",
+ [(store D16:$val, ADDRspii:$mem)]>;
+
+// Pseudo-instructions for storing AnyCC register to a stack slot.
+// Replaced with D=CC + STORE byte
+def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
+ "${:comment}FI b[$mem] = $val;",
+ [(store AnyCC:$val, ADDRspii:$mem)]>;
+
+def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+ "[$ptr + $off] = $val;",
+ [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
+
+def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+ "[$ptr + $off] = $val;",
+ [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
+
+def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
+ "w[$ptr] = $val;",
+ [(store D16:$val, PI:$ptr)]>;
+
+def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
+ "b[$ptr] = $val;",
+ [(truncstorei8 D:$val, P:$ptr)]>;
+
+def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
+ "b[$ptr + $off] = $val;",
+ [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
+
+let Constraints = "$ptr = $ptr_wb" in {
+
+multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
+ int off=4, string pre=""> {
+ def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+ !strconcat(pre, "[$ptr++] = $val;"),
+ [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
+ def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+ !strconcat(pre, "[$ptr--] = $val;"),
+ [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
+ (ineg off)))]>;
+}
+
+defm STORE32p: STORE_incdec<DP, P>;
+defm STORE16i: STORE_incdec<D16, I, 2, "w">;
+defm STORE8p: STORE_incdec<D, P, 1, "b">;
+
+def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
+ "[$ptr ++ $off] = $val;",
+ [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
+
+def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
+ "w[$ptr ++ $off] = $val;",
+ [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
+}
+
+// Memory stores without patterns
+
+let mayStore = 1 in {
+
+// Note: only works for $fp == FP
+def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
+ "[$fp - $off] = $val;", []>;
+
+def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
+ "[$ptr] = $val;", []>;
+
+def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+ "[$ptr++] = $val;", []>;
+
+def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+ "[$ptr--] = $val;", []>;
+
+def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
+ "[$ptr ++ $off] = $val;", []>;
+}
+
+def : Pat<(truncstorei16 D:$val, PI:$ptr),
+ (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+ lo16), PI:$ptr)>;
+
+def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
+ (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+ hi16), PI:$ptr)>;
+
+def : Pat<(truncstorei8 D16L:$val, P:$ptr),
+ (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
+ lo16),
+ P:$ptr)>;
+
+//===----------------------------------------------------------------------===//
+// Table C-11. Move Instructions.
+//===----------------------------------------------------------------------===//
+
+def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
+ "$dst = $src;",
+ []>;
+
+let Constraints = "$src1 = $dst" in
+def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
+ "if $cc $dst = $src2;",
+ [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
+
+let Defs = [AZ, AN, AC0, V] in {
+def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
+ "$dst = $src (z);",
+ [(set D:$dst, (zext D16L:$src))]>;
+
+def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
+ "$dst = $src (x);",
+ [(set D:$dst, (sext D16L:$src))]>;
+
+def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
+ "$dst = $src.b (z);",
+ [(set D:$dst, (and D:$src, 0xff))]>;
+
+def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
+ "$dst = $src.b (x);",
+ [(set D:$dst, (sext_inreg D:$src, i8))]>;
+
+}
+
+def : Pat<(sext_inreg D16L:$src, i8),
+ (EXTRACT_SUBREG (MOVEsext8
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ D16L:$src,
+ lo16)),
+ lo16)>;
+
+def : Pat<(sext_inreg D:$src, i16),
+ (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(and D:$src, 0xffff),
+ (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(i32 (anyext D16L:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
+ lo16)>;
+
+// TODO Dreg = Dreg_byte (X/Z)
+
+// TODO Accumulator moves
+
+//===----------------------------------------------------------------------===//
+// Table C-12. Stack Control Instructions
+//===----------------------------------------------------------------------===//
+
+let Uses = [SP], Defs = [SP] in {
+def PUSH: F1<(outs), (ins ALL:$src),
+ "[--sp] = $src;", []> { let mayStore = 1; }
+
+// NOTE: POP does not work for DP regs, use LOAD instead
+def POP: F1<(outs ALL:$dst), (ins),
+ "$dst = [sp++];", []> { let mayLoad = 1; }
+}
+
+// TODO: push/pop multiple
+
+def LINK: F2<(outs), (ins i32imm:$amount),
+ "link $amount;", []>;
+
+def UNLINK: F2<(outs), (ins),
+ "unlink;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-13. Control Code Bit Management Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
+ def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ [(set JustCC:$cc, (opnode D:$a, D:$b))]>;
+
+ def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ [(set JustCC:$cc, (opnode DP:$a, imm3:$b))]>;
+
+ def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ []>;
+
+ def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
+ !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+ [(set NotCC:$cc, (invnode DP:$a, imm3:$b))]>;
+}
+
+defm SETEQ : SETCC<seteq, setne, "==">;
+defm SETLT : SETCC<setlt, setge, "<">;
+defm SETLE : SETCC<setle, setgt, "<=">;
+defm SETULT : SETCC<setult, setuge, "<", " (iu);">;
+defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
+
+def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
+ "cc = $a == $b;",
+ [(set NotCC:$cc, (setne D:$a, D:$b))]>;
+
+def : Pat<(setgt D:$a, D:$b), (SETLTdd D:$b, D:$a)>;
+def : Pat<(setge D:$a, D:$b), (SETLEdd D:$b, D:$a)>;
+def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
+def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
+
+// TODO: compare pointer for P-P comparisons
+// TODO: compare accumulator
+
+let Defs = [AC0] in
+def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
+ "ac0 \\|= cc;", []>;
+
+let Uses = [AC0] in
+def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
+ "cc = ac0;", []>;
+
+def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
+ "cc = !cc;", []>;
+
+def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
+ "cc = !cc;", []>;
+
+def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
+ "$dst = $cc;", []>;
+
+def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
+ "$dst = cc;", []>;
+
+def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
+ "cc = $src;",
+ [(set AnyCC:$cc, (setne D:$src, 0))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-14. Logical Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 & $src2;",
+ [(set D:$dst, (and D:$src1, D:$src2))]>;
+
+def NOT: F1<(outs D:$dst), (ins D:$src),
+ "$dst = ~$src;",
+ [(set D:$dst, (not D:$src))]>;
+
+def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 \\| $src2;",
+ [(set D:$dst, (or D:$src1, D:$src2))]>;
+
+def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 ^ $src2;",
+ [(set D:$dst, (xor D:$src1, D:$src2))]>;
+
+// missing: BXOR, BXORSHIFT
+
+//===----------------------------------------------------------------------===//
+// Table C-15. Bit Operations Instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
+ "bitclr($dst, $src2);",
+ [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
+
+def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+ "bitset($dst, $src2);",
+ [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
+
+def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+ "bittgl($dst, $src2);",
+ [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
+}
+
+def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+ "cc = bittst($src1, $src2);",
+ [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
+ (i32 0)))]>;
+
+def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+ "cc = !bittst($src1, $src2);",
+ [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
+ (i32 0)))]>;
+
+// TODO: DEPOSIT, EXTRACT, BITMUX
+
+def ONES: F2<(outs D16L:$dst), (ins D:$src),
+ "$dst = ones $src;",
+ [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
+
+def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Table C-16. Shift / Rotate Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHIFT32<SDNode opnode, string ops> {
+ def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
+ !subst("XX", ops, "$dst XX= $amount;"),
+ [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
+ def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
+ !subst("XX", ops, "$dst XX= $amount;"),
+ [(set D:$dst, (opnode D:$src, D:$amount))]>;
+}
+
+let Defs = [AZ, AN, V, VS],
+ Constraints = "$src = $dst" in {
+defm SRA : SHIFT32<sra, ">>>">;
+defm SRL : SHIFT32<srl, ">>">;
+defm SLL : SHIFT32<shl, "<<">;
+}
+
+// TODO: automatic switching between 2-addr and 3-addr (?)
+
+let Defs = [AZ, AN, V, VS] in {
+def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+ "$dst = lshift $src by $amount;",
+ [(set D:$dst, (shl D:$src, D16L:$amount))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+ "$dst = ashift $src by $amount;",
+ [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
+
+def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+ "$dst = $src >>> $amount;",
+ [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
+
+def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+ "$dst = $src >> $amount;",
+ [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+ "$dst = ashift $src BY $amount;",
+ [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
+
+def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+ "$dst = $src << $amount;",
+ [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
+
+def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+ "$dst = lshift $src by $amount;",
+ [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-17. Arithmetic Operations Instructions
+//===----------------------------------------------------------------------===//
+
+// TODO: ABS
+
+let Defs = [AZ, AN, AC0, V, VS] in {
+
+def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 + $src2;",
+ [(set D:$dst, (add D:$src1, D:$src2))]>;
+
+def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 + $src2;",
+ [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
+
+let Constraints = "$src1 = $dst" in
+def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
+ "$dst += $src2;",
+ [(set D:$dst, (add D:$src1, imm7:$src2))]>;
+
+def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 - $src2;",
+ [(set D:$dst, (sub D:$src1, D:$src2))]>;
+
+def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 - $src2;",
+ [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
+
+}
+
+def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
+def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
+
+let Defs = [AZ, AN, V, VS] in
+def NEG: F1<(outs D:$dst), (ins D:$src),
+ "$dst = -$src;",
+ [(set D:$dst, (ineg D:$src))]>;
+
+// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
+def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
+ "$dst = $src1 + $src2;", []>;
+
+let Constraints = "$src1 = $dst" in
+def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
+ "$dst += $src2;", []>;
+
+let Defs = [AZ, AN, V] in
+def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
+ "$dst = $src1 + $src2 (rnd20);", []>;
+
+let Defs = [V, VS] in {
+def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (is);",
+ [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
+
+def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (ih);",
+ [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
+
+def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (is);",
+ [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
+
+def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+ "$dst = $src1 * $src2 (is);",
+ [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
+}
+
+
+let Constraints = "$src1 = $dst" in
+def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst *= $src2;",
+ [(set D:$dst, (mul D:$src1, D:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-18. External Exent Management Instructions
+//===----------------------------------------------------------------------===//
+
+def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
+def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
+def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
+def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
+def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
+def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
+def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
+def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
+def NOP : F1<(outs), (ins), "nop;", []>;
+def MNOP : F2<(outs), (ins), "mnop;", []>;
+def ABORT : F1<(outs), (ins), "abort;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-19. Cache Control Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Table C-20. Video Pixel Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = align8($src1, $src2);",
+ [(set D:$dst, (or (shl D:$src1, (i32 24)),
+ (srl D:$src2, (i32 8))))]>;
+
+def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = align16($src1, $src2);",
+ [(set D:$dst, (or (shl D:$src1, (i32 16)),
+ (srl D:$src2, (i32 16))))]>;
+
+def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+ "$dst = align16($src1, $src2);",
+ [(set D:$dst, (or (shl D:$src1, (i32 8)),
+ (srl D:$src2, (i32 24))))]>;
+
+def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
+
+// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
+// BYTEPACK, BYTEUNPACK
+
+// Table C-21. Vector Operations Instructions
+
+// Patterns
+def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
+ (CALLa tglobaladdr:$dst)>;
+def : Pat<(BfinCall (i32 texternalsym:$dst)),
+ (CALLa texternalsym:$dst)>;
+def : Pat<(i16 (trunc D:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
new file mode 100644
index 000000000000..ae8ee9e2a1a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -0,0 +1,104 @@
+//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace bfinIntrinsic {
+
+ enum ID {
+ last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_bfin_intrinsics
+ };
+
+}
+
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned
+BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (IntrID == 0)
+ return false;
+ else
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+ const Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ const Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
new file mode 100644
index 000000000000..7c4b5a9967d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@@ -0,0 +1,32 @@
+//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BLACKFININTRINSICS_H
+#define BLACKFININTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td
new file mode 100644
index 000000000000..ce21b082376f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td
@@ -0,0 +1,34 @@
+//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the blackfin-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "bfin", isTarget = 1 in {
+
+//===----------------------------------------------------------------------===//
+// Core synchronisation etc.
+//
+// These intrinsics have sideeffects. Each represent a single instruction, but
+// workarounds are sometimes required depending on the cpu.
+
+// Execute csync instruction with workarounds
+def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
+ Intrinsic<[]>;
+
+// Execute ssync instruction with workarounds
+def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
+ Intrinsic<[]>;
+
+// Execute idle instruction with workarounds
+def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
+ Intrinsic<[]>;
+
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
new file mode 100644
index 000000000000..3a7c104ee055
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -0,0 +1,360 @@
+//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinRegisterInfo.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
+ const TargetInstrInfo &tii)
+ : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {}
+
+const unsigned*
+BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ using namespace BF;
+ static const unsigned CalleeSavedRegs[] = {
+ FP,
+ R4, R5, R6, R7,
+ P3, P4, P5,
+ 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector
+BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ using namespace BF;
+ BitVector Reserved(getNumRegs());
+ Reserved.set(AZ);
+ Reserved.set(AN);
+ Reserved.set(AQ);
+ Reserved.set(AC0);
+ Reserved.set(AC1);
+ Reserved.set(AV0);
+ Reserved.set(AV0S);
+ Reserved.set(AV1);
+ Reserved.set(AV1S);
+ Reserved.set(V);
+ Reserved.set(VS);
+ Reserved.set(CYCLES).set(CYCLES2);
+ Reserved.set(L0);
+ Reserved.set(L1);
+ Reserved.set(L2);
+ Reserved.set(L3);
+ Reserved.set(SP);
+ Reserved.set(RETS);
+ if (TFI->hasFP(MF))
+ Reserved.set(FP);
+ return Reserved;
+}
+
+bool BlackfinRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+// Emit instructions to add delta to D/P register. ScratchReg must be of the
+// same class as Reg (P).
+void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ unsigned ScratchReg,
+ int delta) const {
+ if (!delta)
+ return;
+ if (isInt<7>(delta)) {
+ BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
+ .addReg(Reg) // No kill on two-addr operand
+ .addImm(delta);
+ return;
+ }
+
+ // We must load delta into ScratchReg and add that.
+ loadConstant(MBB, I, DL, ScratchReg, delta);
+ if (BF::PRegClass.contains(Reg)) {
+ assert(BF::PRegClass.contains(ScratchReg) &&
+ "ScratchReg must be a P register");
+ BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill);
+ } else {
+ assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
+ assert(BF::DRegClass.contains(ScratchReg) &&
+ "ScratchReg must be a D register");
+ BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill);
+ }
+}
+
+// Emit instructions to load a constant into D/P register
+void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ int value) const {
+ if (isInt<7>(value)) {
+ BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
+ return;
+ }
+
+ if (isUInt<16>(value)) {
+ BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
+ return;
+ }
+
+ if (isInt<16>(value)) {
+ BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
+ return;
+ }
+
+ // We must split into halves
+ BuildMI(MBB, I, DL,
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
+ .addImm((value >> 16) & 0xffff)
+ .addReg(Reg, RegState::ImplicitDefine);
+ BuildMI(MBB, I, DL,
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
+ .addImm(value & 0xffff)
+ .addReg(Reg, RegState::ImplicitKill)
+ .addReg(Reg, RegState::ImplicitDefine);
+}
+
+void BlackfinRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+ if (Amount != 0) {
+ assert(Amount%4 == 0 && "Unaligned call frame size");
+ if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
+ adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
+ } else {
+ assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
+ "Unknown call frame pseudo instruction");
+ adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) {
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ return Reg;
+}
+
+void
+BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned FIPos;
+ for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
+ assert(FIPos < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ int FrameIndex = MI.getOperand(FIPos).getIndex();
+ assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
+ + MI.getOperand(FIPos+1).getImm();
+ unsigned BaseReg = BF::FP;
+ if (TFI->hasFP(MF)) {
+ assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
+ } else {
+ BaseReg = BF::SP;
+ Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
+ }
+
+ bool isStore = false;
+
+ switch (MI.getOpcode()) {
+ case BF::STORE32fi:
+ isStore = true;
+ case BF::LOAD32fi: {
+ assert(Offset%4 == 0 && "Unaligned i32 stack access");
+ assert(FIPos==1 && "Bad frame index operand");
+ MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+ MI.getOperand(FIPos+1).setImm(Offset);
+ if (isUInt<6>(Offset)) {
+ MI.setDesc(TII.get(isStore
+ ? BF::STORE32p_uimm6m4
+ : BF::LOAD32p_uimm6m4));
+ return;
+ }
+ if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
+ MI.setDesc(TII.get(isStore
+ ? BF::STORE32fp_nimm7m4
+ : BF::LOAD32fp_nimm7m4));
+ MI.getOperand(FIPos+1).setImm(-Offset);
+ return;
+ }
+ if (isInt<18>(Offset)) {
+ MI.setDesc(TII.get(isStore
+ ? BF::STORE32p_imm18m4
+ : BF::LOAD32p_imm18m4));
+ return;
+ }
+ // Use RegScavenger to calculate proper offset...
+ MI.dump();
+ llvm_unreachable("Stack frame offset too big");
+ break;
+ }
+ case BF::ADDpp: {
+ assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ // We need to produce a stack offset in a P register. We emit:
+ // P0 = offset;
+ // P0 = BR + P0;
+ assert(FIPos==1 && "Bad frame index operand");
+ loadConstant(MBB, II, DL, DestReg, Offset);
+ MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(2).ChangeToRegister(BaseReg, false);
+ break;
+ }
+ case BF::STORE16fi:
+ isStore = true;
+ case BF::LOAD16fi: {
+ assert(Offset%2 == 0 && "Unaligned i16 stack access");
+ assert(FIPos==1 && "Bad frame index operand");
+ // We need a P register to use as an address
+ unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
+ assert(ScratchReg && "Could not scavenge register");
+ loadConstant(MBB, II, DL, ScratchReg, Offset);
+ BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(BaseReg);
+ MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
+ MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
+ MI.RemoveOperand(2);
+ break;
+ }
+ case BF::STORE8fi: {
+ // This is an AnyCC spill, we need a scratch register.
+ assert(FIPos==1 && "Bad frame index operand");
+ MachineOperand SpillReg = MI.getOperand(0);
+ unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+ assert(ScratchReg && "Could not scavenge register");
+ if (SpillReg.getReg()==BF::NCC) {
+ BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
+ .addOperand(SpillReg);
+ BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
+ .addReg(ScratchReg).addImm(0);
+ } else {
+ BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
+ .addOperand(SpillReg);
+ }
+ // STORE D
+ MI.setDesc(TII.get(BF::STORE8p_imm16));
+ MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
+ MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+ MI.getOperand(FIPos+1).setImm(Offset);
+ break;
+ }
+ case BF::LOAD8fi: {
+ // This is an restore, we need a scratch register.
+ assert(FIPos==1 && "Bad frame index operand");
+ MachineOperand SpillReg = MI.getOperand(0);
+ unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+ assert(ScratchReg && "Could not scavenge register");
+ MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
+ MI.getOperand(0).ChangeToRegister(ScratchReg, true);
+ MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+ MI.getOperand(FIPos+1).setImm(Offset);
+ ++II;
+ if (SpillReg.getReg()==BF::CC) {
+ // CC = D
+ BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
+ .addReg(ScratchReg, RegState::Kill);
+ } else {
+ // Restore NCC (CC = D==0)
+ BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(0);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Cannot eliminate frame index");
+ break;
+ }
+}
+
+unsigned BlackfinRegisterInfo::getRARegister() const {
+ return BF::RETS;
+}
+
+unsigned
+BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? BF::FP : BF::SP;
+}
+
+unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ llvm_unreachable("What is the dwarf register number");
+ return -1;
+}
+
+int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum,
+ bool isEH) const {
+ llvm_unreachable("What is the dwarf register number");
+ return -1;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
new file mode 100644
index 000000000000..86f45c17c625
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -0,0 +1,81 @@
+//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINREGISTERINFO_H
+#define BLACKFINREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "BlackfinGenRegisterInfo.inc"
+
+namespace llvm {
+
+ class BlackfinSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
+ BlackfinSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ // getSubReg implemented by tablegen
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
+ return &BF::PRegClass;
+ }
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getRARegister() const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ // Utility functions
+ void adjustRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ unsigned ScratchReg,
+ int delta) const;
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL,
+ unsigned Reg,
+ int value) const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
new file mode 100644
index 000000000000..1c42205eb780
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -0,0 +1,277 @@
+//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Blackfin register file
+//===----------------------------------------------------------------------===//
+
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
+
+// Registers are identified with 3-bit group and 3-bit ID numbers.
+class BlackfinReg<string n> : Register<n> {
+ field bits<3> Group;
+ field bits<3> Num;
+ let Namespace = "BF";
+}
+
+// Rc - 1-bit registers
+class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
+ field bits<5> BitNum = bitno;
+}
+
+// Rs - 16-bit integer registers
+class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
+ let Group = group;
+ let Num = num;
+ field bits<1> High = hi;
+}
+
+// Ri - 32-bit integer registers with subregs
+class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
+ let Group = group;
+ let Num = num;
+}
+
+// Ra 40-bit accumulator registers
+class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
+ let SubRegs = subs;
+ let SubRegIndices = [hi32, lo32];
+ let Group = 4;
+ let Num = num;
+}
+
+// Two halves of 32-bit register
+multiclass Rss<bits<3> group, bits<3> num, string n> {
+ def H : Rs<group, num, 1, !strconcat(n, ".h")>;
+ def L : Rs<group, num, 0, !strconcat(n, ".l")>;
+}
+
+// Rii - 32-bit integer registers with subregs
+class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
+ : BlackfinReg<n> {
+ let SubRegs = subs;
+ let SubRegIndices = [hi16, lo16];
+ let Group = group;
+ let Num = num;
+}
+
+// Status bits are all part of ASTAT
+def AZ : Rc<0, "az">;
+def AN : Rc<1, "an">;
+def CC : Rc<5, "cc">, DwarfRegNum<[34]>;
+def NCC : Rc<5, "!cc"> { let Aliases = [CC]; }
+def AQ : Rc<6, "aq">;
+def AC0 : Rc<12, "ac0">;
+def AC1 : Rc<13, "ac1">;
+def AV0 : Rc<16, "av0">;
+def AV0S : Rc<17, "av0s">;
+def AV1 : Rc<18, "av1">;
+def AV1S : Rc<19, "av1s">;
+def V : Rc<24, "v">;
+def VS : Rc<25, "vs">;
+// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
+
+// Group 0: Integer registers
+defm R0 : Rss<0, 0, "r0">;
+def R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
+defm R1 : Rss<0, 1, "r1">;
+def R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
+defm R2 : Rss<0, 2, "r2">;
+def R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
+defm R3 : Rss<0, 3, "r3">;
+def R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
+defm R4 : Rss<0, 4, "r4">;
+def R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
+defm R5 : Rss<0, 5, "r5">;
+def R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
+defm R6 : Rss<0, 6, "r6">;
+def R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
+defm R7 : Rss<0, 7, "r7">;
+def R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
+
+// Group 1: Pointer registers
+defm P0 : Rss<1, 0, "p0">;
+def P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
+defm P1 : Rss<1, 1, "p1">;
+def P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
+defm P2 : Rss<1, 2, "p2">;
+def P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
+defm P3 : Rss<1, 3, "p3">;
+def P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
+defm P4 : Rss<1, 4, "p4">;
+def P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
+defm P5 : Rss<1, 5, "p5">;
+def P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
+defm SP : Rss<1, 6, "sp">;
+def SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
+defm FP : Rss<1, 7, "fp">;
+def FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
+
+// Group 2: Index registers
+defm I0 : Rss<2, 0, "i0">;
+def I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
+defm I1 : Rss<2, 1, "i1">;
+def I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
+defm I2 : Rss<2, 2, "i2">;
+def I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
+defm I3 : Rss<2, 3, "i3">;
+def I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
+defm M0 : Rss<2, 4, "m0">;
+def M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
+defm M1 : Rss<2, 5, "m1">;
+def M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
+defm M2 : Rss<2, 6, "m2">;
+def M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
+defm M3 : Rss<2, 7, "m3">;
+def M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
+
+// Group 3: Cyclic indexing registers
+defm B0 : Rss<3, 0, "b0">;
+def B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
+defm B1 : Rss<3, 1, "b1">;
+def B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
+defm B2 : Rss<3, 2, "b2">;
+def B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
+defm B3 : Rss<3, 3, "b3">;
+def B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
+defm L0 : Rss<3, 4, "l0">;
+def L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
+defm L1 : Rss<3, 5, "l1">;
+def L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
+defm L2 : Rss<3, 6, "l2">;
+def L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
+defm L3 : Rss<3, 7, "l3">;
+def L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
+
+// Accumulators
+def A0X : Ri <4, 0, "a0.x">;
+defm A0 : Rss<4, 1, "a0">;
+def A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
+def A0 : Ra <0, "a0", [A0X, A0W]>;
+
+def A1X : Ri <4, 2, "a1.x">;
+defm A1 : Rss<4, 3, "a1">;
+def A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
+def A1 : Ra <2, "a1", [A1X, A1W]>;
+
+def RETS : Ri<4, 7, "rets">, DwarfRegNum<[35]>;
+def RETI : Ri<7, 3, "reti">, DwarfRegNum<[36]>;
+def RETX : Ri<7, 4, "retx">, DwarfRegNum<[37]>;
+def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>;
+def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>;
+
+def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> {
+ let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+}
+
+def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
+def USP : Ri<7, 0, "usp">, DwarfRegNum<[42]>;
+def EMUDAT : Ri<7, 7, "emudat">, DwarfRegNum<[43]>;
+def SYSCFG : Ri<7, 2, "syscfg">;
+def CYCLES : Ri<6, 6, "cycles">;
+def CYCLES2 : Ri<6, 7, "cycles2">;
+
+// Hardware loops
+def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
+def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
+def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
+def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
+def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
+def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
+
+// Register classes.
+def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
+
+def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
+
+def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
+
+def P16L : RegisterClass<"BF", [i16], 16,
+ (add (sequence "P%uL", 0, 5), SPL, FPL)>;
+
+def P16H : RegisterClass<"BF", [i16], 16,
+ (add (sequence "P%uH", 0, 5), SPH, FPH)>;
+
+def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
+
+def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
+
+def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
+
+def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
+
+def GR16 : RegisterClass<"BF", [i16], 16,
+ (add DP16,
+ I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
+ M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
+ B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
+ L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
+
+def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
+ let SubRegClasses = [(D16L lo16), (D16H hi16)];
+}
+
+def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
+ let SubRegClasses = [(P16L lo16), (P16H hi16)];
+}
+
+def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
+ let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
+}
+
+def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
+def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
+def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
+def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
+
+def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
+
+def ALL : RegisterClass<"BF", [i32], 32,
+ (add GR,
+ A0X, A0W, A1X, A1W, ASTAT, RETS,
+ LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
+ USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
+
+def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
+
+// We are going to pretend that CC and !CC are 32-bit registers, even though
+// they only can hold 1 bit.
+let CopyCost = -1, Size = 8 in {
+def JustCC : RegisterClass<"BF", [i32], 8, (add CC)>;
+def NotCC : RegisterClass<"BF", [i32], 8, (add NCC)>;
+def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
+def StatBit : RegisterClass<"BF", [i1], 8,
+ (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
+}
+
+// Should be i40, but that isn't defined. It is not a legal type yet anyway.
+def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
+
+// Register classes to match inline asm constraints.
+def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
+def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
+def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
+def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
+ B0, B1, B2, B3,
+ L0, L1, L2, L3)>;
+def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
+def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
+def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
+def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
+ RETE, ASTAT, SEQSTAT,
+ USP)>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a21f696a62eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -0,0 +1,24 @@
+//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BlackfinSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "blackfin-selectiondag-info"
+#include "BlackfinTargetMachine.h"
+using namespace llvm;
+
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+ const BlackfinTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
new file mode 100644
index 000000000000..f1ce3482f90f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINSELECTIONDAGINFO_H
+#define BLACKFINSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class BlackfinTargetMachine;
+
+class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
+ ~BlackfinSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
new file mode 100644
index 000000000000..ec919cdf0b90
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -0,0 +1,44 @@
+//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the blackfin specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "BlackfinGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
+ icplb(false),
+ wa_mi_shift(false),
+ wa_csync(false),
+ wa_specld(false),
+ wa_mmr_stall(false),
+ wa_lcregs(false),
+ wa_hwloop(false),
+ wa_ind_call(false),
+ wa_killed_mmr(false),
+ wa_rets(false)
+{
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h
new file mode 100644
index 000000000000..1a01a81116d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -0,0 +1,49 @@
+//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_SUBTARGET_H
+#define BLACKFIN_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "BlackfinGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+ class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
+ bool sdram;
+ bool icplb;
+ bool wa_mi_shift;
+ bool wa_csync;
+ bool wa_specld;
+ bool wa_mmr_stall;
+ bool wa_lcregs;
+ bool wa_hwloop;
+ bool wa_ind_call;
+ bool wa_killed_mmr;
+ bool wa_rets;
+ public:
+ BlackfinSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
new file mode 100644
index 000000000000..a1c9f1c05e0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -0,0 +1,41 @@
+//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinTargetMachine.h"
+#include "Blackfin.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeBlackfinTarget() {
+ RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
+}
+
+BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ DataLayout("e-p:32:32-i64:32-f64:32-n32"),
+ Subtarget(TT, CPU, FS),
+ TLInfo(*this),
+ TSInfo(*this),
+ InstrInfo(Subtarget),
+ FrameLowering(Subtarget) {
+}
+
+bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createBlackfinISelDag(*this, OptLevel));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
new file mode 100644
index 000000000000..bd7dc84f04ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -0,0 +1,67 @@
+//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Blackfin specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETMACHINE_H
+#define BLACKFINTARGETMACHINE_H
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinIntrinsicInfo.h"
+#include "BlackfinISelLowering.h"
+#include "BlackfinFrameLowering.h"
+#include "BlackfinSubtarget.h"
+#include "BlackfinSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+ class BlackfinTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout;
+ BlackfinSubtarget Subtarget;
+ BlackfinTargetLowering TLInfo;
+ BlackfinSelectionDAGInfo TSInfo;
+ BlackfinInstrInfo InstrInfo;
+ BlackfinFrameLowering FrameLowering;
+ BlackfinIntrinsicInfo IntrinsicInfo;
+ public:
+ BlackfinTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const BlackfinSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const BlackfinRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const BlackfinTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ const TargetIntrinsicInfo *getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
new file mode 100644
index 000000000000..5b9d4a29794e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the BlackfinMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCAsmInfo.h"
+
+using namespace llvm;
+
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
+ GlobalPrefix = "_";
+ CommentString = "//";
+ HasSetDirective = false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
new file mode 100644
index 000000000000..c372aa247e04
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BlackfinMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETASMINFO_H
+#define BLACKFINTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct BlackfinMCAsmInfo : public MCAsmInfo {
+ explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
new file mode 100644
index 000000000000..0fa1471ae3e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCTargetDesc.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "BlackfinGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createBlackfinMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitBlackfinMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+ createBlackfinMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+ createBlackfinMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeBlackfinMCAsmInfo() {
+ RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
new file mode 100644
index 000000000000..5bffe94fc582
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINMCTARGETDESC_H
+#define BLACKFINMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheBlackfinTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Blackfin registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "BlackfinGenRegisterInfo.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#define GET_INSTRINFO_ENUM
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "BlackfinGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..8cd924f9236f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMBlackfinDesc
+ BlackfinMCTargetDesc.cpp
+ BlackfinMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..6b26101f4473
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
new file mode 100644
index 000000000000..402e0afde81d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheBlackfinTarget;
+
+extern "C" void LLVMInitializeBlackfinTargetInfo() {
+ RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
+ "Analog Devices Blackfin [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 000000000000..415beb1dd1cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,3611 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
+using namespace llvm;
+
+extern "C" void LLVMInitializeCBackendTarget() {
+ // Register the target.
+ RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
+}
+
+extern "C" void LLVMInitializeCBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCInstrInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {}
+
+namespace {
+ class CBEMCAsmInfo : public MCAsmInfo {
+ public:
+ CBEMCAsmInfo() {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = "";
+ }
+ };
+
+ /// CWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C translation unit.
+ class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+ formatted_raw_ostream &Out;
+ IntrinsicLowering *IL;
+ Mangler *Mang;
+ LoopInfo *LI;
+ const Module *TheModule;
+ const MCAsmInfo* TAsm;
+ MCContext *TCtx;
+ const TargetData* TD;
+
+ std::map<const ConstantFP *, unsigned> FPConstantMap;
+ std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+ std::set<const Argument*> ByValParams;
+ unsigned FPCounter;
+ unsigned OpaqueCounter;
+ DenseMap<const Value*, unsigned> AnonValueNumbers;
+ unsigned NextAnonValueNumber;
+
+ /// UnnamedStructIDs - This contains a unique ID for each struct that is
+ /// either anonymous or has no name.
+ DenseMap<const StructType*, unsigned> UnnamedStructIDs;
+
+ public:
+ static char ID;
+ explicit CWriter(formatted_raw_ostream &o)
+ : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
+ TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
+ NextAnonValueNumber(0) {
+ initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+ FPCounter = 0;
+ }
+
+ virtual const char *getPassName() const { return "C backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ bool runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LI = &getAnalysis<LoopInfo>();
+
+ // Get rid of intrinsics we can't handle.
+ lowerIntrinsics(F);
+
+ // Output all floating point constants that cannot be printed accurately.
+ printFloatingPointConstants(F);
+
+ printFunction(F);
+ return false;
+ }
+
+ virtual bool doFinalization(Module &M) {
+ // Free memory...
+ delete IL;
+ delete TD;
+ delete Mang;
+ delete TCtx;
+ delete TAsm;
+ FPConstantMap.clear();
+ ByValParams.clear();
+ intrinsicPrototypesAlreadyGenerated.clear();
+ UnnamedStructIDs.clear();
+ return false;
+ }
+
+ raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false,
+ const AttrListPtr &PAL = AttrListPtr());
+ raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+
+ void printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ const PointerType *Ty);
+
+ std::string getStructName(const StructType *ST);
+
+ /// writeOperandDeref - Print the result of dereferencing the specified
+ /// operand with '*'. This is equivalent to printing '*' then using
+ /// writeOperand, but avoids excess syntax in some cases.
+ void writeOperandDeref(Value *Operand) {
+ if (isAddressExposed(Operand)) {
+ // Already something with an address exposed.
+ writeOperandInternal(Operand);
+ } else {
+ Out << "*(";
+ writeOperand(Operand);
+ Out << ")";
+ }
+ }
+
+ void writeOperand(Value *Operand, bool Static = false);
+ void writeInstComputationInline(Instruction &I);
+ void writeOperandInternal(Value *Operand, bool Static = false);
+ void writeOperandWithCast(Value* Operand, unsigned Opcode);
+ void writeOperandWithCast(Value* Operand, const ICmpInst &I);
+ bool writeInstructionCast(const Instruction &I);
+
+ void writeMemoryAccess(Value *Operand, const Type *OperandType,
+ bool IsVolatile, unsigned Alignment);
+
+ private :
+ std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+ void lowerIntrinsics(Function &F);
+ /// Prints the definition of the intrinsic function F. Supports the
+ /// intrinsics which need to be explicitly defined in the CBackend.
+ void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
+
+ void printModuleTypes();
+ void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &);
+ void printFloatingPointConstants(Function &F);
+ void printFloatingPointConstants(const Constant *C);
+ void printFunctionSignature(const Function *F, bool Prototype);
+
+ void printFunction(Function &);
+ void printBasicBlock(BasicBlock *BB);
+ void printLoop(Loop *L);
+
+ void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+ void printConstant(Constant *CPV, bool Static);
+ void printConstantWithCast(Constant *CPV, unsigned Opcode);
+ bool printConstExprCast(const ConstantExpr *CE, bool Static);
+ void printConstantArray(ConstantArray *CPA, bool Static);
+ void printConstantVector(ConstantVector *CV, bool Static);
+
+ /// isAddressExposed - Return true if the specified value's name needs to
+ /// have its address taken in order to get a C value of the correct type.
+ /// This happens for global variables, byval parameters, and direct allocas.
+ bool isAddressExposed(const Value *V) const {
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return ByValParams.count(A);
+ return isa<GlobalVariable>(V) || isDirectAlloca(V);
+ }
+
+ // isInlinableInst - Attempt to inline instructions into their uses to build
+ // trees as much as possible. To do this, we have to consistently decide
+ // what is acceptable to inline, so that variable declarations don't get
+ // printed and an extra copy of the expr is not emitted.
+ //
+ static bool isInlinableInst(const Instruction &I) {
+ // Always inline cmp instructions, even if they are shared by multiple
+ // expressions. GCC generates horrible code if we don't.
+ if (isa<CmpInst>(I))
+ return true;
+
+ // Must be an expression, must be used exactly once. If it is dead, we
+ // emit it inline where it would go.
+ if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
+ isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+ isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
+ isa<InsertValueInst>(I))
+ // Don't inline a load across a store or other bad things!
+ return false;
+
+ // Must not be used in inline asm, extractelement, or shufflevector.
+ if (I.hasOneUse()) {
+ const Instruction &User = cast<Instruction>(*I.use_back());
+ if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
+ isa<ShuffleVectorInst>(User))
+ return false;
+ }
+
+ // Only inline instruction it if it's use is in the same BB as the inst.
+ return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+ }
+
+ // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+ // variables which are accessed with the & operator. This causes GCC to
+ // generate significantly better code than to emit alloca calls directly.
+ //
+ static const AllocaInst *isDirectAlloca(const Value *V) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI) return 0;
+ if (AI->isArrayAllocation())
+ return 0; // FIXME: we can also inline fixed size array allocas!
+ if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+ return 0;
+ return AI;
+ }
+
+ // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
+ static bool isInlineAsm(const Instruction& I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ return isa<InlineAsm>(CI->getCalledValue());
+ return false;
+ }
+
+ // Instruction visitation functions
+ friend class InstVisitor<CWriter>;
+
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+ void visitInvokeInst(InvokeInst &I) {
+ llvm_unreachable("Lowerinvoke pass didn't work!");
+ }
+
+ void visitUnwindInst(UnwindInst &I) {
+ llvm_unreachable("Lowerinvoke pass didn't work!");
+ }
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitPHINode(PHINode &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+
+ void visitCastInst (CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitCallInst (CallInst &I);
+ void visitInlineAsm(CallInst &I);
+ bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+
+ void visitAllocaInst(AllocaInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitStoreInst (StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitVAArgInst (VAArgInst &I);
+
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+
+ void visitInstruction(Instruction &I) {
+#ifndef NDEBUG
+ errs() << "C Writer does not know about " << I;
+#endif
+ llvm_unreachable(0);
+ }
+
+ void outputLValue(Instruction *I) {
+ Out << " " << GetValueName(I) << " = ";
+ }
+
+ bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+ void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+ BasicBlock *Successor, unsigned Indent);
+ void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+ unsigned Indent);
+ void printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static);
+
+ std::string GetValueName(const Value *Operand);
+ };
+}
+
+char CWriter::ID = 0;
+
+
+
+static std::string CBEMangle(const std::string &S) {
+ std::string Result;
+
+ for (unsigned i = 0, e = S.size(); i != e; ++i)
+ if (isalnum(S[i]) || S[i] == '_') {
+ Result += S[i];
+ } else {
+ Result += '_';
+ Result += 'A'+(S[i]&15);
+ Result += 'A'+((S[i]>>4)&15);
+ Result += '_';
+ }
+ return Result;
+}
+
+std::string CWriter::getStructName(const StructType *ST) {
+ if (!ST->isAnonymous() && !ST->getName().empty())
+ return CBEMangle("l_"+ST->getName().str());
+
+ return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
+}
+
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ const PointerType *TheTy) {
+ const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
+ FunctionInnards << " (*) (";
+ bool PrintedType = false;
+
+ FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+ const Type *RetTy = cast<PointerType>(*I)->getElementType();
+ unsigned Idx = 1;
+ for (++I, ++Idx; I != E; ++I, ++Idx) {
+ if (PrintedType)
+ FunctionInnards << ", ";
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ PrintedType = true;
+ }
+ if (FTy->isVarArg()) {
+ if (!PrintedType)
+ FunctionInnards << " int"; //dummy argument for empty vararg functs
+ FunctionInnards << ", ...";
+ } else if (!PrintedType) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+}
+
+raw_ostream &
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else if (NumBits <= 64)
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ else {
+ assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+ return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+ // present matches host 'long double'.
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: return Out << "long double " << NameSoFar;
+
+ case Type::X86_MMXTyID:
+ return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+ " __attribute__((vector_size(64))) " + NameSoFar);
+
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return printSimpleType(Out, VTy->getElementType(), isSigned,
+ " __attribute__((vector_size(" +
+ utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+ }
+
+ default:
+#ifndef NDEBUG
+ errs() << "Unknown primitive type: " << *Ty << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName, const AttrListPtr &PAL) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
+ FunctionInnards << " (" << NameSoFar << ") (";
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (!FTy->getNumParams())
+ FunctionInnards << " int"; //dummy argument for empty vaarg functs
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+ return Out;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+
+ // Check to see if the type is named.
+ if (!IgnoreName)
+ return Out << getStructName(STy) << ' ' << NameSoFar;
+
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (PTy->getElementType()->isArrayTy() ||
+ PTy->getElementType()->isVectorTy())
+ ptrName = "(" + ptrName + ")";
+
+ if (!PAL.isEmpty())
+ // Must be a function ptr cast!
+ return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ // Arrays are wrapped in structs to allow them to have normal
+ // value semantics (avoiding the array "decay").
+ Out << NameSoFar << " { ";
+ printType(Out, ATy->getElementType(), false,
+ "array[" + utostr(NumElements) + "]");
+ return Out << "; }";
+ }
+
+ default:
+ llvm_unreachable("Unhandled case in getTypeProps!");
+ }
+
+ return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
+
+ // As a special case, print the array as a string if it is an array of
+ // ubytes or an array of sbytes with positive values.
+ //
+ const Type *ETy = CPA->getType()->getElementType();
+ bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
+ ETy == Type::getInt8Ty(CPA->getContext()));
+
+ // Make sure the last character is a null char, as automatically added by C
+ if (isString && (CPA->getNumOperands() == 0 ||
+ !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+ isString = false;
+
+ if (isString) {
+ Out << '\"';
+ // Keep track of whether the last number was a hexadecimal escape.
+ bool LastWasHex = false;
+
+ // Do not include the last character, which we know is null
+ for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+ unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+ // Print it out literally if it is a printable character. The only thing
+ // to be careful about is when the last letter output was a hex escape
+ // code, in which case we have to be careful not to print out hex digits
+ // explicitly (the C compiler thinks it is a continuation of the previous
+ // character, sheesh...)
+ //
+ if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+ LastWasHex = false;
+ if (C == '"' || C == '\\')
+ Out << "\\" << (char)C;
+ else
+ Out << (char)C;
+ } else {
+ LastWasHex = false;
+ switch (C) {
+ case '\n': Out << "\\n"; break;
+ case '\t': Out << "\\t"; break;
+ case '\r': Out << "\\r"; break;
+ case '\v': Out << "\\v"; break;
+ case '\a': Out << "\\a"; break;
+ case '\"': Out << "\\\""; break;
+ case '\'': Out << "\\\'"; break;
+ default:
+ Out << "\\x";
+ Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+ Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ LastWasHex = true;
+ break;
+ }
+ }
+ }
+ Out << '\"';
+ } else {
+ Out << '{';
+ if (CPA->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPA->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPA->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
+ Out << '{';
+ if (CP->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CP->getOperand(0)), Static);
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CP->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+ bool ignored;
+ // Do long doubles in hex for now.
+ if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
+ CFP->getType() != Type::getDoubleTy(CFP->getContext()))
+ return false;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%a", APF.convertToDouble());
+ if (!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3))
+ return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
+ return false;
+#else
+ std::string StrVal = ftostr(APF);
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like "Inf"
+ // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9')))
+ // Reparse stringized version!
+ return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
+ return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+ // Print the destination type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::IntToPtr:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+ Out << '(';
+ printType(Out, DstTy);
+ Out << ')';
+ break;
+ case Instruction::ZExt:
+ case Instruction::PtrToInt:
+ case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+ Out << '(';
+ printSimpleType(Out, DstTy, false);
+ Out << ')';
+ break;
+ case Instruction::SExt:
+ case Instruction::FPToSI: // For these, make sure we get a signed dest
+ Out << '(';
+ printSimpleType(Out, DstTy, true);
+ Out << ')';
+ break;
+ default:
+ llvm_unreachable("Invalid cast opcode");
+ }
+
+ // Print the source type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::ZExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, false);
+ Out << ')';
+ break;
+ case Instruction::SIToFP:
+ case Instruction::SExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, true);
+ Out << ')';
+ break;
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // Avoid "cast to pointer from integer of different size" warnings
+ Out << "(unsigned long)";
+ break;
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ break; // These don't need a source cast.
+ default:
+ llvm_unreachable("Invalid cast opcode");
+ break;
+ }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV, bool Static) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Out << "(";
+ printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+ if (CE->getOpcode() == Instruction::SExt &&
+ CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
+ // Make sure we really sext from bool here by subtracting from 0
+ Out << "0-";
+ }
+ printConstant(CE->getOperand(0), Static);
+ if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
+ (CE->getOpcode() == Instruction::Trunc ||
+ CE->getOpcode() == Instruction::FPToUI ||
+ CE->getOpcode() == Instruction::FPToSI ||
+ CE->getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really truncate to bool here by anding with 1
+ Out << "&1u";
+ }
+ Out << ')';
+ return;
+
+ case Instruction::GetElementPtr:
+ Out << "(";
+ printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+ gep_type_end(CPV), Static);
+ Out << ")";
+ return;
+ case Instruction::Select:
+ Out << '(';
+ printConstant(CE->getOperand(0), Static);
+ Out << '?';
+ printConstant(CE->getOperand(1), Static);
+ Out << ':';
+ printConstant(CE->getOperand(2), Static);
+ Out << ')';
+ return;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd: Out << " + "; break;
+ case Instruction::Sub:
+ case Instruction::FSub: Out << " - "; break;
+ case Instruction::Mul:
+ case Instruction::FMul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl: Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ case Instruction::ICmp:
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: Out << " < "; break;
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE: Out << " <= "; break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: Out << " > "; break;
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: Out << " >= "; break;
+ default: llvm_unreachable("Illegal ICmp predicate");
+ }
+ break;
+ default: llvm_unreachable("Illegal opcode here!");
+ }
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ case Instruction::FCmp: {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+ Out << "0";
+ else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+ Out << "1";
+ else {
+ const char* op = 0;
+ switch (CE->getPredicate()) {
+ default: llvm_unreachable("Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+ Out << "llvm_fcmp_" << op << "(";
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ Out << ", ";
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ Out << ")";
+ }
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ default:
+#ifndef NDEBUG
+ errs() << "CWriter Error: Unhandled constant expression: "
+ << *CE << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*UNDEF*/";
+ if (!CPV->getType()->isVectorTy()) {
+ Out << "0)";
+ } else {
+ Out << "{})";
+ }
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ const Type* Ty = CI->getType();
+ if (Ty == Type::getInt1Ty(CPV->getContext()))
+ Out << (CI->getZExtValue() ? '1' : '0');
+ else if (Ty == Type::getInt32Ty(CPV->getContext()))
+ Out << CI->getZExtValue() << 'u';
+ else if (Ty->getPrimitiveSizeInBits() > 32)
+ Out << CI->getZExtValue() << "ull";
+ else {
+ Out << "((";
+ printSimpleType(Out, Ty, false) << ')';
+ if (CI->isMinValue(true))
+ Out << CI->getZExtValue() << 'u';
+ else
+ Out << CI->getSExtValue();
+ Out << ')';
+ }
+ return;
+ }
+
+ switch (CPV->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ ConstantFP *FPC = cast<ConstantFP>(CPV);
+ std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+ if (I != FPConstantMap.end()) {
+ // Because of FP precision problems we must load from a stack allocated
+ // value that holds the value in hex.
+ Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
+ "float" :
+ FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
+ "double" :
+ "long double")
+ << "*)&FPConstant" << I->second << ')';
+ } else {
+ double V;
+ if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
+ V = FPC->getValueAPF().convertToFloat();
+ else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
+ V = FPC->getValueAPF().convertToDouble();
+ else {
+ // Long double. Convert the number to double, discarding precision.
+ // This is not awesome, but it at least makes the CBE output somewhat
+ // useful.
+ APFloat Tmp = FPC->getValueAPF();
+ bool LosesInfo;
+ Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
+ V = Tmp.convertToDouble();
+ }
+
+ if (IsNAN(V)) {
+ // The value is NaN
+
+ // FIXME the actual NaN bits should be emitted.
+ // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+ // it's 0x7ff4.
+ const unsigned long QuietNaN = 0x7ff8UL;
+ //const unsigned long SignalNaN = 0x7ff4UL;
+
+ // We need to grab the first part of the FP #
+ char Buffer[100];
+
+ uint64_t ll = DoubleToBits(V);
+ sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+ std::string Num(&Buffer[0], &Buffer[6]);
+ unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+ if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+ << Buffer << "\") /*nan*/ ";
+ else
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+ << Buffer << "\") /*nan*/ ";
+ } else if (IsInf(V)) {
+ // The value is Inf
+ if (V < 0) Out << '-';
+ Out << "LLVM_INF" <<
+ (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
+ << " /*inf*/ ";
+ } else {
+ std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ // Print out the constant as a floating point number.
+ char Buffer[100];
+ sprintf(Buffer, "%a", V);
+ Num = Buffer;
+#else
+ Num = ftostr(FPC->getValueAPF());
+#endif
+ Out << Num;
+ }
+ }
+ break;
+ }
+
+ case Type::ArrayTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ Out << "{ "; // Arrays are wrapped in struct types.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+ printConstantArray(CA, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ const ArrayType *AT = cast<ArrayType>(CPV->getType());
+ Out << '{';
+ if (AT->getNumElements()) {
+ Out << ' ';
+ Constant *CZ = Constant::getNullValue(AT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ }
+ Out << " }";
+ }
+ Out << " }"; // Arrays are wrapped in struct types.
+ break;
+
+ case Type::VectorTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+ printConstantVector(CV, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ const VectorType *VT = cast<VectorType>(CPV->getType());
+ Out << "{ ";
+ Constant *CZ = Constant::getNullValue(VT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::StructTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ const StructType *ST = cast<StructType>(CPV->getType());
+ Out << '{';
+ if (ST->getNumElements()) {
+ Out << ' ';
+ printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
+ for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
+ }
+ }
+ Out << " }";
+ } else {
+ Out << '{';
+ if (CPV->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPV->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPV->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(CPV)) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*NULL*/0)";
+ break;
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+ writeOperand(GV, Static);
+ break;
+ }
+ // FALL THROUGH
+ default:
+#ifndef NDEBUG
+ errs() << "Unknown constant type: " << *CPV << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
+ bool NeedsExplicitCast = false;
+ const Type *Ty = CE->getOperand(0)->getType();
+ bool TypeIsSigned = false;
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv: NeedsExplicitCast = true; break;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+ case Instruction::SExt:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ TypeIsSigned = true;
+ break;
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ break;
+ default: break;
+ }
+ if (NeedsExplicitCast) {
+ Out << "((";
+ if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
+ printSimpleType(Out, Ty, TypeIsSigned);
+ else
+ printType(Out, Ty); // not integer, sign doesn't matter
+ Out << ")(";
+ }
+ return NeedsExplicitCast;
+}
+
+// Print a constant assuming that it is the operand for a given Opcode. The
+// opcodes that care about sign need to cast their operands to the expected
+// type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = CPV->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+ bool typeIsSigned = false;
+
+ // Based on the Opcode for which this Constant is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+ // casted below.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ shouldCast = true;
+ break;
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ shouldCast = true;
+ typeIsSigned = true;
+ break;
+ }
+
+ // Write out the casted constant if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, typeIsSigned);
+ Out << ")";
+ printConstant(CPV, false);
+ Out << ")";
+ } else
+ printConstant(CPV, false);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+
+ // Resolve potential alias.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+ if (const Value *V = GA->resolveAliasedGlobal(false))
+ Operand = V;
+ }
+
+ // Mangle globals with the standard mangler interface for LLC compatibility.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
+ SmallString<128> Str;
+ Mang->getNameWithPrefix(Str, GV, false);
+ return CBEMangle(Str.str().str());
+ }
+
+ std::string Name = Operand->getName();
+
+ if (Name.empty()) { // Assign unique names to local temporaries.
+ unsigned &No = AnonValueNumbers[Operand];
+ if (No == 0)
+ No = ++NextAnonValueNumber;
+ Name = "tmp__" + utostr(No);
+ }
+
+ std::string VarName;
+ VarName.reserve(Name.capacity());
+
+ for (std::string::iterator I = Name.begin(), E = Name.end();
+ I != E; ++I) {
+ char ch = *I;
+
+ if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') || ch == '_')) {
+ char buffer[5];
+ sprintf(buffer, "_%x_", ch);
+ VarName += buffer;
+ } else
+ VarName += ch;
+ }
+
+ return "llvm_cbe_" + VarName;
+}
+
+/// writeInstComputationInline - Emit the computation for the specified
+/// instruction inline, with no destination provided.
+void CWriter::writeInstComputationInline(Instruction &I) {
+ // We can't currently support integer types other than 1, 8, 16, 32, 64.
+ // Validate this.
+ const Type *Ty = I.getType();
+ if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+ Ty!=Type::getInt8Ty(I.getContext()) &&
+ Ty!=Type::getInt16Ty(I.getContext()) &&
+ Ty!=Type::getInt32Ty(I.getContext()) &&
+ Ty!=Type::getInt64Ty(I.getContext()))) {
+ report_fatal_error("The C backend does not currently support integer "
+ "types of widths other than 1, 8, 16, 32, 64.\n"
+ "This is being tracked as PR 4158.");
+ }
+
+ // If this is a non-trivial bool computation, make sure to truncate down to
+ // a 1 bit value. This is important because we want "add i1 x, y" to return
+ // "0" when x and y are true, not "2" for example.
+ bool NeedBoolTrunc = false;
+ if (I.getType() == Type::getInt1Ty(I.getContext()) &&
+ !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+ NeedBoolTrunc = true;
+
+ if (NeedBoolTrunc)
+ Out << "((";
+
+ visit(I);
+
+ if (NeedBoolTrunc)
+ Out << ")&1)";
+}
+
+
+void CWriter::writeOperandInternal(Value *Operand, bool Static) {
+ if (Instruction *I = dyn_cast<Instruction>(Operand))
+ // Should we inline this instruction to build a tree?
+ if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+ Out << '(';
+ writeInstComputationInline(*I);
+ Out << ')';
+ return;
+ }
+
+ Constant* CPV = dyn_cast<Constant>(Operand);
+
+ if (CPV && !isa<GlobalValue>(CPV))
+ printConstant(CPV, Static);
+ else
+ Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperand(Value *Operand, bool Static) {
+ bool isAddressImplicit = isAddressExposed(Operand);
+ if (isAddressImplicit)
+ Out << "(&"; // Global variables are referenced as their addresses by llvm
+
+ writeOperandInternal(Operand, Static);
+
+ if (isAddressImplicit)
+ Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+ const Type *Ty = I.getOperand(0)->getType();
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, false);
+ Out << ")(";
+ return true;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, true);
+ Out << ")(";
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = Operand->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+
+ // Indicate whether the cast should be to a signed type or not.
+ bool castIsSigned = false;
+
+ // Based on the Opcode for which this Operand is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem: // Cast to unsigned first
+ shouldCast = true;
+ castIsSigned = false;
+ break;
+ case Instruction::GetElementPtr:
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem: // Cast to signed first
+ shouldCast = true;
+ castIsSigned = true;
+ break;
+ }
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+ } else
+ writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
+ // This has to do a cast to ensure the operand has the right signedness.
+ // Also, if the operand is a pointer, we make sure to cast to an integer when
+ // doing the comparison both for signedness and so that the C compiler doesn't
+ // optimize things like "p < NULL" to false (p may contain an integer value
+ // f.e.).
+ bool shouldCast = Cmp.isRelational();
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (!shouldCast) {
+ writeOperand(Operand);
+ return;
+ }
+
+ // Should this be a signed comparison? If so, convert to signed.
+ bool castIsSigned = Cmp.isSigned();
+
+ // If the operand was a pointer, convert to a large integer type.
+ const Type* OpTy = Operand->getType();
+ if (OpTy->isPointerTy())
+ OpTy = TD->getIntPtrType(Operand->getContext());
+
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
+ const TargetData *TD) {
+ // Alloca is hard to get, and we don't want to include stdlib.h here.
+ Out << "/* get a declaration for alloca */\n"
+ << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+ << "#define alloca(x) __builtin_alloca((x))\n"
+ << "#define _alloca(x) __builtin_alloca((x))\n"
+ << "#elif defined(__APPLE__)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#define longjmp _longjmp\n"
+ << "#define setjmp _setjmp\n"
+ << "#elif defined(__sun__)\n"
+ << "#if defined(__sparcv9)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#else\n"
+ << "extern void *__builtin_alloca(unsigned int);\n"
+ << "#endif\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(_MSC_VER)\n"
+ << "#define inline _inline\n"
+ << "#define alloca(x) _alloca(x)\n"
+ << "#else\n"
+ << "#include <alloca.h>\n"
+ << "#endif\n\n";
+
+ // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+ // If we aren't being compiled with GCC, just drop these attributes.
+ Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
+ << "#define __attribute__(X)\n"
+ << "#endif\n\n";
+
+ // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __EXTERNAL_WEAK__\n"
+ << "#endif\n\n";
+
+ // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#endif\n\n";
+
+ // Add hidden visibility support. FIXME: APPLE_CC?
+ Out << "#if defined(__GNUC__)\n"
+ << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+ << "#endif\n\n";
+
+ // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+ // From the GCC documentation:
+ //
+ // double __builtin_nan (const char *str)
+ //
+ // This is an implementation of the ISO C99 function nan.
+ //
+ // Since ISO C99 defines this function in terms of strtod, which we do
+ // not implement, a description of the parsing is in order. The string is
+ // parsed as by strtol; that is, the base is recognized by leading 0 or
+ // 0x prefixes. The number parsed is placed in the significand such that
+ // the least significant bit of the number is at the least significant
+ // bit of the significand. The number is truncated to fit the significand
+ // field provided. The significand is forced to be a quiet NaN.
+ //
+ // This function, if given a string literal, is evaluated early enough
+ // that it is considered a compile-time constant.
+ //
+ // float __builtin_nanf (const char *str)
+ //
+ // Similar to __builtin_nan, except the return type is float.
+ //
+ // double __builtin_inf (void)
+ //
+ // Similar to __builtin_huge_val, except a warning is generated if the
+ // target floating-point format does not support infinities. This
+ // function is suitable for implementing the ISO C99 macro INFINITY.
+ //
+ // float __builtin_inff (void)
+ //
+ // Similar to __builtin_inf, except the return type is float.
+ Out << "#ifdef __GNUC__\n"
+ << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
+ << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+ << "#define LLVM_INF __builtin_inf() /* Double */\n"
+ << "#define LLVM_INFF __builtin_inff() /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) "
+ "__builtin_prefetch(addr,rw,locality)\n"
+ << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+ << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+ << "#define LLVM_ASM __asm__\n"
+ << "#else\n"
+ << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_INF ((double)0.0) /* Double */\n"
+ << "#define LLVM_INFF 0.0F /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
+ << "#define __ATTRIBUTE_CTOR__\n"
+ << "#define __ATTRIBUTE_DTOR__\n"
+ << "#define LLVM_ASM(X)\n"
+ << "#endif\n\n";
+
+ Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+ << "#define __builtin_stack_save() 0 /* not implemented */\n"
+ << "#define __builtin_stack_restore(X) /* noop */\n"
+ << "#endif\n\n";
+
+ // Output typedefs for 128-bit integers. If these are needed with a
+ // 32-bit target or with a C compiler that doesn't support mode(TI),
+ // more drastic measures will be needed.
+ Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
+ << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
+ << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
+ << "#endif\n\n";
+
+ // Output target-specific code that should be inserted into main.
+ Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ Constant *FP = CS->getOperand(1);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP))
+ StaticTors.insert(F);
+ }
+}
+
+enum SpecialGlobalClass {
+ NotSpecial = 0,
+ GlobalCtors, GlobalDtors,
+ NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+ // If this is a global ctors/dtors list, handle it now.
+ if (GV->hasAppendingLinkage() && GV->use_empty()) {
+ if (GV->getName() == "llvm.global_ctors")
+ return GlobalCtors;
+ else if (GV->getName() == "llvm.global_dtors")
+ return GlobalDtors;
+ }
+
+ // Otherwise, if it is other metadata, don't print it. This catches things
+ // like debug information.
+ if (GV->getSection() == "llvm.metadata")
+ return NotPrinted;
+
+ return NotSpecial;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const char *Str, unsigned Length,
+ raw_ostream &Out) {
+ for (unsigned i = 0; i != Length; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '\\' && C != '"')
+ Out << C;
+ else if (C == '\\')
+ Out << "\\\\";
+ else if (C == '\"')
+ Out << "\\\"";
+ else if (C == '\t')
+ Out << "\\t";
+ else
+ Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+ PrintEscapedString(Str.c_str(), Str.size(), Out);
+}
+
+bool CWriter::doInitialization(Module &M) {
+ FunctionPass::doInitialization(M);
+
+ // Initialize
+ TheModule = &M;
+
+ TD = new TargetData(&M);
+ IL = new IntrinsicLowering(*TD);
+ IL->AddPrototypes(M);
+
+#if 0
+ std::string Triple = TheModule->getTargetTriple();
+ if (Triple.empty())
+ Triple = llvm::sys::getHostTriple();
+
+ std::string E;
+ if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+ TAsm = Match->createMCAsmInfo(Triple);
+#endif
+ TAsm = new CBEMCAsmInfo();
+ TCtx = new MCContext(*TAsm, NULL);
+ Mang = new Mangler(*TCtx, *TD);
+
+ // Keep track of which functions are static ctors/dtors so they can have
+ // an attribute added to their prototypes.
+ std::set<Function*> StaticCtors, StaticDtors;
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ switch (getGlobalVariableClass(I)) {
+ default: break;
+ case GlobalCtors:
+ FindStaticTors(I, StaticCtors);
+ break;
+ case GlobalDtors:
+ FindStaticTors(I, StaticDtors);
+ break;
+ }
+ }
+
+ // get declaration for alloca
+ Out << "/* Provide Declarations */\n";
+ Out << "#include <stdarg.h>\n"; // Varargs support
+ Out << "#include <setjmp.h>\n"; // Unwind support
+ Out << "#include <limits.h>\n"; // With overflow intrinsics support.
+ generateCompilerSpecificCode(Out, TD);
+
+ // Provide a definition for `bool' if not compiling with a C++ compiler.
+ Out << "\n"
+ << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+ << "\n\n/* Support for floating point constants */\n"
+ << "typedef unsigned long long ConstantDoubleTy;\n"
+ << "typedef unsigned int ConstantFloatTy;\n"
+ << "typedef struct { unsigned long long f1; unsigned short f2; "
+ "unsigned short pad[3]; } ConstantFP80Ty;\n"
+ // This is used for both kinds of 128-bit long double; meaning differs.
+ << "typedef struct { unsigned long long f1; unsigned long long f2; }"
+ " ConstantFP128Ty;\n"
+ << "\n\n/* Global Declarations */\n";
+
+ // First output all the declarations for the program, because C requires
+ // Functions & globals to be declared before they are used.
+ //
+ if (!M.getModuleInlineAsm().empty()) {
+ Out << "/* Module asm statements */\n"
+ << "asm(";
+
+ // Split the string into lines, to make it easier to read the .ll file.
+ std::string Asm = M.getModuleInlineAsm();
+ size_t CurPos = 0;
+ size_t NewLine = Asm.find_first_of('\n', CurPos);
+ while (NewLine != std::string::npos) {
+ // We found a newline, print the portion of the asm string from the
+ // last newline up to this newline.
+ Out << "\"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+ Out);
+ Out << "\\n\"\n";
+ CurPos = NewLine+1;
+ NewLine = Asm.find_first_of('\n', CurPos);
+ }
+ Out << "\"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
+ Out << "\");\n"
+ << "/* End Module asm statements */\n";
+ }
+
+ // Loop over the symbol table, emitting all named constants.
+ printModuleTypes();
+
+ // Global variable declarations...
+ if (!M.global_empty()) {
+ Out << "\n/* External Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+
+ if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+ I->hasCommonLinkage())
+ Out << "extern ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else
+ continue; // Internal Global
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ Out << ";\n";
+ }
+ }
+
+ // Function declarations
+ Out << "\n/* Function Declarations */\n";
+ Out << "double fmod(double, double);\n"; // Support for FP rem
+ Out << "float fmodf(float, float);\n";
+ Out << "long double fmodl(long double, long double);\n";
+
+ // Store the intrinsics which will be declared/defined below.
+ SmallVector<const Function*, 8> intrinsicsToDefine;
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ // Don't print declarations for intrinsic functions.
+ // Store the used intrinsics, which need to be explicitly defined.
+ if (I->isIntrinsic()) {
+ switch (I->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ intrinsicsToDefine.push_back(I);
+ break;
+ }
+ continue;
+ }
+
+ if (I->getName() == "setjmp" ||
+ I->getName() == "longjmp" || I->getName() == "_setjmp")
+ continue;
+
+ if (I->hasExternalWeakLinkage())
+ Out << "extern ";
+ printFunctionSignature(I, true);
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (StaticCtors.count(I))
+ Out << " __ATTRIBUTE_CTOR__";
+ if (StaticDtors.count(I))
+ Out << " __ATTRIBUTE_DTOR__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ if (I->hasName() && I->getName()[0] == 1)
+ Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+
+ Out << ";\n";
+ }
+
+ // Output the global variable declarations
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else
+ Out << "extern ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasCommonLinkage()) // FIXME is this right?
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable definitions and contents...
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else if (I->hasDLLExportLinkage())
+ Out << "__declspec(dllexport) ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasCommonLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ // If the initializer is not null, emit the initializer. If it is null,
+ // we try to avoid emitting large amounts of zeros. The problem with
+ // this, however, occurs when the variable has weak linkage. In this
+ // case, the assembler will complain about the variable being both weak
+ // and common, so we disable this optimization.
+ // FIXME common linkage should avoid this problem.
+ if (!I->getInitializer()->isNullValue()) {
+ Out << " = " ;
+ writeOperand(I->getInitializer(), true);
+ } else if (I->hasWeakLinkage()) {
+ // We have to specify an initializer, but it doesn't have to be
+ // complete. If the value is an aggregate, print out { 0 }, and let
+ // the compiler figure out the rest of the zeros.
+ Out << " = " ;
+ if (I->getInitializer()->getType()->isStructTy() ||
+ I->getInitializer()->getType()->isVectorTy()) {
+ Out << "{ 0 }";
+ } else if (I->getInitializer()->getType()->isArrayTy()) {
+ // As with structs and vectors, but with an extra set of braces
+ // because arrays are wrapped in structs.
+ Out << "{ { 0 } }";
+ } else {
+ // Just print it out normally.
+ writeOperand(I->getInitializer(), true);
+ }
+ }
+ Out << ";\n";
+ }
+ }
+
+ if (!M.empty())
+ Out << "\n\n/* Function Bodies */\n";
+
+ // Emit some helper functions for dealing with FCMP instruction's
+ // predicates
+ Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+ Out << "return X == X && Y == Y; }\n";
+ Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+ Out << "return X != X || Y != Y; }\n";
+ Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+ Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+ Out << "return X != Y; }\n";
+ Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+ Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+ Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+ Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+ Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+ Out << "return X == Y ; }\n";
+ Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+ Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+ Out << "return X < Y ; }\n";
+ Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+ Out << "return X > Y ; }\n";
+ Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+ Out << "return X <= Y ; }\n";
+ Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+ Out << "return X >= Y ; }\n";
+
+ // Emit definitions of the intrinsics.
+ for (SmallVector<const Function*, 8>::const_iterator
+ I = intrinsicsToDefine.begin(),
+ E = intrinsicsToDefine.end(); I != E; ++I) {
+ printIntrinsicDefinition(**I, Out);
+ }
+
+ return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+ // Scan the module for floating point constants. If any FP constant is used
+ // in the function, we want to redirect it here so that we do not depend on
+ // the precision of the printed form, unless the printed form preserves
+ // precision.
+ //
+ for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+ I != E; ++I)
+ printFloatingPointConstants(*I);
+
+ Out << '\n';
+}
+
+void CWriter::printFloatingPointConstants(const Constant *C) {
+ // If this is a constant expression, recursively check for constant fp values.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ printFloatingPointConstants(CE->getOperand(i));
+ return;
+ }
+
+ // Otherwise, check for a FP constant that we need to print.
+ const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
+ if (FPC == 0 ||
+ // Do not put in FPConstantMap if safe.
+ isFPCSafeToPrint(FPC) ||
+ // Already printed this constant?
+ FPConstantMap.count(FPC))
+ return;
+
+ FPConstantMap[FPC] = FPCounter; // Number the FP constants
+
+ if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
+ double Val = FPC->getValueAPF().convertToDouble();
+ uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "ULL; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
+ float Val = FPC->getValueAPF().convertToFloat();
+ uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "U; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
+ // api needed to prevent premature destruction
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
+ << " = { 0x" << utohexstr(p[0])
+ << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+ << "}; /* Long double constant */\n";
+ } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
+ FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
+ << " = { 0x"
+ << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+ << "}; /* Long double constant */\n";
+
+ } else {
+ llvm_unreachable("Unknown float type!");
+ }
+}
+
+
+/// printSymbolTable - Run through symbol table looking for type names. If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes() {
+ Out << "/* Helper union for bitcasts */\n";
+ Out << "typedef union {\n";
+ Out << " unsigned int Int32;\n";
+ Out << " unsigned long long Int64;\n";
+ Out << " float Float;\n";
+ Out << " double Double;\n";
+ Out << "} llvmBitCastUnion;\n";
+
+ // Get all of the struct types used in the module.
+ std::vector<StructType*> StructTypes;
+ TheModule->findUsedStructTypes(StructTypes);
+
+ if (StructTypes.empty()) return;
+
+ Out << "/* Structure forward decls */\n";
+
+ unsigned NextTypeID = 0;
+
+ // If any of them are missing names, add a unique ID to UnnamedStructIDs.
+ // Print out forward declarations for structure types.
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *ST = StructTypes[i];
+
+ if (ST->isAnonymous() || ST->getName().empty())
+ UnnamedStructIDs[ST] = NextTypeID++;
+
+ std::string Name = getStructName(ST);
+
+ Out << "typedef struct " << Name << ' ' << Name << ";\n";
+ }
+
+ Out << '\n';
+
+ // Keep track of which structures have been printed so far.
+ SmallPtrSet<const Type *, 16> StructPrinted;
+
+ // Loop over all structures then push them into the stack so they are
+ // printed in the correct order.
+ //
+ Out << "/* Structure contents */\n";
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
+ if (StructTypes[i]->isStructTy())
+ // Only print out used types!
+ printContainedStructs(StructTypes[i], StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO: Make this work properly with vector types
+//
+void CWriter::printContainedStructs(const Type *Ty,
+ SmallPtrSet<const Type *, 16> &StructPrinted) {
+ // Don't walk through pointers.
+ if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return;
+
+ // Print all contained types first.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ printContainedStructs(*I, StructPrinted);
+
+ if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ // Check to see if we have already printed this struct.
+ if (!StructPrinted.insert(Ty)) return;
+
+ // Print structure type out.
+ printType(Out, ST, false, getStructName(ST), true);
+ Out << ";\n\n";
+ }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F->hasStructRetAttr();
+
+ if (F->hasLocalLinkage()) Out << "static ";
+ if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+ if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Out << "__attribute__((stdcall)) ";
+ break;
+ case CallingConv::X86_FastCall:
+ Out << "__attribute__((fastcall)) ";
+ break;
+ case CallingConv::X86_ThisCall:
+ Out << "__attribute__((thiscall)) ";
+ break;
+ default:
+ break;
+ }
+
+ // Loop over the arguments, printing them...
+ const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ const AttrListPtr &PAL = F->getAttributes();
+
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
+
+ // Print out the name...
+ FunctionInnards << GetValueName(F) << '(';
+
+ bool PrintedArg = false;
+ if (!F->isDeclaration()) {
+ if (!F->arg_empty()) {
+ Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ std::string ArgName;
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ if (I->hasName() || !Prototype)
+ ArgName = GetValueName(I);
+ else
+ ArgName = "";
+ const Type *ArgTy = I->getType();
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ ByValParams.insert(I);
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
+ ArgName);
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+ } else {
+ // Loop over the arguments, printing them.
+ FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+
+ if (!PrintedArg && FT->isVarArg()) {
+ FunctionInnards << "int vararg_dummy_arg";
+ PrintedArg = true;
+ }
+
+ // Finish printing arguments... if this is a vararg function, print the ...,
+ // unless there are no known types, in which case, we just emit ().
+ //
+ if (FT->isVarArg() && PrintedArg) {
+ FunctionInnards << ",..."; // Output varargs portion of signature!
+ } else if (!FT->isVarArg() && !PrintedArg) {
+ FunctionInnards << "void"; // ret() -> ret(void) in C.
+ }
+ FunctionInnards << ')';
+
+ // Get the return tpe for the function.
+ const Type *RetTy;
+ if (!isStructReturn)
+ RetTy = F->getReturnType();
+ else {
+ // If this is a struct-return function, print the struct-return type.
+ RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+ }
+
+ // Print out the return type and the signature built above.
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
+ FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+ if (!isa<BitCastInst>(I))
+ return false;
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DstTy = I.getType();
+ return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+ (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
+}
+
+void CWriter::printFunction(Function &F) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F.hasStructRetAttr();
+
+ printFunctionSignature(&F, false);
+ Out << " {\n";
+
+ // If this is a struct return function, handle the result with magic.
+ if (isStructReturn) {
+ const Type *StructTy =
+ cast<PointerType>(F.arg_begin()->getType())->getElementType();
+ Out << " ";
+ printType(Out, StructTy, false, "StructReturn");
+ Out << "; /* Struct return temporary */\n";
+
+ Out << " ";
+ printType(Out, F.arg_begin()->getType(), false,
+ GetValueName(F.arg_begin()));
+ Out << " = &StructReturn;\n";
+ }
+
+ bool PrintedVar = false;
+
+ // print local variable information for the function
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+ Out << " ";
+ printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+ Out << "; /* Address-exposed local */\n";
+ PrintedVar = true;
+ } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
+ !isInlinableInst(*I)) {
+ Out << " ";
+ printType(Out, I->getType(), false, GetValueName(&*I));
+ Out << ";\n";
+
+ if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well...
+ Out << " ";
+ printType(Out, I->getType(), false,
+ GetValueName(&*I)+"__PHI_TEMPORARY");
+ Out << ";\n";
+ }
+ PrintedVar = true;
+ }
+ // We need a temporary for the BitCast to use so it can pluck a value out
+ // of a union to do the BitCast. This is separate from the need for a
+ // variable to hold the result of the BitCast.
+ if (isFPIntBitCast(*I)) {
+ Out << " llvmBitCastUnion " << GetValueName(&*I)
+ << "__BITCAST_TEMPORARY;\n";
+ PrintedVar = true;
+ }
+ }
+
+ if (PrintedVar)
+ Out << '\n';
+
+ if (F.hasExternalLinkage() && F.getName() == "main")
+ Out << " CODE_FOR_MAIN();\n";
+
+ // print the basic blocks
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (L->getHeader() == BB && L->getParentLoop() == 0)
+ printLoop(L);
+ } else {
+ printBasicBlock(BB);
+ }
+ }
+
+ Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+ Out << " do { /* Syntactic loop '" << L->getHeader()->getName()
+ << "' to make GCC happy */\n";
+ for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ Loop *BBLoop = LI->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+ printLoop(BBLoop);
+ }
+ Out << " } while (1); /* end of syntactic loop '"
+ << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+ // Don't print the label for the basic block if there are no uses, or if
+ // the only terminator use is the predecessor basic block's terminator.
+ // We have to scan the use list because PHI nodes use basic blocks too but
+ // do not require a label to be generated.
+ //
+ bool NeedsLabel = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (isGotoCodeNecessary(*PI, BB)) {
+ NeedsLabel = true;
+ break;
+ }
+
+ if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+ ++II) {
+ if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+ if (II->getType() != Type::getVoidTy(BB->getContext()) &&
+ !isInlineAsm(*II))
+ outputLValue(II);
+ else
+ Out << " ";
+ writeInstComputationInline(*II);
+ Out << ";\n";
+ }
+ }
+
+ // Don't emit prefix or suffix for the terminator.
+ visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+ // If this is a struct return function, return the temporary struct.
+ bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+
+ if (isStructReturn) {
+ Out << " return StructReturn;\n";
+ return;
+ }
+
+ // Don't output a void return if this is the last basic block in the function
+ if (I.getNumOperands() == 0 &&
+ &*--I.getParent()->getParent()->end() == I.getParent() &&
+ !I.getParent()->size() == 1) {
+ return;
+ }
+
+ Out << " return";
+ if (I.getNumOperands()) {
+ Out << ' ';
+ writeOperand(I.getOperand(0));
+ }
+ Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+ Out << " switch (";
+ writeOperand(SI.getOperand(0));
+ Out << ") {\n default:\n";
+ printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+ printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+ Out << ";\n";
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+ Out << " case ";
+ writeOperand(SI.getOperand(i));
+ Out << ":\n";
+ BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
+ printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+ printBranchToBlock(SI.getParent(), Succ, 2);
+ if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+ Out << " break;\n";
+ }
+ Out << " }\n";
+}
+
+void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
+ Out << " goto *(void*)(";
+ writeOperand(IBI.getOperand(0));
+ Out << ");\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+ Out << " /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+ /// FIXME: This should be reenabled, but loop reordering safe!!
+ return true;
+
+ if (llvm::next(Function::iterator(From)) != Function::iterator(To))
+ return true; // Not the direct successor, we need a goto.
+
+ //isa<SwitchInst>(From->getTerminator())
+
+ if (LI->getLoopFor(From) != LI->getLoopFor(To))
+ return true;
+ return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+ BasicBlock *Successor,
+ unsigned Indent) {
+ for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Now we have to do the printing.
+ Value *IV = PN->getIncomingValueForBlock(CurBlock);
+ if (!isa<UndefValue>(IV)) {
+ Out << std::string(Indent, ' ');
+ Out << " " << GetValueName(I) << "__PHI_TEMPORARY = ";
+ writeOperand(IV);
+ Out << "; /* for PHI node */\n";
+ }
+ }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+ unsigned Indent) {
+ if (isGotoCodeNecessary(CurBB, Succ)) {
+ Out << std::string(Indent, ' ') << " goto ";
+ writeOperand(Succ);
+ Out << ";\n";
+ }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+ if (I.isConditional()) {
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+ Out << " if (";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+ Out << " } else {\n";
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+ } else {
+ // First goto not necessary, assume second one is...
+ Out << " if (!";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+
+ Out << " }\n";
+ } else {
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+ }
+ Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks. We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+ writeOperand(&I);
+ Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+ // binary instructions, shift instructions, setCond instructions.
+ assert(!I.getType()->isPointerTy());
+
+ // We must cast the results of binary operations which might be promoted.
+ bool needsCast = false;
+ if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
+ (I.getType() == Type::getInt16Ty(I.getContext()))
+ || (I.getType() == Type::getFloatTy(I.getContext()))) {
+ needsCast = true;
+ Out << "((";
+ printType(Out, I.getType(), false);
+ Out << ")(";
+ }
+
+ // If this is a negation operation, print it out as such. For FP, we don't
+ // want to print "-0.0 - X".
+ if (BinaryOperator::isNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (BinaryOperator::isFNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (I.getOpcode() == Instruction::FRem) {
+ // Output a call to fmod/fmodf instead of emitting a%b
+ if (I.getType() == Type::getFloatTy(I.getContext()))
+ Out << "fmodf(";
+ else if (I.getType() == Type::getDoubleTy(I.getContext()))
+ Out << "fmod(";
+ else // all 3 flavors of long double
+ Out << "fmodl(";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ } else {
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain instructions require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd: Out << " + "; break;
+ case Instruction::Sub:
+ case Instruction::FSub: Out << " - "; break;
+ case Instruction::Mul:
+ case Instruction::FMul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl : Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ default:
+#ifndef NDEBUG
+ errs() << "Invalid operator type!" << I;
+#endif
+ llvm_unreachable(0);
+ }
+
+ writeOperandWithCast(I.getOperand(1), I.getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ }
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+ // We must cast the results of icmp which might be promoted.
+ bool needsCast = false;
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain icmp predicate require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE: Out << " <= "; break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE: Out << " >= "; break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: Out << " < "; break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: Out << " > "; break;
+ default:
+#ifndef NDEBUG
+ errs() << "Invalid icmp predicate!" << I;
+#endif
+ llvm_unreachable(0);
+ }
+
+ writeOperandWithCast(I.getOperand(1), I);
+ if (NeedsClosingParens)
+ Out << "))";
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+ if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+ Out << "0";
+ return;
+ }
+ if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+ Out << "1";
+ return;
+ }
+
+ const char* op = 0;
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+
+ Out << "llvm_fcmp_" << op << "(";
+ // Write the first operand
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ // Write the second operand
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+static const char * getFloatBitCastField(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ default: llvm_unreachable("Invalid Type");
+ case Type::FloatTyID: return "Float";
+ case Type::DoubleTyID: return "Double";
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits <= 32)
+ return "Int32";
+ else
+ return "Int64";
+ }
+ }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+ const Type *DstTy = I.getType();
+ const Type *SrcTy = I.getOperand(0)->getType();
+ if (isFPIntBitCast(I)) {
+ Out << '(';
+ // These int<->float and long<->double casts need to be handled specially
+ Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+ writeOperand(I.getOperand(0));
+ Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getType());
+ Out << ')';
+ return;
+ }
+
+ Out << '(';
+ printCast(I.getOpcode(), SrcTy, DstTy);
+
+ // Make a sext from i1 work by subtracting the i1 from 0 (an int).
+ if (SrcTy == Type::getInt1Ty(I.getContext()) &&
+ I.getOpcode() == Instruction::SExt)
+ Out << "0-";
+
+ writeOperand(I.getOperand(0));
+
+ if (DstTy == Type::getInt1Ty(I.getContext()) &&
+ (I.getOpcode() == Instruction::Trunc ||
+ I.getOpcode() == Instruction::FPToUI ||
+ I.getOpcode() == Instruction::FPToSI ||
+ I.getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really get a trunc to bool by anding the operand with 1
+ Out << "&1u";
+ }
+ Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+ Out << "((";
+ writeOperand(I.getCondition());
+ Out << ") ? (";
+ writeOperand(I.getTrueValue());
+ Out << ") : (";
+ writeOperand(I.getFalseValue());
+ Out << "))";
+}
+
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+ raw_ostream &Out) {
+ const char* type;
+ const char* sprefix = "";
+
+ unsigned NumBits = Ty.getBitWidth();
+ if (NumBits <= 8) {
+ type = "CHAR";
+ sprefix = "S";
+ } else if (NumBits <= 16) {
+ type = "SHRT";
+ } else if (NumBits <= 32) {
+ type = "INT";
+ } else if (NumBits <= 64) {
+ type = "LLONG";
+ } else {
+ llvm_unreachable("Bit widths > 64 not implemented yet");
+ }
+
+ if (isSigned)
+ Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+ else
+ Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+#ifndef NDEBUG
+static bool isSupportedIntegerSize(const IntegerType &T) {
+ return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+ T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+#endif
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+ const FunctionType *funT = F.getFunctionType();
+ const Type *retT = F.getReturnType();
+ const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+ assert(isSupportedIntegerSize(*elemT) &&
+ "CBackend does not support arbitrary size integers.");
+ assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+ elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+ switch (F.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported Intrinsic.");
+ case Intrinsic::uadd_with_overflow:
+ // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+ // Rty r;
+ // r.field0 = a + b;
+ // r.field1 = (r.field0 < a);
+ // return r;
+ // }
+ Out << "static inline ";
+ printType(Out, retT);
+ Out << GetValueName(&F);
+ Out << "(";
+ printSimpleType(Out, elemT, false);
+ Out << "a,";
+ printSimpleType(Out, elemT, false);
+ Out << "b) {\n ";
+ printType(Out, retT);
+ Out << "r;\n";
+ Out << " r.field0 = a + b;\n";
+ Out << " r.field1 = (r.field0 < a);\n";
+ Out << " return r;\n}\n";
+ break;
+
+ case Intrinsic::sadd_with_overflow:
+ // static inline Rty sadd_ixx(ixx a, ixx b) {
+ // Rty r;
+ // r.field1 = (b > 0 && a > XX_MAX - b) ||
+ // (b < 0 && a < XX_MIN - b);
+ // r.field0 = r.field1 ? 0 : a + b;
+ // return r;
+ // }
+ Out << "static ";
+ printType(Out, retT);
+ Out << GetValueName(&F);
+ Out << "(";
+ printSimpleType(Out, elemT, true);
+ Out << "a,";
+ printSimpleType(Out, elemT, true);
+ Out << "b) {\n ";
+ printType(Out, retT);
+ Out << "r;\n";
+ Out << " r.field1 = (b > 0 && a > ";
+ printLimitValue(*elemT, true, true, Out);
+ Out << " - b) || (b < 0 && a < ";
+ printLimitValue(*elemT, true, false, Out);
+ Out << " - b);\n";
+ Out << " r.field0 = r.field1 ? 0 : a + b;\n";
+ Out << " return r;\n}\n";
+ break;
+ }
+}
+
+void CWriter::lowerIntrinsics(Function &F) {
+ // This is used to keep track of intrinsics that get generated to a lowered
+ // function. We must generate the prototypes before the function body which
+ // will only be expanded on first use (by the loop below).
+ std::vector<Function*> prototypesToGen;
+
+ // Examine all the instructions in this function to find the intrinsics that
+ // need to be lowered.
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ case Intrinsic::memory_barrier:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::setjmp:
+ case Intrinsic::longjmp:
+ case Intrinsic::prefetch:
+ case Intrinsic::powi:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ case Intrinsic::ppc_altivec_lvsl:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ // We directly implement these intrinsics
+ break;
+ default:
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we handle it.
+ const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ // If we handle it, don't lower it.
+ if (BuiltinName[0]) break;
+
+ // All other intrinsic calls we must lower.
+ Instruction *Before = 0;
+ if (CI != &BB->front())
+ Before = prior(BasicBlock::iterator(CI));
+
+ IL->LowerIntrinsicCall(CI);
+ if (Before) { // Move iterator to instruction after call
+ I = Before; ++I;
+ } else {
+ I = BB->begin();
+ }
+ // If the intrinsic got lowered to another call, and that call has
+ // a definition then we need to make sure its prototype is emitted
+ // before any calls to it.
+ if (CallInst *Call = dyn_cast<CallInst>(I))
+ if (Function *NewF = Call->getCalledFunction())
+ if (!NewF->isDeclaration())
+ prototypesToGen.push_back(NewF);
+
+ break;
+ }
+
+ // We may have collected some prototypes to emit in the loop above.
+ // Emit them now, before the function that uses them is emitted. But,
+ // be careful not to emit them twice.
+ std::vector<Function*>::iterator I = prototypesToGen.begin();
+ std::vector<Function*>::iterator E = prototypesToGen.end();
+ for ( ; I != E; ++I) {
+ if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+ Out << '\n';
+ printFunctionSignature(*I, true);
+ Out << ";\n";
+ }
+ }
+}
+
+void CWriter::visitCallInst(CallInst &I) {
+ if (isa<InlineAsm>(I.getCalledValue()))
+ return visitInlineAsm(I);
+
+ bool WroteCallee = false;
+
+ // Handle intrinsic function calls first...
+ if (Function *F = I.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ if (visitBuiltinCall(I, ID, WroteCallee))
+ return;
+
+ Value *Callee = I.getCalledValue();
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ // If this is a call to a struct-return function, assign to the first
+ // parameter instead of passing it to the call.
+ const AttrListPtr &PAL = I.getAttributes();
+ bool hasByVal = I.hasByValArgument();
+ bool isStructRet = I.hasStructRetAttr();
+ if (isStructRet) {
+ writeOperandDeref(I.getArgOperand(0));
+ Out << " = ";
+ }
+
+ if (I.isTailCall()) Out << " /*tail*/ ";
+
+ if (!WroteCallee) {
+ // If this is an indirect call to a struct return function, we need to cast
+ // the pointer. Ditto for indirect calls with byval arguments.
+ bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
+
+ // GCC is a real PITA. It does not permit codegening casts of functions to
+ // function pointers if they are in a call (it generates a trap instruction
+ // instead!). We work around this by inserting a cast to void* in between
+ // the function and the function pointer cast. Unfortunately, we can't just
+ // form the constant expression here, because the folder will immediately
+ // nuke it.
+ //
+ // Note finally, that this is completely unsafe. ANSI C does not guarantee
+ // that void* and function pointers have the same size. :( To deal with this
+ // in the common case, we handle casts where the number of arguments passed
+ // match exactly.
+ //
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+ if (CE->isCast())
+ if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+ NeedsCast = true;
+ Callee = RF;
+ }
+
+ if (NeedsCast) {
+ // Ok, just cast the pointer type.
+ Out << "((";
+ if (isStructRet)
+ printStructReturnPointerFunctionType(Out, PAL,
+ cast<PointerType>(I.getCalledValue()->getType()));
+ else if (hasByVal)
+ printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
+ else
+ printType(Out, I.getCalledValue()->getType());
+ Out << ")(void*)";
+ }
+ writeOperand(Callee);
+ if (NeedsCast) Out << ')';
+ }
+
+ Out << '(';
+
+ bool PrintedArg = false;
+ if(FTy->isVarArg() && !FTy->getNumParams()) {
+ Out << "0 /*dummy arg*/";
+ PrintedArg = true;
+ }
+
+ unsigned NumDeclaredParams = FTy->getNumParams();
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ unsigned ArgNo = 0;
+ if (isStructRet) { // Skip struct return argument.
+ ++AI;
+ ++ArgNo;
+ }
+
+
+ for (; AI != AE; ++AI, ++ArgNo) {
+ if (PrintedArg) Out << ", ";
+ if (ArgNo < NumDeclaredParams &&
+ (*AI)->getType() != FTy->getParamType(ArgNo)) {
+ Out << '(';
+ printType(Out, FTy->getParamType(ArgNo),
+ /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
+ Out << ')';
+ }
+ // Check if the argument is expected to be passed by value.
+ if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ writeOperandDeref(*AI);
+ else
+ writeOperand(*AI);
+ PrintedArg = true;
+ }
+ Out << ')';
+}
+
+/// visitBuiltinCall - Handle the call to the specified builtin. Returns true
+/// if the entire call is handled, return false if it wasn't handled, and
+/// optionally set 'WroteCallee' if the callee has already been printed out.
+bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
+ bool &WroteCallee) {
+ switch (ID) {
+ default: {
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we emit it here.
+ const char *BuiltinName = "";
+ Function *F = I.getCalledFunction();
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+ Out << BuiltinName;
+ WroteCallee = true;
+ return false;
+ }
+ case Intrinsic::memory_barrier:
+ Out << "__sync_synchronize()";
+ return true;
+ case Intrinsic::vastart:
+ Out << "0; ";
+
+ Out << "va_start(*(va_list*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ // Output the last argument to the enclosing function.
+ if (I.getParent()->getParent()->arg_empty())
+ Out << "vararg_dummy_arg";
+ else
+ writeOperand(--I.getParent()->getParent()->arg_end());
+ Out << ')';
+ return true;
+ case Intrinsic::vaend:
+ if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
+ Out << "0; va_end(*(va_list*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ } else {
+ Out << "va_end(*(va_list*)0)";
+ }
+ return true;
+ case Intrinsic::vacopy:
+ Out << "0; ";
+ Out << "va_copy(*(va_list*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", *(va_list*)";
+ writeOperand(I.getArgOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::returnaddress:
+ Out << "__builtin_return_address(";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ return true;
+ case Intrinsic::frameaddress:
+ Out << "__builtin_frame_address(";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ return true;
+ case Intrinsic::powi:
+ Out << "__builtin_powi(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::setjmp:
+ Out << "setjmp(*(jmp_buf*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ')';
+ return true;
+ case Intrinsic::longjmp:
+ Out << "longjmp(*(jmp_buf*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::prefetch:
+ Out << "LLVM_PREFETCH((const void *)";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ", ";
+ writeOperand(I.getArgOperand(2));
+ Out << ")";
+ return true;
+ case Intrinsic::stacksave:
+ // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
+ // to work around GCC bugs (see PR1809).
+ Out << "0; *((void**)&" << GetValueName(&I)
+ << ") = __builtin_stack_save()";
+ return true;
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ // Multiple GCC builtins multiplex onto this intrinsic.
+ switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
+ default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
+ case 0: Out << "__builtin_ia32_cmpeq"; break;
+ case 1: Out << "__builtin_ia32_cmplt"; break;
+ case 2: Out << "__builtin_ia32_cmple"; break;
+ case 3: Out << "__builtin_ia32_cmpunord"; break;
+ case 4: Out << "__builtin_ia32_cmpneq"; break;
+ case 5: Out << "__builtin_ia32_cmpnlt"; break;
+ case 6: Out << "__builtin_ia32_cmpnle"; break;
+ case 7: Out << "__builtin_ia32_cmpord"; break;
+ }
+ if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
+ Out << 'p';
+ else
+ Out << 's';
+ if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
+ Out << 's';
+ else
+ Out << 'd';
+
+ Out << "(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ")";
+ return true;
+ case Intrinsic::ppc_altivec_lvsl:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ Out << "__builtin_altivec_lvsl(0, (void*)";
+ writeOperand(I.getArgOperand(0));
+ Out << ")";
+ return true;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ Out << GetValueName(I.getCalledFunction()) << "(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ")";
+ return true;
+ }
+}
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+// of the per target tables
+// handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+ assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+ // Grab the translation table from MCAsmInfo if it exists.
+ const MCAsmInfo *TargetAsm;
+ std::string Triple = TheModule->getTargetTriple();
+ if (Triple.empty())
+ Triple = llvm::sys::getHostTriple();
+
+ std::string E;
+ if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+ TargetAsm = Match->createMCAsmInfo(Triple);
+ else
+ return c.Codes[0];
+
+ const char *const *table = TargetAsm->getAsmCBE();
+
+ // Search the translation table if it exists.
+ for (int i = 0; table && table[i]; i += 2)
+ if (c.Codes[0] == table[i]) {
+ delete TargetAsm;
+ return table[i+1];
+ }
+
+ // Default is identity.
+ delete TargetAsm;
+ return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+ for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+ if (asmstr[i] == '\n')
+ asmstr.replace(i, 1, "\\n");
+ else if (asmstr[i] == '\t')
+ asmstr.replace(i, 1, "\\t");
+ else if (asmstr[i] == '$') {
+ if (asmstr[i + 1] == '{') {
+ std::string::size_type a = asmstr.find_first_of(':', i + 1);
+ std::string::size_type b = asmstr.find_first_of('}', i + 1);
+ std::string n = "%" +
+ asmstr.substr(a + 1, b - a - 1) +
+ asmstr.substr(i + 2, a - i - 2);
+ asmstr.replace(i, b - i + 1, n);
+ i += n.size() - 1;
+ } else
+ asmstr.replace(i, 1, "%");
+ }
+ else if (asmstr[i] == '%')//grr
+ { asmstr.replace(i, 1, "%%"); ++i;}
+
+ return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+// handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+ InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
+ InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
+
+ std::vector<std::pair<Value*, int> > ResultVals;
+ if (CI.getType() == Type::getVoidTy(CI.getContext()))
+ ;
+ else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+ ResultVals.push_back(std::make_pair(&CI, (int)i));
+ } else {
+ ResultVals.push_back(std::make_pair(&CI, -1));
+ }
+
+ // Fix up the asm string for gcc and emit it.
+ Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
+ Out << " :";
+
+ unsigned ValueCount = 0;
+ bool IsFirst = true;
+
+ // Convert over all the output constraints.
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+
+ if (I->Type != InlineAsm::isOutput) {
+ ++ValueCount;
+ continue; // Ignore non-output constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ // Unpack the dest.
+ Value *DestVal;
+ int DestValNo = -1;
+
+ if (ValueCount < ResultVals.size()) {
+ DestVal = ResultVals[ValueCount].first;
+ DestValNo = ResultVals[ValueCount].second;
+ } else
+ DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+ if (I->isEarlyClobber)
+ C = "&"+C;
+
+ Out << "\"=" << C << "\"(" << GetValueName(DestVal);
+ if (DestValNo != -1)
+ Out << ".field" << DestValNo; // Multiple retvals.
+ Out << ")";
+ ++ValueCount;
+ }
+
+
+ // Convert over all the input constraints.
+ Out << "\n :";
+ IsFirst = true;
+ ValueCount = 0;
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isInput) {
+ ++ValueCount;
+ continue; // Ignore non-input constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
+ Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+ Out << "\"" << C << "\"(";
+ if (!I->isIndirect)
+ writeOperand(SrcVal);
+ else
+ writeOperandDeref(SrcVal);
+ Out << ")";
+ }
+
+ // Convert over the clobber constraints.
+ IsFirst = true;
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isClobber)
+ continue; // Ignore non-input constraints.
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ Out << '\"' << C << '"';
+ }
+
+ Out << ")";
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ") alloca(sizeof(";
+ printType(Out, I.getType()->getElementType());
+ Out << ')';
+ if (I.isArrayAllocation()) {
+ Out << " * " ;
+ writeOperand(I.getOperand(0));
+ }
+ Out << ')';
+}
+
+void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static) {
+
+ // If there are no indices, just print out the pointer.
+ if (I == E) {
+ writeOperand(Ptr);
+ return;
+ }
+
+ // Find out if the last index is into a vector. If so, we have to print this
+ // specially. Since vectors can't have elements of indexable type, only the
+ // last index could possibly be of a vector element.
+ const VectorType *LastIndexIsVector = 0;
+ {
+ for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
+ LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
+ }
+
+ Out << "(";
+
+ // If the last index is into a vector, we can't print it as &a[i][j] because
+ // we can't index into a vector with j in GCC. Instead, emit this as
+ // (((float*)&a[i])+j)
+ if (LastIndexIsVector) {
+ Out << "((";
+ printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
+ Out << ")(";
+ }
+
+ Out << '&';
+
+ // If the first index is 0 (very typical) we can do a number of
+ // simplifications to clean up the code.
+ Value *FirstOp = I.getOperand();
+ if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
+ // First index isn't simple, print it the hard way.
+ writeOperand(Ptr);
+ } else {
+ ++I; // Skip the zero index.
+
+ // Okay, emit the first operand. If Ptr is something that is already address
+ // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
+ if (isAddressExposed(Ptr)) {
+ writeOperandInternal(Ptr, Static);
+ } else if (I != E && (*I)->isStructTy()) {
+ // If we didn't already emit the first operand, see if we can print it as
+ // P->f instead of "P[0].f"
+ writeOperand(Ptr);
+ Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ ++I; // eat the struct index as well.
+ } else {
+ // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
+ Out << "(*";
+ writeOperand(Ptr);
+ Out << ")";
+ }
+ }
+
+ for (; I != E; ++I) {
+ if ((*I)->isStructTy()) {
+ Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ } else if ((*I)->isArrayTy()) {
+ Out << ".array[";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else if (!(*I)->isVectorTy()) {
+ Out << '[';
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else {
+ // If the last index is into a vector, then print it out as "+j)". This
+ // works with the 'LastIndexIsVector' code above.
+ if (isa<Constant>(I.getOperand()) &&
+ cast<Constant>(I.getOperand())->isNullValue()) {
+ Out << "))"; // avoid "+0".
+ } else {
+ Out << ")+(";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << "))";
+ }
+ }
+ }
+ Out << ")";
+}
+
+void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+ bool IsVolatile, unsigned Alignment) {
+
+ bool IsUnaligned = Alignment &&
+ Alignment < TD->getABITypeAlignment(OperandType);
+
+ if (!IsUnaligned)
+ Out << '*';
+ if (IsVolatile || IsUnaligned) {
+ Out << "((";
+ if (IsUnaligned)
+ Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
+ printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
+ if (IsUnaligned) {
+ Out << "; } ";
+ if (IsVolatile) Out << "volatile ";
+ Out << "*";
+ }
+ Out << ")";
+ }
+
+ writeOperand(Operand);
+
+ if (IsVolatile || IsUnaligned) {
+ Out << ')';
+ if (IsUnaligned)
+ Out << "->data";
+ }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+ writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+ I.getAlignment());
+
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+ writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+ I.isVolatile(), I.getAlignment());
+ Out << " = ";
+ Value *Operand = I.getOperand(0);
+ Constant *BitMask = 0;
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (!ITy->isPowerOf2ByteWidth())
+ // We have a bit width that doesn't match an even power-of-2 byte
+ // size. Consequently we must & the value with the type's bit mask
+ BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+ if (BitMask)
+ Out << "((";
+ writeOperand(Operand);
+ if (BitMask) {
+ Out << ") & ";
+ printConstant(BitMask, false);
+ Out << ")";
+ }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+ gep_type_end(I), false);
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+ Out << "va_arg(*(va_list*)";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ printType(Out, I.getType());
+ Out << ");\n ";
+}
+
+void CWriter::visitInsertElementInst(InsertElementInst &I) {
+ const Type *EltTy = I.getType()->getElementType();
+ writeOperand(I.getOperand(0));
+ Out << ";\n ";
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(&I) << "))[";
+ writeOperand(I.getOperand(2));
+ Out << "] = (";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+ // We know that our operand is not inlined.
+ Out << "((";
+ const Type *EltTy =
+ cast<VectorType>(I.getOperand(0)->getType())->getElementType();
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
+ writeOperand(I.getOperand(1));
+ Out << "]";
+}
+
+void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Out << "(";
+ printType(Out, SVI.getType());
+ Out << "){ ";
+ const VectorType *VT = SVI.getType();
+ unsigned NumElts = VT->getNumElements();
+ const Type *EltTy = VT->getElementType();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i) Out << ", ";
+ int SrcVal = SVI.getMaskValue(i);
+ if ((unsigned)SrcVal >= NumElts*2) {
+ Out << " 0/*undef*/ ";
+ } else {
+ Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
+ if (isa<Instruction>(Op)) {
+ // Do an extractelement of this value from the appropriate input.
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(Op)
+ << "))[" << (SrcVal & (NumElts-1)) << "]";
+ } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+ Out << "0";
+ } else {
+ printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+ (NumElts-1)),
+ false);
+ }
+ }
+ }
+ Out << "}";
+}
+
+void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+ // Start by copying the entire aggregate value into the result variable.
+ writeOperand(IVI.getOperand(0));
+ Out << ";\n ";
+
+ // Then do the insert to update the field.
+ Out << GetValueName(&IVI);
+ for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
+ i != e; ++i) {
+ const Type *IndexedTy =
+ ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
+ ArrayRef<unsigned>(b, i+1));
+ if (IndexedTy->isArrayTy())
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ Out << " = ";
+ writeOperand(IVI.getOperand(1));
+}
+
+void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+ Out << "(";
+ if (isa<UndefValue>(EVI.getOperand(0))) {
+ Out << "(";
+ printType(Out, EVI.getType());
+ Out << ") 0/*UNDEF*/";
+ } else {
+ Out << GetValueName(EVI.getOperand(0));
+ for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+ i != e; ++i) {
+ const Type *IndexedTy =
+ ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+ ArrayRef<unsigned>(b, i+1));
+ if (IndexedTy->isArrayTy())
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ }
+ Out << ")";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+
+ PM.add(createGCLoweringPass());
+ PM.add(createLowerInvokePass());
+ PM.add(createCFGSimplificationPass()); // clean up after lower invoke.
+ PM.add(new CWriter(o));
+ PM.add(createGCInfoDeleter());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 000000000000..e64216be0bdc
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,41 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+ CTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS)
+ : TargetMachine(T, TT, CPU, FS) {}
+
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
+
+ virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
new file mode 100644
index 000000000000..f7e8ff254848
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCBackendTarget;
+
+extern "C" void LLVMInitializeCBackendTargetInfo() {
+ RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 000000000000..9468aee067a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,449 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+ RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+ RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+ RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+ RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+ RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+ RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+ RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+ RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+ RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+ RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+ RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+ RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+ RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+ RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+ RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+ RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyi $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+ RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyui $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+ RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB),
+ (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+ RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+ RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpys $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+ RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+ RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+ RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+ RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+ RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "and\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+ RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "andc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+ RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "andbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+ RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+ RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "or\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+ RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "addc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "orbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "orhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "xor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "xorbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xorhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nand\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+ Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+ (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqbi:
+ Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+ (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+ Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+ (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqby:
+ Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+ (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+ Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+ (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+ RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceq\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+ RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+ RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "ceqbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqh\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+ RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+ RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+ RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+ RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "cgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+ RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+ RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+ RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+ RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+ RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+ RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "clgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fa\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fs\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+ RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+ RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fceq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+ RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+ RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmeq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+ RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+ RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+ RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+ RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+ RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+ RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+ RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+ RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+ RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..85fb258eac2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMCellSPUDesc
+ SPUMCTargetDesc.cpp
+ SPUMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..10d9a42239ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..8c1176a9d028
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCAsmInfo.h"
+using namespace llvm;
+
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = "\t.quad\t";
+ AlignmentIsInBytes = false;
+
+ PCSymbol = ".";
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+
+ // Has leb128
+ HasLEB128 = true;
+
+ SupportsDebugInformation = true;
+
+ // Exception handling is not supported on CellSPU (think about it: you only
+ // have 256K for code+data. Would you support exception handling?)
+ ExceptionsType = ExceptionHandling::None;
+
+ // SPU assembly requires ".section" before ".bss"
+ UsesELFSectionDirectiveForBSS = true;
+}
+
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
new file mode 100644
index 000000000000..7f850d347f56
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
@@ -0,0 +1,28 @@
+//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct SPULinuxMCAsmInfo : public MCAsmInfo {
+ explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
+ };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..26c5a4bc7b33
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell SPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCTargetDesc.h"
+#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSPUMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+ createSPUMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeCellSPUMCAsmInfo() {
+ RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
new file mode 100644
index 000000000000..c5c037d4de44
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheCellSPUTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPU instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.h b/contrib/llvm/lib/Target/CellSPU/SPU.h
new file mode 100644
index 000000000000..b51fbc7a5197
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,31 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "MCTargetDesc/SPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
+
+}
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.td b/contrib/llvm/lib/Target/CellSPU/SPU.td
new file mode 100644
index 000000000000..8327fe03d7f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+ SubtargetFeature<"large_mem","UseLargeMem", "true",
+ "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+ let InstructionSet = SPUInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 000000000000..3031fda54381
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+// Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+ (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 000000000000..f340edfb0f86
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,408 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+// Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant. In the scalar case, we're only interested in the
+// top two 32-bit slots, whereas we're interested in an exact
+// all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+// [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+// compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+ CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64eq:
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64compare:
+ CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+ CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64compare:
+ CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+ CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+ CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+ I64LGEv2i64.Fragment>;
+
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+ CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64eq:
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64compare:
+ CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+ CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64compare:
+ CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+ CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+ CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+ I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS
+ v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+ SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+ [(set R64FP:$rT,
+ (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
new file mode 100644
index 000000000000..fd96694b32fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -0,0 +1,334 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SPUAsmPrinter : public AsmPrinter {
+ public:
+ explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
+ AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description.
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ void printOp(const MachineOperand &MO, raw_ostream &OS);
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO, O);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+
+ void
+ printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value < (1 << 8) && "Invalid u7 argument");
+ O << value;
+ }
+
+ void
+ printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ char value = MI->getOperand(OpNo).getImm();
+ O << (int) value;
+ O << "(";
+ printOperand(MI, OpNo+1, O);
+ O << ")";
+ }
+
+ void
+ printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ O << (short) MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ O << getRegisterName(MO.getReg()) << ", ";
+ printOperand(MI, OpNo+1, O);
+ }
+
+ void
+ printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+ O << value;
+ }
+
+ void
+ printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+ && "Invalid s10 argument");
+ O << value;
+ }
+
+ void
+ printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+ O << value;
+ }
+
+ void
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ assert(MI->getOperand(OpNo).isImm() &&
+ "printDFormAddr first operand is not immediate");
+ int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+ int16_t value16 = int16_t(value);
+ assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+ && "Invalid dform s10 offset argument");
+ O << (value16 & ~0xf) << "(";
+ printOperand(MI, OpNo+1, O);
+ O << ")";
+ }
+
+ void
+ printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ /* Note: operand 1 is an offset or symbol name. */
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ if (MI->getOperand(OpNo+1).isImm()) {
+ int displ = int(MI->getOperand(OpNo+1).getImm());
+ if (displ > 0)
+ O << "+" << displ;
+ else if (displ < 0)
+ O << displ;
+ }
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ // Used to generate a ".-<target>", but it turns out that the assembler
+ // really wants the target.
+ //
+ // N.B.: This operand is used for call targets. Branch hints are another
+ // animal entirely.
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ O << "@h";
+ }
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ O << "@l";
+ }
+ }
+
+ /// Print local store address
+ void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value < 16)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
+ assert(MI->getOperand(OpNo).isImm() &&
+ "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value <= 32)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ }
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ report_fatal_error("printOp() does not handle immediate values");
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
+ << "$non_lazy_ptr";
+ return;
+ }
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ // External or weakly linked global variables need non-lazily-resolved
+ // stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ const GlobalValue *GV = MO.getGlobal();
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ return;
+ }
+ }
+ O << *Mang->getSymbol(MO.getGlobal());
+ return;
+ case MachineOperand::MO_MCSymbol:
+ O << *(MO.getMCSymbol());
+ return;
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemRegReg(MI, OpNo, O);
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() {
+ RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 000000000000..04fa2ae866d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,57 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
+def RetCC_SPU : CallingConv<[
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CCC_SPU : CallingConv<[
+ CCIfType<[i8, i16, i32, i64, i128, f32, f64,
+ v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>
+]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 000000000000..a3e7e73ae30a
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,275 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return MFI->getStackSize() &&
+ (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI.hasDebugInfo();
+ MCSymbol *FrameLabel = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isInt<10>(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (isInt<16>(FrameSize)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ } else {
+ // This is a leaf function -- insert a branch hint iff there are
+ // sufficient number instructions in the basic block. Note that
+ // this is just a best guess based on the basic block's size.
+ if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ dl = MBBI->getDebugLoc();
+
+ // Insert terminator label
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
+ .addSym(MMI.getContext().CreateTempSymbol());
+ }
+ }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+ }
+}
+
+void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const{
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
new file mode 100644
index 000000000000..4fee72d946a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
@@ -0,0 +1,94 @@
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
+
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUSubtarget;
+
+ class SPUFrameLowering: public TargetFrameLowering {
+ const SPUSubtarget &Subtarget;
+ std::pair<unsigned, int> LR[1];
+
+ public:
+ SPUFrameLowering(const SPUSubtarget &sti);
+
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ //! Perform target-specific stack frame setup.
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ //! Return a function's saved spill slots
+ /*!
+ For CellSPU, a function's saved spill slots is just the link register.
+ */
+ const std::pair<unsigned, int> *
+ getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+ //! Stack slot size (16 bytes)
+ static int stackSlotSize() {
+ return 16;
+ }
+ //! Maximum frame offset representable by a signed 10-bit integer
+ /*!
+ This is the maximum frame offset that can be expressed as a 10-bit
+ integer, used in D-form addresses.
+ */
+ static int maxFrameOffset() {
+ return ((1 << 9) - 1) * stackSlotSize();
+ }
+ //! Minimum frame offset representable by a signed 10-bit integer
+ static int minFrameOffset() {
+ return -(1 << 9) * stackSlotSize();
+ }
+ //! Minimum frame size (enough to spill LR + SP)
+ static int minStackSize() {
+ return (2 * stackSlotSize());
+ }
+ //! Convert frame index to stack offset
+ static int FItoStackOffset(int frame_index) {
+ return frame_index * stackSlotSize();
+ }
+ //! Number of instructions required to overcome hint-for-branch latency
+ /*!
+ HBR (hint-for-branch) instructions can be inserted when, for example,
+ we know that a given function is going to be called, such as printf(),
+ in the control flow graph. HBRs are only inserted if a sufficient number
+ of instructions occurs between the HBR and the target. Currently, HBRs
+ take 6 cycles, ergo, the magic number 6.
+ */
+ static int branchHintPenalty() {
+ return 6;
+ }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 000000000000..403d7ef1fd9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,141 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+ TII(tii),
+ EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
+{
+ // Initial thoughts on how to do this, but this code cannot work unless the
+ // function's prolog and epilog code are also being scheduled so that we can
+ // accurately determine which pipeline is being scheduled.
+#if 0
+ assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ ScheduleHazardRecognizer::HazardType retval = NoHazard;
+ bool mustBeOdd = false;
+
+ switch (Node->getOpcode()) {
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16:
+ case SPU::LQAv16i8:
+ case SPU::LQAv8i16:
+ case SPU::LQAv4i32:
+ case SPU::LQAv4f32:
+ case SPU::LQAv2f64:
+ case SPU::LQAr128:
+ case SPU::LQAr64:
+ case SPU::LQAr32:
+ case SPU::LQXv4i32:
+ case SPU::LQXr128:
+ case SPU::LQXr64:
+ case SPU::LQXr32:
+ case SPU::LQXr16:
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8:
+ case SPU::STQAv16i8:
+ case SPU::STQAv8i16:
+ case SPU::STQAv4i32:
+ case SPU::STQAv4f32:
+ case SPU::STQAv2f64:
+ case SPU::STQAr128:
+ case SPU::STQAr64:
+ case SPU::STQAr32:
+ case SPU::STQAr16:
+ case SPU::STQAr8:
+ case SPU::STQXv16i8:
+ case SPU::STQXv8i16:
+ case SPU::STQXv4i32:
+ case SPU::STQXv4f32:
+ case SPU::STQXv2f64:
+ case SPU::STQXr128:
+ case SPU::STQXr64:
+ case SPU::STQXr32:
+ case SPU::STQXr16:
+ case SPU::STQXr8:
+ case SPU::RET:
+ mustBeOdd = true;
+ break;
+ default:
+ // Assume that this instruction can be on the even pipe
+ break;
+ }
+
+ if (mustBeOdd && !EvenOdd)
+ retval = Hazard;
+
+ DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
+ << retval << "\n");
+ EvenOdd ^= 1;
+ return retval;
+#else
+ return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+ DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+ AdvanceCycle();
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 000000000000..675632cc7f13
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+ const TargetInstrInfo &TII;
+ int EvenOdd;
+
+public:
+ SPUHazardRecognizer(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 000000000000..a297d036f03e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1203 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameLowering.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI32IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+ bool
+ isI32IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isUInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+ bool
+ isI16IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isUInt<10>((short) CN->getZExtValue());
+ }
+
+ //! ConstantSDNode predicate for signed 16-bit values
+ /*!
+ \arg CN The constant SelectionDAG node holding the value
+ \arg Imm The returned 16-bit value, if returning true
+
+ This predicate tests the value in \a CN to see whether it can be
+ represented as a 16-bit, sign-extended quantity. Returns true if
+ this is the case.
+ */
+ bool
+ isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+ {
+ EVT vt = CN->getValueType(0);
+ Imm = (short) CN->getZExtValue();
+ if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+ return true;
+ } else if (vt == MVT::i32) {
+ int32_t i_val = (int32_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ } else {
+ int64_t i_val = (int64_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ }
+
+ return false;
+ }
+
+ //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+ static bool
+ isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+ {
+ EVT vt = FPN->getValueType(0);
+ if (vt == MVT::f32) {
+ int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+ int sval = (int) ((val << 16) >> 16);
+ Imm = (short) val;
+ return val == sval;
+ }
+
+ return false;
+ }
+
+ //! Generate the carry-generate shuffle mask.
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //! Generate the borrow-generate shuffle mask
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //===------------------------------------------------------------------===//
+ /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class SPUDAGToDAGISel :
+ public SelectionDAGISel
+ {
+ const SPUTargetMachine &TM;
+ const SPUTargetLowering &SPUtli;
+ unsigned GlobalBaseReg;
+
+ public:
+ explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm),
+ SPUtli(*tm.getTargetLowering())
+ { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint32_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+ }
+
+ SDNode *emitBuildVector(SDNode *bvNode) {
+ EVT vecVT = bvNode->getValueType(0);
+ DebugLoc dl = bvNode->getDebugLoc();
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant by invoking all of the instruction selection predicates:
+ if (((vecVT == MVT::v8i16) &&
+ (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+ ((vecVT == MVT::v4i32) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+ ((vecVT == MVT::v2i64) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+ HandleSDNode Dummy(SDValue(bvNode, 0));
+ if (SDNode *N = Select(bvNode))
+ return N;
+ return Dummy.getValue().getNode();
+ }
+
+ // No, need to emit a constant pool spill:
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
+ ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ }
+
+ const Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
+
+ HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, Alignment));
+ CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ }
+
+ /// Select - Convert the specified operand from a target-independent to a
+ /// target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
+
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
+
+ //! Alternate instruction emit sequence for loading i64 constants
+ SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
+
+ //! Returns true if the address N is an A-form (local store) address
+ bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ //! D-form address predicate
+ bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// Alternate D-form address using i7 offset predicate
+ bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base);
+
+ /// D-form address selection workhorse
+ bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base, int minOffset, int maxOffset);
+
+ //! Address predicate if N can be expressed as an indexed [r+r] operation.
+ bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+ && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
+ SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+ && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+#if 1
+ llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+#endif
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "Cell SPU DAG->DAG Pattern Instruction Selection";
+ }
+
+ private:
+ SDValue getRC( MVT );
+
+ // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+ };
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ // These match the addr256k operand type:
+ EVT OffsVT = MVT::i16;
+ SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+ int64_t val;
+
+ switch (N.getOpcode()) {
+ case ISD::Constant:
+ val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+ Base = CurDAG->getTargetConstant( val , MVT::i32);
+ Index = Zero;
+ return true; break;
+ case ISD::ConstantPool:
+ case ISD::GlobalAddress:
+ report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
+ /*NOTREACHED*/
+
+ case ISD::TargetConstant:
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetJumpTable:
+ report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+ "not wrapped as A-form address.");
+ /*NOTREACHED*/
+
+ case SPUISD::AFormAddr:
+ // Just load from memory if there's only a single use of the location,
+ // otherwise, this will get handled below with D-form offset addresses
+ if (N.hasOneUse()) {
+ SDValue Op0 = N.getOperand(0);
+ switch (Op0.getOpcode()) {
+ case ISD::TargetConstantPool:
+ case ISD::TargetJumpTable:
+ Base = Op0;
+ Index = Zero;
+ return true;
+
+ case ISD::TargetGlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+ const GlobalValue *GV = GSDN->getGlobal();
+ if (GV->getAlignment() == 16) {
+ Base = Op0;
+ Index = Zero;
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ const int minDForm2Offset = -(1 << 7);
+ const int maxDForm2Offset = (1 << 7) - 1;
+ return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+ maxDForm2Offset);
+}
+
+/*!
+ \arg Op The ISD instruction (ignored)
+ \arg N The address to be tested
+ \arg Base Base address register/pointer
+ \arg Index Base address index
+
+ Examine the input address by a base register plus a signed 10-bit
+ displacement, [r+I10] (D-form address).
+
+ \return true if \a N is a D-form address with \a Base and \a Index set
+ to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return DFormAddressPredicate(Op, N, Base, Index,
+ SPUFrameLowering::minFrameOffset(),
+ SPUFrameLowering::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index, int minOffset,
+ int maxOffset) {
+ unsigned Opc = N.getOpcode();
+ EVT PtrTy = SPUtli.getPointerTy();
+
+ if (Opc == ISD::FrameIndex) {
+ // Stack frame index must be less than 512 (divided by 16):
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
+ << FI << "\n");
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (Opc == ISD::ADD) {
+ // Generated by getelementptr
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+ || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (Op1.getOpcode() == ISD::Constant
+ || Op1.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op0.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op0;
+ return true;
+ }
+ } else if (Op0.getOpcode() == ISD::Constant
+ || Op0.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op1.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op1;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Indirect with constant offset -> D-Form address
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::Hi
+ && Op1.getOpcode() == SPUISD::Lo) {
+ // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+ int32_t offset = 0;
+ SDValue idxOp;
+
+ if (isa<ConstantSDNode>(Op1)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op0;
+ } else if (isa<ConstantSDNode>(Op0)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op1;
+ }
+
+ if (offset >= minOffset && offset <= maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = idxOp;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::AFormAddr) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == SPUISD::LDRESULT) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
+ unsigned OpOpc = Op->getOpcode();
+
+ if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+ // Direct load/store without getelementptr
+ SDValue Offs;
+
+ Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
+
+ if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+ if (Offs.getOpcode() == ISD::UNDEF)
+ Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+ Base = Offs;
+ Index = N;
+ return true;
+ }
+ } else {
+ /* If otherwise unadorned, default to D-form address with 0 offset: */
+ if (Opc == ISD::CopyFromReg) {
+ Index = N.getOperand(1);
+ } else {
+ Index = N;
+ }
+
+ Base = CurDAG->getTargetConstant(0, Index.getValueType());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address operand
+ \arg Base The base pointer operand
+ \arg Index The offset/index operand
+
+ If the address \a N can be expressed as an A-form or D-form address, returns
+ false. Otherwise, creates two operands, Base and Index that will become the
+ (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ if (!SelectAFormAddr(Op, N, Base, Index)
+ && !SelectDFormAddr(Op, N, Base, Index)) {
+ // If the address is neither A-form or D-form, punt and use an X-form
+ // address:
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+ switch( VT.SimpleTy ) {
+ case MVT::i8:
+ return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i16:
+ return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i32:
+ return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::f32:
+ return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i64:
+ return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i128:
+ return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+ break;
+ default:
+ assert( false && "add a new case here" );
+ }
+ return SDValue();
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ int n_ops = -1;
+ unsigned NewOpc = 0;
+ EVT OpVT = N->getValueType(0);
+ SDValue Ops[8];
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ if (Opc == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
+
+ if (FI < 128) {
+ NewOpc = SPU::AIr32;
+ Ops[0] = TFI;
+ Ops[1] = Imm0;
+ n_ops = 2;
+ } else {
+ NewOpc = SPU::Ar32;
+ Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
+ N->getValueType(0), TFI),
+ 0);
+ n_ops = 2;
+ }
+ } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+ // Catch the i64 constants that end up here. Note: The backend doesn't
+ // attempt to legalize the constant (it's useless because DAGCombiner
+ // will insert 64-bit constants and we can't stop it).
+ return SelectI64Constant(N, OpVT, N->getDebugLoc());
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = N->getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+ EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ Op0VT, (128 / Op0VT.getSizeInBits()));
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
+ /*NOTREACHED*/
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
+ }
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
+
+ HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+ Op0VecVT, Op0));
+
+ SDValue PromScalar;
+ if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+ PromScalar = SDValue(N, 0);
+ else
+ PromScalar = PromoteScalar.getValue();
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ PromScalar, PromScalar,
+ SDValue(shufMaskLoad, 0));
+
+ HandleSDNode Dummy2(zextShuffle);
+ if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+ zextShuffle = SDValue(N, 0);
+ else
+ zextShuffle = Dummy2.getValue();
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ SelectCode(Dummy.getValue().getNode());
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::TRUNCATE) {
+ SDValue Op0 = N->getOperand(0);
+ if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+ && OpVT == MVT::i32
+ && Op0.getValueType() == MVT::i64) {
+ // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+ //
+ // Take advantage of the fact that the upper 32 bits are in the
+ // i32 preferred slot and avoid shuffle gymnastics:
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (CN != 0) {
+ unsigned shift_amt = unsigned(CN->getZExtValue());
+
+ if (shift_amt >= 32) {
+ SDNode *hi32 =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ Op0.getOperand(0), getRC(MVT::i32));
+
+ shift_amt -= 32;
+ if (shift_amt > 0) {
+ // Take care of the additional shift, if present:
+ SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+ unsigned Opc = SPU::ROTMAIr32_i32;
+
+ if (Op0.getOpcode() == ISD::SRL)
+ Opc = SPU::ROTMr32;
+
+ hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
+ shift);
+ }
+
+ return hi32;
+ }
+ }
+ }
+ } else if (Opc == ISD::SHL) {
+ if (OpVT == MVT::i64)
+ return SelectSHLi64(N, OpVT);
+ } else if (Opc == ISD::SRL) {
+ if (OpVT == MVT::i64)
+ return SelectSRLi64(N, OpVT);
+ } else if (Opc == ISD::SRA) {
+ if (OpVT == MVT::i64)
+ return SelectSRAi64(N, OpVT);
+ } else if (Opc == ISD::FNEG
+ && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+ DebugLoc dl = N->getDebugLoc();
+ // Check if the pattern is a special form of DFNMS:
+ // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getOpcode() == ISD::FSUB) {
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == ISD::FMUL) {
+ unsigned Opc = SPU::DFNMSf64;
+ if (OpVT == MVT::v2f64)
+ Opc = SPU::DFNMSv2f64;
+
+ return CurDAG->getMachineNode(Opc, dl, OpVT,
+ Op00.getOperand(0),
+ Op00.getOperand(1),
+ Op0.getOperand(1));
+ }
+ }
+
+ SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+ SDNode *signMask = 0;
+ unsigned Opc = SPU::XORfneg64;
+
+ if (OpVT == MVT::f64) {
+ signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
+ } else if (OpVT == MVT::v2f64) {
+ Opc = SPU::XORfnegvec;
+ signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v2i64,
+ negConst, negConst).getNode());
+ }
+
+ return CurDAG->getMachineNode(Opc, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ } else if (Opc == ISD::FABS) {
+ if (OpVT == MVT::f64) {
+ SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+ return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ } else if (OpVT == MVT::v2f64) {
+ SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+ SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ absConst, absConst);
+ SDNode *signMask = emitBuildVector(absVec.getNode());
+ return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ }
+ } else if (Opc == SPUISD::LDRESULT) {
+ // Custom select instructions for LDRESULT
+ EVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+ SDValue Chain = N->getOperand(1);
+ SDNode *Result;
+
+ Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+ MVT::Other, Arg,
+ getRC( VT.getSimpleVT()), Chain);
+ return Result;
+
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
+ Ops[1] = Op1;
+ if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+ if (isInt<10>(CN->getSExtValue())) {
+ NewOpc = SPU::AIr32;
+ Ops[1] = Op1;
+ } else {
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+ N->getValueType(0),
+ Op1),
+ 0);
+ }
+ }
+ Ops[0] = Op0;
+ n_ops = 2;
+ }
+ }
+
+ if (n_ops > 0) {
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+ else
+ return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
+ } else
+ return SelectCode(N);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+ SDValue SelMaskVal;
+ DebugLoc dl = N->getDebugLoc();
+
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
+ SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+ SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+ ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0, OpVT));
+ VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(ZeroFill, 0),
+ SDValue(VecOp0, 0),
+ SDValue(SelMask, 0));
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *Shift = 0;
+ DebugLoc dl = N->getDebugLoc();
+
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+ // Ensure that the shift amounts are negated!
+ Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bytes, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bits, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
+ // Promote Op0 to vector
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDNode *VecOp0 =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ VecVT, N->getOperand(0), getRC(MVT::v2i64));
+
+ SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+ SDNode *SignRot =
+ CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+ SDValue(VecOp0, 0), SignRotAmt);
+ SDNode *UpperHalfSign =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
+
+ SDNode *UpperHalfSignMask =
+ CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+ SDNode *UpperLowerMask =
+ CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+ SDNode *UpperLowerSelect =
+ CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(UpperHalfSignMask, 0),
+ SDValue(VecOp0, 0),
+ SDValue(UpperLowerMask, 0));
+
+ SDNode *Shift = 0;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ bytes = 31 - bytes;
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
+ SDValue(UpperLowerSelect, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ bits = 8 - bits;
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *NegShift =
+ CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+ SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(NegShift, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
+ DebugLoc dl) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
+ DebugLoc dl) {
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
+ SDValue i64vec =
+ SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+ // Here's where it gets interesting, because we have to parse out the
+ // subtree handed back in i64vec:
+
+ if (i64vec.getOpcode() == ISD::BITCAST) {
+ // The degenerate case where the upper and lower bits in the splat are
+ // identical:
+ SDValue Op0 = i64vec.getOperand(0);
+
+ ReplaceUses(i64vec, Op0);
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(Op0.getNode()), 0),
+ getRC(MVT::i64));
+ } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+ SDValue lhs = i64vec.getOperand(0);
+ SDValue rhs = i64vec.getOperand(1);
+ SDValue shufmask = i64vec.getOperand(2);
+
+ if (lhs.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(lhs, lhs.getOperand(0));
+ lhs = lhs.getOperand(0);
+ }
+
+ SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+ ? lhs.getNode()
+ : emitBuildVector(lhs.getNode()));
+
+ if (rhs.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(rhs, rhs.getOperand(0));
+ rhs = rhs.getOperand(0);
+ }
+
+ SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+ ? rhs.getNode()
+ : emitBuildVector(rhs.getNode()));
+
+ if (shufmask.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(shufmask, shufmask.getOperand(0));
+ shufmask = shufmask.getOperand(0);
+ }
+
+ SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+ ? shufmask.getNode()
+ : emitBuildVector(shufmask.getNode()));
+
+ SDValue shufNode =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+ SDValue(shufMaskNode, 0));
+ HandleSDNode Dummy(shufNode);
+ SDNode *SN = SelectCode(Dummy.getValue().getNode());
+ if (SN == 0) SN = Dummy.getValue().getNode();
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(SN, 0), getRC(MVT::i64));
+ } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(i64vec.getNode()), 0),
+ getRC(MVT::i64));
+ } else {
+ report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+ "condition");
+ }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+ return new SPUDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 000000000000..f0ceee214149
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,3257 @@
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameLowering.h"
+#include "SPUMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+// Used in getTargetNodeName() below
+namespace {
+ std::map<unsigned, const char *> node_names;
+
+ // Byte offset of the preferred slot (counted from the MSB)
+ int prefslotOffset(EVT VT) {
+ int retval=0;
+ if (VT==MVT::i1) retval=3;
+ if (VT==MVT::i8) retval=3;
+ if (VT==MVT::i16) retval=2;
+
+ return retval;
+ }
+
+ //! Expand a library call into an actual call DAG node
+ /*!
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
+ */
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy =
+ Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op.getDebugLoc());
+
+ return CallInfo.first;
+ }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+ SPUTM(TM) {
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set RTLIB libcall names as used by SPU:
+ setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+ // Set up the SPU's register classes:
+ addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
+ addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
+ addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
+ addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
+ addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
+ addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
+ addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+ // SPU has no sign or zero extended loads for i1, i8, i16:
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // SPU constant load actions are custom lowered:
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+ // SPU's loads and stores have to be custom lowered:
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ // Expand the jumptable branches
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ // SPU has no intrinsics for these particular operations:
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ // SPU has no division/remainder instructions
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
+
+ // We don't support sin/cos/sqrt/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // SPU can do rotate right and left, so legalize it... but customize for i8
+ // because instructions don't exist.
+
+ // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+ // .td files.
+ setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i16, Legal);
+ setOperationAction(ISD::ROTL, MVT::i8, Custom);
+
+ // SPU has no native version of shift left/right for i8
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
+
+ // Custom lower i8, i32 and i64 multiplications
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+
+ // Expand double-width multiplication
+ // FIXME: It would probably be reasonable to support some of these operations
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+ // Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
+
+ // SPU does not have BSWAP. It does have i32 support CTLZ.
+ // CTPOP has to be custom lowered.
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
+
+ setOperationAction(ISD::CTTZ , MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+
+ setOperationAction(ISD::CTLZ , MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i16, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+
+ // SPU has a version of select that implements (a&~c)|(b&c), just like
+ // select ought to work:
+ setOperationAction(ISD::SELECT, MVT::i8, Legal);
+ setOperationAction(ISD::SELECT, MVT::i16, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::i16, Legal);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ // Custom lower i128 -> i64 truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+ // Custom lower i32/i64 -> i128 sign extend
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
+
+ // FDIV on SPU requires custom lowering
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
+
+ // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
+
+ // Cell SPU has instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ // mul has to be custom lowered.
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ // These operations need to be expanded:
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Custom lower build_vector, constant pool spills, insert and
+ // extract vector elements:
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+
+ setOperationAction(ISD::AND, MVT::v16i8, Custom);
+ setOperationAction(ISD::OR, MVT::v16i8, Custom);
+ setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+ setStackPointerRegisterToSaveRestore(SPU::R1);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ setMinFunctionAlignment(3);
+
+ computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(Sched::RegPressure);
+}
+
+const char *
+SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ if (node_names.empty()) {
+ node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
+ node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
+ node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
+ node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+ node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+ node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
+ node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
+ node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
+ node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
+ node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
+ node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+ node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
+ node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+ node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
+ node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
+ node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+ "SPUISD::ROTBYTES_LEFT_BITS";
+ node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
+ node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
+ }
+
+ std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
+
+ return ((i != node_names.end()) ? i->second : 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
+ // i8, i16 and i32 are valid SETCC result types
+ MVT::SimpleValueType retval;
+
+ switch(VT.getSimpleVT().SimpleTy){
+ case MVT::i1:
+ case MVT::i8:
+ retval = MVT::i8; break;
+ case MVT::i16:
+ retval = MVT::i16; break;
+ case MVT::i32:
+ default:
+ retval = MVT::i32;
+ }
+ return retval;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ LoadSDNode *LN = cast<LoadSDNode>(Op);
+ SDValue the_chain = LN->getChain();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT InVT = LN->getMemoryVT();
+ EVT OutVT = Op.getValueType();
+ ISD::LoadExtType ExtType = LN->getExtensionType();
+ unsigned alignment = LN->getAlignment();
+ int pso = prefslotOffset(InVT);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+ (128 / InVT.getSizeInBits()));
+
+ // two sanity checks
+ assert( LN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = LN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(rotamt, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-pso, PtrVT));
+ }
+
+ // Do the load as a i128 to allow possible shifting
+ SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+
+ // When the size is not greater than alignment we get all data with just
+ // one load
+ if (alignment >= InVT.getSizeInBits()/8) {
+ // Update the chain
+ the_chain = low.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+ low.getValue(0), rotate);
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+ DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+ }
+ // When alignment is less than the size, we might need (known only at
+ // run-time) two loads
+ // TODO: if the memory address is composed only from constants, we have
+ // extra kowledge, and might avoid the second load
+ else {
+ // storage position offset from lower 16 byte aligned memory chunk
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+ // get a registerfull of ones. (this implementation is a workaround: LLVM
+ // cannot handle 128 bit signed int constants)
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(16, PtrVT)),
+ highMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ high.getValue(1));
+
+ // Shift the (possible) high part right to compensate the misalignemnt.
+ // if there is no highpart (i.e. value is i64 and offset is 4), this
+ // will zero out the high value.
+ high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+ DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset
+ ));
+
+ // Shift the low similarly
+ // TODO: add SPUISD::SHL_BYTES
+ low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+ // Merge the two parts
+ result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+ DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+ if (!InVT.isVector()) {
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+ }
+
+ }
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
+
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
+
+ result = DAG.getNode(NewOpc, dl, OutVT, result);
+ }
+
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+ SDValue retops[2] = {
+ result,
+ the_chain
+ };
+
+ result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+ retops, sizeof(retops) / sizeof(retops[0]));
+ return result;
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ StoreSDNode *SN = cast<StoreSDNode>(Op);
+ SDValue Value = SN->getValue();
+ EVT VT = Value.getValueType();
+ EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned alignment = SN->getAlignment();
+ SDValue result;
+ EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+ (128 / StVT.getSizeInBits()));
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = SN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+ // two sanity checks
+ assert( SN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Load the lower part of the memory to which to store.
+ SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+ // if we don't need to store over the 16 byte boundary, one store suffices
+ if (alignment >= StVT.getSizeInBits()/8) {
+ // Update the chain
+ the_chain = low.getValue(1);
+
+ LoadSDNode *LN = cast<LoadSDNode>(low);
+ SDValue theValue = SN->getValue();
+
+ if (StVT != VT
+ && (theValue.getOpcode() == ISD::AssertZext
+ || theValue.getOpcode() == ISD::AssertSext)) {
+ // Drill down and get the value for zero- and sign-extended
+ // quantities
+ theValue = theValue.getOperand(0);
+ }
+
+ // If the base pointer is already a D-form address, then just create
+ // a new D-form address with a slot offset and the orignal base pointer.
+ // Otherwise generate a D-form address with the slot offset relative
+ // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
+
+ result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+ vectorizeOp, low,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, insertEltOp));
+
+ result = DAG.getStore(the_chain, dl, result, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(),
+ 16);
+
+ }
+ // do the store when it might cross the 16 byte memory access boundary.
+ else {
+ // TODO issue a warning if SN->isVolatile()== true? This is likely not
+ // what the user wanted.
+
+ // address offset from nearest lower 16byte alinged address
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ SN->getBasePtr(),
+ DAG.getConstant(0xf, MVT::i32));
+ // 16 - offset
+ SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset);
+ // 16 - sizeof(Value)
+ SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ DAG.getConstant( VT.getSizeInBits()/8,
+ MVT::i32));
+ // get a registerfull of ones
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ // Create the 128 bit masks that have ones where the data to store is
+ // located.
+ SDValue lowmask, himask;
+ // if the value to store don't fill up the an entire 128 bits, zero
+ // out the last bits of the mask so that only the value we want to store
+ // is masked.
+ // this is e.g. in the case of store i32, align 2
+ if (!VT.isVector()){
+ Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+ lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ surplus);
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+ }
+ else {
+ lowmask = ones;
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ }
+ // this will zero, if there are no data that goes to the high quad
+ himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ offset_compl);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+ offset);
+
+ // Load in the old data and zero out the parts that will be overwritten with
+ // the new data to store.
+ SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ hi.getValue(1));
+
+ low = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+ hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+ // Shift the Value to store into place. rlow contains the parts that go to
+ // the lower memory chunk, rhi has the parts that go to the upper one.
+ SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+ rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+ SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+ offset_compl);
+
+ // Merge the old data and the new data and store the results
+ // Need to convert vectors here to integer as 'OR'ing floats assert
+ rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+ rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+ low = DAG.getStore(the_chain, dl, rlow, basePtr,
+ lowMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ hi = DAG.getStore(the_chain, dl, rhi,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+ hi.getValue(0));
+ }
+
+ return result;
+}
+
+//! Generate the address of a constant pool entry.
+static SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ // Just return the SDValue with the constant pool address in it.
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ llvm_unreachable("LowerConstantPool: Relocation model other than static"
+ " not supported.");
+ return SDValue();
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ llvm_unreachable("LowerJumpTable: Relocation model other than static"
+ " not supported.");
+ return SDValue();
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
+ const TargetMachine &TM = DAG.getTarget();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ } else {
+ report_fatal_error("LowerGlobalAddress: Relocation model other than static"
+ "not supported.");
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
+ uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
+ }
+
+ return SDValue();
+}
+
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
+
+ unsigned ArgOffset = SPUFrameLowering::minStackSize();
+ unsigned ArgRegIdx = 0;
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
+ // Add DAG nodes to load the arguments or copy them out of registers.
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
+
+ if (VA.isRegLoc()) {
+ const TargetRegisterClass *ArgRegClass;
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+ Twine(ObjectVT.getEVTString()));
+ case MVT::i8:
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
+ case MVT::i16:
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
+ case MVT::i32:
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
+ case MVT::i64:
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
+ case MVT::f32:
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
+ case MVT::f64:
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++ArgRegIdx;
+ } else {
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type
+ // or we're forced to do vararg
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
+ ArgOffset += StackSlotSize;
+ }
+
+ InVals.push_back(ArgVal);
+ // Update the chain
+ Chain = ArgVal.getOperand(0);
+ }
+
+ // vararg handling:
+ if (isVarArg) {
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const unsigned ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ unsigned NumArgRegs = 77;
+
+ // We will spill (79-3)+1 registers to the stack
+ SmallVector<SDValue, 79-3+1> MemOps;
+
+ // Create the frame slot
+ for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
+ SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
+ false, false, 0);
+ Chain = Store.getOperand(0);
+ MemOps.push_back(Store);
+
+ // Increment address by stack slot size for the next stored argument
+ ArgOffset += StackSlotSize;
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ return Chain;
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 14 >> 14) != Addr)
+ return 0; // Top 14 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // CellSPU target does not yet support tail call optimization.
+ isTailCall = false;
+
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ unsigned NumOps = Outs.size();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
+
+ // Handy pointer type
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory.
+ unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
+ unsigned ArgRegIdx = 0;
+
+ // Keep track of registers passing arguments
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ // And the arguments passed on the stack
+ SmallVector<SDValue, 8> MemOpChains;
+
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::i128:
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ }
+ }
+
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP].
+ unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
+
+ // Insert a call sequence start
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+ true));
+
+ if (!MemOpChains.empty()) {
+ // Adjust the stack pointer for the stack arguments.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = SPUISD::CALL;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
+
+ if (!ST->usingLargeMem()) {
+ // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+ // style calls, otherwise, external symbols are BRASL calls. This assumes
+ // that declared/defined symbols are in the same compilation unit and can
+ // be reached through PC-relative jumps.
+ //
+ // NOTE:
+ // This may be an unsafe assumption for JIT and really large compilation
+ // units.
+ if (GV->isDeclaration()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+ }
+ } else {
+ // "Large memory" mode: Turn all calls into indirect calls with a X-form
+ // address pairs:
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+ }
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ // If this is an absolute destination address that appears to be a legal
+ // local store address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // If the function returns void, just return the chain.
+ if (Ins.empty())
+ return Chain;
+
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
+ // If the call has results, copy the values out of the ret val registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+ SDValue OpVal(0, 0);
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return 0;
+ }
+
+ if (OpVal.getNode() != 0) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ return CN;
+ }
+ }
+
+ return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value <= 0x3ffff)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (isInt<10>(Value))
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int Value = (int) CN->getZExtValue();
+ if (ValueType == MVT::i16
+ && Value <= 0xffff /* truncated from uint64_t */
+ && ((short) Value >> 8) == ((short) Value & 0xff))
+ return DAG.getTargetConstant(Value & 0xff, ValueType);
+ else if (ValueType == MVT::i8
+ && (Value & 0xff) == Value)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if ((ValueType == MVT::i32
+ && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+ || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+ return DAG.getTargetConstant(Value >> 16, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+ }
+
+ return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
+
+ if (minSplatBits < 16)
+ minSplatBits = 16;
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
+
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+ Twine(VT.getEVTString()));
+ /*NOTREACHED*/
+ case MVT::v4f32: {
+ uint32_t Value32 = uint32_t(SplatBits);
+ assert(SplatBitSize == 32
+ && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(Value32, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+ break;
+ }
+ case MVT::v2f64: {
+ uint64_t f64val = uint64_t(SplatBits);
+ assert(SplatBitSize == 64
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(f64val, MVT::i64);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+ break;
+ }
+ case MVT::v16i8: {
+ // 8-bit constants have to be expanded to 16-bits
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+ }
+ case MVT::v8i16: {
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ }
+ case MVT::v4i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+ }
+ case MVT::v2i64: {
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ bool upper_special, lower_special;
+
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ SplatValCN, SplatValCN);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
+
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
+
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
+ }
+
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ }
+
+ return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // If we have a single element being moved from V1 to V2, this can be handled
+ // using the C*[DX] compute mask instructions, but the vector elements have
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
+ EVT VecVT = V1.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned EltsFromV2 = 0;
+ unsigned V2EltOffset = 0;
+ unsigned V2EltIdx0 = 0;
+ unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ bool monotonic = true;
+ bool rotate = true;
+ int rotamt=0;
+ EVT maskVT; // which of the c?d instructions to use
+
+ if (EltVT == MVT::i8) {
+ V2EltIdx0 = 16;
+ maskVT = MVT::v16i8;
+ } else if (EltVT == MVT::i16) {
+ V2EltIdx0 = 8;
+ maskVT = MVT::v8i16;
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+ V2EltIdx0 = 4;
+ maskVT = MVT::v4i32;
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
+ } else
+ llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+ for (unsigned i = 0; i != MaxElts; ++i) {
+ if (SVN->getMaskElt(i) < 0)
+ continue;
+
+ unsigned SrcElt = SVN->getMaskElt(i);
+
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
+
+ ++CurrElt;
+ }
+
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ } else {
+ rotate = false;
+ }
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
+ rotamt = SrcElt-i;
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
+ }
+
+ if (EltsFromV2 == 1 && monotonic) {
+ // Compute mask and shuffle
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2EltOffset, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
+ // Use shuffle mask in SHUFB synthetic instruction:
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+ ShufMaskOp);
+ } else if (rotate) {
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
+ } else {
+ // Convert the SHUFFLE_VECTOR mask's input element units to the
+ // actual bytes.
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+ unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+ }
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+ }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op0.getNode()->getOpcode() == ISD::Constant) {
+ // For a constant, build the appropriate constant vector, which will
+ // eventually simplify to a vector register load.
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+ SmallVector<SDValue, 16> ConstVecValues;
+ EVT VT;
+ size_t n_copies;
+
+ // Create a constant vector:
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected constant value type in "
+ "LowerSCALAR_TO_VECTOR");
+ case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+ case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+ case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+ case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+ case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+ case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+ }
+
+ SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+ for (size_t j = 0; j < n_copies; ++j)
+ ConstVecValues.push_back(CValue);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+ &ConstVecValues[0], ConstVecValues.size());
+ } else {
+ // Otherwise, copy the value from one register to another:
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDValue N = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+ }
+
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
+
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+ unsigned int ShufBytes[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
+
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
+
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+ N, N, ShufMaskVec));
+ } else {
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ EVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+ "vector type!");
+ }
+
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
+
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
+
+ vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
+
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+ "type");
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ loFactor, hiFactor, loFactor, hiFactor);
+ break;
+ }
+ }
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ vecShift, vecShift, replicate));
+ }
+
+ return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ SDValue VecOp = Op.getOperand(0);
+ SDValue ValOp = Op.getOperand(1);
+ SDValue IdxOp = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
+
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Offset=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+ }
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
+
+ SDValue result =
+ DAG.getNode(SPUISD::SHUFB, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+ VecOp,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
+
+ return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
+{
+ SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
+
+ assert(Op.getValueType() == MVT::i8);
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled i8 math operator");
+ /*NOTREACHED*/
+ break;
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+ }
+
+ case ISD::SUB: {
+ // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::ROTR:
+ case ISD::ROTL: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
+ SDValue ExpandArg =
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
+ N0, DAG.getConstant(8, MVT::i32)));
+
+ // Truncate back down to i8
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+ }
+ case ISD::SRL:
+ case ISD::SHL: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
+
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::SRA: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
+
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::MUL: {
+ SDValue N1 = Op.getOperand(1);
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+ SDValue ConstVec;
+ SDValue Arg;
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ ConstVec = Op.getOperand(0);
+ Arg = Op.getOperand(1);
+ if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+ ConstVec = ConstVec.getOperand(0);
+ } else {
+ ConstVec = Op.getOperand(1);
+ Arg = Op.getOperand(0);
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+ ConstVec = ConstVec.getOperand(0);
+ }
+ }
+ }
+
+ if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
+ return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+ }
+ }
+
+ // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+ // lowered. Return the operation, rather than a null SDValue.
+ return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+ Custom lowering code that counts the number ones in the input
+ operand. SPU has such an instruction, but it counts the number of
+ ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+ }
+
+ case MVT::i16: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+ SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+ SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ Tmp1, Shift1),
+ Tmp1),
+ Mask0);
+ }
+
+ case MVT::i32: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+ SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+ SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+ SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Comp1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+ Shift1);
+
+ SDValue Sum1 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+ SDValue Sum1_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+ SDValue Comp2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+ Shift2);
+ SDValue Sum2 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+ }
+
+ case MVT::i64:
+ break;
+ }
+
+ return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ DebugLoc dl = Op.getDebugLoc();
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ EVT lhsVT = lhs.getValueType();
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+ EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ EVT IntVT(MVT::i64);
+
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(dl, ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
+
+ switch (CC->get()) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
+ case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
+ default:
+ report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
+ }
+
+ SDValue result =
+ DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+ (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+ }
+
+ return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(Op.getValueType()),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ // Type to truncate to
+ EVT VT = Op.getValueType();
+ MVT simpleVT = VT.getSimpleVT();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to truncate from
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
+ // Create shuffle mask, least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ Op0, Op0, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+ }
+
+ return SDValue(); // Leave the truncate unmolested
+}
+
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to extend to
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // Type to extend from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+ // extend i8 & i16 via i32
+ if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+ Op0VT = MVT::i32;
+ }
+
+ // The type to extend to needs to be a i128 and
+ // the type to extend from needs to be i64 or i32.
+ assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+ "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+ // Create shuffle mask
+ unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+ unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
+ unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask2, MVT::i32),
+ DAG.getConstant(mask3, MVT::i32));
+
+ // Word wise arithmetic right shift to generate at least one byte
+ // that contains sign bits.
+ MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+ SDValue sraVal = DAG.getNode(ISD::SRA,
+ dl,
+ mvt,
+ DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+ DAG.getConstant(31, MVT::i32));
+
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
+ // Shuffle bytes - Copy the sign bits into the upper 64 bits
+ // and the input value into the lower 64 bits.
+ SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+ extended, sraVal, shufMask);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ unsigned Opc = (unsigned) Op.getOpcode();
+ EVT VT = Op.getValueType();
+
+ switch (Opc) {
+ default: {
+#ifndef NDEBUG
+ errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
+ Op.getNode()->dump();
+#endif
+ llvm_unreachable(0);
+ }
+ case ISD::LOAD:
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD:
+ case ISD::ZEXTLOAD:
+ return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantFP:
+ return LowerConstantFP(Op, DAG);
+
+ // i8, i64 math ops:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case ISD::SRL:
+ case ISD::SHL:
+ case ISD::SRA: {
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+ break;
+ }
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+ // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return LowerByteImmed(Op, DAG);
+
+ // Vector and i8 multiply:
+ case ISD::MUL:
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+
+ case ISD::CTPOP:
+ return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG, *this);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG, *this);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SIGN_EXTEND:
+ return LowerSIGN_EXTEND(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const
+{
+#if 0
+ unsigned Opc = (unsigned) N->getOpcode();
+ EVT OpVT = N->getValueType(0);
+
+ switch (Opc) {
+ default: {
+ errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
+ N->dump();
+ abort();
+ /*NOTREACHED*/
+ }
+ }
+#endif
+
+ /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+ TargetMachine &TM = getTargetMachine();
+#endif
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ EVT NodeVT = N->getValueType(0); // The node's value type
+ EVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: {
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ IndirectArg, combinedValue);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+ // (any_extend (SPUextract_elt0 <arg>)) ->
+ // (SPUextract_elt0 <arg>)
+ // Types must match, however...
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\nReplace: ";
+ N->dump(&DAG);
+ errs() << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ return Op0;
+ }
+ break;
+ }
+ case SPUISD::IndirectAddr: {
+ if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CN != 0 && CN->isNullValue()) {
+ // (SPUindirect (SPUaform <addr>, 0), 0) ->
+ // (SPUaform <addr>, 0)
+
+ DEBUG(errs() << "Replace: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(errs() << "\nWith: ");
+ DEBUG(Op0.getNode()->dump(&DAG));
+ DEBUG(errs() << "\n");
+
+ return Op0;
+ }
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+ }
+ break;
+ }
+ case SPUISD::SHL_BITS:
+ case SPUISD::SHL_BYTES:
+ case SPUISD::ROTBYTES_LEFT: {
+ SDValue Op1 = N->getOperand(1);
+
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
+ Result = Op0;
+ }
+ }
+ break;
+ }
+ case SPUISD::PREFSLOT2VEC: {
+ switch (Op0.getOpcode()) {
+ default:
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+ // <arg>
+ // but only if the SPUprefslot2vec and <arg> types match.
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+ SDValue Op000 = Op00.getOperand(0);
+ if (Op000.getValueType() == NodeVT) {
+ Result = Op000;
+ }
+ }
+ break;
+ }
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+ // <arg>
+ Result = Op0.getOperand(0);
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ // Otherwise, return unchanged.
+#ifndef NDEBUG
+ if (Result.getNode()) {
+ DEBUG(errs() << "\nReplace.SPU: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(errs() << "\nWith: ");
+ DEBUG(Result.getNode()->dump(&DAG));
+ DEBUG(errs() << "\n");
+ }
+#endif
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+ if (ConstraintLetter.size() == 1) {
+ switch (ConstraintLetter[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64)
+ return std::make_pair(0U, SPU::R64CRegisterClass);
+ return std::make_pair(0U, SPU::R32CRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, SPU::R32FPRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, SPU::R64FPRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, SPU::GPRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth ) const {
+#if 0
+ const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+ switch (Op.getOpcode()) {
+ default:
+ // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ break;
+ case CALL:
+ case SHUFB:
+ case SHUFFLE_MASK:
+ case CNTB:
+ case SPUISD::PREFSLOT2VEC:
+ case SPUISD::LDRESULT:
+ case SPUISD::VEC2PREFSLOT:
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_ROTL:
+ case SPUISD::VEC_ROTR:
+ case SPUISD::ROTBYTES_LEFT:
+ case SPUISD::SELECT_MASK:
+ case SPUISD::SELB:
+ }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+
+ case ISD::SETCC: {
+ EVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Default, for the time being, to the base class handler
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+ const Type *Ty) const {
+ // SPU's addresses are 256K:
+ return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The SPU target isn't yet aware of offsets.
+ return false;
+}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 000000000000..d23f6cc48c71
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,186 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SPU.h"
+
+namespace llvm {
+ namespace SPUISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Pseudo instructions:
+ RET_FLAG, ///< Return with flag, matched by bi instruction
+
+ Hi, ///< High address component (upper 16)
+ Lo, ///< Low address component (lower 16)
+ PCRelAddr, ///< Program counter relative address
+ AFormAddr, ///< A-form address (local store)
+ IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
+
+ LDRESULT, ///< Load result (value, chain)
+ CALL, ///< CALL instruction
+ SHUFB, ///< Vector shuffle (permute)
+ SHUFFLE_MASK, ///< Shuffle mask
+ CNTB, ///< Count leading ones in bytes
+ PREFSLOT2VEC, ///< Promote scalar->vector
+ VEC2PREFSLOT, ///< Extract element 0
+ SHL_BITS, ///< Shift quad left, by bits
+ SHL_BYTES, ///< Shift quad left, by bytes
+ SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
+ VEC_ROTL, ///< Vector rotate left
+ VEC_ROTR, ///< Vector rotate right
+ ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
+ ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
+ SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+ SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
+ LAST_SPUISD ///< Last user-defined instruction
+ };
+ }
+
+ //! Utility functions specific to CellSPU:
+ namespace SPU {
+ SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+ SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+ //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
+ SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
+ DebugLoc dl);
+ }
+
+ class SPUTargetMachine; // forward dec'l.
+
+ class SPUTargetLowering :
+ public TargetLowering
+ {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ SPUTargetMachine &SPUTM;
+
+ public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
+ SPUTargetLowering(SPUTargetMachine &TM);
+
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ValueType for ISD::SETCC
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ //! Custom lowering hooks
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ //! Custom lowering hook for nodes with illegal result types.
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth = 0) const;
+
+ ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 000000000000..5e268f8767c2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 000000000000..bdbe2552dcdd
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,320 @@
+//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SPU";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin> {
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+ // RR Format, where RB is zeroed (dont care):
+ class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ let RA = 0 in {
+ // RR Format, where RA and RB are zeroed (dont care):
+ // Used for reads from status control registers (see FPSCRRr32)
+ class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+ }
+}
+
+let RT = 0 in {
+ // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+ // says, "RT is a false target." Used in "Halt if" instructions
+ class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RC;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-3} = opcode;
+ let Inst{4-10} = RT;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> i7;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = i7;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-9} = opcode;
+ let Inst{10-17} = 0;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+ class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ let RT = 0 in {
+ // Branch instruction format (without D/E flag settings)
+ class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+ : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+ pattern>
+ { }
+
+ let RB = 0 in {
+ // Return instruction (bi, branch indirect), RA is zero (LR):
+ class RETForm<string asmstr, list<dag> pattern>
+ : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+ pattern>
+ { }
+ }
+ }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+ : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+ bits<7> Rcalldest;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b11010101100;
+ let Inst{11} = 0;
+ let Inst{12-13} = DE_flag;
+ let Inst{14-17} = 0b0000;
+ let Inst{18-24} = Rcalldest;
+ let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<10> i10;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-7} = opcode;
+ let Inst{8-17} = i10;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+ class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+ class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<16> i16;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-8} = opcode;
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+ class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<18> i18;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-24} = i18;
+ let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+ : Instruction {
+ field bits<32> Inst;
+ bits<16>i16;
+ bits<9>RO;
+
+ let Namespace = "SPU";
+ let InOperandList = IOL;
+ let OutOperandList = (outs); //no output
+ let AsmString = asmstr;
+ let Itinerary = BranchHints;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-8} = RO{8-7};
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RO{6-0};
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 000000000000..e67b10c7984d
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,451 @@
+//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+namespace {
+ //! Predicate for an unconditional branch instruction
+ inline bool isUncondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BR
+ || opc == SPU::BRA
+ || opc == SPU::BI);
+ }
+
+ //! Predicate for a conditional branch instruction
+ inline bool isCondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
+ }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+ : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+ TM(tm),
+ RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new SPUHazardRecognizer(*TII);
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ // We support cross register class moves for our aliases, such as R3 in any
+ // reg class to any other reg class containing R3. This is required because
+ // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+ // types have no specific meaning.
+
+ BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+ } else {
+ llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
+ }
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+//! Branch analysis
+/*!
+ \note This code was kiped from PPC. There may be more branch analysis for
+ CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranch(LastInst)) {
+ // Check for jump tables
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(LastInst)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ DEBUG(errs() << "Pushing LastInst: ");
+ DEBUG(LastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a conditional and unconditional branch, handle it.
+ if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ DEBUG(errs() << "Pushing SecondLastInst: ");
+ DEBUG(SecondLastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+ if (I->getOpcode() == SPU::HBRA ||
+ I->getOpcode() == SPU::HBR_LABEL){
+ I=MBB.erase(I);
+ }
+ }
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ removeHBR(MBB);
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isCondBranch(I) && !isUncondBranch(I))
+ return 0;
+
+ // Remove the first branch.
+ DEBUG(errs() << "Removing branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ I = MBB.end();
+ if (I == MBB.begin())
+ return 1;
+
+ --I;
+ if (!(isCondBranch(I) || isUncondBranch(I)))
+ return 1;
+
+ // Remove the second branch.
+ DEBUG(errs() << "Removing second branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ return 2;
+}
+
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ * -the branch hint delays
+ * -congestion of the memory bus
+ * -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+ MachineBasicBlock::iterator J = MBB.end();
+ for( int i=0; i<8; i++) {
+ if( J == MBB.begin() ) return J;
+ J--;
+ }
+ return J;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "SPU branch conditions have two components!");
+
+ MachineInstrBuilder MIB;
+ //TODO: make a more accurate algorithm.
+ bool haveHBR = MBB.size()>8;
+
+ removeHBR(MBB);
+ MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+ // Add a label just before the branch
+ if (haveHBR)
+ MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Unconditional branch
+ MIB = BuildMI(&MBB, DL, get(SPU::BR));
+ MIB.addMBB(TBB);
+
+ DEBUG(errs() << "Inserted one-way uncond branch: ");
+ DEBUG((*MIB).dump());
+
+ // basic blocks have just one branch so it is safe to add the hint a its
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+ } else {
+ // Conditional branch
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+ if (haveHBR) {
+ MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+
+ DEBUG(errs() << "Inserted one-way cond branch: ");
+ DEBUG((*MIB).dump());
+ }
+ return 1;
+ } else {
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
+
+ // Two-way Conditional Branch.
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+ MIB2.addMBB(FBB);
+
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(FBB);
+ }
+
+ DEBUG(errs() << "Inserted conditional branch: ");
+ DEBUG((*MIB).dump());
+ DEBUG(errs() << "part 2: ");
+ DEBUG((*MIB2).dump());
+ return 2;
+ }
+}
+
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // Pretty brainless way of inverting the condition, but it works, considering
+ // there are only two conditions...
+ static struct {
+ unsigned Opc; //! The incoming opcode
+ unsigned RevCondOpc; //! The reversed condition opcode
+ } revconds[] = {
+ { SPU::BRNZr32, SPU::BRZr32 },
+ { SPU::BRNZv4i32, SPU::BRZv4i32 },
+ { SPU::BRZr32, SPU::BRNZr32 },
+ { SPU::BRZv4i32, SPU::BRNZv4i32 },
+ { SPU::BRHNZr16, SPU::BRHZr16 },
+ { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+ { SPU::BRHZr16, SPU::BRHNZr16 },
+ { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+ };
+
+ unsigned Opc = unsigned(Cond[0].getImm());
+ // Pretty dull mapping between the two conditions that SPU can generate:
+ for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+ if (revconds[i].Opc == Opc) {
+ Cond[0].setImm(revconds[i].RevCondOpc);
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 000000000000..bc1ba71f7a45
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,84 @@
+//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SPURegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SPUGenInstrInfo.inc"
+
+namespace llvm {
+ //! Cell SPU instruction information class
+ class SPUInstrInfo : public SPUGenInstrInfo {
+ SPUTargetMachine &TM;
+ const SPURegisterInfo RI;
+ public:
+ explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ //! Store a register to a stack slot, based on its register class.
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ //! Load a register from a stack slot, based on its register class.
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ //! Reverses a branch's condition, returning false on success.
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 000000000000..e103c9b6a5af
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4482 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt)]>;
+ def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ),
+ "$targ:\t${:comment}branch hint target",[ ]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+ class LoadDFormVec<ValueType vectype>
+ : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+ { }
+
+ class LoadDForm<RegisterClass rclass>
+ : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load dform_addr:$src))]>
+ { }
+
+ multiclass LoadDForms
+ {
+ def v16i8: LoadDFormVec<v16i8>;
+ def v8i16: LoadDFormVec<v8i16>;
+ def v4i32: LoadDFormVec<v4i32>;
+ def v2i64: LoadDFormVec<v2i64>;
+ def v4f32: LoadDFormVec<v4f32>;
+ def v2f64: LoadDFormVec<v2f64>;
+
+ def r128: LoadDForm<GPRC>;
+ def r64: LoadDForm<R64C>;
+ def r32: LoadDForm<R32C>;
+ def f32: LoadDForm<R32FP>;
+ def f64: LoadDForm<R64FP>;
+ def r16: LoadDForm<R16C>;
+ def r8: LoadDForm<R8C>;
+ }
+
+ class LoadAFormVec<ValueType vectype>
+ : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+ { }
+
+ class LoadAForm<RegisterClass rclass>
+ : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load aform_addr:$src))]>
+ { }
+
+ multiclass LoadAForms
+ {
+ def v16i8: LoadAFormVec<v16i8>;
+ def v8i16: LoadAFormVec<v8i16>;
+ def v4i32: LoadAFormVec<v4i32>;
+ def v2i64: LoadAFormVec<v2i64>;
+ def v4f32: LoadAFormVec<v4f32>;
+ def v2f64: LoadAFormVec<v2f64>;
+
+ def r128: LoadAForm<GPRC>;
+ def r64: LoadAForm<R64C>;
+ def r32: LoadAForm<R32C>;
+ def f32: LoadAForm<R32FP>;
+ def f64: LoadAForm<R64FP>;
+ def r16: LoadAForm<R16C>;
+ def r8: LoadAForm<R8C>;
+ }
+
+ class LoadXFormVec<ValueType vectype>
+ : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+ { }
+
+ class LoadXForm<RegisterClass rclass>
+ : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load xform_addr:$src))]>
+ { }
+
+ multiclass LoadXForms
+ {
+ def v16i8: LoadXFormVec<v16i8>;
+ def v8i16: LoadXFormVec<v8i16>;
+ def v4i32: LoadXFormVec<v4i32>;
+ def v2i64: LoadXFormVec<v2i64>;
+ def v4f32: LoadXFormVec<v4f32>;
+ def v2f64: LoadXFormVec<v2f64>;
+
+ def r128: LoadXForm<GPRC>;
+ def r64: LoadXForm<R64C>;
+ def r32: LoadXForm<R32C>;
+ def f32: LoadXForm<R32FP>;
+ def f64: LoadXForm<R64FP>;
+ def r16: LoadXForm<R16C>;
+ def r8: LoadXForm<R8C>;
+ }
+
+ defm LQA : LoadAForms;
+ defm LQD : LoadDForms;
+ defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+ Might be of use later for relocatable code. It's effectively the
+ same as LQA, but uses PC-relative addressing.
+ def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+ "lqr\t$rT, $disp", LoadStore,
+ [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+ : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+ : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+ def v16i8: StoreDFormVec<v16i8>;
+ def v8i16: StoreDFormVec<v8i16>;
+ def v4i32: StoreDFormVec<v4i32>;
+ def v2i64: StoreDFormVec<v2i64>;
+ def v4f32: StoreDFormVec<v4f32>;
+ def v2f64: StoreDFormVec<v2f64>;
+
+ def r128: StoreDForm<GPRC>;
+ def r64: StoreDForm<R64C>;
+ def r32: StoreDForm<R32C>;
+ def f32: StoreDForm<R32FP>;
+ def f64: StoreDForm<R64FP>;
+ def r16: StoreDForm<R16C>;
+ def r8: StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+ : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+ : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+ def v16i8: StoreAFormVec<v16i8>;
+ def v8i16: StoreAFormVec<v8i16>;
+ def v4i32: StoreAFormVec<v4i32>;
+ def v2i64: StoreAFormVec<v2i64>;
+ def v4f32: StoreAFormVec<v4f32>;
+ def v2f64: StoreAFormVec<v2f64>;
+
+ def r128: StoreAForm<GPRC>;
+ def r64: StoreAForm<R64C>;
+ def r32: StoreAForm<R32C>;
+ def f32: StoreAForm<R32FP>;
+ def f64: StoreAForm<R64FP>;
+ def r16: StoreAForm<R16C>;
+ def r8: StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+ : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+ : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+ def v16i8: StoreXFormVec<v16i8>;
+ def v8i16: StoreXFormVec<v8i16>;
+ def v4i32: StoreXFormVec<v4i32>;
+ def v2i64: StoreXFormVec<v2i64>;
+ def v4f32: StoreXFormVec<v4f32>;
+ def v2f64: StoreXFormVec<v2f64>;
+
+ def r128: StoreXForm<GPRC>;
+ def r64: StoreXForm<R64C>;
+ def r32: StoreXForm<R32C>;
+ def f32: StoreXForm<R32FP>;
+ def f64: StoreXForm<R64FP>;
+ def r16: StoreXForm<R16C>;
+ def r8: StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+ be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+ "stqr\t$rT, $disp", LoadStore,
+ [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cbd\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cbx\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "chd\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "chx\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+ RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+ RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+ RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+ def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+ def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+ // TODO: Need v2f64, v4f32
+
+ def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+ def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+ def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+ def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+ def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+ def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+ def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+ def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+ // Loads the high portion of an address
+ def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+ // Used in custom lowering constant SFP loads:
+ def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+ LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+ def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+ def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+ def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+ def r32: ILARegInst<R32C, u18imm, imm18>;
+ def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+ def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+ def hi: ILARegInst<R32C, symbolHi, imm18>;
+ def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+ def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+ [(set R32C:$rT, imm18:$val)]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+ ImmLoad, pattern>,
+ RegConstraint<"$rS = $rT">,
+ NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+ def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+ def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+ def r32: IOHLRegInst<R32C, i32imm>;
+ def f32: IOHLRegInst<R32FP, f32imm>;
+
+ def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+ RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+ "fsmbi\t$rT, $val",
+ SelectOp,
+ [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+ def v16i8: FSMBIVec<v16i8>;
+ def v8i16: FSMBIVec<v8i16>;
+ def v4i32: FSMBIVec<v4i32>;
+ def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+ def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+ def v16i8: FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+ def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+ def v8i16: FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+ FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+ FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+ def v4i32: FSMVecInst<v4i32>;
+
+ def r32 : FSMRegInst<v4i32, R32C>;
+ def r16 : FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+ def r32 : FSMRegInst<v2i64, R32C>;
+ def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+ RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+ RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+ RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+ RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000011000, OOL, IOL,
+ "a\t$rT, $rA, $rB", IntegerOp,
+ pattern>;
+
+class AVecInst<ValueType vectype>:
+ AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+ AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+
+multiclass AddInstruction {
+ def v4i32: AVecInst<v4i32>;
+ def v16i8: AVecInst<v16i8>;
+ def r32: ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00111000, OOL, IOL,
+ "ai\t$rT, $rA, $val", IntegerOp,
+ pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+
+multiclass AddImmediate {
+ def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+ def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+ def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+ def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+ RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+ RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
+
+def SFHIvec:
+ RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+ (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
+
+def SFIvec:
+ RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+ (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+ (ins R32C:$rA, s10imm_i32:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000010110, OOL, IOL,
+ "addx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+ ADDXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+ ADDXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+ def v2i64 : ADDXVecInst<v2i64>;
+ def v4i32 : ADDXVecInst<v4i32>;
+ def r64 : ADDXRegInst<R64C>;
+ def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000011000, OOL, IOL,
+ "cg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+ CGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+ CGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass CarryGenerate {
+ def v2i64 : CGVecInst<v2i64>;
+ def v4i32 : CGVecInst<v4i32>;
+ def r64 : CGRegInst<R64C>;
+ def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010110, OOL, IOL,
+ "sfx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+ SFXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+ SFXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+ def v2i64 : SFXVecInst<v2i64>;
+ def v4i32 : SFXVecInst<v4i32>;
+ def r64 : SFXRegInst<R64C>;
+ def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000010000, OOL, IOL,
+ "bg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+ BGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+ BGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+ def v4i32 : BGVecInst<v4i32>;
+ def v2i64 : BGVecInst<v2i64>;
+ def r64 : BGRegInst<R64C>;
+ def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+ RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+ VECREG:$rCarry),
+ "bgx\t$rT, $rA, $rB", IntegerOp,
+ []>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYr16:
+ RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00110011110, OOL, IOL,
+ "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYUv4i32:
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYUr16:
+ MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+ MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00101110, OOL, IOL,
+ "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYIvec:
+ MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+ MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10101110, OOL, IOL,
+ "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYUIvec:
+ MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ []>;
+
+def MPYUIr16:
+ MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b0011, OOL, IOL,
+ "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+ pattern>;
+
+def MPYAv4i32:
+ MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4i32 VECREG:$rT),
+ (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))),
+ (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sext:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+ MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+ (sext_inreg R32C:$rB, i16)),
+ R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10100011110, OOL, IOL,
+ "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYHv4i32:
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYHr32:
+ MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+ RRForm<0b11100011110, OOL, IOL,
+ "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYSv4i32:
+ MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYSr16:
+ MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+ RRForm<0b01100011110, OOL, IOL,
+ "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHv8i16:
+ MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+ MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+ RRForm<0b01100010110, OOL, IOL,
+ "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAvec:
+ MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAr32:
+ MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+ RRForm<0b01110011110, OOL, IOL,
+ "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHUv4i32:
+ MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHUr32:
+ MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+ RRForm<0b01110010110, OOL, IOL,
+ "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAUvec:
+ MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAUr32:
+ MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+ IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+ CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+ CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+ def v4i32 : CLZVecInst<v4i32>;
+ def r32 : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+ GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+ def v16i8_r32: GBBRegInst<R32C, v16i8>;
+ def v16i8_r16: GBBRegInst<R16C, v16i8>;
+ def v16i8: GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+ GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+ GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+ def v8i16_r32: GBHRegInst<R32C, v8i16>;
+ def v8i16_r16: GBHRegInst<R16C, v8i16>;
+ def v8i16: GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+ GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+ def v4i32_r32: GBRegInst<R32C, v4i32>;
+ def v4i32_r16: GBRegInst<R16C, v4i32>;
+ def v4i32: GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+ RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "avgb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+ RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "absdb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+ RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sumb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL,
+ "xsbh\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+ pattern>;
+
+multiclass ExtendByteHalfword {
+ def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [
+ /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def r16: XSBHInRegInst<R16C,
+ [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+ // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+ // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+ // pattern below). Intentionally doesn't match a pattern because we want the
+ // sext 8->32 pattern to do the work for us, namely because we need the extra
+ // XSHWr32.
+ def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
+
+ // Same as the 32-bit version, but for i64
+ def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+ IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDest),
+ (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+ pattern>;
+
+class XSHWRegInst<RegisterClass rclass>:
+ XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+ [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+ def v4i32: XSHWVecInst<v8i16, v4i32>;
+
+ def r16: XSHWRegInst<R32C>;
+
+ def r32: XSHWInRegInst<R32C,
+ [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+ def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [/*(set (out_vectype VECREG:$rDst),
+ (sext (out_vectype VECREG:$rSrc)))*/]>;
+
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+ XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+ [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+
+multiclass ExtendWordToDoubleWord {
+ def v2i64: XSWDVecInst<v4i32, v2i64>;
+ def r64: XSWDRegInst<R32C, R64C>;
+
+ def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+ [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+ ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+ ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+ def v16i8: ANDVecInst<v16i8>;
+ def v8i16: ANDVecInst<v8i16>;
+ def v4i32: ANDVecInst<v4i32>;
+ def v2i64: ANDVecInst<v2i64>;
+
+ def r128: ANDRegInst<GPRC>;
+ def r64: ANDRegInst<R64C>;
+ def r32: ANDRegInst<R32C>;
+ def r16: ANDRegInst<R16C>;
+ def r8: ANDRegInst<R8C>;
+
+ //===---------------------------------------------
+ // Special instructions to perform the fabs instruction
+ def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ //===---------------------------------------------
+
+ // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+ // quantities -- see 16->32 zext pattern.
+ //
+ // This pattern is somewhat artificial, since it might match some
+ // compiler generated pattern but it is unlikely to do so.
+
+ def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (and (vectype VECREG:$rA),
+ (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+ ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+ def v16i8: ANDCVecInst<v16i8>;
+ def v8i16: ANDCVecInst<v8i16>;
+ def v4i32: ANDCVecInst<v4i32>;
+ def v2i64: ANDCVecInst<v2i64>;
+
+ def r128: ANDCRegInst<GPRC>;
+ def r64: ANDCRegInst<R64C>;
+ def r32: ANDCRegInst<R32C>;
+ def r16: ANDCRegInst<R16C>;
+ def r8: ANDCRegInst<R8C>;
+
+ // Sometimes, the xor pattern has a bitcast constant:
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+ def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT),
+ (and (v16i8 VECREG:$rA),
+ (v16i8 v16i8U8Imm:$val)))]>;
+
+ def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+ def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+ def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Zero-extend i8 to i16:
+ def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+ def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+ def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+ // pattern below.
+ def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+ // zext 16->32 pattern below.
+ //
+ // Note that this pattern is somewhat artificial, since it might match
+ // something the compiler generates but is unlikely to occur in practice.
+ def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+ ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+ ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+
+multiclass BitwiseOr
+{
+ def v16i8: ORVecInst<v16i8>;
+ def v8i16: ORVecInst<v8i16>;
+ def v4i32: ORVecInst<v4i32>;
+ def v2i64: ORVecInst<v2i64>;
+
+ def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4f32 VECREG:$rT),
+ (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))))]>;
+
+ def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v2f64 VECREG:$rT),
+ (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)))))]>;
+
+ def r128: ORRegInst<GPRC>;
+ def r64: ORRegInst<R64C>;
+ def r32: ORRegInst<R32C>;
+ def r16: ORRegInst<R16C>;
+ def r8: ORRegInst<R8C>;
+
+ // OR instructions used to copy f32 and f64 registers.
+ def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [/* no pattern */]>;
+
+ def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ [/* no pattern */]>;
+}
+
+defm OR : BitwiseOr;
+
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+ (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+ (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
+
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
+
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
+
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
+
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
+
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+ class LRInst<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/*no pattern*/];
+
+ let Inst{0-10} = 0b10000010000; /* It's an OR operation */
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+ }
+
+ class LRVecInst<ValueType vectype>:
+ LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+ class LRRegInst<RegisterClass rclass>:
+ LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+ multiclass LoadRegister {
+ def v2i64: LRVecInst<v2i64>;
+ def v2f64: LRVecInst<v2f64>;
+ def v4i32: LRVecInst<v4i32>;
+ def v4f32: LRVecInst<v4f32>;
+ def v8i16: LRVecInst<v8i16>;
+ def v16i8: LRVecInst<v16i8>;
+
+ def r128: LRRegInst<GPRC>;
+ def r64: LRRegInst<R64C>;
+ def f64: LRRegInst<R64FP>;
+ def r32: LRRegInst<R32C>;
+ def f32: LRRegInst<R32FP>;
+ def r16: LRRegInst<R16C>;
+ def r8: LRRegInst<R8C>;
+ }
+
+ defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+ ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+ ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+ def v16i8: ORCVecInst<v16i8>;
+ def v8i16: ORCVecInst<v8i16>;
+ def v4i32: ORCVecInst<v4i32>;
+ def v2i64: ORCVecInst<v2i64>;
+
+ def r128: ORCRegInst<GPRC>;
+ def r64: ORCRegInst<R64C>;
+ def r32: ORCRegInst<R32C>;
+ def r16: ORCRegInst<R16C>;
+ def r8: ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+ def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+ def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+ def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+ def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Specialized ORHI form used to promote 8-bit registers to 16-bit
+ def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+ [(set R16C:$rT, (or (anyext R8C:$rA),
+ i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+ def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+
+ // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+ // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R16C:$rA),
+ i32ImmSExt10:$val))]>;
+
+ // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+ // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R8C:$rA),
+ i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+ RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "orx\t$rT, $rA, $rB", IntegerOp,
+ []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+ XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+ XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+ def v16i8: XORVecInst<v16i8>;
+ def v8i16: XORVecInst<v8i16>;
+ def v4i32: XORVecInst<v4i32>;
+ def v2i64: XORVecInst<v2i64>;
+
+ def r128: XORRegInst<GPRC>;
+ def r64: XORRegInst<R64C>;
+ def r32: XORRegInst<R32C>;
+ def r16: XORRegInst<R16C>;
+ def r8: XORRegInst<R8C>;
+
+ // XOR instructions used to negate f32 and f64 quantities.
+
+ def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+ def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* no pattern */]>;
+
+ def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+ def v16i8:
+ XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+ def r8:
+ XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+ RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+ v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+ RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+ NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+ NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+ def v16i8: NANDVecInst<v16i8>;
+ def v8i16: NANDVecInst<v8i16>;
+ def v4i32: NANDVecInst<v4i32>;
+ def v2i64: NANDVecInst<v2i64>;
+
+ def r128: NANDRegInst<GPRC>;
+ def r64: NANDRegInst<R64C>;
+ def r32: NANDRegInst<R32C>;
+ def r16: NANDRegInst<R16C>;
+ def r8: NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+ NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+ NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+ def v16i8: NORVecInst<v16i8>;
+ def v8i16: NORVecInst<v8i16>;
+ def v4i32: NORVecInst<v4i32>;
+ def v2i64: NORVecInst<v2i64>;
+
+ def r128: NORRegInst<GPRC>;
+ def r64: NORRegInst<R64C>;
+ def r32: NORRegInst<R32C>;
+ def r16: NORRegInst<R16C>;
+ def r8: NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+ (and (vnot_frag (vectype VECREG:$rC)),
+ (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (select (vectype VECREG:$rC),
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+ [(set (vectype VECREG:$rT),
+ (select R32C:$rC,
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+ [(set rclass:$rT,
+ (or (and rclass:$rB, rclass:$rC),
+ (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+ [(set rclass:$rT,
+ (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+ def v16i8: SELBVecInst<v16i8>;
+ def v8i16: SELBVecInst<v8i16>;
+ def v4i32: SELBVecInst<v4i32>;
+ def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
+
+ def r128: SELBRegInst<GPRC>;
+ def r64: SELBRegInst<R64C>;
+ def r32: SELBRegInst<R32C>;
+ def r16: SELBRegInst<R16C>;
+ def r8: SELBRegInst<R8C>;
+
+ def v16i8_cond: SELBVecCondInst<v16i8>;
+ def v8i16_cond: SELBVecCondInst<v8i16>;
+ def v4i32_cond: SELBVecCondInst<v4i32>;
+ def v2i64_cond: SELBVecCondInst<v2i64>;
+
+ def v16i8_vcond: SELBVecCondInst<v16i8>;
+ def v8i16_vcond: SELBVecCondInst<v8i16>;
+ def v4i32_vcond: SELBVecCondInst<v4i32>;
+ def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+ def v4f32_cond:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4f32 VECREG:$rT),
+ (select (v4i32 VECREG:$rC),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)))]>;
+
+ // SELBr64_cond is defined in SPU64InstrInfo.td
+ def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def f32_cond: SELBRegCondInst<R32C, R32FP>;
+ def r16_cond: SELBRegCondInst<R16C, R16C>;
+ def r8_cond: SELBRegCondInst<R8C, R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+ Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+ (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+ Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+ (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C, SELBr8>;
+def : SPUselbPatReg<R16C, SELBr16>;
+def : SPUselbPatReg<R32C, SELBr32>;
+def : SPUselbPatReg<R64C, SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (and (vnot (vectype VECREG:$rA)),
+ (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+ (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (or (and rclass:$rA, rclass:$rB),
+ (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+ def v16i8: EQVVecInst<v16i8>;
+ def v8i16: EQVVecInst<v8i16>;
+ def v4i32: EQVVecInst<v4i32>;
+ def v2i64: EQVVecInst<v2i64>;
+
+ def v16i8_1: EQVVecPattern1<v16i8>;
+ def v8i16_1: EQVVecPattern1<v8i16>;
+ def v4i32_1: EQVVecPattern1<v4i32>;
+ def v2i64_1: EQVVecPattern1<v2i64>;
+
+ def v16i8_2: EQVVecPattern2<v16i8>;
+ def v8i16_2: EQVVecPattern2<v8i16>;
+ def v4i32_2: EQVVecPattern2<v4i32>;
+ def v2i64_2: EQVVecPattern2<v2i64>;
+
+ def v16i8_3: EQVVecPattern3<v16i8>;
+ def v8i16_3: EQVVecPattern3<v8i16>;
+ def v4i32_3: EQVVecPattern3<v4i32>;
+ def v2i64_3: EQVVecPattern3<v2i64>;
+
+ def r128: EQVRegInst<GPRC>;
+ def r64: EQVRegInst<R64C>;
+ def r32: EQVRegInst<R32C>;
+ def r16: EQVRegInst<R16C>;
+ def r8: EQVRegInst<R8C>;
+
+ def r128_1: EQVRegPattern1<GPRC>;
+ def r64_1: EQVRegPattern1<R64C>;
+ def r32_1: EQVRegPattern1<R32C>;
+ def r16_1: EQVRegPattern1<R16C>;
+ def r8_1: EQVRegPattern1<R8C>;
+
+ def r128_2: EQVRegPattern2<GPRC>;
+ def r64_2: EQVRegPattern2<R64C>;
+ def r32_2: EQVRegPattern2<R32C>;
+ def r16_2: EQVRegPattern2<R16C>;
+ def r8_2: EQVRegPattern2<R8C>;
+
+ def r128_3: EQVRegPattern3<GPRC>;
+ def r64_3: EQVRegPattern3<R64C>;
+ def r32_3: EQVRegPattern3<R32C>;
+ def r16_3: EQVRegPattern3<R16C>;
+ def r8_3: EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+ ShuffleOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+ SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (resultvec VECREG:$rT),
+ (SPUshuffle (resultvec VECREG:$rA),
+ (resultvec VECREG:$rB),
+ (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+ SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+ def v16i8 : SHUFBVecInst<v16i8, v16i8>;
+ def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+ def v8i16 : SHUFBVecInst<v8i16, v16i8>;
+ def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+ def v4i32 : SHUFBVecInst<v4i32, v16i8>;
+ def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+ def v2i64 : SHUFBVecInst<v2i64, v16i8>;
+ def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+ def v4f32 : SHUFBVecInst<v4f32, v16i8>;
+ def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+ def v2f64 : SHUFBVecInst<v2f64, v16i8>;
+ def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+ def gprc : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+ SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass ShiftLeftHalfword
+{
+ def v8i16: SHLHVecInst<v8i16>;
+ def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+ def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+ SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+ def v8i16: SHLHIVecInst<v8i16>;
+ def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+ (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+multiclass ShiftLeftWord
+{
+ def v4i32:
+ SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ def r32:
+ SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+ def v4i32:
+ SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+ def r32:
+ SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+ [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+ SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+ SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+ def v16i8: SHLQBIVecInst<v16i8>;
+ def v8i16: SHLQBIVecInst<v8i16>;
+ def v4i32: SHLQBIVecInst<v4i32>;
+ def v4f32: SHLQBIVecInst<v4f32>;
+ def v2i64: SHLQBIVecInst<v2i64>;
+ def v2f64: SHLQBIVecInst<v2f64>;
+
+ def r128: SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+ SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+ def v16i8 : SHLQBIIVecInst<v16i8>;
+ def v8i16 : SHLQBIIVecInst<v8i16>;
+ def v4i32 : SHLQBIIVecInst<v4i32>;
+ def v4f32 : SHLQBIIVecInst<v4f32>;
+ def v2i64 : SHLQBIIVecInst<v2i64>;
+ def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+ SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+ def v16i8: SHLQBYVecInst<v16i8>;
+ def v8i16: SHLQBYVecInst<v8i16>;
+ def v4i32: SHLQBYVecInst<v4i32>;
+ def v4f32: SHLQBYVecInst<v4f32>;
+ def v2i64: SHLQBYVecInst<v2i64>;
+ def v2f64: SHLQBYVecInst<v2f64>;
+ def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+ SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+ def v16i8: SHLQBYIVecInst<v16i8>;
+ def v8i16: SHLQBYIVecInst<v8i16>;
+ def v4i32: SHLQBYIVecInst<v4i32>;
+ def v4f32: SHLQBYIVecInst<v4f32>;
+ def v2i64: SHLQBYIVecInst<v2i64>;
+ def v2f64: SHLQBYIVecInst<v2f64>;
+ def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+ [(set GPRC:$rT,
+ (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+ SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+ SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+ def v16i8: SHLQBYBIVecInst<v16i8>;
+ def v8i16: SHLQBYBIVecInst<v8i16>;
+ def v4i32: SHLQBYBIVecInst<v4i32>;
+ def v4f32: SHLQBYBIVecInst<v4f32>;
+ def v2i64: SHLQBYBIVecInst<v2i64>;
+ def v2f64: SHLQBYBIVecInst<v2f64>;
+
+ def r128: SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+ ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+ ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+ def v8i16: ROTHVecInst<v8i16>;
+ def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+ ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+ def v8i16: ROTHIVecInst<v8i16>;
+ def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+ def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+ ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+ ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [(set rclass:$rT,
+ (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+ def v4i32: ROTVecInst<v4i32>;
+ def r32: ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+ def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+ def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+ def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+ def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
+ def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+ ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+ [(set (type rc:$rT),
+ (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+ ROTQBYGenInst<type, VECREG>;
+
+multiclass RotateQuadLeftByBytes
+{
+ def v16i8: ROTQBYVecInst<v16i8>;
+ def v8i16: ROTQBYVecInst<v8i16>;
+ def v4i32: ROTQBYVecInst<v4i32>;
+ def v4f32: ROTQBYVecInst<v4f32>;
+ def v2i64: ROTQBYVecInst<v2i64>;
+ def v2f64: ROTQBYVecInst<v2f64>;
+ def i128: ROTQBYGenInst<i128, GPRC>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+ ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+ [(set (type rclass:$rT),
+ (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+ ROTQBYIGenInst<vectype, VECREG>;
+
+multiclass RotateQuadByBytesImm
+{
+ def v16i8: ROTQBYIVecInst<v16i8>;
+ def v8i16: ROTQBYIVecInst<v8i16>;
+ def v4i32: ROTQBYIVecInst<v4i32>;
+ def v4f32: ROTQBYIVecInst<v4f32>;
+ def v2i64: ROTQBYIVecInst<v2i64>;
+ def vfi64: ROTQBYIVecInst<v2f64>;
+ def i128: ROTQBYIGenInst<i128, GPRC>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00110011100, OOL, IOL,
+ "rotqbybi\t$rT, $rA, $shift",
+ RotShiftQuad, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+ ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+ def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+ def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+ def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+ def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+ ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+ ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+ def v16i8: ROTQBIVecInst<v16i8>;
+ def v8i16: ROTQBIVecInst<v8i16>;
+ def v4i32: ROTQBIVecInst<v4i32>;
+ def v2i64: ROTQBIVecInst<v2i64>;
+
+ def r128: ROTQBIRegInst<GPRC>;
+ def r64: ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+ def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+ def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+ def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+ def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+ def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+// so this only matches a synthetically generated/lowered code
+// fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+def ROTHMv8i16:
+ ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+ ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+ (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+def ROTHMIv8i16:
+ ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def ROTHMIr16:
+ ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+def ROTMv4i32:
+ ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
+
+def ROTMr32:
+ ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+ (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+ RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+ RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
+ [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+ (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+ (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+ ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+ ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+ def v16i8: ROTQMBYVecInst<v16i8>;
+ def v8i16: ROTQMBYVecInst<v8i16>;
+ def v4i32: ROTQMBYVecInst<v4i32>;
+ def v2i64: ROTQMBYVecInst<v2i64>;
+
+ def r128: ROTQMBYRegInst<GPRC>;
+ def r64: ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+ (ROTQMBYr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0))>;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+ ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+ ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+ def v16i8: ROTQMBYIVecInst<v16i8>;
+ def v8i16: ROTQMBYIVecInst<v8i16>;
+ def v4i32: ROTQMBYIVecInst<v4i32>;
+ def v2i64: ROTQMBYIVecInst<v2i64>;
+
+ def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+ def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+
+ def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+ def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+ def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+ def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+ ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+ def v16i8: ROTQMBYBIVecInst<v16i8>;
+ def v8i16: ROTQMBYBIVecInst<v8i16>;
+ def v4i32: ROTQMBYBIVecInst<v4i32>;
+ def v2i64: ROTQMBYBIVecInst<v2i64>;
+ def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [/*no pattern*/]>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+ ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+ ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+ def v16i8: ROTQMBIVecInst<v16i8>;
+ def v8i16: ROTQMBIVecInst<v8i16>;
+ def v4i32: ROTQMBIVecInst<v4i32>;
+ def v2i64: ROTQMBIVecInst<v2i64>;
+
+ def r128: ROTQMBIRegInst<GPRC>;
+ def r64: ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+ (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0)),
+ (SFIr32 R32C:$rB, 0))>;
+
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+ ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+ ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+ def v16i8: ROTQMBIIVecInst<v16i8>;
+ def v8i16: ROTQMBIIVecInst<v8i16>;
+ def v4i32: ROTQMBIIVecInst<v4i32>;
+ def v2i64: ROTQMBIIVecInst<v2i64>;
+
+ def r128: ROTQMBIIRegInst<GPRC>;
+ def r64: ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+ RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+def ROTMAHr16:
+ RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+ (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+ RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
+ [(set (v8i16 VECREG:$rT),
+ (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def ROTMAHIr16:
+ RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
+ [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def ROTMAv4i32:
+ RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
+
+def ROTMAr32:
+ RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+ (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011110000, OOL, IOL,
+ "rotmai\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+ [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+ def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+ def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+ def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+ def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+ // Halt If Equal (r32 preferred slot only, no vector form)
+ def HEQr32:
+ RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+ "heq\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HEQIr32 :
+ RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+ "heqi\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+ // contrasting with HLGT/HLGTI, which use unsigned comparison:
+ def HGTr32:
+ RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HGTIr32:
+ RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTr32:
+ RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hlgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTIr32:
+ RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hlgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+ def v16i8 :
+ CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+ def v16i8 :
+ CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+ def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+ def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (seteq (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+ def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+ def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+ def v16i8 :
+ CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+ def v16i8 :
+ CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+ def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+ def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setgt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+ def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+ def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+ def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+ def v16i8 :
+ CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+ def v16i8 :
+ CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+ def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+ def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setugt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+ def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+ def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB : CmpEqualByte;
+defm CEQBI : CmpEqualByteImm;
+defm CEQH : CmpEqualHalfword;
+defm CEQHI : CmpEqualHalfwordImm;
+defm CEQ : CmpEqualWord;
+defm CEQI : CmpEqualWordImm;
+defm CGTB : CmpGtrByte;
+defm CGTBI : CmpGtrByteImm;
+defm CGTH : CmpGtrHalfword;
+defm CGTHI : CmpGtrHalfwordImm;
+defm CGT : CmpGtrWord;
+defm CGTI : CmpGtrWordImm;
+defm CLGTB : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT : CmpLGtrWord;
+defm CLGTI : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+// matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+// i64 comparisons. See the tblgen fundamentals documentation for what
+// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+// class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+ (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType immtype,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+ (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+ (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setle R8C:$rA, immU8:$imm),
+ (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+ (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+ (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+ (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setule R8C:$rA, immU8:$imm),
+ (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+ ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+ CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+ (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+ ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+ (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+ SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+ SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+ // All calls clobber the non-callee-saved registers:
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+ R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+ R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+ R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+ R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+ R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+ R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+ R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+ // All of these instructions use $lr (aka $0)
+ Uses = [R0] in {
+ // Branch relative and set link: Used if we actually know that the target
+ // is within [-32768, 32767] bytes of the target
+ def BRSL:
+ BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+ "brsl\t$$lr, $func",
+ [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+ // Branch absolute and set link: Used if we actually know that the target
+ // is an absolute address
+ def BRASL:
+ BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+ "brasl\t$$lr, $func",
+ [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+ // Branch indirect and set link if external data. These instructions are not
+ // actually generated, matched by an intrinsic:
+ def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+ // Branch indirect and set link. This is the "X-form" address version of a
+ // function call
+ def BISL:
+ BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+ (BRSL texternalsym:$func)>;
+
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+ (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ let isBarrier = 1 in {
+ def BR :
+ UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+ "br\t$dest",
+ [(br bb:$dest)]>;
+
+ // Unconditional, absolute address branch
+ def BRA:
+ UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+ "bra\t$dest",
+ [/* no pattern */]>;
+
+ // Indirect branch
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ }
+
+ // Conditional branches:
+ class BRNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+ BranchResolv, pattern>;
+
+ class BRNZRegInst<RegisterClass rclass>:
+ BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRNZVecInst<ValueType vectype>:
+ BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+ [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+ multiclass BranchNotZero {
+ def v4i32 : BRNZVecInst<v4i32>;
+ def r32 : BRNZRegInst<R32C>;
+ }
+
+ defm BRNZ : BranchNotZero;
+
+ class BRZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+ BranchResolv, pattern>;
+
+ class BRZRegInst<RegisterClass rclass>:
+ BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRZVecInst<ValueType vectype>:
+ BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZero {
+ def v4i32: BRZVecInst<v4i32>;
+ def r32: BRZRegInst<R32C>;
+ }
+
+ defm BRZ: BranchZero;
+
+ // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+ // be useful:
+ /*
+ class BINZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+ class BINZRegInst<RegisterClass rclass>:
+ BINZInst<(ins rclass:$rA, brtarget:$dest),
+ [(brcond rclass:$rA, R32C:$dest)]>;
+
+ class BINZVecInst<ValueType vectype>:
+ BINZInst<(ins VECREG:$rA, R32C:$dest),
+ [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+ multiclass BranchNotZeroIndirect {
+ def v4i32: BINZVecInst<v4i32>;
+ def r32: BINZRegInst<R32C>;
+ }
+
+ defm BINZ: BranchNotZeroIndirect;
+
+ class BIZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+ class BIZRegInst<RegisterClass rclass>:
+ BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+ class BIZVecInst<ValueType vectype>:
+ BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+ multiclass BranchZeroIndirect {
+ def v4i32: BIZVecInst<v4i32>;
+ def r32: BIZRegInst<R32C>;
+ }
+
+ defm BIZ: BranchZeroIndirect;
+ */
+
+ class BRHNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+ pattern>;
+
+ class BRHNZRegInst<RegisterClass rclass>:
+ BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRHNZVecInst<ValueType vectype>:
+ BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchNotZeroHalfword {
+ def v8i16: BRHNZVecInst<v8i16>;
+ def r16: BRHNZRegInst<R16C>;
+ }
+
+ defm BRHNZ: BranchNotZeroHalfword;
+
+ class BRHZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+ pattern>;
+
+ class BRHZRegInst<RegisterClass rclass>:
+ BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRHZVecInst<ValueType vectype>:
+ BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZeroHalfword {
+ def v8i16: BRHZVecInst<v8i16>;
+ def r16: BRHZRegInst<R16C>;
+ }
+
+ defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+ (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+ (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+ (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+ (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+ let isReturn = 1 in {
+ def RET:
+ RETForm<"bi\t$$lr", [(retflag)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+ FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+ def v4f32: FAVecInst<v4f32>;
+ def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+ FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+ def v4f32: FSVecInst<v4f32>;
+ def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+ RRForm_1<0b00110111000, OOL, IOL,
+ "frest\t$rT, $rA", SPrecFP,
+ [/* no pattern */]>;
+
+def FRESTv4f32 :
+ FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+ FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+ RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+def FIf32 :
+ RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+ RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fceq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+ (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+ RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmeq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+ RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
+ (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+ RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+ RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+ "fscrwr\t$rA", SPrecFP,
+ [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+ RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+ "fscrrd\t$rT", SPrecFP,
+ [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+// frest rC rB # c = 1/b (both lines)
+// fi rC rB rC
+// fm rD rA rC # d = a * 1/b
+// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+// fma rB rB rC rD # b = b * c + d
+// = -(d *b -a) * c + d
+// = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+ CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+ CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+ CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+ RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
+
+def FESDf32 :
+ RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+// "frds\t$rT, $rA,", SPrecFP,
+// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+ RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+ "frds\t$rT, $rA", SPrecFP,
+ [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+ RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+ RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+ RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+ RRForm<0b00111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMAv2f64:
+ RRForm<0b00111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSf64 :
+ RRForm<0b10111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSv2f64 :
+ RRForm<0b10111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+ (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+ DPrecFP, pattern>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+ DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+ DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ pattern>;
+
+multiclass DFMultiplySubtract
+{
+ def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB))))]>;
+
+ def f64 : DFNMSRegInst<[(set R64FP:$rT,
+ (fsub R64FP:$rC,
+ (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+ RRForm<0b11111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FNMAv2f64 :
+ RRForm<0b11111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fneg (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+ (XORfnegvec (v4f32 VECREG:$rA),
+ (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+ (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+ (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+ (ANDfabsvec (v4f32 VECREG:$rA),
+ (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+def HBRA :
+ HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000010;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000000;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+ (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+ (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+ (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+ (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+ (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+ (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+ (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+ (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+ (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+ (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+ (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+ (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+ (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+ (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+
+def : Pat<(i64 (sext R8C:$rSrc)),
+ (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+ (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+ (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+ (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+ (COPY_TO_REGCLASS
+ (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
+ 0x4),
+ (ILv4i32 0x0),
+ (FSMBIv4i32 0x0f0f)), R64C)>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+ (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+ (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+ (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+
+def : Pat<(sext R16C:$rSrc),
+ (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+ (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+ (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+ (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+ (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+ (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+ (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+ (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+ (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+ (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 000000000000..3ef3ccbcaaee
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,49 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+private:
+ /// UsesLR - Indicates whether LR is used in the current function.
+ ///
+ bool UsesLR;
+
+ // VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+public:
+ SPUFunctionInfo(MachineFunction& MF)
+ : UsesLR(false),
+ VarArgsFrameIndex(0)
+ {}
+
+ void setUsesLR(bool U) { UsesLR = U; }
+ bool usesLR() { return UsesLR; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 000000000000..ed7129e33291
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+// Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+ (ORv4i32
+ (ANDv4i32
+ (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+ (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)),
+ (ILAv4i32 0x0000ffff)),
+ (SHLIv4i32
+ (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+ (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+ (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)), 16))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+ (FSMBIv8i16 0xcccc))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
+ (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
+ (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+ Interpf32.Fragment,
+ DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+ (SELBf32_cond NRaphf32.Fragment,
+ Epsilonf32.Fragment,
+ (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)),
+ Interpv4f32.Fragment,
+ DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (SELBv4f32_cond NRaphv4f32.Fragment,
+ Epsilonv4f32.Fragment,
+ (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+ Epsilonv4f32.Fragment,
+ (v4f32 VECREG:$rA)), -1))>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 000000000000..a6e621f36b35
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,159 @@
+//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+ SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+ SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+ SPUvecshift_type>;
+
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 000000000000..e2bd2d7f4100
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to
+// satisfy the dual issue requirements. The actual dual issue scheduling is
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct SPUNopFiller : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const InstrItineraryData *IID;
+ bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
+
+ static char ID;
+ SPUNopFiller(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
+ IID(tm.getInstrItineraryData())
+ {
+ DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+ }
+
+ virtual const char *getPassName() const {
+ return "SPU nop/lnop Filler";
+ }
+
+ void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ isEvenPlace = true; //all functions get an .align 3 directive at start
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ runOnMachineBasicBlock(*FI);
+ return true; //never-ever do any more modifications, just print it!
+ }
+
+ typedef enum { none = 0, // no more instructions in this function / BB
+ pseudo = 1, // this does not get executed
+ even = 2,
+ odd = 3 } SPUOpPlace;
+ SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+ };
+ char SPUNopFiller::ID = 0;
+
+}
+
+// Fill a BasicBlock to alignment.
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ assert( isEvenPlace && "basic block start from odd address");
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ {
+ SPUOpPlace this_optype, next_optype;
+ MachineBasicBlock::iterator J = I;
+ J++;
+
+ this_optype = getOpPlacement( *I );
+ next_optype = none;
+ while (J!=MBB.end()){
+ next_optype = getOpPlacement( *J );
+ ++J;
+ if (next_optype != pseudo )
+ break;
+ }
+
+ // padd: odd(wrong), even(wrong), ...
+ // to: nop(corr), odd(corr), even(corr)...
+ if( isEvenPlace && this_optype == odd && next_optype == even ) {
+ DEBUG( dbgs() <<"Adding NOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+ isEvenPlace=false;
+ }
+
+ // padd: even(wrong), odd(wrong), ...
+ // to: lnop(corr), even(corr), odd(corr)...
+ else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+ DEBUG( dbgs() <<"Adding LNOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+ isEvenPlace=true;
+ }
+
+ // now go to next mem slot
+ if( this_optype != pseudo )
+ isEvenPlace = !isEvenPlace;
+
+ }
+
+ // padd basicblock end
+ if( !isEvenPlace ){
+ MachineBasicBlock::iterator J = MBB.end();
+ J--;
+ if (getOpPlacement( *J ) == odd) {
+ DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+ }
+ else {
+ J++;
+ DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+ BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+ }
+ isEvenPlace=true;
+ }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+ return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+ int sc = instr.getDesc().getSchedClass();
+ const InstrStage *stage = IID->beginStage(sc);
+ unsigned FUs = stage->getUnits();
+ SPUOpPlace retval;
+
+ switch( FUs ) {
+ case 0: retval = pseudo; break;
+ case 1: retval = odd; break;
+ case 2: retval = even; break;
+ default: retval= pseudo;
+ assert( false && "got unknown FuncUnit\n");
+ break;
+ };
+ return retval;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 000000000000..96cde51709ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,664 @@
+//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+// TO_IMM32 - Convert an i8/i16 to i32.
+def TO_IMM32 : SDNodeXForm<imm, [{
+ return getI32Imm(N->getZExtValue());
+}]>;
+
+// TO_IMM16 - Convert an i8/i32 to i16.
+def TO_IMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
+}]>;
+
+
+def LO16 : SDNodeXForm<imm, [{
+ unsigned val = N->getZExtValue();
+ // Transformation function: get the low 16 bits.
+ return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ // Transformation function: get the low 16 bit immediate from a build_vector
+ // node.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "LO16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "HI16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+ int sextVal = int(N->getSExtValue());
+ return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8 : PatLeaf<(imm), [{
+ int Value = int(N->getSExtValue());
+ return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10 : PatLeaf<(imm), [{
+ return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field. Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10 : PatLeaf<(imm), [{
+ return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10 : PatLeaf<(imm), [{
+ return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+ return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ short Ignored;
+ return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+ // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+ return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18 : PatLeaf<(imm), [{
+ // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+ int Value = (int) N->getZExtValue();
+ return isUInt<18>(Value);
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+ // lo16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((val & 0x0000ffff) == val);
+ }
+
+ return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+ // hi16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = uint32_t(N->getZExtValue());
+ return ((val & 0xffff0000) == val);
+ } else if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((val & 0xffff0000ULL) == val);
+ }
+
+ return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+ // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+ // (shift left quadword by bits immediate)
+ int64_t Val = N->getZExtValue();
+ return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+ short Ignored;
+ return isFPS16Immediate(N, Ignored);
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((val & 0xffff0000) == val);
+ }
+
+ return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+ return isUInt<18>(Value);
+ }
+
+ return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm : Operand<i16> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm : Operand<i32> {
+ let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+ let PrintMethod = "printPCRelativeOperand";
+ let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+ let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+ let PrintMethod = "printShufAddr";
+ let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+ let PrintMethod = "printDFormAddr";
+ let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+ let PrintMethod = "printAddr256K";
+ let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+ let PrintMethod = "printMemRegImmS18";
+ let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form : [r+I10] (10-bit signed offset + reg)
+// X-form : [r+r] (reg+reg)
+// A-form : abs (256K LSA offset)
+// D-form(2): [r+I7] (7-bit signed offset + reg)
+
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+ [], [SDNPWantRoot]>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+ [], [SDNPWantRoot]>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+ [], [SDNPWantRoot]>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+ [], [SDNPWantRoot]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 000000000000..19896c0b4be9
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,373 @@
+//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace SPU;
+ switch (RegEnum) {
+ case SPU::R0: return 0;
+ case SPU::R1: return 1;
+ case SPU::R2: return 2;
+ case SPU::R3: return 3;
+ case SPU::R4: return 4;
+ case SPU::R5: return 5;
+ case SPU::R6: return 6;
+ case SPU::R7: return 7;
+ case SPU::R8: return 8;
+ case SPU::R9: return 9;
+ case SPU::R10: return 10;
+ case SPU::R11: return 11;
+ case SPU::R12: return 12;
+ case SPU::R13: return 13;
+ case SPU::R14: return 14;
+ case SPU::R15: return 15;
+ case SPU::R16: return 16;
+ case SPU::R17: return 17;
+ case SPU::R18: return 18;
+ case SPU::R19: return 19;
+ case SPU::R20: return 20;
+ case SPU::R21: return 21;
+ case SPU::R22: return 22;
+ case SPU::R23: return 23;
+ case SPU::R24: return 24;
+ case SPU::R25: return 25;
+ case SPU::R26: return 26;
+ case SPU::R27: return 27;
+ case SPU::R28: return 28;
+ case SPU::R29: return 29;
+ case SPU::R30: return 30;
+ case SPU::R31: return 31;
+ case SPU::R32: return 32;
+ case SPU::R33: return 33;
+ case SPU::R34: return 34;
+ case SPU::R35: return 35;
+ case SPU::R36: return 36;
+ case SPU::R37: return 37;
+ case SPU::R38: return 38;
+ case SPU::R39: return 39;
+ case SPU::R40: return 40;
+ case SPU::R41: return 41;
+ case SPU::R42: return 42;
+ case SPU::R43: return 43;
+ case SPU::R44: return 44;
+ case SPU::R45: return 45;
+ case SPU::R46: return 46;
+ case SPU::R47: return 47;
+ case SPU::R48: return 48;
+ case SPU::R49: return 49;
+ case SPU::R50: return 50;
+ case SPU::R51: return 51;
+ case SPU::R52: return 52;
+ case SPU::R53: return 53;
+ case SPU::R54: return 54;
+ case SPU::R55: return 55;
+ case SPU::R56: return 56;
+ case SPU::R57: return 57;
+ case SPU::R58: return 58;
+ case SPU::R59: return 59;
+ case SPU::R60: return 60;
+ case SPU::R61: return 61;
+ case SPU::R62: return 62;
+ case SPU::R63: return 63;
+ case SPU::R64: return 64;
+ case SPU::R65: return 65;
+ case SPU::R66: return 66;
+ case SPU::R67: return 67;
+ case SPU::R68: return 68;
+ case SPU::R69: return 69;
+ case SPU::R70: return 70;
+ case SPU::R71: return 71;
+ case SPU::R72: return 72;
+ case SPU::R73: return 73;
+ case SPU::R74: return 74;
+ case SPU::R75: return 75;
+ case SPU::R76: return 76;
+ case SPU::R77: return 77;
+ case SPU::R78: return 78;
+ case SPU::R79: return 79;
+ case SPU::R80: return 80;
+ case SPU::R81: return 81;
+ case SPU::R82: return 82;
+ case SPU::R83: return 83;
+ case SPU::R84: return 84;
+ case SPU::R85: return 85;
+ case SPU::R86: return 86;
+ case SPU::R87: return 87;
+ case SPU::R88: return 88;
+ case SPU::R89: return 89;
+ case SPU::R90: return 90;
+ case SPU::R91: return 91;
+ case SPU::R92: return 92;
+ case SPU::R93: return 93;
+ case SPU::R94: return 94;
+ case SPU::R95: return 95;
+ case SPU::R96: return 96;
+ case SPU::R97: return 97;
+ case SPU::R98: return 98;
+ case SPU::R99: return 99;
+ case SPU::R100: return 100;
+ case SPU::R101: return 101;
+ case SPU::R102: return 102;
+ case SPU::R103: return 103;
+ case SPU::R104: return 104;
+ case SPU::R105: return 105;
+ case SPU::R106: return 106;
+ case SPU::R107: return 107;
+ case SPU::R108: return 108;
+ case SPU::R109: return 109;
+ case SPU::R110: return 110;
+ case SPU::R111: return 111;
+ case SPU::R112: return 112;
+ case SPU::R113: return 113;
+ case SPU::R114: return 114;
+ case SPU::R115: return 115;
+ case SPU::R116: return 116;
+ case SPU::R117: return 117;
+ case SPU::R118: return 118;
+ case SPU::R119: return 119;
+ case SPU::R120: return 120;
+ case SPU::R121: return 121;
+ case SPU::R122: return 122;
+ case SPU::R123: return 123;
+ case SPU::R124: return 124;
+ case SPU::R125: return 125;
+ case SPU::R126: return 126;
+ case SPU::R127: return 127;
+ default:
+ report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+ }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+ const TargetInstrInfo &tii) :
+ SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii)
+{
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return &SPU::R32CRegClass;
+}
+
+const unsigned *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Cell ABI calling convention
+ static const unsigned SPU_CalleeSaveRegs[] = {
+ SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+ SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+ SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+ SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+ SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+ SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+ SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+ SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+ SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+ SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+ SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+ SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+ SPU::R2, /* environment pointer */
+ SPU::R1, /* stack pointer */
+ SPU::R0, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegs;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SPU::R0); // LR
+ Reserved.set(SPU::R1); // SP
+ Reserved.set(SPU::R2); // environment pointer
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+ const
+{
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = II->getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &SPOp = MI.getOperand(i);
+ int FrameIndex = SPOp.getIndex();
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+
+ // Most instructions, except for generated FrameIndex additions using AIr32
+ // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+ // immediate in operand 2.
+ unsigned OpNo = 1;
+ if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+ OpNo = 2;
+
+ MachineOperand &MO = MI.getOperand(OpNo);
+
+ // Offset is biased by $lr's slot at the bottom.
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
+ assert((Offset & 0xf) == 0
+ && "16-byte alignment violated in eliminateFrameIndex");
+
+ // Replace the FrameIndex with base register with $sp (aka $r1)
+ SPOp.ChangeToRegister(SPU::R1, false);
+
+ // if 'Offset' doesn't fit to the D-form instruction's
+ // immediate, convert the instruction to X-form
+ // if the instruction is not an AI (which takes a s10 immediate), assume
+ // it is a load/store that can take a s14 immediate
+ if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+ || !isInt<14>(Offset)) {
+ int newOpcode = convertDFormToXForm(MI.getOpcode());
+ unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+ BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+ .addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+ .addReg(tmpReg, RegState::Kill)
+ .addReg(SPU::R1);
+ // remove the replaced D-form instruction
+ MBB.erase(II);
+ } else {
+ MO.ChangeToImmediate(Offset);
+ }
+}
+
+unsigned
+SPURegisterInfo::getRARegister() const
+{
+ return SPU::R0;
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+ return SPU::R1;
+}
+
+int
+SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int SPURegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+ return SPUGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
+}
+
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+ switch(dFormOpcode)
+ {
+ case SPU::AIr32: return SPU::Ar32;
+ case SPU::LQDr32: return SPU::LQXr32;
+ case SPU::LQDr128: return SPU::LQXr128;
+ case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
+ case SPU::LQDv4f32: return SPU::LQXv4f32;
+ case SPU::STQDr32: return SPU::STQXr32;
+ case SPU::STQDr128: return SPU::STQXr128;
+ case SPU::STQDv16i8: return SPU::STQXv16i8;
+ case SPU::STQDv4i32: return SPU::STQXv4i32;
+ case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+ default: assert( false && "Unhandled D to X-form conversion");
+ }
+ // default will assert, but need to return something to keep the
+ // compiler happy.
+ return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const
+{
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ assert( Reg && "Register scavenger failed");
+ return Reg;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 000000000000..5e014f8adbfc
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,107 @@
+//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+
+#define GET_REGINFO_HEADER
+#include "SPUGenRegisterInfo.inc"
+
+namespace llvm {
+ class SPUSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ class SPURegisterInfo : public SPUGenRegisterInfo {
+ private:
+ const SPUSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ //! Predicate: Does the machine function use the link register?
+ bool usesLR(MachineFunction &MF) const;
+
+ public:
+ SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+
+ //! Translate a register's enum value to a register number
+ /*!
+ This method translates a register's enum value to it's regiser number,
+ e.g. SPU::R14 -> 14.
+ */
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *
+ getPointerRegClass(unsigned Kind = 0) const;
+
+ /// After allocating this many registers, the allocator should feel
+ /// register pressure. The value is a somewhat random guess, based on the
+ /// number of non callee saved registers in the C calling convention.
+ virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+ MachineFunction &MF) const{
+ return 50;
+ }
+
+ //! Return the array of callee-saved registers
+ virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+ //! Allow for scavenging, so we can get scratch registers when needed.
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+ { return true; }
+
+ //! Return the reserved registers
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ //! Eliminate the call frame setup pseudo-instructions
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ //! Convert frame indicies into machine operands
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
+
+ //! Get return address register (LR, aka R0)
+ unsigned getRARegister() const;
+ //! Get the stack frame register (SP, aka R1)
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //------------------------------------------------------------------------
+ // New methods added:
+ //------------------------------------------------------------------------
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ //! Convert D-form load/store to X-form load/store
+ /*!
+ Converts a regiser displacement load/store into a register-indexed
+ load/store for large stack frames, when the stack frame exceeds the
+ range of a s10 displacement.
+ */
+ int convertDFormToXForm(int dFormOpcode) const;
+
+ //! Acquire an unused register in an emergency.
+ unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const;
+
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 000000000000..e16f51ff0e02
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,183 @@
+//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+ let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+ field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+// The non-volatile registers are allocated in reverse order, like PPC does it.
+def GPRC : RegisterClass<"SPU", [i128], 128,
+ (add (sequence "R%u", 0, 79),
+ (sequence "R%u", 127, 80))>;
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
+ (add GPRC)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 000000000000..e557ed340a28
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,19 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 000000000000..9cd3c2327df0
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,59 @@
+//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore : InstrItinClass; // ODD_UNIT
+def BranchHints : InstrItinClass; // ODD_UNIT
+def BranchResolv : InstrItinClass; // ODD_UNIT
+def ChanOpSPR : InstrItinClass; // ODD_UNIT
+def ShuffleOp : InstrItinClass; // ODD_UNIT
+def SelectOp : InstrItinClass; // ODD_UNIT
+def GatherOp : InstrItinClass; // ODD_UNIT
+def LoadNOP : InstrItinClass; // ODD_UNIT
+def ExecNOP : InstrItinClass; // EVEN_UNIT
+def SPrecFP : InstrItinClass; // EVEN_UNIT
+def DPrecFP : InstrItinClass; // EVEN_UNIT
+def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
+def ByteOp : InstrItinClass; // EVEN_UNIT
+def IntegerOp : InstrItinClass; // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
+def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
+def ImmLoad : InstrItinClass; // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
+ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
+ InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
+ InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
+ InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
+ InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
+ InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
+ ]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..5732fd43cdc2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUTargetMachine.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 000000000000..39257d92c400
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUTargetMachine;
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
+ ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 000000000000..856dc82f786b
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,66 @@
+//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPUGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS) :
+ SPUGenSubtargetInfo(TT, CPU, FS),
+ StackAlignment(16),
+ ProcDirective(SPU::DEFAULT_PROC),
+ UseLargeMem(false)
+{
+ // Should be the target SPU processor type. For now, since there's only
+ // one, simply default to the current "v0" default:
+ std::string default_cpu("v0");
+
+ // Parse features string.
+ ParseSubtargetFeatures(default_cpu, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ // CriticalPathsRCs seems to be the set of
+ // RegisterClasses that antidep breakings are performed for.
+ // Do it for all register classes
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&SPU::R8CRegClass);
+ CriticalPathRCs.push_back(&SPU::R16CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+ CriticalPathRCs.push_back(&SPU::R64CRegClass);
+ CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 000000000000..7c4aa1430217
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,97 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SPUGenSubtargetInfo.inc"
+
+namespace llvm {
+ class GlobalValue;
+ class StringRef;
+
+ namespace SPU {
+ enum {
+ PROC_NONE,
+ DEFAULT_PROC
+ };
+ }
+
+ class SPUSubtarget : public SPUGenSubtargetInfo {
+ protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame
+ /// on entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which SPU processor (this isn't really used, but it's there to keep
+ /// the C compiler happy)
+ unsigned ProcDirective;
+
+ /// Use (assume) large memory -- effectively disables the LQA/STQA
+ /// instructions that assume 259K local store.
+ bool UseLargeMem;
+
+ public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ SPUSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by
+ /// every function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ /// Use large memory addressing predicate
+ bool usingLargeMem() const {
+ return UseLargeMem;
+ }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
+ "-s:128:128-n32:64";
+ }
+
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 000000000000..3542a2b87e43
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,67 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeCellSPUTarget() {
+ // Register the target.
+ RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
+}
+
+const std::pair<unsigned, int> *
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 1;
+ return &LR[0];
+}
+
+SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getTargetDataString()),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this),
+ TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ setRelocationModel(Reloc::Static);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createSPUISelDag(*this));
+ return false;
+}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ //align instructions with nops/lnops for dual issue
+ PM.add(createSPUNopFillerPass(*this));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 000000000000..d96f86dcaeb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,90 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+class TargetFrameLowering;
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+ SPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ SPUInstrInfo InstrInfo;
+ SPUFrameLowering FrameLowering;
+ SPUTargetLowering TLInfo;
+ SPUSelectionDAGInfo TSInfo;
+ InstrItineraryData InstrItins;
+public:
+ SPUTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ /// Return the subtarget implementation object
+ virtual const SPUSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const SPUInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const SPUFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ /*!
+ \note Cell SPU does not support JIT today. It could support JIT at some
+ point.
+ */
+ virtual TargetJITInfo *getJITInfo() {
+ return NULL;
+ }
+
+ virtual const SPUTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const SPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const {
+ return &DataLayout;
+ }
+
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
new file mode 100644
index 000000000000..049ea236e992
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCellSPUTarget;
+
+extern "C" void LLVMInitializeCellSPUTargetInfo() {
+ RegisterTarget<Triple::cellspu>
+ X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 000000000000..10d18f61c7e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,1971 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+ cl::value_desc("function name"));
+
+enum WhatToGenerate {
+ GenProgram,
+ GenModule,
+ GenContents,
+ GenFunction,
+ GenFunctions,
+ GenInline,
+ GenVariable,
+ GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+ cl::desc("Choose what kind of output to generate"),
+ cl::init(GenProgram),
+ cl::values(
+ clEnumValN(GenProgram, "program", "Generate a complete program"),
+ clEnumValN(GenModule, "module", "Generate a module definition"),
+ clEnumValN(GenContents, "contents", "Generate contents of a module"),
+ clEnumValN(GenFunction, "function", "Generate a function definition"),
+ clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+ clEnumValN(GenInline, "inline", "Generate an inline function"),
+ clEnumValN(GenVariable, "variable", "Generate a variable definition"),
+ clEnumValN(GenType, "type", "Generate a type definition"),
+ clEnumValEnd
+ )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+ cl::desc("Specify the name of the thing to generate"),
+ cl::init("!bad!"));
+
+extern "C" void LLVMInitializeCppBackendTarget() {
+ // Register the target.
+ RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
+}
+
+extern "C" void LLVMInitializeCppBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCppBackendMCInstrInfo() {
+ RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget);
+}
+
+extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() {
+ RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget);
+}
+
+namespace {
+ typedef std::vector<const Type*> TypeList;
+ typedef std::map<const Type*,std::string> TypeMap;
+ typedef std::map<const Value*,std::string> ValueMap;
+ typedef std::set<std::string> NameSet;
+ typedef std::set<const Type*> TypeSet;
+ typedef std::set<const Value*> ValueSet;
+ typedef std::map<const Value*,std::string> ForwardRefMap;
+
+ /// CppWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C++ translation unit.
+ class CppWriter : public ModulePass {
+ formatted_raw_ostream &Out;
+ const Module *TheModule;
+ uint64_t uniqueNum;
+ TypeMap TypeNames;
+ ValueMap ValueNames;
+ NameSet UsedNames;
+ TypeSet DefinedTypes;
+ ValueSet DefinedValues;
+ ForwardRefMap ForwardRefs;
+ bool is_inline;
+ unsigned indent_level;
+
+ public:
+ static char ID;
+ explicit CppWriter(formatted_raw_ostream &o) :
+ ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+
+ virtual const char *getPassName() const { return "C++ backend"; }
+
+ bool runOnModule(Module &M);
+
+ void printProgram(const std::string& fname, const std::string& modName );
+ void printModule(const std::string& fname, const std::string& modName );
+ void printContents(const std::string& fname, const std::string& modName );
+ void printFunction(const std::string& fname, const std::string& funcName );
+ void printFunctions();
+ void printInline(const std::string& fname, const std::string& funcName );
+ void printVariable(const std::string& fname, const std::string& varName );
+ void printType(const std::string& fname, const std::string& typeName );
+
+ void error(const std::string& msg);
+
+
+ formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
+ private:
+ void printLinkageType(GlobalValue::LinkageTypes LT);
+ void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printCallingConv(CallingConv::ID cc);
+ void printEscapedString(const std::string& str);
+ void printCFP(const ConstantFP* CFP);
+
+ std::string getCppName(const Type* val);
+ inline void printCppName(const Type* val);
+
+ std::string getCppName(const Value* val);
+ inline void printCppName(const Value* val);
+
+ void printAttributes(const AttrListPtr &PAL, const std::string &name);
+ void printType(const Type* Ty);
+ void printTypes(const Module* M);
+
+ void printConstant(const Constant *CPV);
+ void printConstants(const Module* M);
+
+ void printVariableUses(const GlobalVariable *GV);
+ void printVariableHead(const GlobalVariable *GV);
+ void printVariableBody(const GlobalVariable *GV);
+
+ void printFunctionUses(const Function *F);
+ void printFunctionHead(const Function *F);
+ void printFunctionBody(const Function *F);
+ void printInstruction(const Instruction *I, const std::string& bbname);
+ std::string getOpName(Value*);
+
+ void printModuleBody();
+ };
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+ Out << '\n';
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ Out.indent(indent_level);
+ return Out;
+}
+
+static inline void sanitize(std::string &str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+}
+
+static std::string getTypePrefix(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ default: return "other_";
+ }
+ return "unknown_";
+}
+
+void CppWriter::error(const std::string& msg) {
+ report_fatal_error(msg);
+}
+
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(mod->getContext(), ";
+ Out << "APFloat(";
+#if HAVE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
+#endif
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << StrVal;
+ else
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
+#if HAVE_PRINTF_A
+ }
+#endif
+ Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
+ }
+}
+
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateLinkage:
+ Out << "GlobalValue::LinkerPrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
+ }
+}
+
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ default: llvm_unreachable("Unknown GVar visibility");
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
+ }
+}
+
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+}
+
+std::string CppWriter::getCppName(const Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
+ case Type::X86_MMXTyID: return "Type::getX86_MMXTy(mod->getContext())";
+ default:
+ error("Invalid primitive type");
+ break;
+ }
+ // shouldn't be returned, but make it sensible
+ return "Type::getVoidTy(mod->getContext())";
+ }
+
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
+
+ // See if the type has a name in the symboltable and build accordingly
+ std::string name;
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (STy->hasName())
+ name = STy->getName();
+
+ if (name.empty())
+ name = utostr(uniqueNum++);
+
+ name = std::string(prefix) + name;
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+}
+
+void CppWriter::printCppName(const Type* Ty) {
+ printEscapedString(getCppName(Ty));
+}
+
+std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ if (val->hasName())
+ name += val->getName();
+ else
+ name += utostr(uniqueNum++);
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+}
+
+void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+}
+
+void CppWriter::printAttributes(const AttrListPtr &PAL,
+ const std::string &name) {
+ Out << "AttrListPtr " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+ Out << "AttributeWithIndex PAWI;"; nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlot(i).Index;
+ Attributes attrs = PAL.getSlot(i).Attrs;
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+#define HANDLE_ATTR(X) \
+ if (attrs & Attribute::X) \
+ Out << " | Attribute::" #X; \
+ attrs &= ~Attribute::X;
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(NoCapture);
+ HANDLE_ATTR(NoRedZone);
+ HANDLE_ATTR(NoImplicitFloat);
+ HANDLE_ATTR(Naked);
+ HANDLE_ATTR(InlineHint);
+#undef HANDLE_ATTR
+ if (attrs & Attribute::StackAlignment)
+ Out << " | Attribute::constructStackAlignmentFromInt("
+ << Attribute::getStackAlignmentFromAttrs(attrs)
+ << ")";
+ attrs &= ~Attribute::StackAlignment;
+ assert(attrs == 0 && "Unhandled attribute!");
+ Out << ";";
+ nl(Out);
+ Out << "Attrs.push_back(PAWI);";
+ nl(Out);
+ }
+ Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
+ }
+}
+
+void CppWriter::printType(const Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return;
+
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return;
+
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ const Type* argTy = static_cast<const Type*>(*PI);
+ printType(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
+ Out << ");";
+ nl(Out);
+ }
+ printType(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType* ST = cast<StructType>(Ty);
+ if (!ST->isAnonymous()) {
+ Out << "StructType *" << typeName << " = ";
+ Out << "StructType::createNamed(mod->getContext(), \"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+ }
+
+ Out << "std::vector<Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ const Type* fieldTy = static_cast<const Type*>(*EI);
+ printType(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ Out << ");";
+ nl(Out);
+ }
+
+ if (ST->isAnonymous()) {
+ Out << "StructType *" << typeName << " = ";
+ Out << "StructType::get(" << "mod->getContext(), ";
+ } else {
+ Out << typeName << "->setBody(";
+ }
+
+ Out << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType* AT = cast<ArrayType>(Ty);
+ const Type* ET = AT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::PointerTyID: {
+ const PointerType* PT = cast<PointerType>(Ty);
+ const Type* ET = PT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType* PT = cast<VectorType>(Ty);
+ const Type* ET = PT->getElementType();
+ printType(ET);
+ if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
+
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
+}
+
+void CppWriter::printTypes(const Module* M) {
+ // Add all of the global variables to the value table.
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
+
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
+ }
+ }
+ }
+}
+
+
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
+
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
+
+ if (isa<GlobalValue>(CV)) {
+ // Skip variables and functions, we emit them elsewhere
+ return;
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName
+ << " = ConstantInt::get(mod->getContext(), APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth()
+ << ", StringRef(\"" << constValue << "\"), 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ if (CA->isString() &&
+ CA->getType()->getElementType() ==
+ Type::getInt8Ty(CA->getContext())) {
+ Out << "Constant* " << constName <<
+ " = ConstantArray::get(mod->getContext(), \"";
+ std::string tmp = CA->getAsString();
+ bool nullTerminate = false;
+ if (tmp[tmp.length()-1] == 0) {
+ tmp.erase(tmp.length()-1);
+ nullTerminate = true;
+ }
+ printEscapedString(tmp);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true"; // Indicate that the null terminator should be
+ // added.
+ else
+ Out << "\", false";// No null terminator
+ Out << ");";
+ } else {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CA->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CA->getOperand(i)); // recurse to print operands
+ Out << constName << "_elems.push_back("
+ << getCppName(CA->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantArray::get("
+ << typeName << ", " << constName << "_elems);";
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CP->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CP->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CP->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << "&" << constName << "_indices[0], "
+ << constName << "_indices.size()"
+ << ");";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::FAdd: Out << "getFAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::FSub: Out << "getFSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::FMul: Out << "getFMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
+ }
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
+ }
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
+ }
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "Constant* " << constName << " = ";
+ Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
+ }
+ nl(Out);
+}
+
+void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
+ }
+ }
+ }
+ }
+ }
+}
+
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ const Constant *Init = GV->getInitializer();
+ printType(Init->getType());
+ if (const Function *F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
+ }
+ }
+}
+
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
+ printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(/*Module=*/*mod, ";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\");";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
+ Out << "\");";
+ nl(Out);
+ }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
+ }
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocal(true);";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+}
+
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
+ nl(Out);
+ }
+}
+
+std::string CppWriter::getOpName(Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
+
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
+
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ const unsigned Ops(I->getNumOperands());
+ std::string* opNames = new std::string[Ops];
+ for (unsigned i = 0; i < Ops; i++)
+ opNames[i] = getOpName(I->getOperand(i));
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create(mod->getContext(), "
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
+ }
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst *SI = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << SI->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+ Out << iName << "->addCase("
+ << opNames[i] << ", "
+ << opNames[i+1] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::IndirectBr: {
+ const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+ Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+ << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ nl(Out);
+ for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+ Out << iName << "->addDestination(" << opNames[i] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << getOpName(inv->getArgOperand(i)) << ");";
+ nl(Out);
+ }
+ // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getNormalDest()) << ", "
+ << getOpName(inv->getUnwindDest()) << ", "
+ << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unwind: {
+ Out << "new UnwindInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Unreachable: {
+ Out << "new UnreachableInst("
+ << "mod->getContext(), "
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::FSub: Out << "Instruction::FSub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::FMul: Out << "Instruction::FMul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load: {
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << " new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices.begin(), "
+ << iName << "_indices.end()";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", "
+ << phi->getNumIncomingValues() << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
+ Out << iName << "->addIncoming("
+ << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+ << getOpName(phi->getIncomingBlock(i)) << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: assert(!"Unreachable"); break;
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call: {
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
+ nl(Out);
+ }
+ if (call->getNumArgOperands() > 1) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", "
+ << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ } else if (call->getNumArgOperands() == 1) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", \"";
+ }
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true" : "false");
+ Out << ");";
+ nl(Out);
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ }
+ DefinedValues.insert(I);
+ nl(Out);
+ delete [] opNames;
+}
+
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
+
+ // Print the function's function type
+ printType(F->getFunctionType());
+
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+ }
+
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
+
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand)) {
+ consts.insert(C);
+ for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+ // If the operand references a GVal or Constant, make a note of it
+ Value* operand = C->getOperand(j);
+ printType(operand->getType());
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
+ }
+ }
+
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
+
+ // Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ if (GenerationType != GenFunction) {
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
+ }
+ }
+}
+
+void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ if (is_inline) {
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\", " << getCppName(F->getFunctionType()) << ");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+ }
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(F);
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (F->hasGC()) {
+ printCppName(F);
+ Out << "->setGC(\"" << F->getGC() << "\");";
+ nl(Out);
+ }
+ if (is_inline) {
+ Out << "}";
+ nl(Out);
+ }
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+}
+
+void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
+
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
+
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
+ nl(Out);
+ }
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
+ nl(Out);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+ nl(Out);
+ }
+ }
+ }
+
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname <<
+ " = BasicBlock::Create(mod->getContext(), \"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
+
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
+ }
+ }
+
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
+ nl(Out);
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
+ nl(Out);
+ ForwardRefs.erase(I);
+ }
+}
+
+void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+}
+
+void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
+
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
+ }
+
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+ }
+}
+
+void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/LLVMContext.h>\n";
+ Out << "#include <llvm/Module.h>\n";
+ Out << "#include <llvm/DerivedTypes.h>\n";
+ Out << "#include <llvm/Constants.h>\n";
+ Out << "#include <llvm/GlobalVariable.h>\n";
+ Out << "#include <llvm/Function.h>\n";
+ Out << "#include <llvm/CallingConv.h>\n";
+ Out << "#include <llvm/BasicBlock.h>\n";
+ Out << "#include <llvm/Instructions.h>\n";
+ Out << "#include <llvm/InlineAsm.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+}
+
+void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"";
+ printEscapedString(mName);
+ Out << "\", getGlobalContext());";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = TheModule->lib_begin();
+ Module::lib_iterator LE = TheModule->lib_end();
+ while (LI != LE) {
+ Out << "mod->addLibrary(\"" << *LI << "\");";
+ nl(Out);
+ ++LI;
+ }
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"";
+ printEscapedString(mName);
+ Out << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+}
+
+void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
+ }
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+}
+
+void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
+ }
+ }
+}
+
+void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+}
+
+void CppWriter::printType(const std::string &fname,
+ const std::string &typeName) {
+ const Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
+ }
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ default:
+ error("Invalid generation option");
+ }
+
+ return false;
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+ PM.add(new CppWriter(o));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 000000000000..7322e3e34f00
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,43 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+ CPPTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS)
+ : TargetMachine(T, TT, CPU, FS) {}
+
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
+
+ virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCppBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
new file mode 100644
index 000000000000..d0aeb12499c5
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCppBackendTarget;
+
+static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
+ // This class always works, but shouldn't be the default in most cases.
+ return 1;
+}
+
+extern "C" void LLVMInitializeCppBackendTargetInfo() {
+ TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",
+ "C++ backend",
+ &CppBackend_TripleMatchQuality);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..87e7cb5da561
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmParser
+ MBlazeAsmLexer.cpp
+ MBlazeAsmParser.cpp
+ )
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
new file mode 100644
index 000000000000..15965964452a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -0,0 +1,128 @@
+//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+ class MBlazeBaseAsmLexer : public TargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+ protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const TargetRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
+ }
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
+ }
+ }
+ public:
+ MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : TargetAsmLexer(T), AsmInfo(MAI) {
+ }
+ };
+
+ class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
+ public:
+ MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : MBlazeBaseAsmLexer(T, MAI) {
+ std::string tripleString("mblaze-unknown-unknown");
+ std::string featureString;
+ std::string CPU;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+ };
+}
+
+AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default:
+ return AsmToken(lexedToken);
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return AsmToken(lexedToken);
+ case AsmToken::Identifier:
+ {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+
+ if (regID) {
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ } else {
+ return AsmToken(lexedToken);
+ }
+ }
+ }
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer() {
+ RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 000000000000..eebd9d878943
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,567 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeISelLowering.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public TargetAsmParser {
+ MCAsmParser &Parser;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ MBlazeOperand *ParseRegister(unsigned &RegNo);
+ MBlazeOperand *ParseImmediate();
+ MBlazeOperand *ParseFsl();
+ MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+ /// }
+
+
+public:
+ MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : TargetAsmParser(), Parser(_Parser) {}
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ Register,
+ Memory,
+ Fsl
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ } Mem;
+
+ struct {
+ const MCExpr *Val;
+ } FslImm;
+ };
+
+ MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Memory:
+ Mem = o.Mem;
+ break;
+ case Fsl:
+ FslImm = o.FslImm;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getFslImm() const {
+ assert(Kind == Fsl && "Invalid access!");
+ return FslImm.Val;
+ }
+
+ unsigned getMemBase() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Base;
+ }
+
+ const MCExpr* getMemOff() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Off;
+ }
+
+ unsigned getMemOffReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.OffReg;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { return Kind == Memory; }
+ bool isFsl() const { return Kind == Fsl; }
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFslOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getFslImm());
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ Inst.addOperand(MCOperand::CreateReg(RegOff));
+ else
+ addExpr(Inst, getMemOff());
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+ MBlazeOperand *Op = new MBlazeOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Fsl);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.Off = Off;
+ Op->Mem.OffReg = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.OffReg = Off;
+ Op->Mem.Off = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Memory: {
+ OS << "<memory R";
+ OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+ OS << ", ";
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+ else
+ OS << getMemOff();
+ OS << ">";
+ }
+ break;
+ case Fsl:
+ getFslImm()->print(OS);
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ SMLoc ErrorLoc;
+ unsigned ErrorInfo;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Operands.size() != 4)
+ return 0;
+
+ MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+ MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+ SMLoc S = Base.getStartLoc();
+ SMLoc O = Offset.getStartLoc();
+ SMLoc E = Offset.getEndLoc();
+
+ if (!Base.isReg()) {
+ Error(S, "base address must be a register");
+ return 0;
+ }
+
+ if (!Offset.isReg() && !Offset.isImm()) {
+ Error(O, "offset must be a register or immediate");
+ return 0;
+ }
+
+ MBlazeOperand *Op;
+ if (Offset.isReg())
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+ else
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+ delete Operands.pop_back_val();
+ delete Operands.pop_back_val();
+ Operands.push_back(Op);
+
+ return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ return (ParseRegister(RegNo) == 0);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+ if (RegNo == 0)
+ return 0;
+
+ getLexer().Lex();
+ return MBlazeOperand::CreateReg(RegNo, S, E);
+ }
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+ if (!String.startswith("rfsl"))
+ return -1;
+
+ unsigned regNum;
+ if (String.substr(4).getAsInteger(10,regNum))
+ return -1;
+
+ return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+ if (reg >= 16)
+ return 0;
+
+ getLexer().Lex();
+ const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+ return MBlazeOperand::CreateFslImm(EVal,S,E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ const MCExpr *EVal;
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Identifier:
+ if (getParser().ParseExpression(EVal))
+ return 0;
+
+ return MBlazeOperand::CreateImm(EVal, S, E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MBlazeOperand *Op;
+
+ // Attempt to parse the next token as a register name
+ unsigned RegNo;
+ Op = ParseRegister(RegNo);
+
+ // Attempt to parse the next token as an FSL immediate
+ if (!Op)
+ Op = ParseFsl();
+
+ // Attempt to parse the next token as an immediate
+ if (!Op)
+ Op = ParseImmediate();
+
+ // If the token could not be parsed then fail
+ if (!Op) {
+ Error(Parser.getTok().getLoc(), "unknown operand");
+ return 0;
+ }
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(Op);
+ return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The first operands is the token for the instruction name
+ size_t dotLoc = Name.find('.');
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+ if (dotLoc < Name.size())
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (!ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (!ParseOperand(Operands))
+ return true;
+ }
+
+ // If the instruction requires a memory operand then we need to
+ // replace the last two operands (base+offset) with a single
+ // memory operand.
+ if (Name.startswith("lw") || Name.startswith("sw") ||
+ Name.startswith("lh") || Name.startswith("sh") ||
+ Name.startswith("lb") || Name.startswith("sb"))
+ return (ParseMemory(Operands) == NULL);
+
+ return false;
+}
+
+/// ParseDirective parses the arm specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+ RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+ LLVMInitializeMBlazeAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
new file mode 100644
index 000000000000..611a0f473f73
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmParser
+
+# Hack: we need to include 'main' MBlaze target directory for private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..9376e68a35cf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeDisassembler
+ MBlazeDisassembler.cpp
+ )
+
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+ SOURCE MBlazeDisassembler.cpp
+ PROPERTY COMPILE_FLAGS "/Od"
+ )
+endif()
+
+add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 000000000000..88d80a12eb3a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,707 @@
+//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeDisassembler.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+#include "MBlazeGenEDInfo.inc"
+
+namespace llvm {
+extern MCInstrDesc MBlazeInsts[];
+}
+
+using namespace llvm;
+
+const unsigned UNSUPPORTED = -1;
+
+static unsigned mblazeBinary2Opcode[] = {
+ MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
+ MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
+ MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
+ MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+ MBlaze::MUL, MBlaze::BSRL, MBlaze::IDIV, MBlaze::GETD, //10,11,12,13
+ UNSUPPORTED, UNSUPPORTED, MBlaze::FADD, UNSUPPORTED, //14,15,16,17
+ MBlaze::MULI, MBlaze::BSRLI, UNSUPPORTED, MBlaze::GET, //18,19,1A,1B
+ UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, //1C,1D,1E,1F
+
+ MBlaze::OR, MBlaze::AND, MBlaze::XOR, MBlaze::ANDN, //20,21,22,23
+ MBlaze::SEXT8, MBlaze::MFS, MBlaze::BR, MBlaze::BEQ, //24,25,26,27
+ MBlaze::ORI, MBlaze::ANDI, MBlaze::XORI, MBlaze::ANDNI, //28,29,2A,2B
+ MBlaze::IMM, MBlaze::RTSD, MBlaze::BRI, MBlaze::BEQI, //2C,2D,2E,2F
+
+ MBlaze::LBU, MBlaze::LHU, MBlaze::LW, UNSUPPORTED, //30,31,32,33
+ MBlaze::SB, MBlaze::SH, MBlaze::SW, UNSUPPORTED, //34,35,36,37
+ MBlaze::LBUI, MBlaze::LHUI, MBlaze::LWI, UNSUPPORTED, //38,39,3A,3B
+ MBlaze::SBI, MBlaze::SHI, MBlaze::SWI, UNSUPPORTED, //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+ if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F))
+ return UNSUPPORTED;
+ return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+ if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F))
+ return UNSUPPORTED;
+ return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+ if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F))
+ return UNSUPPORTED;
+ return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+ if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF))
+ return UNSUPPORTED;
+ return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+ int16_t val = (insn & 0xFFFF);
+ return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+ int16_t val = (insn & 0x1F);
+ return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+ return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+ int16_t val = (insn & 0xF);
+ return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0: return MBlaze::MUL;
+ case 1: return MBlaze::MULH;
+ case 2: return MBlaze::MULHSU;
+ case 3: return MBlaze::MULHU;
+ }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+ switch (insn&0x7FF) {
+ default: return UNSUPPORTED;
+ case 0x60: return MBlaze::SEXT8;
+ case 0x68: return MBlaze::WIC;
+ case 0x64: return MBlaze::WDC;
+ case 0x66: return MBlaze::WDCC;
+ case 0x74: return MBlaze::WDCF;
+ case 0x61: return MBlaze::SEXT16;
+ case 0x41: return MBlaze::SRL;
+ case 0x21: return MBlaze::SRC;
+ case 0x01: return MBlaze::SRA;
+ }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQ;
+ case 0x10: return MBlaze::BEQD;
+ case 0x05: return MBlaze::BGE;
+ case 0x15: return MBlaze::BGED;
+ case 0x04: return MBlaze::BGT;
+ case 0x14: return MBlaze::BGTD;
+ case 0x03: return MBlaze::BLE;
+ case 0x13: return MBlaze::BLED;
+ case 0x02: return MBlaze::BLT;
+ case 0x12: return MBlaze::BLTD;
+ case 0x01: return MBlaze::BNE;
+ case 0x11: return MBlaze::BNED;
+ }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQI;
+ case 0x10: return MBlaze::BEQID;
+ case 0x05: return MBlaze::BGEI;
+ case 0x15: return MBlaze::BGEID;
+ case 0x04: return MBlaze::BGTI;
+ case 0x14: return MBlaze::BGTID;
+ case 0x03: return MBlaze::BLEI;
+ case 0x13: return MBlaze::BLEID;
+ case 0x02: return MBlaze::BLTI;
+ case 0x12: return MBlaze::BLTID;
+ case 0x01: return MBlaze::BNEI;
+ case 0x11: return MBlaze::BNEID;
+ }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BR;
+ case 0x08: return MBlaze::BRA;
+ case 0x0C: return MBlaze::BRK;
+ case 0x10: return MBlaze::BRD;
+ case 0x14: return MBlaze::BRLD;
+ case 0x18: return MBlaze::BRAD;
+ case 0x1C: return MBlaze::BRALD;
+ }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BRI;
+ case 0x08: return MBlaze::BRAI;
+ case 0x0C: return MBlaze::BRKI;
+ case 0x10: return MBlaze::BRID;
+ case 0x14: return MBlaze::BRLID;
+ case 0x18: return MBlaze::BRAID;
+ case 0x1C: return MBlaze::BRALID;
+ }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLL;
+ case 0x1: return MBlaze::BSRA;
+ case 0x0: return MBlaze::BSRL;
+ }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLLI;
+ case 0x1: return MBlaze::BSRAI;
+ case 0x0: return MBlaze::BSRLI;
+ }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::RSUBK;
+ case 0x1: return MBlaze::CMP;
+ case 0x3: return MBlaze::CMPU;
+ }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::FADD;
+ case 0x080: return MBlaze::FRSUB;
+ case 0x100: return MBlaze::FMUL;
+ case 0x180: return MBlaze::FDIV;
+ case 0x200: return MBlaze::FCMP_UN;
+ case 0x210: return MBlaze::FCMP_LT;
+ case 0x220: return MBlaze::FCMP_EQ;
+ case 0x230: return MBlaze::FCMP_LE;
+ case 0x240: return MBlaze::FCMP_GT;
+ case 0x250: return MBlaze::FCMP_NE;
+ case 0x260: return MBlaze::FCMP_GE;
+ case 0x280: return MBlaze::FLT;
+ case 0x300: return MBlaze::FINT;
+ case 0x380: return MBlaze::FSQRT;
+ }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+ switch ((insn>>10)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GET;
+ case 0x01: return MBlaze::EGET;
+ case 0x02: return MBlaze::AGET;
+ case 0x03: return MBlaze::EAGET;
+ case 0x04: return MBlaze::TGET;
+ case 0x05: return MBlaze::TEGET;
+ case 0x06: return MBlaze::TAGET;
+ case 0x07: return MBlaze::TEAGET;
+ case 0x08: return MBlaze::CGET;
+ case 0x09: return MBlaze::ECGET;
+ case 0x0A: return MBlaze::CAGET;
+ case 0x0B: return MBlaze::ECAGET;
+ case 0x0C: return MBlaze::TCGET;
+ case 0x0D: return MBlaze::TECGET;
+ case 0x0E: return MBlaze::TCAGET;
+ case 0x0F: return MBlaze::TECAGET;
+ case 0x10: return MBlaze::NGET;
+ case 0x11: return MBlaze::NEGET;
+ case 0x12: return MBlaze::NAGET;
+ case 0x13: return MBlaze::NEAGET;
+ case 0x14: return MBlaze::TNGET;
+ case 0x15: return MBlaze::TNEGET;
+ case 0x16: return MBlaze::TNAGET;
+ case 0x17: return MBlaze::TNEAGET;
+ case 0x18: return MBlaze::NCGET;
+ case 0x19: return MBlaze::NECGET;
+ case 0x1A: return MBlaze::NCAGET;
+ case 0x1B: return MBlaze::NECAGET;
+ case 0x1C: return MBlaze::TNCGET;
+ case 0x1D: return MBlaze::TNECGET;
+ case 0x1E: return MBlaze::TNCAGET;
+ case 0x1F: return MBlaze::TNECAGET;
+ case 0x20: return MBlaze::PUT;
+ case 0x22: return MBlaze::APUT;
+ case 0x24: return MBlaze::TPUT;
+ case 0x26: return MBlaze::TAPUT;
+ case 0x28: return MBlaze::CPUT;
+ case 0x2A: return MBlaze::CAPUT;
+ case 0x2C: return MBlaze::TCPUT;
+ case 0x2E: return MBlaze::TCAPUT;
+ case 0x30: return MBlaze::NPUT;
+ case 0x32: return MBlaze::NAPUT;
+ case 0x34: return MBlaze::TNPUT;
+ case 0x36: return MBlaze::TNAPUT;
+ case 0x38: return MBlaze::NCPUT;
+ case 0x3A: return MBlaze::NCAPUT;
+ case 0x3C: return MBlaze::TNCPUT;
+ case 0x3E: return MBlaze::TNCAPUT;
+ }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+ switch ((insn>>5)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GETD;
+ case 0x01: return MBlaze::EGETD;
+ case 0x02: return MBlaze::AGETD;
+ case 0x03: return MBlaze::EAGETD;
+ case 0x04: return MBlaze::TGETD;
+ case 0x05: return MBlaze::TEGETD;
+ case 0x06: return MBlaze::TAGETD;
+ case 0x07: return MBlaze::TEAGETD;
+ case 0x08: return MBlaze::CGETD;
+ case 0x09: return MBlaze::ECGETD;
+ case 0x0A: return MBlaze::CAGETD;
+ case 0x0B: return MBlaze::ECAGETD;
+ case 0x0C: return MBlaze::TCGETD;
+ case 0x0D: return MBlaze::TECGETD;
+ case 0x0E: return MBlaze::TCAGETD;
+ case 0x0F: return MBlaze::TECAGETD;
+ case 0x10: return MBlaze::NGETD;
+ case 0x11: return MBlaze::NEGETD;
+ case 0x12: return MBlaze::NAGETD;
+ case 0x13: return MBlaze::NEAGETD;
+ case 0x14: return MBlaze::TNGETD;
+ case 0x15: return MBlaze::TNEGETD;
+ case 0x16: return MBlaze::TNAGETD;
+ case 0x17: return MBlaze::TNEAGETD;
+ case 0x18: return MBlaze::NCGETD;
+ case 0x19: return MBlaze::NECGETD;
+ case 0x1A: return MBlaze::NCAGETD;
+ case 0x1B: return MBlaze::NECAGETD;
+ case 0x1C: return MBlaze::TNCGETD;
+ case 0x1D: return MBlaze::TNECGETD;
+ case 0x1E: return MBlaze::TNCAGETD;
+ case 0x1F: return MBlaze::TNECAGETD;
+ case 0x20: return MBlaze::PUTD;
+ case 0x22: return MBlaze::APUTD;
+ case 0x24: return MBlaze::TPUTD;
+ case 0x26: return MBlaze::TAPUTD;
+ case 0x28: return MBlaze::CPUTD;
+ case 0x2A: return MBlaze::CAPUTD;
+ case 0x2C: return MBlaze::TCPUTD;
+ case 0x2E: return MBlaze::TCAPUTD;
+ case 0x30: return MBlaze::NPUTD;
+ case 0x32: return MBlaze::NAPUTD;
+ case 0x34: return MBlaze::TNPUTD;
+ case 0x36: return MBlaze::TNAPUTD;
+ case 0x38: return MBlaze::NCPUTD;
+ case 0x3A: return MBlaze::NCAPUTD;
+ case 0x3C: return MBlaze::TNCPUTD;
+ case 0x3E: return MBlaze::TNCAPUTD;
+ }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+ switch (insn&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::IDIV;
+ case 0x2: return MBlaze::IDIVU;
+ }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LBU;
+ case 0x1: return MBlaze::LBUR;
+ }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LHU;
+ case 0x1: return MBlaze::LHUR;
+ }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LW;
+ case 0x1: return MBlaze::LWR;
+ case 0x2: return MBlaze::LWX;
+ }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SB;
+ case 0x1: return MBlaze::SBR;
+ }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SH;
+ case 0x1: return MBlaze::SHR;
+ }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SW;
+ case 0x1: return MBlaze::SWR;
+ case 0x2: return MBlaze::SWX;
+ }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+ switch ((insn>>15)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0:
+ switch ((insn>>16)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MSRSET;
+ case 0x1: return MBlaze::MSRCLR;
+ }
+ case 0x1:
+ switch ((insn>>14)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MFS;
+ case 0x1: return MBlaze::MTS;
+ }
+ }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::OR;
+ case 0x400: return MBlaze::PCMPBF;
+ }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::XOR;
+ case 0x400: return MBlaze::PCMPEQ;
+ }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::ANDN;
+ case 0x400: return MBlaze::PCMPNE;
+ }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x10: return MBlaze::RTSD;
+ case 0x11: return MBlaze::RTID;
+ case 0x12: return MBlaze::RTBD;
+ case 0x14: return MBlaze::RTED;
+ }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+ unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+ switch (opcode) {
+ case MBlaze::MUL: return decodeMUL(insn);
+ case MBlaze::SEXT8: return decodeSEXT(insn);
+ case MBlaze::BEQ: return decodeBEQ(insn);
+ case MBlaze::BEQI: return decodeBEQI(insn);
+ case MBlaze::BR: return decodeBR(insn);
+ case MBlaze::BRI: return decodeBRI(insn);
+ case MBlaze::BSRL: return decodeBSRL(insn);
+ case MBlaze::BSRLI: return decodeBSRLI(insn);
+ case MBlaze::RSUBK: return decodeRSUBK(insn);
+ case MBlaze::FADD: return decodeFADD(insn);
+ case MBlaze::GET: return decodeGET(insn);
+ case MBlaze::GETD: return decodeGETD(insn);
+ case MBlaze::IDIV: return decodeIDIV(insn);
+ case MBlaze::LBU: return decodeLBU(insn);
+ case MBlaze::LHU: return decodeLHU(insn);
+ case MBlaze::LW: return decodeLW(insn);
+ case MBlaze::SB: return decodeSB(insn);
+ case MBlaze::SH: return decodeSH(insn);
+ case MBlaze::SW: return decodeSW(insn);
+ case MBlaze::MFS: return decodeMFS(insn);
+ case MBlaze::OR: return decodeOR(insn);
+ case MBlaze::XOR: return decodeXOR(insn);
+ case MBlaze::ANDN: return decodeANDN(insn);
+ case MBlaze::RTSD: return decodeRTSD(insn);
+ default: return opcode;
+ }
+}
+
+EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+ return instInfoMBlaze;
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool MBlazeDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const {
+ // The machine instruction.
+ uint32_t insn;
+ uint64_t read;
+ uint8_t bytes[4];
+
+ // By default we consume 1 byte on failure
+ size = 1;
+
+ // We want to read exactly 4 bytes of data.
+ if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
+ return false;
+
+ // Encoded as a big-endian 32-bit word in the stream.
+ insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+ // Get the MCInst opcode from the binary instruction and make sure
+ // that it is a valid instruction.
+ unsigned opcode = getOPCODE(insn);
+ if (opcode == UNSUPPORTED)
+ return false;
+
+ instr.setOpcode(opcode);
+
+ unsigned RD = getRD(insn);
+ unsigned RA = getRA(insn);
+ unsigned RB = getRB(insn);
+ unsigned RS = getRS(insn);
+
+ uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+ switch ((tsFlags & MBlazeII::FormMask)) {
+ default:
+ return false;
+
+ case MBlazeII::FRRRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRI:
+ switch (opcode) {
+ default:
+ return false;
+ case MBlaze::MFS:
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ break;
+ case MBlaze::MTS:
+ if (RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+ case MBlaze::MSRSET:
+ case MBlaze::MSRCLR:
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+ break;
+ }
+ break;
+
+ case MBlazeII::FRRI:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ switch (opcode) {
+ default:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+ case MBlaze::BSRLI:
+ case MBlaze::BSRAI:
+ case MBlaze::BSLLI:
+ instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+ break;
+ }
+ break;
+
+ case MBlazeII::FCRR:
+ if (RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FCRI:
+ if (RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRCR:
+ if (RD == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRCI:
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FCCR:
+ if (RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FCCI:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRRCI:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+ break;
+
+ case MBlazeII::FRRC:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FRCX:
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FRCS:
+ if (RD == UNSUPPORTED || RS == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RS));
+ break;
+
+ case MBlazeII::FCRCS:
+ if (RS == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RS));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
+ case MBlazeII::FCRCX:
+ if (RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCX:
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCR:
+ if (RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RB));
+ break;
+
+ case MBlazeII::FRIR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+ }
+
+ // We always consume 4 bytes of data on success
+ size = 4;
+
+ return true;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T) {
+ return new MBlazeDisassembler;
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+ createMBlazeDisassembler);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 000000000000..d05eced0bacf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,55 @@
+//===- MBlazeDisassembler.h - Disassembler for MicroBlaze ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ MBlazeDisassembler() :
+ MCDisassembler() {
+ }
+
+ ~MBlazeDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
new file mode 100644
index 000000000000..0530b3286bc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDisassembler
+
+# Hack: we need to include 'main' MBlaze target directory to grab headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..242a573036e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+ MBlazeInstPrinter.cpp
+ )
+
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 000000000000..a7fd287990b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,69 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlaze.h"
+#include "MBlazeInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ printInstruction(MI, O);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << (int32_t)Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << "rfsl" << MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, OpNo, O, NULL);
+ O << ", ";
+ printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 000000000000..eacca410b986
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MBlazeInstPrinter : public MCInstPrinter {
+ public:
+ MBlazeInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+ const char *Modifier = 0);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
new file mode 100644
index 000000000000..9fb6e869d945
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+# private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 000000000000..3390794c9375
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,42 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MBlazeTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class MCCodeEmitter;
+ class MCInstrInfo;
+ class MCSubtargetInfo;
+ class TargetAsmBackend;
+ class formatted_raw_ostream;
+
+ MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+ TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
+
+ FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+ FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.td b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 000000000000..1245658d29ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,73 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
+ "Implements barrel shifter">;
+def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
+ "Implements hardware divider">;
+def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
+ "Implements hardware multiplier">;
+def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+ "Implements pattern compare instruction">;
+def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
+ "Implements floating point unit">;
+def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
+ "Implements multiplier with 64-bit result">;
+def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
+ "Implements sqrt and floating point convert">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+def : Processor<"mblaze", MBlazeGenericItineraries, []>;
+def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
+def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MBlaze : Target {
+ let InstructionSet = MBlazeInstrInfo;
+ let AssemblyWriters = [MBlazeAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
new file mode 100644
index 000000000000..08f14c365957
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -0,0 +1,162 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "MBlaze.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+ switch (Kind) {
+ default: assert(0 && "invalid fixup kind!");
+ case FK_Data_1: return 1;
+ case FK_PCRel_2:
+ case FK_Data_2: return 2;
+ case FK_PCRel_4:
+ case FK_Data_4: return 4;
+ case FK_Data_8: return 8;
+ }
+}
+
+
+namespace {
+class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MBlazeELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
+ /*HasRelocationAddend*/ true) {}
+};
+
+class MBlazeAsmBackend : public TargetAsmBackend {
+public:
+ MBlazeAsmBackend(const Target &T)
+ : TargetAsmBackend() {
+ }
+
+ unsigned getNumFixupKinds() const {
+ return 2;
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ unsigned getPointerSize() const {
+ return 4;
+ }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case MBlaze::ADDIK: return MBlaze::ADDIK32;
+ case MBlaze::ORI: return MBlaze::ORI32;
+ case MBlaze::BRLID: return MBlaze::BRLID32;
+ }
+}
+
+bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+ bool hasExprOrImm = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+ return hasExprOrImm;
+}
+
+void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ Res = Inst;
+ Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if ((Count % 4) != 0)
+ return false;
+
+ for (uint64_t i = 0; i < Count; i += 4)
+ OW->Write32(0x00000000);
+
+ return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
+ : MBlazeAsmBackend(T), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ false);
+ }
+};
+
+void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned Size = getFixupKindSize(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+
+ char *data = Data + Fixup.getOffset();
+ switch (Size) {
+ default: llvm_unreachable("Cannot fixup unknown value.");
+ case 1: llvm_unreachable("Cannot fixup 1 byte value.");
+ case 8: llvm_unreachable("Cannot fixup 8 byte value.");
+
+ case 4:
+ *(data+7) = uint8_t(Value);
+ *(data+6) = uint8_t(Value >> 8);
+ *(data+3) = uint8_t(Value >> 16);
+ *(data+2) = uint8_t(Value >> 24);
+ break;
+
+ case 2:
+ *(data+3) = uint8_t(Value >> 0);
+ *(data+2) = uint8_t(Value >> 8);
+ }
+}
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
+ const std::string &TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ assert(0 && "Mac not supported on MBlaze");
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on MBlaze");
+
+ return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
new file mode 100644
index 000000000000..0016df569b93
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -0,0 +1,335 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeMCInstLower.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+ class MBlazeAsmPrinter : public AsmPrinter {
+ const MBlazeSubtarget *Subtarget;
+ public:
+ explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Assembly Printer";
+ }
+
+ void printSavedRegsBitmask();
+ void emitFrameDirective();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+// #include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+// MBlaze Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame R19,48,R15
+// .mask 0xc0000000,-8
+// addiu R1, R1, -48
+// sw R15, 40(R1)
+// sw R19, 36(R1)
+//
+// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+// 19 (R19) are saved at prologue. As the save order on prologue is from
+// left to right, R15 is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (R15) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+// Print a 32 bit hex number with all numbers.
+static void printHex32(unsigned int Value, raw_ostream &O) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ // CPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+
+ // Set the CPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+ if (MBlaze::GPRRegisterClass->contains(Reg))
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (TFI->hasFP(*MF))
+ CPUBitmask |= (1 << MBlazeRegisterInfo::
+ getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+ if (MFI->adjustsStack())
+ CPUBitmask |= (1 << MBlazeRegisterInfo::
+ getRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
+}
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ unsigned stkReg = RI.getFrameRegister(*MF);
+ unsigned retReg = RI.getRARegister();
+ unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+ OutStreamer.EmitRawText("\t.frame\t" +
+ Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+ "," + Twine(stkSze) + "," +
+ Twine(MBlazeInstPrinter::getRegisterName(retReg)));
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ AsmPrinter::EmitFunctionEntryLabel();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
+ emitFrameDirective();
+ printSavedRegsBitmask();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << MBlazeInstPrinter::getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int32_t)MO.getImm();
+ break;
+
+ case MachineOperand::MO_FPImmediate: {
+ const ConstantFP *fp = MO.getFPImm();
+ printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
+ O << ";\t# immediate = " << *fp;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << "rfsl" << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier) {
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ if (SyntaxVariant == 0)
+ return new MBlazeInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+ RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+ TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+ createMBlazeMCInstPrinter);
+
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 000000000000..4962573f96ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,28 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+ // i32 are returned in registers R3, R4
+ CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
+
+def CC_MBlaze : CallingConv<[
+ CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+ CCIfType<[i32,f32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 000000000000..c07570a487b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,258 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace llvm {
+cl::opt<bool> DisableDelaySlotFiller(
+ "disable-mblaze-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the MBlaze delay slot filter."),
+ cl::Hidden);
+}
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+ // Any instruction with an immediate mode operand greater than
+ // 16-bits requires an implicit IMM instruction.
+ unsigned numOper = candidate->getNumOperands();
+ for (unsigned op = 0; op < numOper; ++op) {
+ MachineOperand &mop = candidate->getOperand(op);
+
+ // The operand requires more than 16-bits to represent.
+ if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+ return true;
+
+ // We must assume that unknown immediate values require more than
+ // 16-bits to represent.
+ if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI())
+ return true;
+
+ // FIXME: we could probably check to see if the FP value happens
+ // to not need an IMM instruction. For now we just always
+ // assume that FP values do.
+ if (mop.isFPImm())
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+ switch (instr->getOpcode()) {
+ default: return instr->getNumOperands();
+
+ // These instructions have a variable number of operands but the first two
+ // are the "real" operands that we care about during hazard detection.
+ case MBlaze::BRLID:
+ case MBlaze::BRALID:
+ case MBlaze::BRLD:
+ case MBlaze::BRALD:
+ return 2;
+ }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+ MachineBasicBlock::iterator &slot) {
+ // Hazard check
+ MachineBasicBlock::iterator a = candidate;
+ MachineBasicBlock::iterator b = slot;
+ MCInstrDesc desc = candidate->getDesc();
+
+ // MBB layout:-
+ // candidate := a0 = operation(a1, a2)
+ // ...middle bit...
+ // slot := b0 = operation(b1, b2)
+
+ // Possible hazards:-/
+ // 1. a1 or a2 was written during the middle bit
+ // 2. a0 was read or written during the middle bit
+ // 3. a0 is one or more of {b0, b1, b2}
+ // 4. b0 is one or more of {a1, a2}
+ // 5. a accesses memory, and the middle bit
+ // contains a store operation.
+ bool a_is_memory = desc.mayLoad() || desc.mayStore();
+
+ // Determine the number of operands in the slot instruction and in the
+ // candidate instruction.
+ const unsigned aend = getLastRealOperand(a);
+ const unsigned bend = getLastRealOperand(b);
+
+ // Check hazards type 1, 2 and 5 by scanning the middle bit
+ MachineBasicBlock::iterator m = a;
+ for (++m; m != b; ++m) {
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ bool aop_is_reg = a->getOperand(aop).isReg();
+ if (!aop_is_reg) continue;
+
+ bool aop_is_def = a->getOperand(aop).isDef();
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ const unsigned mend = getLastRealOperand(m);
+ for (unsigned mop = 0; mop<mend; ++mop) {
+ bool mop_is_reg = m->getOperand(mop).isReg();
+ if (!mop_is_reg) continue;
+
+ bool mop_is_def = m->getOperand(mop).isDef();
+ unsigned mop_reg = m->getOperand(mop).getReg();
+
+ if (aop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 2, because aop = a0
+ else if (mop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 1, because aop in {a1, a2}
+ }
+ }
+
+ // Check hazard type 5
+ if (a_is_memory && m->getDesc().mayStore())
+ return true;
+ }
+
+ // Check hazard type 3 & 4
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ if (a->getOperand(aop).isReg()) {
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ for (unsigned bop = 0; bop<bend; ++bop) {
+ if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+ unsigned bop_reg = b->getOperand(bop).getReg();
+ if (aop_reg == bop_reg)
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate) {
+ if (candidate == MBB.begin())
+ return false;
+
+ MCInstrDesc brdesc = (--candidate)->getDesc();
+ return (brdesc.hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+ if (!I->hasUnmodeledSideEffects())
+ return false;
+
+ unsigned op = I->getOpcode();
+ if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+ op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+ op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+ op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+ op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+ op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+ return false;
+
+ return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+ MachineBasicBlock::iterator I = slot;
+ while (true) {
+ if (I == MBB.begin())
+ break;
+
+ --I;
+ MCInstrDesc desc = I->getDesc();
+ if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
+ desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+ hasUnknownSideEffects(I))
+ break;
+
+ if (hasImmInstruction(I) || delayHasHazard(I,slot))
+ continue;
+
+ return I;
+ }
+
+ return MBB.end();
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
+ MachineBasicBlock::iterator J = I;
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB,I);
+
+ ++FilledSlots;
+ Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
+ }
+ return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
new file mode 100644
index 000000000000..3f26ed15b284
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeELFWriterInfo.h"
+#include "MBlazeRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the MBlazeELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {}
+
+unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case MBlaze::reloc_pcrel_word:
+ return ELF::R_MICROBLAZE_64_PCREL;
+ case MBlaze::reloc_absolute_word:
+ return ELF::R_MICROBLAZE_NONE;
+ default:
+ llvm_unreachable("unknown mblaze machine relocation type");
+ }
+ return 0;
+}
+
+long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32_PCREL:
+ return Modifier - 4;
+ case ELF::R_MICROBLAZE_32:
+ return Modifier;
+ default:
+ llvm_unreachable("unknown mblaze relocation type");
+ }
+ return 0;
+}
+
+unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ // FIXME: Most of these sizes are guesses based on the name
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32:
+ case ELF::R_MICROBLAZE_32_PCREL:
+ case ELF::R_MICROBLAZE_32_PCREL_LO:
+ case ELF::R_MICROBLAZE_32_LO:
+ case ELF::R_MICROBLAZE_SRO32:
+ case ELF::R_MICROBLAZE_SRW32:
+ case ELF::R_MICROBLAZE_32_SYM_OP_SYM:
+ case ELF::R_MICROBLAZE_GOTOFF_32:
+ return 32;
+
+ case ELF::R_MICROBLAZE_64_PCREL:
+ case ELF::R_MICROBLAZE_64:
+ case ELF::R_MICROBLAZE_GOTPC_64:
+ case ELF::R_MICROBLAZE_GOT_64:
+ case ELF::R_MICROBLAZE_PLT_64:
+ case ELF::R_MICROBLAZE_GOTOFF_64:
+ return 64;
+ }
+
+ return 0;
+}
+
+bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ // FIXME: Most of these are guesses based on the name
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32_PCREL:
+ case ELF::R_MICROBLAZE_64_PCREL:
+ case ELF::R_MICROBLAZE_32_PCREL_LO:
+ case ELF::R_MICROBLAZE_GOTPC_64:
+ return true;
+ }
+
+ return false;
+}
+
+unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return MBlaze::reloc_absolute_word;
+}
+
+long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
+ return SymOffset - (RelOffset + 4);
+ else
+ assert("computeRelocation unknown for this relocation type");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
new file mode 100644
index 000000000000..63bfc0da745a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_ELF_WRITER_INFO_H
+#define MBLAZE_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class MBlazeELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ MBlazeELFWriterInfo(TargetMachine &TM);
+ virtual ~MBlazeELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // MBLAZE_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 000000000000..e7639025cf1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,450 @@
+//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+ cl::opt<bool> DisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
+}
+
+static void replaceFrameIndexes(MachineFunction &MF,
+ SmallVector<std::pair<int,int64_t>, 16> &FR) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+ SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+ for (; FRI != FRE; ++FRI) {
+ MFI->RemoveStackObject(FRI->first);
+ int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+ MBlazeFI->recordReplacement(FRI->first, NFI);
+
+ for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+ MachineBasicBlock::iterator MBB = MB->begin();
+ const MachineBasicBlock::iterator MBE = MB->end();
+
+ for (; MBB != MBE; ++MBB) {
+ MachineInstr::mop_iterator MIB = MBB->operands_begin();
+ const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+ for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+ if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+ DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+ MII->setIndex(NFI);
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+ if (DisableStackAdjust) return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+ MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+ const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+ SmallVector<MachineInstr*, 16> EraseInstr;
+ SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+ MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+ MachineBasicBlock::iterator MIB = MBB->begin();
+ MachineBasicBlock::iterator MIE = MBB->end();
+
+ int StackAdjust = 0;
+ int StackOffset = -28;
+
+ // In this loop we are searching frame indexes that corrospond to incoming
+ // arguments that are already in the stack. We look for instruction sequences
+ // like the following:
+ //
+ // LWI REG, FI1, 0
+ // ...
+ // SWI REG, FI2, 0
+ //
+ // As long as there are no defs of REG in the ... part, we can eliminate
+ // the SWI instruction because the value has already been stored to the
+ // stack by the caller. All we need to do is locate FI at the correct
+ // stack location according to the calling convensions.
+ //
+ // Additionally, if the SWI operation kills the def of REG then we don't
+ // need the LWI operation so we can erase it as well.
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+ unsigned FIReg = I->getOperand(0).getReg();
+ MachineBasicBlock::iterator SI = I;
+ for (SI++; SI != MIE; ++SI) {
+ if (!SI->getOperand(0).isReg() ||
+ !SI->getOperand(1).isFI() ||
+ SI->getOpcode() != MBlaze::SWI) continue;
+
+ int FI = SI->getOperand(1).getIndex();
+ if (SI->getOperand(0).getReg() != FIReg ||
+ MFI->isFixedObjectIndex(FI) ||
+ MFI->getObjectSize(FI) != 4) continue;
+
+ if (SI->getOperand(0).isDef()) break;
+
+ if (SI->getOperand(0).isKill()) {
+ DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex()
+ << " removed\n");
+ EraseInstr.push_back(I);
+ }
+
+ EraseInstr.push_back(SI);
+ DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+ FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+ StackOffset -= 4;
+ StackAdjust += 4;
+ break;
+ }
+ }
+ }
+
+ // In this loop we are searching for frame indexes that corrospond to
+ // incoming arguments that are in registers. We look for instruction
+ // sequences like the following:
+ //
+ // ... SWI REG, FI, 0
+ //
+ // As long as the ... part does not define REG and if REG is an incoming
+ // parameter register then we know that, according to ABI convensions, the
+ // caller has allocated stack space for it already. Instead of allocating
+ // stack space on our frame, we record the correct location in the callers
+ // frame.
+ for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->definesRegister(LI->first))
+ break;
+
+ if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() < 0) continue;
+
+ if (I->getOperand(0).getReg() == LI->first) {
+ int FI = I->getOperand(1).getIndex();
+ MBlazeFI->recordLiveIn(FI);
+
+ int FILoc = 0;
+ switch (LI->first) {
+ default: llvm_unreachable("invalid incoming parameter!");
+ case MBlaze::R5: FILoc = -4; break;
+ case MBlaze::R6: FILoc = -8; break;
+ case MBlaze::R7: FILoc = -12; break;
+ case MBlaze::R8: FILoc = -16; break;
+ case MBlaze::R9: FILoc = -20; break;
+ case MBlaze::R10: FILoc = -24; break;
+ }
+
+ StackAdjust += 4;
+ FrameRelocate.push_back(std::make_pair(FI,FILoc));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+ break;
+ }
+ }
+ }
+
+ // Go ahead and erase all of the instructions that we determined were
+ // no longer needed.
+ for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+ MBB->erase(EraseInstr[i]);
+
+ // Replace all of the frame indexes that we have relocated with new
+ // fixed object frame indexes.
+ replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ llvm::CallingConv::ID CallConv = F->getCallingConv();
+
+ // If this function is not using either the interrupt_handler
+ // calling convention or the save_volatiles calling convention
+ // then we don't need to do any additional frame layout.
+ if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
+ CallConv != llvm::CallingConv::MBLAZE_SVOL)
+ return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ // Determine if the calling convention is the interrupt_handler
+ // calling convention. Some pieces of the prologue and epilogue
+ // only need to be emitted if we are lowering and interrupt handler.
+ bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Determine where to put prologue and epilogue additions
+ MachineBasicBlock &MENT = MF.front();
+ MachineBasicBlock &MEXT = MF.back();
+
+ MachineBasicBlock::iterator MENTI = MENT.begin();
+ MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+ DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+ DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+ // Store the frame indexes generated during prologue additions for use
+ // when we are generating the epilogue additions.
+ SmallVector<int, 10> VFI;
+
+ // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+ // always have a SWI because it is used when processing RMSR.
+ for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+ if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+
+ int FI = MFI->CreateStackObject(4,4,false,false);
+ VFI.push_back(FI);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+ .addFrameIndex(FI).addImm(0);
+ }
+
+ // Build the prologue SWI for R17, R18
+ int R17FI = MFI->CreateStackObject(4,4,false,false);
+ int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+ if (isIntr) {
+ int MSRFI = MFI->CreateStackObject(4,4,false,false);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+ .addReg(MBlaze::RMSR);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+ .addReg(MBlaze::R11);
+ }
+
+ // Build the epilogue LWI for R17, R18
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ // Build the epilogue LWI for R3 - R12 if needed
+ for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+ if (!MRI.isPhysRegUsed(r)) continue;
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+ .addFrameIndex(VFI[--i]).addImm(0);
+ }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MBlazeFI->adjustLoadArgsFI(MFI);
+ MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+ DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ // unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ MFI->setStackSize(FrameSize);
+ DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
+ const {
+ const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->hasReplacement(FI))
+ FI = MBlazeFI->getReplacement(FI);
+ return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Determine the correct frame layout
+ determineFrameLayout(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // Adjust stack : addi R1, R1, -imm
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-StackSize);
+
+ // swi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ if (hasFP(MF)) {
+ // swi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+ // add R19, R1, R0
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+ .addReg(MBlaze::R1).addReg(MBlaze::R0);
+ }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ if (hasFP(MF)) {
+ // add R1, R19, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+ .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+ // lwi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+ .addReg(MBlaze::R1).addImm(FPOffset);
+ }
+
+ // lwi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+ .addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ // Get the number of bytes from FrameInfo
+ int StackSize = (int) MFI->getStackSize();
+
+ // addi R1, R1, imm
+ if (StackSize) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(StackSize);
+ }
+}
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ if (MFI->adjustsStack() || requiresRA) {
+ MBlazeFI->setRAStackOffset(0);
+ MFI->CreateFixedObject(4,0,true);
+ }
+
+ if (hasFP(MF)) {
+ MBlazeFI->setFPStackOffset(4);
+ MFI->CreateFixedObject(4,4,true);
+ }
+
+ interruptFrameLayout(MF);
+ analyzeFrameIndexes(MF);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 000000000000..8be15bfb857d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,53 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+ const MBlazeSubtarget &STI;
+
+public:
+ explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+ }
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 000000000000..6b4349766f37
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,277 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MBlazeTargetMachine.
+ MBlazeTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MBlazeSubtarget &Subtarget;
+
+public:
+ explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MBlaze DAG->DAG Pattern Instruction Selection";
+ }
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MBlazeGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MBlazeTargetMachine &getTargetMachine() {
+ return static_cast<const MBlazeTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MBlazeInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Address Selection
+ bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+ bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
+
+ // getI32Imm - Return a target constant with the specified value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::Constant)
+ return false;
+
+ Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+ return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
+ if (N.getOpcode() == ISD::FrameIndex) return false;
+ if (N.getOpcode() == ISD::TargetExternalSymbol ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ int32_t imm = 0;
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ if (isIntS32Immediate(N.getOperand(1), imm))
+ return false; // r+i
+
+ if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+ N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+ return false; // jump tables.
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddrRegReg(N, Base, Disp))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int32_t imm = 0;
+ if (isIntS32Immediate(N.getOperand(1), imm)) {
+ Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+ uint32_t Imm = CN->getZExtValue();
+ Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+ Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+ return true;
+ }
+
+ Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode())
+ return NULL;
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch (Opcode) {
+ default: break;
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+ int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT VT = Node->getValueType(0);
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+ unsigned Opc = MBlaze::ADDIK;
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+ return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+ }
+
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MBlazeISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ((isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)))
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+ MVT::i32, MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+ MVT::Glue, R20Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+ return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 000000000000..62dfdcc2fd10
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,1147 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink";
+ case MBlazeISD::GPRel : return "MBlazeISD::GPRel";
+ case MBlazeISD::Wrap : return "MBlazeISD::Wrap";
+ case MBlazeISD::ICmp : return "MBlazeISD::ICmp";
+ case MBlazeISD::Ret : return "MBlazeISD::Ret";
+ case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC";
+ default : return NULL;
+ }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+ : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+ // MBlaze does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
+ if (Subtarget->hasFPU()) {
+ addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ }
+
+ // Floating point operations which are not supported
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // Sign extended loads must be expanded
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // MBlaze has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // If the processor doesn't support multiply then expand it
+ if (!Subtarget->hasMul()) {
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+ }
+
+ // If the processor doesn't support 64-bit multiply then expand
+ if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ }
+
+ // If the processor doesn't support division then expand
+ if (!Subtarget->hasDiv()) {
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ }
+
+ // Expand unsupported conversions
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ // Expand SELECT_CC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // MBlaze doesn't have MUL_LOHI
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+ // MBlaze Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Variable Argument support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
+
+ // Operations not directly supported by MBlaze.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ // MBlaze doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setMinFunctionAlignment(2);
+
+ setStackPointerRegisterToSaveRestore(MBlaze::R1);
+ computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode())
+ {
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB)
+ const {
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+
+ case MBlaze::ShiftRL:
+ case MBlaze::ShiftRA:
+ case MBlaze::ShiftL:
+ return EmitCustomShift(MI, MBB);
+
+ case MBlaze::Select_FCC:
+ case MBlaze::Select_CC:
+ return EmitCustomSelect(MI, MBB);
+
+ case MBlaze::CAS32:
+ case MBlaze::SWP32:
+ case MBlaze::LAA32:
+ case MBlaze::LAS32:
+ case MBlaze::LAD32:
+ case MBlaze::LAO32:
+ case MBlaze::LAX32:
+ case MBlaze::LAN32:
+ return EmitCustomAtomic(MI, MBB);
+
+ case MBlaze::MEMBARRIER:
+ // The Microblaze does not need memory barriers. Just delete the pseudo
+ // instruction and finish.
+ MI->eraseFromParent();
+ return MBB;
+ }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+ MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by transferring adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ finish->splice(finish->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the true and fallthrough blocks as its successors.
+ MBB->addSuccessor(loop);
+ MBB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
+
+ unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(31);
+
+ unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0);
+
+ BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+ .addReg(IAMT)
+ .addMBB(finish);
+
+ unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+ .addReg(IAMT).addMBB(MBB)
+ .addReg(NAMT).addMBB(loop);
+
+ if (MI->getOpcode() == MBlaze::ShiftL)
+ BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRA)
+ BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRL)
+ BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+ else
+ llvm_unreachable("Cannot lower unknown shift instruction");
+
+ BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+ .addReg(SAMT)
+ .addImm(-1);
+
+ BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+ .addReg(NAMT)
+ .addMBB(loop);
+
+ BuildMI(*finish, finish->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned Opc;
+ switch (MI->getOperand(4).getImm()) {
+ default: llvm_unreachable("Unknown branch condition");
+ case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+ case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+ case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+ case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+ case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+ case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+ }
+
+ F->insert(It, flsBB);
+ F->insert(It, dneBB);
+
+ // Transfer the remainder of MBB and its successor edges to dneBB.
+ dneBB->splice(dneBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ MBB->addSuccessor(flsBB);
+ MBB->addSuccessor(dneBB);
+ flsBB->addSuccessor(dneBB);
+
+ BuildMI(MBB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+ // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+ BuildMI(*dneBB, dneBB->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // All atomic instructions on the Microblaze are implemented using the
+ // load-linked / store-conditional style atomic instruction sequences.
+ // Thus, all operations will look something like the following:
+ //
+ // start:
+ // lwx RV, RP, 0
+ // <do stuff>
+ // swx RV, RP, 0
+ // addic RC, R0, 0
+ // bneid RC, start
+ //
+ // exit:
+ //
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+
+ // Create the start and exit basic blocks for the atomic operation
+ MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, start);
+ F->insert(It, exit);
+
+ // Update machine-CFG edges by transferring adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the fallthrough block as its successors.
+ MBB->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ MachineBasicBlock *final = start;
+ unsigned finalReg = 0;
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+ case MBlaze::SWP32:
+ finalReg = MI->getOperand(2).getReg();
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+ break;
+
+ case MBlaze::LAN32:
+ case MBlaze::LAX32:
+ case MBlaze::LAO32:
+ case MBlaze::LAD32:
+ case MBlaze::LAS32:
+ case MBlaze::LAA32: {
+ unsigned opcode = 0;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic load!");
+ case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+ case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+ case MBlaze::LAD32: opcode = MBlaze::AND; break;
+ case MBlaze::LAO32: opcode = MBlaze::OR; break;
+ case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+ case MBlaze::LAN32: opcode = MBlaze::AND; break;
+ }
+
+ finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(opcode), finalReg)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ if (MI->getOpcode() == MBlaze::LAN32) {
+ unsigned tmp = finalReg;
+ finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+ .addReg(tmp)
+ .addImm(-1);
+ }
+ break;
+ }
+
+ case MBlaze::CAS32: {
+ finalReg = MI->getOperand(3).getReg();
+ final = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, final);
+ start->addSuccessor(exit);
+ start->addSuccessor(final);
+ final->addSuccessor(exit);
+ final->addSuccessor(start);
+
+ unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ BuildMI(start, dl, TII->get(MBlaze::BNEID))
+ .addReg(CMP)
+ .addMBB(exit);
+
+ final->moveAfter(start);
+ exit->moveAfter(final);
+ break;
+ }
+ }
+
+ unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(final, dl, TII->get(MBlaze::SWX))
+ .addReg(finalReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+ .addReg(MBlaze::R0)
+ .addImm(0);
+
+ BuildMI(final, dl, TII->get(MBlaze::BNEID))
+ .addReg(CHK)
+ .addMBB(start);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return exit;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ Opc = MBlazeISD::Select_CC;
+ CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+ .getValue(1);
+ } else {
+ llvm_unreachable("Cannot lower select_cc with unknown type");
+ }
+
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("TLS not implemented for MicroBlaze.");
+ return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = N->getConstVal();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), 0);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
+ MBlaze::R5, MBlaze::R6, MBlaze::R7,
+ MBlaze::R8, MBlaze::R9, MBlaze::R10
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+ unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+ if (!Reg) return false;
+
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // MBlaze does not yet support tail call optimization
+ isTailCall = false;
+
+ // The MBlaze requires stack slots for arguments passed to var arg
+ // functions even if they are passed in registers.
+ bool needsRegArgSlots = isVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Variable argument function calls require a minimum of 24-bytes of stack
+ if (isVarArg && NumBytes < 24) NumBytes = 24;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Since we are alread passing values on the stack we don't
+ // need to worry about creating additional slots for the
+ // values passed via registers.
+ needsRegArgSlots = false;
+
+ // Create the frame index object for this incoming parameter
+ unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // If we need to reserve stack space for the arguments passed via registers
+ // then create a fixed stack object at the beginning of the stack.
+ if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+ MFI->CreateFixedObject(28,0,true);
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0, 0);
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), 0);
+
+ // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+ MBlazeFI->setVarArgsFrameIndex(0);
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Keep track of the last register used for arguments
+ unsigned ArgRegEnd = 0;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+ SDValue StackPtr;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ ArgRegEnd = VA.getLocReg();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = MBlaze::GPRRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = MBlaze::GPRRegisterClass;
+ else
+ llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size. If if is a floating point value
+ // then convert to the correct type.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ InVals.push_back(ArgValue);
+ } else { // VA.isRegLoc()
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The last argument is not a register
+ ArgRegEnd = 0;
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
+ MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+ MBlazeFI->recordLiveIn(FI);
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame. If the last
+ // argument was placed in the stack, there's no need to save any register.
+ if ((isVarArg) && ArgRegEnd) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The last register argument that must be saved is MBlaze::R10
+ TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+
+ unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
+ unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
+ unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+ unsigned StackLoc = Start - Begin + 1;
+
+ for (; Start <= End; ++Start, ++StackLoc) {
+ unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+ unsigned LiveReg = MF.addLiveIn(Reg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+ int FI = MFI->CreateFixedObject(4, 0, true);
+ MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ if (!MBlazeFI->getVarArgsFrameIndex())
+ MBlazeFI->setVarArgsFrameIndex(FI);
+ }
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // If this function is using the interrupt_handler calling convention
+ // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+ unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
+ SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+
+ if (Flag.getNode())
+ return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+
+ return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // MBlaze specific constrainy
+ //
+ // 'd' : An address register. Equivalent to r.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, MBlaze::GPRRegisterClass);
+ // TODO: These can't possibly be right, but match what was in
+ // getRegClassForInlineAsmConstraint.
+ case 'd':
+ case 'y':
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, MBlaze::GPRRegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The MBlaze target isn't yet aware of offsets.
+ return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ return VT != MVT::f32;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 000000000000..bb128da3c7c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,185 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+ namespace MBlazeCC {
+ enum CC {
+ FIRST = 0,
+ EQ,
+ NE,
+ GT,
+ LT,
+ GE,
+ LE
+ };
+
+ inline static CC getOppositeCondition(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case GT: return LE;
+ case LT: return GE;
+ case GE: return LT;
+ case LE: return GE;
+ }
+ }
+
+ inline static const char *MBlazeCCToString(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return "eq";
+ case NE: return "ne";
+ case GT: return "gt";
+ case LT: return "lt";
+ case GE: return "ge";
+ case LE: return "le";
+ }
+ }
+ }
+
+ namespace MBlazeISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Select CC Pseudo Instruction
+ Select_CC,
+
+ // Wrap up multiple types of instructions
+ Wrap,
+
+ // Integer Compare
+ ICmp,
+
+ // Return from subroutine
+ Ret,
+
+ // Return from interrupt
+ IRet
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MBlazeTargetLowering : public TargetLowering {
+ public:
+ explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ private:
+ // Subtarget Info
+ const MBlazeSubtarget *Subtarget;
+
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ // Lower Operand specifics
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 000000000000..4acdcfdd772c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,219 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+let rb=0 in {
+ class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+ def FORI : LogicFI<0x28, "ori ">;
+ def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIC_FPU>;
+ def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIC_FPU>;
+ def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIC_FPU>;
+ def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIC_FPUd>;
+}
+
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+ def LWF : LoadFM<0x32, "lw ", load>;
+ def LWFI : LoadFMI<0x3A, "lwi ", load>;
+
+ def SWF : StoreFM<0x36, "sw ", store>;
+ def SWFI : StoreFMI<0x3E, "swi ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+ def FLT : ArithIF<0x16, 0x280, "flt ", IIC_FPUf>;
+ def FINT : ArithFI<0x16, 0x300, "fint ", IIC_FPUi>;
+ def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIC_FPUs>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>;
+ def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>;
+ def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>;
+ def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>;
+ def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>;
+ def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>;
+ def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>;
+}
+
+
+let usesCustomInserter = 1 in {
+ def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
+ "; SELECT_FCC PSEUDO!",
+ []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+ def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+ def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+ def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (XOR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_NE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+ (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 000000000000..3082a7e227f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,229 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg>
+{
+ bits<5> rd;
+ bits<4> fslno;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x0;
+ let Inst{17-21} = flags; // NCTAE
+ let Inst{22-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg>
+{
+ bits<5> rd;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x0;
+ let Inst{22-26} = flags; // NCTAE
+ let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp>
+{
+ bits<5> ra;
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, GPR:$b)], IIC_FSLp>
+{
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode immZExt4:$b)], IIC_FSLp>
+{
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode GPR:$b)], IIC_FSLp>
+{
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET : FSLGet<0x1B, 0x00, "get ", int_mblaze_fsl_get>;
+def AGET : FSLGet<0x1B, 0x02, "aget ", int_mblaze_fsl_aget>;
+def CGET : FSLGet<0x1B, 0x08, "cget ", int_mblaze_fsl_cget>;
+def CAGET : FSLGet<0x1B, 0x0A, "caget ", int_mblaze_fsl_caget>;
+def EGET : FSLGet<0x1B, 0x01, "eget ", int_mblaze_fsl_eget>;
+def EAGET : FSLGet<0x1B, 0x03, "eaget ", int_mblaze_fsl_eaget>;
+def ECGET : FSLGet<0x1B, 0x09, "ecget ", int_mblaze_fsl_ecget>;
+def ECAGET : FSLGet<0x1B, 0x0B, "ecaget ", int_mblaze_fsl_ecaget>;
+def TGET : FSLGet<0x1B, 0x04, "tget ", int_mblaze_fsl_tget>;
+def TAGET : FSLGet<0x1B, 0x06, "taget ", int_mblaze_fsl_taget>;
+def TCGET : FSLGet<0x1B, 0x0C, "tcget ", int_mblaze_fsl_tcget>;
+def TCAGET : FSLGet<0x1B, 0x0E, "tcaget ", int_mblaze_fsl_tcaget>;
+def TEGET : FSLGet<0x1B, 0x05, "teget ", int_mblaze_fsl_teget>;
+def TEAGET : FSLGet<0x1B, 0x07, "teaget ", int_mblaze_fsl_teaget>;
+def TECGET : FSLGet<0x1B, 0x0D, "tecget ", int_mblaze_fsl_tecget>;
+def TECAGET : FSLGet<0x1B, 0x0F, "tecaget ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGET : FSLGet<0x1B, 0x10, "nget ", int_mblaze_fsl_nget>;
+ def NAGET : FSLGet<0x1B, 0x12, "naget ", int_mblaze_fsl_naget>;
+ def NCGET : FSLGet<0x1B, 0x18, "ncget ", int_mblaze_fsl_ncget>;
+ def NCAGET : FSLGet<0x1B, 0x1A, "ncaget ", int_mblaze_fsl_ncaget>;
+ def NEGET : FSLGet<0x1B, 0x11, "neget ", int_mblaze_fsl_neget>;
+ def NEAGET : FSLGet<0x1B, 0x13, "neaget ", int_mblaze_fsl_neaget>;
+ def NECGET : FSLGet<0x1B, 0x19, "necget ", int_mblaze_fsl_necget>;
+ def NECAGET : FSLGet<0x1B, 0x1B, "necaget ", int_mblaze_fsl_necaget>;
+ def TNGET : FSLGet<0x1B, 0x14, "tnget ", int_mblaze_fsl_tnget>;
+ def TNAGET : FSLGet<0x1B, 0x16, "tnaget ", int_mblaze_fsl_tnaget>;
+ def TNCGET : FSLGet<0x1B, 0x1C, "tncget ", int_mblaze_fsl_tncget>;
+ def TNCAGET : FSLGet<0x1B, 0x1E, "tncaget ", int_mblaze_fsl_tncaget>;
+ def TNEGET : FSLGet<0x1B, 0x15, "tneget ", int_mblaze_fsl_tneget>;
+ def TNEAGET : FSLGet<0x1B, 0x17, "tneaget ", int_mblaze_fsl_tneaget>;
+ def TNECGET : FSLGet<0x1B, 0x1D, "tnecget ", int_mblaze_fsl_tnecget>;
+ def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD : FSLGetD<0x13, 0x00, "getd ", int_mblaze_fsl_get>;
+def AGETD : FSLGetD<0x13, 0x02, "agetd ", int_mblaze_fsl_aget>;
+def CGETD : FSLGetD<0x13, 0x08, "cgetd ", int_mblaze_fsl_cget>;
+def CAGETD : FSLGetD<0x13, 0x0A, "cagetd ", int_mblaze_fsl_caget>;
+def EGETD : FSLGetD<0x13, 0x01, "egetd ", int_mblaze_fsl_eget>;
+def EAGETD : FSLGetD<0x13, 0x03, "eagetd ", int_mblaze_fsl_eaget>;
+def ECGETD : FSLGetD<0x13, 0x09, "ecgetd ", int_mblaze_fsl_ecget>;
+def ECAGETD : FSLGetD<0x13, 0x0B, "ecagetd ", int_mblaze_fsl_ecaget>;
+def TGETD : FSLGetD<0x13, 0x04, "tgetd ", int_mblaze_fsl_tget>;
+def TAGETD : FSLGetD<0x13, 0x06, "tagetd ", int_mblaze_fsl_taget>;
+def TCGETD : FSLGetD<0x13, 0x0C, "tcgetd ", int_mblaze_fsl_tcget>;
+def TCAGETD : FSLGetD<0x13, 0x0E, "tcagetd ", int_mblaze_fsl_tcaget>;
+def TEGETD : FSLGetD<0x13, 0x05, "tegetd ", int_mblaze_fsl_teget>;
+def TEAGETD : FSLGetD<0x13, 0x07, "teagetd ", int_mblaze_fsl_teaget>;
+def TECGETD : FSLGetD<0x13, 0x0D, "tecgetd ", int_mblaze_fsl_tecget>;
+def TECAGETD : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGETD : FSLGetD<0x13, 0x10, "ngetd ", int_mblaze_fsl_nget>;
+ def NAGETD : FSLGetD<0x13, 0x12, "nagetd ", int_mblaze_fsl_naget>;
+ def NCGETD : FSLGetD<0x13, 0x18, "ncgetd ", int_mblaze_fsl_ncget>;
+ def NCAGETD : FSLGetD<0x13, 0x1A, "ncagetd ", int_mblaze_fsl_ncaget>;
+ def NEGETD : FSLGetD<0x13, 0x11, "negetd ", int_mblaze_fsl_neget>;
+ def NEAGETD : FSLGetD<0x13, 0x13, "neagetd ", int_mblaze_fsl_neaget>;
+ def NECGETD : FSLGetD<0x13, 0x19, "necgetd ", int_mblaze_fsl_necget>;
+ def NECAGETD : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+ def TNGETD : FSLGetD<0x13, 0x14, "tngetd ", int_mblaze_fsl_tnget>;
+ def TNAGETD : FSLGetD<0x13, 0x16, "tnagetd ", int_mblaze_fsl_tnaget>;
+ def TNCGETD : FSLGetD<0x13, 0x1C, "tncgetd ", int_mblaze_fsl_tncget>;
+ def TNCAGETD : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+ def TNEGETD : FSLGetD<0x13, 0x15, "tnegetd ", int_mblaze_fsl_tneget>;
+ def TNEAGETD : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+ def TNECGETD : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+ def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT : FSLPut<0x1B, 0x0, "put ", int_mblaze_fsl_put>;
+def APUT : FSLPut<0x1B, 0x1, "aput ", int_mblaze_fsl_aput>;
+def CPUT : FSLPut<0x1B, 0x4, "cput ", int_mblaze_fsl_cput>;
+def CAPUT : FSLPut<0x1B, 0x5, "caput ", int_mblaze_fsl_caput>;
+def TPUT : FSLPutT<0x1B, 0x2, "tput ", int_mblaze_fsl_tput>;
+def TAPUT : FSLPutT<0x1B, 0x3, "taput ", int_mblaze_fsl_taput>;
+def TCPUT : FSLPutT<0x1B, 0x6, "tcput ", int_mblaze_fsl_tcput>;
+def TCAPUT : FSLPutT<0x1B, 0x7, "tcaput ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUT : FSLPut<0x1B, 0x8, "nput ", int_mblaze_fsl_nput>;
+ def NAPUT : FSLPut<0x1B, 0x9, "naput ", int_mblaze_fsl_naput>;
+ def NCPUT : FSLPut<0x1B, 0xC, "ncput ", int_mblaze_fsl_ncput>;
+ def NCAPUT : FSLPut<0x1B, 0xD, "ncaput ", int_mblaze_fsl_ncaput>;
+ def TNPUT : FSLPutT<0x1B, 0xA, "tnput ", int_mblaze_fsl_tnput>;
+ def TNAPUT : FSLPutT<0x1B, 0xB, "tnaput ", int_mblaze_fsl_tnaput>;
+ def TNCPUT : FSLPutT<0x1B, 0xE, "tncput ", int_mblaze_fsl_tncput>;
+ def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput ", int_mblaze_fsl_tncaput>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD : FSLPutD<0x13, 0x0, "putd ", int_mblaze_fsl_put>;
+def APUTD : FSLPutD<0x13, 0x1, "aputd ", int_mblaze_fsl_aput>;
+def CPUTD : FSLPutD<0x13, 0x4, "cputd ", int_mblaze_fsl_cput>;
+def CAPUTD : FSLPutD<0x13, 0x5, "caputd ", int_mblaze_fsl_caput>;
+def TPUTD : FSLPutTD<0x13, 0x2, "tputd ", int_mblaze_fsl_tput>;
+def TAPUTD : FSLPutTD<0x13, 0x3, "taputd ", int_mblaze_fsl_taput>;
+def TCPUTD : FSLPutTD<0x13, 0x6, "tcputd ", int_mblaze_fsl_tcput>;
+def TCAPUTD : FSLPutTD<0x13, 0x7, "tcaputd ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUTD : FSLPutD<0x13, 0x8, "nputd ", int_mblaze_fsl_nput>;
+ def NAPUTD : FSLPutD<0x13, 0x9, "naputd ", int_mblaze_fsl_naput>;
+ def NCPUTD : FSLPutD<0x13, 0xC, "ncputd ", int_mblaze_fsl_ncput>;
+ def NCAPUTD : FSLPutD<0x13, 0xD, "ncaputd ", int_mblaze_fsl_ncaput>;
+ def TNPUTD : FSLPutTD<0x13, 0xA, "tnputd ", int_mblaze_fsl_tnput>;
+ def TNAPUTD : FSLPutTD<0x13, 0xB, "tnaputd ", int_mblaze_fsl_tnaput>;
+ def TNCPUTD : FSLPutTD<0x13, 0xE, "tncputd ", int_mblaze_fsl_tncput>;
+ def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 000000000000..54f605f989a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,204 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR : Format<1>; // ADD, OR, etc.
+def FRRI : Format<2>; // ADDI, ORI, etc.
+def FCRR : Format<3>; // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI : Format<4>; // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR : Format<5>; // BRLD, BRALD, GETD
+def FRCI : Format<6>; // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR : Format<7>; // BR, BRA, BRD, etc.
+def FCCI : Format<8>; // IMM, BRI, BRAI, BRID, etc.
+def FRRCI : Format<9>; // BSRLI, BSRAI, BSLLI
+def FRRC : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX : Format<11>; // GET
+def FRCS : Format<12>; // MFS
+def FCRCS : Format<13>; // MTS
+def FCRCX : Format<14>; // PUT
+def FCX : Format<15>; // TPUT
+def FCR : Format<16>; // TPUTD
+def FRIR : Format<17>; // RSUBI
+def FRRRR : Format<18>; // RSUB, FRSUB
+def FRI : Format<19>; // RSUB, FRSUB
+def FC : Format<20>; // NOP
+
+//===----------------------------------------------------------------------===//
+// Describe MBlaze instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rd - dst reg.
+// ra - first src. reg.
+// rb - second src. reg.
+// imm16 - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> : Instruction {
+ let Namespace = "MBlaze";
+ field bits<32> Inst;
+
+ bits<6> opcode = op;
+ Format Form = form;
+ bits<6> FormBits = Form.Value;
+
+ // Top 6 bits are the 'opcode' field
+ let Inst{0-5} = opcode;
+
+ // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+ // assembler and disassmbler ignore it.
+ let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+ let TSFlags{5-0} = FormBits;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ TA<op, flags, outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rrd;
+ bits<5> rrb;
+ bits<5> rra;
+
+ let Form = FRRRR;
+
+ let rd = rrd;
+ let ra = rra;
+ let rb = rrb;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ TB<op, outs, ins, asmstr, pattern, itin> {
+ bits<5> rrd;
+ bits<16> rimm16;
+ bits<5> rra;
+
+ let Form = FRIR;
+
+ let rd = rrd;
+ let ra = rra;
+ let imm16 = rimm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> imm5;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = 0x0;
+ let Inst{21-22} = flags;
+ let Inst{23-26} = 0x0;
+ let Inst{27-31} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<14> imm14;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-17} = flags;
+ let Inst{18-31} = imm14;
+}
+
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<15> imm15;
+
+ let Inst{6-10} = rd;
+ let Inst{11-16} = flags;
+ let Inst{17-31} = imm15;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 000000000000..188f10a3972e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,296 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+ : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::LWI) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::SWI) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (MBlaze::isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with something like BEQID then BRID, handle it.
+ if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it.
+ // The second one is not executed, so remove it.
+ if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "MBlaze branch conditions have two components!");
+
+ unsigned Opc = MBlaze::BRID;
+ if (!Cond.empty())
+ Opc = (unsigned)Cond[0].getImm();
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+ return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+
+ if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+ !MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+ switch (Cond[0].getImm()) {
+ default: return true;
+ case MBlaze::BEQ: Cond[0].setImm(MBlaze::BNE); return false;
+ case MBlaze::BNE: Cond[0].setImm(MBlaze::BEQ); return false;
+ case MBlaze::BGT: Cond[0].setImm(MBlaze::BLE); return false;
+ case MBlaze::BGE: Cond[0].setImm(MBlaze::BLT); return false;
+ case MBlaze::BLT: Cond[0].setImm(MBlaze::BGE); return false;
+ case MBlaze::BLE: Cond[0].setImm(MBlaze::BGT); return false;
+ case MBlaze::BEQI: Cond[0].setImm(MBlaze::BNEI); return false;
+ case MBlaze::BNEI: Cond[0].setImm(MBlaze::BEQI); return false;
+ case MBlaze::BGTI: Cond[0].setImm(MBlaze::BLEI); return false;
+ case MBlaze::BGEI: Cond[0].setImm(MBlaze::BLTI); return false;
+ case MBlaze::BLTI: Cond[0].setImm(MBlaze::BGEI); return false;
+ case MBlaze::BLEI: Cond[0].setImm(MBlaze::BGTI); return false;
+ case MBlaze::BEQD: Cond[0].setImm(MBlaze::BNED); return false;
+ case MBlaze::BNED: Cond[0].setImm(MBlaze::BEQD); return false;
+ case MBlaze::BGTD: Cond[0].setImm(MBlaze::BLED); return false;
+ case MBlaze::BGED: Cond[0].setImm(MBlaze::BLTD); return false;
+ case MBlaze::BLTD: Cond[0].setImm(MBlaze::BGED); return false;
+ case MBlaze::BLED: Cond[0].setImm(MBlaze::BGTD); return false;
+ case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+ case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+ case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+ case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+ case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+ case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+ }
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+ unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(MBlaze::R20);
+ RegInfo.addLiveIn(MBlaze::R20);
+
+ MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 000000000000..79f962b349bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,296 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MBlazeGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace MBlaze {
+
+ // MBlaze Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // MBlaze Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_EQ,
+ COND_GT,
+ COND_GE,
+ COND_LT,
+ COND_LE,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ inline static unsigned GetCondBranchFromCond(CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case COND_EQ: return MBlaze::BEQID;
+ case COND_NE: return MBlaze::BNEID;
+ case COND_GT: return MBlaze::BGTID;
+ case COND_GE: return MBlaze::BGEID;
+ case COND_LT: return MBlaze::BLTID;
+ case COND_LE: return MBlaze::BLEID;
+ }
+ }
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+ /// MBlazeCCToString - Map each FP condition code to its string
+ inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+
+ inline static bool isUncondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BRI:
+ case MBlaze::BRAI:
+ case MBlaze::BRID:
+ case MBlaze::BRAID:
+ return true;
+ }
+ }
+
+ inline static bool isCondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BEQI: case MBlaze::BEQID:
+ case MBlaze::BNEI: case MBlaze::BNEID:
+ case MBlaze::BGTI: case MBlaze::BGTID:
+ case MBlaze::BGEI: case MBlaze::BGEID:
+ case MBlaze::BLTI: case MBlaze::BLTID:
+ case MBlaze::BLEI: case MBlaze::BLEID:
+ return true;
+ }
+ }
+}
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ enum {
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ FPseudo = 0,
+ FRRR,
+ FRRI,
+ FCRR,
+ FCRI,
+ FRCR,
+ FRCI,
+ FCCR,
+ FCCI,
+ FRRCI,
+ FRRC,
+ FRCX,
+ FRCS,
+ FCRCS,
+ FCRCX,
+ FCX,
+ FCR,
+ FRIR,
+ FRRRR,
+ FRI,
+ FC,
+ FormMask = 63
+
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ // MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ // MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ // MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ // MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ // MO_ABS_HILO
+
+ };
+}
+
+class MBlazeInstrInfo : public MBlazeGenInstrInfo {
+ MBlazeTargetMachine &TM;
+ const MBlazeRegisterInfo RI;
+public:
+ explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 000000000000..950f2d77029b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,884 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze type profiles
+//===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeIRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
+
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+ [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+ SDNPVariadic]>;
+
+def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+// def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel : Predicate<"Subtarget.hasBarrel()">;
+// def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv : Predicate<"Subtarget.hasDiv()">;
+def HasMul : Predicate<"Subtarget.hasMul()">;
+// def HasFSL : Predicate<"Subtarget.hasFSL()">;
+// def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
+// def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
+// def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU : Predicate<"Subtarget.hasFPU()">;
+// def HasESR : Predicate<"Subtarget.hasESR()">;
+// def HasPVR : Predicate<"Subtarget.hasPVR()">;
+def HasMul64 : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt : Predicate<"Subtarget.hasSqrt()">;
+// def HasMMU : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+def MBlazeMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+ let Name = "Fsl";
+ let SuperClasses = [];
+}
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def uimm5 : Operand<i32>;
+def uimm15 : Operand<i32>;
+def fimm : Operand<f32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm : Operand<i32> {
+ let PrintMethod = "printFSLImm";
+ let ParserMatchClass = MBlazeFslAsmOperand;
+}
+
+// Address operand
+def memri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR, simm16);
+ let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+def memrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR, GPR);
+ let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+def immSExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+ "#ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MBlazePseudo<(outs),
+ (ins uimm16:$amt1, simm16:$amt2),
+ "#ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+ IIC_ALU>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1 in {
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [], IIC_MEMl>;
+}
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+let mayStore = 1 in {
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [], IIC_MEMs>;
+}
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TB<op, (outs), (ins GPR:$dst, memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIC_BR> {
+ let rd = 0x0;
+ let ra = br;
+ let Form = FCCR;
+}
+
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIC_BR> {
+ let rd = 0;
+ let ra = br;
+ let Form = FCCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIC_BRl> {
+ let ra = br;
+ let Form = FRCR;
+}
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIC_BRl> {
+ let ra = br;
+ let Form = FRCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs),
+ (ins GPR:$a, GPR:$b),
+ !strconcat(instr_asm, " $a, $b"),
+ [], IIC_BRc> {
+ let rd = br;
+ let Form = FCRR;
+}
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $offset"),
+ [], IIC_BRc> {
+ let rd = br;
+ let Form = FCRI;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+ def ADDK : Arith<0x04, 0x000, "addk ", add, IIC_ALU>;
+ def AND : Logic<0x21, 0x000, "and ", and>;
+ def OR : Logic<0x20, 0x000, "or ", or>;
+ def XOR : Logic<0x22, 0x000, "xor ", xor>;
+
+ let Predicates=[HasPatCmp] in {
+ def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+ def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+ def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+ }
+
+ let Defs = [CARRY] in {
+ def ADD : Arith<0x00, 0x000, "add ", addc, IIC_ALU>;
+
+ let Uses = [CARRY] in {
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIC_ALU>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDKC : ArithN<0x06, 0x000, "addkc ", IIC_ALU>;
+ }
+}
+
+let isAsCheapAsAMove = 1 in {
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIC_ALU>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIC_ALU>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIC_ALU>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIC_ALU>;
+
+ let Defs = [CARRY] in {
+ def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIC_ALU>;
+
+ let Uses = [CARRY] in {
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIC_ALU>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>;
+ }
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIC_ALUm>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIC_ALUm>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIC_ALUm>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>;
+}
+
+let Predicates=[HasBarrel] in {
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIC_SHT>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIC_SHT>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIC_SHT>;
+ def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>;
+ def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>;
+ def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+ def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIC_ALUd>;
+ def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIC_ALUd>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+ def ADDIK : ArithI<0x0C, "addik ", add, simm16, immSExt16>;
+ def RSUBIK : ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+ def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
+ def ANDI : LogicI<0x29, "andi ", and>;
+ def ORI : LogicI<0x28, "ori ", or>;
+ def XORI : LogicI<0x2A, "xori ", xor>;
+
+ let Defs = [CARRY] in {
+ def ADDI : ArithI<0x08, "addi ", addc, simm16, immSExt16>;
+ def RSUBI : ArithRI<0x09, "rsubi ", subc, simm16, immSExt16>;
+
+ let Uses = [CARRY] in {
+ def ADDIC : ArithI<0x0A, "addic ", adde, simm16, immSExt16>;
+ def RSUBIC : ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDIKC : ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+ def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+ }
+}
+
+let Predicates=[HasMul] in {
+ def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def LBU : LoadM<0x30, 0x000, "lbu ">;
+ def LBUR : LoadM<0x30, 0x200, "lbur ">;
+
+ def LHU : LoadM<0x31, 0x000, "lhu ">;
+ def LHUR : LoadM<0x31, 0x200, "lhur ">;
+
+ def LW : LoadM<0x32, 0x000, "lw ">;
+ def LWR : LoadM<0x32, 0x200, "lwr ">;
+
+ let Defs = [CARRY] in {
+ def LWX : LoadM<0x32, 0x400, "lwx ">;
+ }
+
+ def LBUI : LoadMI<0x38, "lbui ", zextloadi8>;
+ def LHUI : LoadMI<0x39, "lhui ", zextloadi16>;
+ def LWI : LoadMI<0x3A, "lwi ", load>;
+}
+
+def SB : StoreM<0x34, 0x000, "sb ">;
+def SBR : StoreM<0x34, 0x200, "sbr ">;
+
+def SH : StoreM<0x35, 0x000, "sh ">;
+def SHR : StoreM<0x35, 0x200, "shr ">;
+
+def SW : StoreM<0x36, 0x000, "sw ">;
+def SWR : StoreM<0x36, 0x200, "swr ">;
+
+let Defs = [CARRY] in {
+ def SWX : StoreM<0x36, 0x400, "swx ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRI : BranchI<0x2E, 0x00, "bri ">;
+ def BRAI : BranchI<0x2E, 0x08, "brai ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BEQI : BranchCI<0x2F, 0x00, "beqi ">;
+ def BNEI : BranchCI<0x2F, 0x01, "bnei ">;
+ def BLTI : BranchCI<0x2F, 0x02, "blti ">;
+ def BLEI : BranchCI<0x2F, 0x03, "blei ">;
+ def BGTI : BranchCI<0x2F, 0x04, "bgti ">;
+ def BGEI : BranchCI<0x2F, 0x05, "bgei ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BR : Branch<0x26, 0x00, 0x000, "br ">;
+ def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BEQ : BranchC<0x27, 0x00, 0x000, "beq ">;
+ def BNE : BranchC<0x27, 0x01, 0x000, "bne ">;
+ def BLT : BranchC<0x27, 0x02, 0x000, "blt ">;
+ def BLE : BranchC<0x27, 0x03, 0x000, "ble ">;
+ def BGT : BranchC<0x27, 0x04, 0x000, "bgt ">;
+ def BGE : BranchC<0x27, 0x05, 0x000, "bge ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BRID : BranchI<0x2E, 0x10, "brid ">;
+ def BRAID : BranchI<0x2E, 0x18, "braid ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BEQID : BranchCI<0x2F, 0x10, "beqid ">;
+ def BNEID : BranchCI<0x2F, 0x11, "bneid ">;
+ def BLTID : BranchCI<0x2F, 0x12, "bltid ">;
+ def BLEID : BranchCI<0x2F, 0x13, "bleid ">;
+ def BGTID : BranchCI<0x2F, 0x14, "bgtid ">;
+ def BGEID : BranchCI<0x2F, 0x15, "bgeid ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
+ def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ">;
+ def BNED : BranchC<0x27, 0x11, 0x000, "bned ">;
+ def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ">;
+ def BLED : BranchC<0x27, 0x13, 0x000, "bled ">;
+ def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ">;
+ def BGED : BranchC<0x27, 0x15, 0x000, "bged ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
+ def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
+ def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x10, Form=FCRI in {
+ def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtsd $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x11, Form=FCRI in {
+ def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtid $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x12, Form=FCRI in {
+ def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtbd $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x14, Form=FCRI in {
+ def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rted $target, $imm",
+ [],
+ IIC_BR>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
+}
+
+let usesCustomInserter = 1 in {
+ def Select_CC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
+ "; SELECT_CC PSEUDO!",
+ []>;
+
+ def ShiftL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftL PSEUDO!",
+ []>;
+
+ def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftRA PSEUDO!",
+ []>;
+
+ def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
+ "; ShiftRL PSEUDO!",
+ []>;
+}
+
+let rb = 0 in {
+ def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+ "sext16 $dst, $src", [], IIC_ALU>;
+ def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+ "sext8 $dst, $src", [], IIC_ALU>;
+ let Defs = [CARRY] in {
+ def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+ "srl $dst, $src", [], IIC_ALU>;
+ def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+ "sra $dst, $src", [], IIC_ALU>;
+ let Uses = [CARRY] in {
+ def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+ "src $dst, $src", [], IIC_ALU>;
+ }
+ }
+}
+
+let isCodeGenOnly=1 in {
+ def ADDIK32 : ArithI32<0x08, "addik ", simm16, immSExt16>;
+ def ORI32 : LogicI32<0x28, "ori ">;
+ def BRLID32 : BranchLI<0x2E, 0x14, "brlid ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+ def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+ "mfs $dst, $src", [], IIC_ALU>;
+}
+
+let Form=FCRCS in {
+ def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+ "mts $dst, $src", [], IIC_ALU>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+ "msrset $dst, $set", [], IIC_ALU>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+ "msrclr $dst, $clr", [], IIC_ALU>;
+
+let rd=0x0, Form=FCRR in {
+ def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+ "wdc $a, $b", [], IIC_WDC>;
+ def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.flush $a, $b", [], IIC_WDC>;
+ def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.clear $a, $b", [], IIC_WDC>;
+ def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+ "wic $a, $b", [], IIC_WDC>;
+}
+
+def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+ "imm $imm", [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+ def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+ "# atomic compare and swap",
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+ def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+ "# atomic swap",
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+ def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and add",
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and sub",
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and and",
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and or",
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and xor",
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and nand",
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+ def MEMBARRIER : MBlazePseudo<(outs), (ins),
+ "# memory barrier",
+ [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8), (SEXT8 GPR:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+ (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+ (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+ (BRALD (i32 R15), GPR:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 6)>;
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+ (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+ (BNEID GPR:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in), (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8 xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 000000000000..32d67b264a20
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,113 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+ enum ID {
+ last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_mblaze_intrinsics
+ };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+ "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+ return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (IntrID == 0)
+ return false;
+ else
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+ const Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ const Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 000000000000..9804c7723bee
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ unsigned lookupGCCName(const char *Name) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 000000000000..278afbefc165
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,131 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+ class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+
+ class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+ MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+ MBFSL_PutT_Intrinsic;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 000000000000..ddc636d0ce64
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,222 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+ MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+
+public:
+ MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii) {
+ }
+
+ ~MBlazeMCCodeEmitter() {}
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+ unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+ // FIXME: getMBlazeRegisterNumbering() is sufficient?
+ assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
+ return 0;
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ // The MicroBlaze uses a bit reversed format so we need to reverse the
+ // order of the bits. Taken from:
+ // http://graphics.stanford.edu/~seander/bithacks.html
+ C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ assert(Size <= 8 && "size too big in emit constant");
+
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+ void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+ void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO) const {
+ if (MO.isReg())
+ return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isExpr())
+ return 0; // The relocation has already been recorded at this point.
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable(0);
+ }
+ return 0;
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+ int32_t val = (int32_t)imm.getImm();
+ if (val > 32767 || val < -32768) {
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+ EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+ switch (MI.getOpcode()) {
+ default: break;
+
+ case MBlaze::ADDIK32:
+ case MBlaze::ORI32:
+ case MBlaze::BRLID32:
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+ raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+ MCOperand oper = MI.getOperand(opNo);
+
+ if (oper.isImm()) {
+ EmitIMM(oper, CurByte, OS);
+ } else if (oper.isExpr()) {
+ MCFixupKind FixupKind;
+ switch (MI.getOpcode()) {
+ default:
+ FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ case MBlaze::ORI32:
+ case MBlaze::ADDIK32:
+ case MBlaze::BRLID32:
+ FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ }
+ }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Emit an IMM instruction if the instruction we are encoding requires it
+ EmitIMM(MI,CurByte,OS);
+
+ switch ((TSFlags & MBlazeII::FormMask)) {
+ default: break;
+ case MBlazeII::FPseudo:
+ // Pseudo instructions don't get encoded.
+ return;
+ case MBlazeII::FRRI:
+ EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRIR:
+ EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FCRI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRCI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ case MBlazeII::FCCI:
+ EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+ break;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted
+ unsigned Value = getBinaryCodeForInstr(MI);
+ EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 000000000000..a7e400b1d1a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ bool ignored;
+ APFloat FVal = MO.getFPImm()->getValueAPF();
+ FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+ APInt IVal = FVal.bitcastToAPInt();
+ uint64_t Val = *IVal.getRawData();
+ MCOp = MCOperand::CreateImm(Val);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
new file mode 100644
index 000000000000..92196f220225
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class AsmPrinter;
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+
+ /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
+ MCContext &Ctx;
+ Mangler &Mang;
+
+ AsmPrinter &Printer;
+public:
+ MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+ : Ctx(ctx), Mang(mang), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 000000000000..df395094282f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,170 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MBlazeFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MBlazeFIHolder GPHolder;
+
+ /// On LowerFormalArguments the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFormalArguments, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ // When determining the final stack layout some of the frame indexes may
+ // be replaced by new frame indexes that reside in the caller's stack
+ // frame. The replacements are recorded in this structure.
+ DenseMap<int,int> FIReplacements;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ // VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// LiveInFI - keeps track of the frame indexes in a callers stack
+ /// frame that are live into a function.
+ SmallVector<int, 16> LiveInFI;
+
+public:
+ MBlazeFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+ HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+ VarArgsFrameIndex(0), LiveInFI()
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+ bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLiveIn(int FI) {
+ LiveInFI.push_back(FI);
+ }
+
+ bool isLiveIn(int FI) {
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+ if (FI == LiveInFI[i]) return true;
+
+ return false;
+ }
+
+ const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+ void recordReplacement(int OFI, int NFI) {
+ FIReplacements.insert(std::make_pair(OFI,NFI));
+ }
+
+ bool hasReplacement(int OFI) const {
+ return FIReplacements.find(OFI) != FIReplacements.end();
+ }
+
+ int getReplacement(int OFI) const {
+ return FIReplacements.lookup(OFI);
+ }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
+ }
+
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 000000000000..f0b201a66170
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,363 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+ : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : return 0;
+ case MBlaze::R1 : return 1;
+ case MBlaze::R2 : return 2;
+ case MBlaze::R3 : return 3;
+ case MBlaze::R4 : return 4;
+ case MBlaze::R5 : return 5;
+ case MBlaze::R6 : return 6;
+ case MBlaze::R7 : return 7;
+ case MBlaze::R8 : return 8;
+ case MBlaze::R9 : return 9;
+ case MBlaze::R10 : return 10;
+ case MBlaze::R11 : return 11;
+ case MBlaze::R12 : return 12;
+ case MBlaze::R13 : return 13;
+ case MBlaze::R14 : return 14;
+ case MBlaze::R15 : return 15;
+ case MBlaze::R16 : return 16;
+ case MBlaze::R17 : return 17;
+ case MBlaze::R18 : return 18;
+ case MBlaze::R19 : return 19;
+ case MBlaze::R20 : return 20;
+ case MBlaze::R21 : return 21;
+ case MBlaze::R22 : return 22;
+ case MBlaze::R23 : return 23;
+ case MBlaze::R24 : return 24;
+ case MBlaze::R25 : return 25;
+ case MBlaze::R26 : return 26;
+ case MBlaze::R27 : return 27;
+ case MBlaze::R28 : return 28;
+ case MBlaze::R29 : return 29;
+ case MBlaze::R30 : return 30;
+ case MBlaze::R31 : return 31;
+ case MBlaze::RPC : return 0x0000;
+ case MBlaze::RMSR : return 0x0001;
+ case MBlaze::REAR : return 0x0003;
+ case MBlaze::RESR : return 0x0005;
+ case MBlaze::RFSR : return 0x0007;
+ case MBlaze::RBTR : return 0x000B;
+ case MBlaze::REDR : return 0x000D;
+ case MBlaze::RPID : return 0x1000;
+ case MBlaze::RZPR : return 0x1001;
+ case MBlaze::RTLBX : return 0x1002;
+ case MBlaze::RTLBLO : return 0x1003;
+ case MBlaze::RTLBHI : return 0x1004;
+ case MBlaze::RPVR0 : return 0x2000;
+ case MBlaze::RPVR1 : return 0x2001;
+ case MBlaze::RPVR2 : return 0x2002;
+ case MBlaze::RPVR3 : return 0x2003;
+ case MBlaze::RPVR4 : return 0x2004;
+ case MBlaze::RPVR5 : return 0x2005;
+ case MBlaze::RPVR6 : return 0x2006;
+ case MBlaze::RPVR7 : return 0x2007;
+ case MBlaze::RPVR8 : return 0x2008;
+ case MBlaze::RPVR9 : return 0x2009;
+ case MBlaze::RPVR10 : return 0x200A;
+ case MBlaze::RPVR11 : return 0x200B;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0 : return MBlaze::R0;
+ case 1 : return MBlaze::R1;
+ case 2 : return MBlaze::R2;
+ case 3 : return MBlaze::R3;
+ case 4 : return MBlaze::R4;
+ case 5 : return MBlaze::R5;
+ case 6 : return MBlaze::R6;
+ case 7 : return MBlaze::R7;
+ case 8 : return MBlaze::R8;
+ case 9 : return MBlaze::R9;
+ case 10 : return MBlaze::R10;
+ case 11 : return MBlaze::R11;
+ case 12 : return MBlaze::R12;
+ case 13 : return MBlaze::R13;
+ case 14 : return MBlaze::R14;
+ case 15 : return MBlaze::R15;
+ case 16 : return MBlaze::R16;
+ case 17 : return MBlaze::R17;
+ case 18 : return MBlaze::R18;
+ case 19 : return MBlaze::R19;
+ case 20 : return MBlaze::R20;
+ case 21 : return MBlaze::R21;
+ case 22 : return MBlaze::R22;
+ case 23 : return MBlaze::R23;
+ case 24 : return MBlaze::R24;
+ case 25 : return MBlaze::R25;
+ case 26 : return MBlaze::R26;
+ case 27 : return MBlaze::R27;
+ case 28 : return MBlaze::R28;
+ case 29 : return MBlaze::R29;
+ case 30 : return MBlaze::R30;
+ case 31 : return MBlaze::R31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : return MBlaze::RPC;
+ case 0x0001 : return MBlaze::RMSR;
+ case 0x0003 : return MBlaze::REAR;
+ case 0x0005 : return MBlaze::RESR;
+ case 0x0007 : return MBlaze::RFSR;
+ case 0x000B : return MBlaze::RBTR;
+ case 0x000D : return MBlaze::REDR;
+ case 0x1000 : return MBlaze::RPID;
+ case 0x1001 : return MBlaze::RZPR;
+ case 0x1002 : return MBlaze::RTLBX;
+ case 0x1003 : return MBlaze::RTLBLO;
+ case 0x1004 : return MBlaze::RTLBHI;
+ case 0x2000 : return MBlaze::RPVR0;
+ case 0x2001 : return MBlaze::RPVR1;
+ case 0x2002 : return MBlaze::RPVR2;
+ case 0x2003 : return MBlaze::RPVR3;
+ case 0x2004 : return MBlaze::RPVR4;
+ case 0x2005 : return MBlaze::RPVR5;
+ case 0x2006 : return MBlaze::RPVR6;
+ case 0x2007 : return MBlaze::RPVR7;
+ case 0x2008 : return MBlaze::RPVR8;
+ case 0x2009 : return MBlaze::RPVR9;
+ case 0x200A : return MBlaze::RPVR10;
+ case 0x200B : return MBlaze::RPVR11;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+bool MBlazeRegisterInfo::isRegister(unsigned Reg) {
+ return Reg <= 31;
+}
+
+bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
+ case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
+ case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
+ case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
+ case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
+ case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
+ return true;
+
+ default:
+ return false;
+ }
+ return false; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+ return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ // MBlaze callee-save register range is R20 - R31
+ static const unsigned CalleeSavedRegs[] = {
+ MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+ MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+ MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(MBlaze::R0);
+ Reserved.set(MBlaze::R1);
+ Reserved.set(MBlaze::R2);
+ Reserved.set(MBlaze::R13);
+ Reserved.set(MBlaze::R14);
+ Reserved.set(MBlaze::R15);
+ Reserved.set(MBlaze::R16);
+ Reserved.set(MBlaze::R17);
+ Reserved.set(MBlaze::R18);
+ Reserved.set(MBlaze::R19);
+ return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+ // 'addi r1, r1, <amt>'
+ MachineInstr *Old = I;
+ int Amount = Old->getOperand(0).getImm() + 4;
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+ New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+ New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned oi = i == 2 ? 1 : 2;
+
+ DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ dbgs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MFI->getStackSize();
+ int spOffset = MFI->getObjectOffset(FrameIndex);
+
+ DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ dbgs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n"
+ << "isFixed : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+ << "isLiveIn : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+ << "isSpill : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+ << "\n" );
+
+ // as explained on LowerFormalArguments, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+ Offset += MI.getOperand(oi).getImm();
+
+ DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(oi).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ // Set the stack offset where GP must be saved/loaded from.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->needGPSaveRestore())
+ MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getRARegister() const {
+ return MBlaze::R15;
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
+}
+
+int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 000000000000..7ebce21d3a80
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,85 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MBlazeGenRegisterInfo.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// MBlazeRegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+ };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+ const MBlazeSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+ const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+ static unsigned getRegisterFromNumbering(unsigned RegEnum);
+ static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
+ static bool isRegister(unsigned RegEnum);
+ static bool isSpecialRegister(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 000000000000..13c46ba1ecba
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,148 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+ field bits<15> Num;
+ let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+ let Num = num;
+}
+
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+ // General Purpose Registers
+ def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>;
+ def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>;
+ def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>;
+ def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>;
+ def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>;
+ def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>;
+ def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>;
+ def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>;
+ def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>;
+ def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>;
+ def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>;
+ def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>;
+ def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>;
+ def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>;
+ def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>;
+ def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>;
+ def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>;
+ def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>;
+ def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>;
+ def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>;
+ def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>;
+ def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>;
+
+ // Special Purpose Registers
+ def RPC : MBlazeSPRReg<0x0000, "rpc">, DwarfRegNum<[32]>;
+ def RMSR : MBlazeSPRReg<0x0001, "rmsr">, DwarfRegNum<[33]>;
+ def REAR : MBlazeSPRReg<0x0003, "rear">, DwarfRegNum<[34]>;
+ def RESR : MBlazeSPRReg<0x0005, "resr">, DwarfRegNum<[35]>;
+ def RFSR : MBlazeSPRReg<0x0007, "rfsr">, DwarfRegNum<[36]>;
+ def RBTR : MBlazeSPRReg<0x000B, "rbtr">, DwarfRegNum<[37]>;
+ def REDR : MBlazeSPRReg<0x000D, "redr">, DwarfRegNum<[38]>;
+ def RPID : MBlazeSPRReg<0x1000, "rpid">, DwarfRegNum<[39]>;
+ def RZPR : MBlazeSPRReg<0x1001, "rzpr">, DwarfRegNum<[40]>;
+ def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>;
+ def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+ def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+ def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>;
+ def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[45]>;
+ def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[46]>;
+ def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[47]>;
+ def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[48]>;
+ def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[49]>;
+ def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[50]>;
+ def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[51]>;
+ def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[52]>;
+ def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[53]>;
+ def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[54]>;
+ def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>;
+ def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>;
+
+ // The carry bit. In the Microblaze this is really bit 29 of the
+ // MSR register but this is the only bit of that register that we
+ // are interested in modeling.
+ def CARRY : MBlazeSPRReg<0x0000, "rmsr[c]">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>;
+
+def SPR : RegisterClass<"MBlaze", [i32], 32, (add
+ // Reserved
+ RPC,
+ RMSR,
+ REAR,
+ RESR,
+ RFSR,
+ RBTR,
+ REDR,
+ RPID,
+ RZPR,
+ RTLBX,
+ RTLBLO,
+ RTLBHI,
+ RPVR0,
+ RPVR1,
+ RPVR2,
+ RPVR3,
+ RPVR4,
+ RPVR5,
+ RPVR6,
+ RPVR7,
+ RPVR8,
+ RPVR9,
+ RPVR10,
+ RPVR11
+ )>
+{
+ // None of the special purpose registers are allocatable.
+ let isAllocatable = 0;
+}
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 000000000000..c298eda2195f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace MBlaze {
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 000000000000..4662f25ceb12
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,55 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze functional units.
+//===----------------------------------------------------------------------===//
+def IF : FuncUnit;
+def ID : FuncUnit;
+def EX : FuncUnit;
+def MA : FuncUnit;
+def WB : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIC_ALU : InstrItinClass;
+def IIC_ALUm : InstrItinClass;
+def IIC_ALUd : InstrItinClass;
+def IIC_SHT : InstrItinClass;
+def IIC_FSLg : InstrItinClass;
+def IIC_FSLp : InstrItinClass;
+def IIC_MEMs : InstrItinClass;
+def IIC_MEMl : InstrItinClass;
+def IIC_FPU : InstrItinClass;
+def IIC_FPUd : InstrItinClass;
+def IIC_FPUf : InstrItinClass;
+def IIC_FPUi : InstrItinClass;
+def IIC_FPUs : InstrItinClass;
+def IIC_FPUc : InstrItinClass;
+def IIC_BR : InstrItinClass;
+def IIC_BRc : InstrItinClass;
+def IIC_BRl : InstrItinClass;
+def IIC_WDC : InstrItinClass;
+def IIC_Pseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for three stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule3.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for five stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule5.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
new file mode 100644
index 000000000000..ccbf99dbd3a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -0,0 +1,236 @@
+//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the three stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe3Itineraries : ProcessorItineraries<
+ [IF,ID,EX], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes three cycles. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the execute stage, which takes 34 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the execute stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<34,[EX]>], // 34 cycles in execute stage
+ [ 35 // result ready after 35 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages
+ // except the execute stage, which takes two cycles. The two source operands
+ // are read during the decode stage and the result is ready after the execute
+ // stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the execute stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the execute stage, which takes six cycles. The
+ // source operands are read during the decode stage and the results are ready
+ // after the execute stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 30
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<30,[EX]>], // one cycle in execute stage
+ [ 31 // result ready after 31 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes seven cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<7,[EX]>], // seven cycles in execute stage
+ [ 8 // result ready after eight cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes six cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 29
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<29,[EX]>], // 29 cycles in execute stage
+ [ 30 // result ready after 30 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages except the execute stage, which takes three
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages except the execute stage, which takes two cycles.
+ // The one source operand is read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. The two source operands are read during the decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. All of the source operands are read during the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes two cycles. All of
+ // the source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
new file mode 100644
index 000000000000..fa88766fdb18
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -0,0 +1,267 @@
+//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the five stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe5Itineraries : ProcessorItineraries<
+ [IF,ID,EX,MA,WB], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. The two source operands are read during the decode stage
+ // and the result is ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the memory access stage, which takes 31 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the memory access stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<31,[MA]> // 31 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 33 // result ready after 33 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages.
+ // The two source operands are read during the decode stage and the result is
+ // ready after the memory access stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the writeback stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the memory access stage, which takes two
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 26
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<26,[MA]> // 26 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 29 // result ready after 29 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes three cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<3,[MA]> // three cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 6 // result ready after six cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes two cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 25
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<25,[MA]> // 25 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 28 // result ready after 28 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages. The source operands are read during the
+ // decode stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages. The one source operand is read during the decode
+ // stage and the result is ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. The two source operands are read during the
+ // decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. All of the source operands are read during
+ // the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. All of the source operands are read during the decode
+ // stage and the result is ready after the writeback stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6a115b22bd4a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeTargetMachine.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 000000000000..9f8e2aad4b20
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeTargetMachine;
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
+ ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 000000000000..eda141daf2b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,63 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MBlazeGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS):
+ MBlazeGenSubtargetInfo(TT, CPU, FS),
+ HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
+ HasFPU(false), HasMul64(false), HasSqrt(false)
+{
+ // Parse features string.
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "mblaze";
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Only use instruction scheduling if the selected CPU has an instruction
+ // itinerary (the default CPU is the only one that doesn't).
+ HasItin = CPUName != "mblaze";
+ DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n");
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Compute the issue width of the MBlaze itineraries
+ computeIssueWidth();
+}
+
+void MBlazeSubtarget::computeIssueWidth() {
+ InstrItins.IssueWidth = 1;
+}
+
+bool MBlazeSubtarget::
+enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
+ return HasItin && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 000000000000..43b0197ad5aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,75 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MBlazeGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MBlazeSubtarget : public MBlazeGenSubtargetInfo {
+
+protected:
+ bool HasBarrel;
+ bool HasDiv;
+ bool HasMul;
+ bool HasPatCmp;
+ bool HasFPU;
+ bool HasMul64;
+ bool HasSqrt;
+ bool HasItin;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MBlazeSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// Compute the number of maximum number of issues per cycle for the
+ /// MBlaze scheduling itineraries.
+ void computeIssueWidth();
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ bool hasItin() const { return HasItin; }
+ bool hasPCMP() const { return HasPatCmp; }
+ bool hasFPU() const { return HasFPU; }
+ bool hasSqrt() const { return HasSqrt; }
+ bool hasMul() const { return HasMul; }
+ bool hasMul64() const { return HasMul64; }
+ bool hasDiv() const { return HasDiv; }
+ bool hasBarrel() const { return HasBarrel; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 000000000000..7208874aef1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,102 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+ return NULL;
+ }
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("MBlaze does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+ // Register the target.
+ RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
+ llvm::createMBlazeMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
+ createMBlazeAsmBackend);
+
+ // Register the object streamer
+ TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
+ createMCStreamer);
+
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS):
+ LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+ if (getRelocationModel() == Reloc::Default) {
+ setRelocationModel(Reloc::Static);
+ }
+
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeDelaySlotFillerPass(*this));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 000000000000..cd6caafbf309
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,84 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "MBlazeFrameLowering.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MBlazeTargetMachine : public LLVMTargetMachine {
+ MBlazeSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MBlazeInstrInfo InstrInfo;
+ MBlazeFrameLowering FrameLowering;
+ MBlazeTargetLowering TLInfo;
+ MBlazeSelectionDAGInfo TSInfo;
+ MBlazeIntrinsicInfo IntrinsicInfo;
+ MBlazeELFWriterInfo ELFWriterInfo;
+ InstrItineraryData InstrItins;
+
+ public:
+ MBlazeTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const MBlazeInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+
+ virtual const InstrItineraryData *getInstrItineraryData() const
+ { return &InstrItins; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
+
+ virtual const MBlazeSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MBlazeRegisterInfo *getRegisterInfo() const
+ { return &InstrInfo.getRegisterInfo(); }
+
+ virtual const MBlazeTargetLowering *getTargetLowering() const
+ { return &TLInfo; }
+
+ virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+ { return &TSInfo; }
+
+ const TargetIntrinsicInfo *getIntrinsicInfo() const
+ { return &IntrinsicInfo; }
+
+ virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+ return &ELFWriterInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
+ virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 000000000000..abd1b0b62c7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,90 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ const Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 000000000000..c313722427db
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..3d15708c35b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMBlazeDesc
+ MBlazeMCTargetDesc.cpp
+ MBlazeMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
new file mode 100644
index 000000000000..0d88466bb300
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,24 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+ IsLittleEndian = false;
+ StackGrowsUp = false;
+ SupportsDebugInformation = true;
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ GPRel32Directive = "\t.gpword\t";
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
new file mode 100644
index 000000000000..e68dd58b016b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class MBlazeMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit MBlazeMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
new file mode 100644
index 000000000000..20d6c0bd2156
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -0,0 +1,65 @@
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCTargetDesc.h"
+#include "MBlazeMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MBlazeGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMBlazeMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMBlazeMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+}
+
+static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMBlazeMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+ createMBlazeMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return new MBlazeMCAsmInfo();
+ }
+}
+
+extern "C" void LLVMInitializeMBlazeMCAsmInfo() {
+ RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
new file mode 100644
index 000000000000..b14772ef060b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEMCTARGETDESC_H
+#define MBLAZEMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMBlazeTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for MBlaze registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MBlazeGenRegisterInfo.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#define GET_INSTRINFO_ENUM
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MBlazeGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..71075ffbf47c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 000000000000..16e01dbfdeed
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+ RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..f5458d59a821
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430AsmPrinter
+ MSP430InstPrinter.cpp
+ )
+add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
new file mode 100644
index 000000000000..e10d4fe7ca16
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ printInstruction(MI, O);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << '#' << *Op.getExpr();
+ }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+ // Print displacement first
+
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Base.getReg())
+ O << '&';
+
+ if (Disp.isExpr())
+ O << *Disp.getExpr();
+ else {
+ assert(Disp.isImm() && "Expected immediate in displacement field");
+ O << Disp.getImm();
+ }
+
+ // Print register base field
+ if (Base.getReg())
+ O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CC = MI->getOperand(OpNo).getImm();
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported CC code");
+ break;
+ case MSP430CC::COND_E:
+ O << "eq";
+ break;
+ case MSP430CC::COND_NE:
+ O << "ne";
+ break;
+ case MSP430CC::COND_HS:
+ O << "hs";
+ break;
+ case MSP430CC::COND_LO:
+ O << "lo";
+ break;
+ case MSP430CC::COND_GE:
+ O << "ge";
+ break;
+ case MSP430CC::COND_L:
+ O << 'l';
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
new file mode 100644
index 000000000000..50d98b7c41fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430INSTPRINTER_H
+#define MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MSP430InstPrinter : public MCInstPrinter {
+ public:
+ MSP430InstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
new file mode 100644
index 000000000000..a5293ab8a234
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430AsmPrinter
+
+# Hack: we need to include 'main' MSP430 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..0f3ebd303924
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMSP430Desc
+ MSP430MCTargetDesc.cpp
+ MSP430MCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
new file mode 100644
index 000000000000..ad7d380b5631
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -0,0 +1,28 @@
+//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCAsmInfo.h"
+using namespace llvm;
+
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+ PointerSize = 2;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective ="\t.weak\t";
+ PCSymbol=".";
+ CommentString = ";";
+
+ AlignmentIsInBytes = false;
+ AllowNameToStartWithDigit = true;
+ UsesELFSectionDirectiveForBSS = true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
new file mode 100644
index 000000000000..f3138a22022d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct MSP430MCAsmInfo : public MCAsmInfo {
+ explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
new file mode 100644
index 000000000000..43a704d7a7df
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "MSP430MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MSP430GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMSP430MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMSP430MCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+ createMSP430MCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMSP430MCAsmInfo() {
+ RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
new file mode 100644
index 000000000000..0d8a6bdb44f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMSP430Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#define GET_INSTRINFO_ENUM
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MSP430GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..bb857998eef9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
new file mode 100644
index 000000000000..4574ce5f98b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,47 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace MSP430CC {
+ // MSP430 specific condition code.
+ enum CondCodes {
+ COND_E = 0, // aka COND_Z
+ COND_NE = 1, // aka COND_NZ
+ COND_HS = 2, // aka COND_C
+ COND_LO = 3, // aka COND_NC
+ COND_GE = 4,
+ COND_L = 5,
+
+ COND_INVALID = -1
+ };
+}
+
+namespace llvm {
+ class MSP430TargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createMSP430BranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
new file mode 100644
index 000000000000..5cc5e6e3d7c9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,66 @@
+//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+ "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo;
+
+def MSP430InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+ let InstructionSet = MSP430InstrInfo;
+ let AssemblyWriters = [MSP430InstPrinter];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 000000000000..2042056617ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,179 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCInstLower.h"
+#include "MSP430TargetMachine.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class MSP430AsmPrinter : public AsmPrinter {
+ public:
+ MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "MSP430 Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char* Modifier = 0);
+ void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default: assert(0 && "Not implemented yet!");
+ case MachineOperand::MO_Register:
+ O << MSP430InstPrinter::getRegisterName(MO.getReg());
+ return;
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << '#';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ uint64_t Offset = MO.getOffset();
+
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << (isMemOp ? '&' : '#');
+ if (Offset)
+ O << '(' << Offset << '+';
+
+ O << *Mang->getSymbol(MO.getGlobal());
+
+ if (Offset)
+ O << ')';
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ O << (isMemOp ? '&' : '#');
+ O << MAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+ }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+ // Print displacement first
+
+ // Imm here is in fact global address - print extra modifier.
+ if (Disp.isImm() && !Base.getReg())
+ O << '&';
+ printOperand(MI, OpNum+1, O, "nohash");
+
+ // Print register base field
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ O << ')';
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ return true; // Unknown modifier.
+ }
+ printSrcMemOperand(MI, OpNo, O);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ if (SyntaxVariant == 0)
+ return new MSP430InstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+ RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+ TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+ createMSP430MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
new file mode 100644
index 000000000000..bd644435c76f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -0,0 +1,180 @@
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 10 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-branch-select"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct MSP430BSel : public MachineFunctionPass {
+ static char ID;
+ MSP430BSel() : MachineFunctionPass(ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "MSP430 Branch Selector";
+ }
+ };
+ char MSP430BSel::ID = 0;
+}
+
+/// createMSP430BranchSelectionPass - returns an instance of the Branch
+/// Selection Pass
+///
+FunctionPass *llvm::createMSP430BranchSelectionPass() {
+ return new MSP430BSel();
+}
+
+bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
+ const MSP430InstrInfo *TII =
+ static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 9)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+6
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
+ I->getOpcode() != MSP430::JMP) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(0).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<10>(BranchSize)) {
+ MBBStartOffset += 2;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ unsigned NewSize;
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ if (I->getOpcode() == MSP430::JMP) {
+ NewSize = 4;
+ } else {
+ // The BCC operands are:
+ // 0. MSP430 branch predicate
+ // 1. Target MBB
+ SmallVector<MachineOperand, 1> Cond;
+ Cond.push_back(I->getOperand(1));
+
+ // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
+ TII->ReverseBranchCondition(Cond);
+ BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
+ .addImm(4).addOperand(Cond[0]);
+
+ NewSize = 6;
+ }
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is NewSize bytes, increase the size of the
+ // block by NewSize-2, remember to iterate.
+ BlockSizes[MBB.getNumber()] += NewSize-2;
+ MBBStartOffset += NewSize;
+
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 000000000000..ad27cc9122a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,37 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+ // i8 are returned in registers R15B, R14B, R13B, R12B
+ CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+ // i16 are returned in registers R15, R14, R13, R12
+ CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+ // Promote i8 arguments to i16.
+ CCIfType<[i8], CCPromoteToType<i16>>,
+
+ // The first 4 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+ // Integer values get stored in stack slots that are 2 bytes in
+ // size and 2-byte aligned.
+ CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 000000000000..c99f4ab6c2f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,223 @@
+//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save FPW into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(MSP430::FPW, RegState::Kill);
+
+ // Update FPW with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+ .addReg(MSP430::SPW);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(MSP430::FPW);
+
+ } else
+ NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+ // If there is an SUB16ri of SPW immediately before this instruction, merge
+ // the two.
+ //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+ // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // instruction, merge the two instructions.
+ // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case MSP430::RET:
+ case MSP430::RETI: break; // These are ok
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - CSSize;
+
+ // pop FPW.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ } else
+ NumBytes = StackSize - CSSize;
+
+ // Skip the callee-saved pop instructions.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // instruction, merge the two instructions.
+ //if (NumBytes || MFI->hasVarSizedObjects())
+ // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+ if (CSSize) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(CSSize);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ // adjust stack pointer back: SPW += numbytes
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 000000000000..b636827da7b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,53 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+ const MSP430Subtarget &STI;
+
+public:
+ explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..5430d433b650
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,492 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MSP430ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ int16_t Disp;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ MSP430ISelAddressMode()
+ : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+ ES(0), JT(-1), Align(0) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ }
+
+ void dump() {
+ errs() << "MSP430ISelAddressMode " << this << '\n';
+ if (BaseType == RegBase && Base.Reg.getNode() != 0) {
+ errs() << "Base.Reg ";
+ Base.Reg.getNode()->dump();
+ } else if (BaseType == FrameIndexBase) {
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+ }
+ errs() << " Disp " << Disp << '\n';
+ if (GV) {
+ errs() << "GV ";
+ GV->dump();
+ } else if (CP) {
+ errs() << " CP ";
+ CP->dump();
+ errs() << " Align" << Align << '\n';
+ } else if (ES) {
+ errs() << "ES ";
+ errs() << ES << '\n';
+ } else if (JT != -1)
+ errs() << " JT" << JT << " Align" << Align << '\n';
+ }
+ };
+}
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class MSP430DAGToDAGISel : public SelectionDAGISel {
+ const MSP430TargetLowering &Lowering;
+ const MSP430Subtarget &Subtarget;
+
+ public:
+ MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual const char *getPassName() const {
+ return "MSP430 DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
+ virtual bool
+ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ // Include the pieces autogenerated from the target description.
+ #include "MSP430GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ SDNode *SelectIndexedLoad(SDNode *Op);
+ SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16);
+
+ bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
+ };
+} // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference. If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
+ return true;
+
+ SDValue N0 = N.getOperand(0);
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ //AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ //AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ //AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ //AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+ return false;
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = MSP430ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ AM.Disp += Val;
+ return false;
+ }
+
+ case MSP430ISD::Wrapper:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase
+ && AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::ADD: {
+ MSP430ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM))
+ return false;
+ AM = Backup;
+
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ MSP430ISelAddressMode Backup = AM;
+ uint64_t Offset = CN->getSExtValue();
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += Offset;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
+ SDValue &Base, SDValue &Disp) {
+ MSP430ISelAddressMode AM;
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
+ MVT::i16, AM.Disp,
+ 0/*AM.SymbolFlags*/);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+ AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.BlockAddr)
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+ true, 0/*AM.SymbolFlags*/);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
+
+ return true;
+}
+
+bool MSP430DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+ return false;
+
+ break;
+ case MVT::i16:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+ return false;
+
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+
+ unsigned Opcode = 0;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ Opcode = MSP430::MOV8rm_POST;
+ break;
+ case MVT::i16:
+ Opcode = MSP430::MOV16rm_POST;
+ break;
+ default:
+ return NULL;
+ }
+
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(),
+ VT, MVT::i16, MVT::Other,
+ LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
+ SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16) {
+ if (N1.getOpcode() == ISD::LOAD &&
+ N1.hasOneUse() &&
+ IsLegalToFold(N1, Op, Op, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N1);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+ unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+ SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+ SDNode *ResNode =
+ CurDAG->SelectNodeTo(Op, Opc,
+ VT, MVT::i16, MVT::Other,
+ Ops0, 3);
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ // Transfer chain.
+ ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+ // Transfer writeback.
+ ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+ return ResNode;
+ }
+
+ return NULL;
+}
+
+
+SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: ");
+ DEBUG(Node->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== ";
+ Node->dump(CurDAG);
+ errs() << "\n");
+ return NULL;
+ }
+
+ // Few custom selection stuff.
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ assert(Node->getValueType(0) == MVT::i16);
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ }
+ case ISD::LOAD:
+ if (SDNode *ResNode = SelectIndexedLoad(Node))
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ case ISD::ADD:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SUB:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::AND:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::OR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::XOR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Node,
+ Node->getOperand(0), Node->getOperand(1),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 000000000000..0a3eab1f7a66
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,1197 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+typedef enum {
+ NoHWMult,
+ HWMultIntr,
+ HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+ cl::desc("Hardware multiplier use mode"),
+ cl::init(HWMultNoIntr),
+ cl::values(
+ clEnumValN(NoHWMult, "no",
+ "Do not use hardware multiplier"),
+ clEnumValN(HWMultIntr, "interrupts",
+ "Assume hardware multiplier can be used inside interrupts"),
+ clEnumValN(HWMultNoIntr, "use",
+ "Assume hardware multiplier cannot be used inside interrupts"),
+ clEnumValEnd));
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+ TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+ TD = getTargetData();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
+ addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setSchedulingPreference(Sched::Latency);
+
+ // We have post-incremented loads / stores.
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // We don't have any truncstores
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i16, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+ setOperationAction(ISD::CTTZ, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i8, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // FIXME: Implement efficiently multiplication by a constant
+ setOperationAction(ISD::MUL, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+
+ // Libcalls names.
+ if (HWMultMode == HWMultIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+ } else if (HWMultMode == HWMultNoIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+ }
+
+ setMinFunctionAlignment(1);
+ setPrefFunctionAlignment(2);
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::SHL: // FALLTHROUGH
+ case ISD::SRL:
+ case ISD::SRA: return LowerShifts(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operand");
+ return SDValue();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return C_RegisterClass;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+MSP430TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ if (VT == MVT::i8)
+ return std::make_pair(0U, MSP430::GR8RegisterClass);
+
+ return std::make_pair(0U, MSP430::GR16RegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue
+MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ if (Ins.empty())
+ return Chain;
+ else {
+ report_fatal_error("ISRs cannot have arguments");
+ return SDValue();
+ }
+ }
+}
+
+SDValue
+MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // MSP430 target does not yet support tail call optimization.
+ isTailCall = false;
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ report_fatal_error("ISRs cannot be called directly");
+ return SDValue();
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+
+ assert(!isVarArg && "Varargs not supported yet");
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ EVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT().SimpleTy) {
+ default:
+ {
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ case MVT::i16:
+ unsigned VReg =
+ RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+ // If this is an 8-bit value, it is really passed promoted to 16
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ }
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+MSP430TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // ISRs cannot return any value.
+ if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+ report_fatal_error("ISRs cannot return any value");
+ return SDValue();
+ }
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+ MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
+ if (Flag.getNode())
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(Opc, dl, MVT::Other, Chain);
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+ DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ SDNode* N = Op.getNode();
+ EVT VT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand non-constant shifts to loops:
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ switch (Opc) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(MSP430ISD::SHL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(MSP430ISD::SRA, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(MSP430ISD::SRL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ }
+
+ uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+ // Expand the stuff into sequence of shifts.
+ // FIXME: for some shift amounts this might be done better!
+ // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+ SDValue Victim = N->getOperand(0);
+
+ if (Opc == ISD::SRL && ShiftAmount) {
+ // Emit a special goodness here:
+ // srl A, 1 => clrc; rrc A
+ Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ ShiftAmount -= 1;
+ }
+
+ while (ShiftAmount--)
+ Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+ dl, VT, Victim);
+
+ return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ // Create the TargetGlobalAddress node, folding in the constant offset.
+ SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ getPointerTy(), Offset);
+ return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+ getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
+ ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // FIXME: Handle bittests someday
+ assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+ // FIXME: Handle jump negative someday
+ MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID;
+ switch (CC) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ:
+ TCC = MSP430CC::COND_E; // aka COND_Z
+ // Minor optimization: if LHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (LHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
+ break;
+ case ISD::SETNE:
+ TCC = MSP430CC::COND_NE; // aka COND_NZ
+ // Minor optimization: if LHS is a constant, swap operands, then the
+ // constant can be folded into comparison.
+ if (LHS.getOpcode() == ISD::Constant)
+ std::swap(LHS, RHS);
+ break;
+ case ISD::SETULE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETUGE:
+ // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_LO;
+ break;
+ }
+ TCC = MSP430CC::COND_HS; // aka COND_C
+ break;
+ case ISD::SETUGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETULT:
+ // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_HS;
+ break;
+ }
+ TCC = MSP430CC::COND_LO; // aka COND_NC
+ break;
+ case ISD::SETLE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETGE:
+ // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_L;
+ break;
+ }
+ TCC = MSP430CC::COND_GE;
+ break;
+ case ISD::SETGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETLT:
+ // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
+ // fold constant into instruction.
+ if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+ LHS = RHS;
+ RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+ TCC = MSP430CC::COND_GE;
+ break;
+ }
+ TCC = MSP430CC::COND_L;
+ break;
+ }
+
+ TargetCC = DAG.getConstant(TCC, MVT::i8);
+ return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+ Chain, Dest, TargetCC, Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we are doing an AND and testing against zero, then the CMP
+ // will not be generated. The AND (or BIT) will generate the condition codes,
+ // but they are different from CMP.
+ // FIXME: since we're doing a post-processing, use a pseudoinstr here, so
+ // lowering & isel wouldn't diverge.
+ bool andCC = false;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND))) {
+ andCC = true;
+ }
+ }
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ // Get the condition codes directly from the status register, if its easy.
+ // Otherwise a branch will be generated. Note that the AND and BIT
+ // instructions generate different flags than CMP, the carry bit can be used
+ // for NE/EQ.
+ bool Invert = false;
+ bool Shift = false;
+ bool Convert = true;
+ switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+ default:
+ Convert = false;
+ break;
+ case MSP430CC::COND_HS:
+ // Res = SRW & 1, no processing is required
+ break;
+ case MSP430CC::COND_LO:
+ // Res = ~(SRW & 1)
+ Invert = true;
+ break;
+ case MSP430CC::COND_NE:
+ if (andCC) {
+ // C = ~Z, thus Res = SRW & 1, no processing is required
+ } else {
+ // Res = ~((SRW >> 1) & 1)
+ Shift = true;
+ Invert = true;
+ }
+ break;
+ case MSP430CC::COND_E:
+ Shift = true;
+ // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+ // Res = (SRW >> 1) & 1 is 1 word shorter.
+ break;
+ }
+ EVT VT = Op.getValueType();
+ SDValue One = DAG.getConstant(1, VT);
+ if (Convert) {
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ MVT::i16, Flag);
+ if (Shift)
+ // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+ SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+ SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+ if (Invert)
+ SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+ return SR;
+ } else {
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(One);
+ Ops.push_back(Zero);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ }
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Val = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert(VT == MVT::i16 && "Only support i16 for now!");
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+ DAG.getValueType(Val.getValueType()));
+}
+
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ MSP430::FPW, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+ if (VT != MVT::i8 && VT != MVT::i16)
+ return false;
+
+ if (Op->getOpcode() != ISD::ADD)
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+ uint64_t RHSC = RHS->getZExtValue();
+ if ((VT == MVT::i16 && RHSC != 2) ||
+ (VT == MVT::i8 && RHSC != 1))
+ return false;
+
+ Base = Op->getOperand(0);
+ Offset = DAG.getConstant(RHSC, VT);
+ AM = ISD::POST_INC;
+ return true;
+ }
+
+ return false;
+}
+
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
+ case MSP430ISD::RRA: return "MSP430ISD::RRA";
+ case MSP430ISD::RLA: return "MSP430ISD::RLA";
+ case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::CALL: return "MSP430ISD::CALL";
+ case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
+ case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
+ case MSP430ISD::CMP: return "MSP430ISD::CMP";
+ case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ case MSP430ISD::SHL: return "MSP430ISD::SHL";
+ case MSP430ISD::SRA: return "MSP430ISD::SRA";
+ }
+}
+
+bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
+ const Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
+}
+
+bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+
+ return (VT1.getSizeInBits() > VT2.getSizeInBits());
+}
+
+bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+ // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+ return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
+}
+
+bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+ return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RI = F->getRegInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+ unsigned Opc;
+ const TargetRegisterClass * RC;
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case MSP430::Shl8:
+ Opc = MSP430::SHL8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Shl16:
+ Opc = MSP430::SHL16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Sra8:
+ Opc = MSP430::SAR8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Sra16:
+ Opc = MSP430::SAR16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Srl8:
+ Opc = MSP430::SAR8r1c;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Srl16:
+ Opc = MSP430::SAR16r1c;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ }
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // Create loop block
+ MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(I, LoopBB);
+ F->insert(I, RemBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the block containing instructions after shift.
+ RemBB->splice(RemBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ RemBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ BB->addSuccessor(LoopBB);
+ BB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftReg = RI.createVirtualRegister(RC);
+ unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+ unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // BB:
+ // cmp 0, N
+ // je RemBB
+ BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
+ .addReg(ShiftAmtSrcReg).addImm(0);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(RemBB)
+ .addImm(MSP430CC::COND_E);
+
+ // LoopBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // ShiftReg2 = shift ShiftReg
+ // ShiftAmt2 = ShiftAmt - 1;
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+ .addReg(ShiftAmtSrcReg).addMBB(BB)
+ .addReg(ShiftAmtReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
+ BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+ .addReg(ShiftAmtReg).addImm(1);
+ BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+ .addMBB(LoopBB)
+ .addImm(MSP430CC::COND_NE);
+
+ // RemBB:
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+ BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return RemBB;
+}
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ return EmitShiftInstr(MI, BB);
+
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 000000000000..bd660a0bb039
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,182 @@
+//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace MSP430ISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// Same as RET_FLAG, but used for returning from ISRs.
+ RETI_FLAG,
+
+ /// Y = R{R,L}A X, rotate right (left) arithmetically
+ RRA, RLA,
+
+ /// Y = RRC X, rotate right via carry
+ RRC,
+
+ /// CALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+ /// and TargetGlobalAddress.
+ Wrapper,
+
+ /// CMP - Compare instruction.
+ CMP,
+
+ /// SetCC - Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// instruction.
+ BR_CC,
+
+ /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
+ /// is condition code and operand 4 is flag operand.
+ SELECT_CC,
+
+ /// SHL, SRA, SRL - Non-constant shifts.
+ SHL, SRA, SRL
+ };
+ }
+
+ class MSP430Subtarget;
+ class MSP430TargetMachine;
+
+ class MSP430TargetLowering : public TargetLowering {
+ public:
+ explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+ TargetLowering::ConstraintType
+ getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of type
+ /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
+ /// register R15W to i8 by referencing its sub-register R15B.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a value
+ /// of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in unknown
+ /// ways, such as incoming arguments, or copies from unknown virtual
+ /// registers. Also, if isTruncateFree(Ty2, Ty1) is true, this does not
+ /// necessarily apply to truncate instructions. e.g. on msp430, all
+ /// instructions that define 8-bit values implicit zero-extend the result
+ /// out to 16 bits.
+ virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ private:
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ const MSP430Subtarget &Subtarget;
+ const MSP430TargetMachine &TM;
+ const TargetData *TD;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 000000000000..73aef1facc0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,211 @@
+//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MSP430 instructions format here
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def PseudoFrm : Format<0>;
+def SingleOpFrm : Format<1>;
+def DoubleOpFrm : Format<2>;
+def CondJumpFrm : Format<3>;
+
+class SourceMode<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def SrcReg : SourceMode<0>;
+def SrcMem : SourceMode<1>;
+def SrcIndReg : SourceMode<2>;
+def SrcPostInc : SourceMode<3>;
+def SrcImm : SourceMode<3>;
+
+class DestMode<bit val> {
+ bit Value = val;
+}
+
+def DstReg : DestMode<0>;
+def DstMem : DestMode<1>;
+
+class SizeVal<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def SizeUnknown : SizeVal<0>; // Unknown / unset size
+def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
+def Size2Bytes : SizeVal<2>;
+def Size4Bytes : SizeVal<3>;
+def Size6Bytes : SizeVal<4>;
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
+ string asmstr> : Instruction {
+ field bits<16> Inst;
+
+ let Namespace = "MSP430";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ Format Form = f;
+ SizeVal Sz = sz;
+
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
+ let TSFlags{1-0} = Form.Value;
+ let TSFlags{4-2} = Sz.Value;
+
+ let AsmString = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+ let Pattern = pattern;
+
+ DestMode ad = dest;
+ SourceMode as = src;
+
+ let Inst{12-15} = opcode;
+ let Inst{7} = ad.Value;
+ let Inst{6} = bw;
+ let Inst{4-5} = as.Value;
+}
+
+// 8 bit IForm instructions
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class I8rr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I8ri<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8rm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mi<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I8mm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IForm instructions
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class I16rr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I16ri<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16rm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mr<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mi<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I16mm<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+ let Pattern = pattern;
+
+ SourceMode as = src;
+
+ let Inst{7-15} = opcode;
+ let Inst{6} = bw;
+ let Inst{4-5} = as.Value;
+}
+
+// 8 bit IIForm instructions
+class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class II8r<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II8m<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II8i<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IIForm instructions
+class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class II16r<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II16m<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II16i<bits<9> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{13-15} = opcode;
+ let Inst{10-12} = cond;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+ let Pattern = pattern;
+ let Inst{15-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 000000000000..846d09361b33
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,336 @@
+//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MSP430GenInstrInfo.inc"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+ : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+ RI(tm, *this), TM(tm) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else
+ llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+ else
+ llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV16rr;
+ else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV8rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != MSP430::JMP &&
+ I->getOpcode() != MSP430::JCC &&
+ I->getOpcode() != MSP430::Br &&
+ I->getOpcode() != MSP430::Bm)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+bool MSP430InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+ MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
+
+ switch (CC) {
+ default:
+ assert(0 && "Invalid branch condition!");
+ break;
+ case MSP430CC::COND_E:
+ CC = MSP430CC::COND_NE;
+ break;
+ case MSP430CC::COND_NE:
+ CC = MSP430CC::COND_E;
+ break;
+ case MSP430CC::COND_L:
+ CC = MSP430CC::COND_GE;
+ break;
+ case MSP430CC::COND_GE:
+ CC = MSP430CC::COND_L;
+ break;
+ case MSP430CC::COND_HS:
+ CC = MSP430CC::COND_LO;
+ break;
+ case MSP430CC::COND_LO:
+ CC = MSP430CC::COND_HS;
+ break;
+ }
+
+ Cond[0].setImm(CC);
+ return false;
+}
+
+bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+
+ // Cannot handle indirect branches.
+ if (I->getOpcode() == MSP430::Br ||
+ I->getOpcode() == MSP430::Bm)
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == MSP430::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch");
+ MSP430CC::CondCodes BranchCode =
+ static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm());
+ if (BranchCode == MSP430CC::COND_INVALID)
+ return true; // Can't handle weird stuff.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "MSP430 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ BuildMI(&MBB, DL, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+
+ switch (Desc.TSFlags & MSP430II::SizeMask) {
+ default:
+ switch (Desc.getOpcode()) {
+ default:
+ assert(0 && "Unknown instruction size!");
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::INLINEASM: {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
+ *MF->getTarget().getMCAsmInfo());
+ }
+ }
+ case MSP430II::SizeSpecial:
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Unknown instruction size!");
+ case MSP430::SAR8r1c:
+ case MSP430::SAR16r1c:
+ return 4;
+ }
+ case MSP430II::Size2Bytes:
+ return 2;
+ case MSP430II::Size4Bytes:
+ return 4;
+ case MSP430II::Size6Bytes:
+ return 6;
+ }
+
+ return 6;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 000000000000..90013f5c2e70
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,92 @@
+//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MSP430RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MSP430GenInstrInfo.inc"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+/// MSP430II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MSP430II {
+ enum {
+ SizeShift = 2,
+ SizeMask = 7 << SizeShift,
+
+ SizeUnknown = 0 << SizeShift,
+ SizeSpecial = 1 << SizeShift,
+ Size2Bytes = 2 << SizeShift,
+ Size4Bytes = 3 << SizeShift,
+ Size6Bytes = 4 << SizeShift
+ };
+}
+
+class MSP430InstrInfo : public MSP430GenInstrInfo {
+ const MSP430RegisterInfo RI;
+ MSP430TargetMachine &TM;
+public:
+ explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ // Branch folding goodness
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 000000000000..59cb59873ab7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,1211 @@
+//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisI8<2>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def MSP430callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def MSP430callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+ [SDNPHasChain, SDNPInGlue]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+ [SDNPInGlue]>;
+def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImmOperand";
+}
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+ let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
+ "# Select8 PSEUDO",
+ [(set GR8:$dst,
+ (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
+ "# Select16 PSEUDO",
+ [(set GR16:$dst,
+ (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
+ let Defs = [SRW] in {
+ def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Shl8 PSEUDO",
+ [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Shl16 PSEUDO",
+ [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Sra8 PSEUDO",
+ [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Sra16 PSEUDO",
+ [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Srl8 PSEUDO",
+ [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Srl16 PSEUDO",
+ [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+ }
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def RET : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs), (ins), "ret", [(MSP430retflag)]>;
+ def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// FIXME: expand opcode & cond field for branches!
+
+// Direct branch
+let isBarrier = 1 in {
+ // Short branch
+ def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+ "jmp\t$dst",
+ [(br bb:$dst)]>;
+ let isIndirectBranch = 1 in {
+ // Long branches
+ def Bi : I16ri<0, (outs), (ins i16imm:$brdst),
+ "br\t$brdst",
+ [(brind tblockaddress:$brdst)]>;
+ def Br : I16rr<0, (outs), (ins GR16:$brdst),
+ "mov.w\t{$brdst, pc}",
+ [(brind GR16:$brdst)]>;
+ def Bm : I16rm<0, (outs), (ins memsrc:$brdst),
+ "mov.w\t{$brdst, pc}",
+ [(brind (load addr:$brdst))]>;
+ }
+}
+
+// Conditional branches
+let Uses = [SRW] in
+ def JCC : CJForm<0, 0,
+ (outs), (ins jmptarget:$dst, cc:$cc),
+ "j$cc\t$dst",
+ [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. SPW is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [R12W, R13W, R14W, R15W, SRW],
+ Uses = [SPW] in {
+ def CALLi : II16i<0x0,
+ (outs), (ins i16imm:$dst, variable_ops),
+ "call\t$dst", [(MSP430call imm:$dst)]>;
+ def CALLr : II16r<0x0,
+ (outs), (ins GR16:$dst, variable_ops),
+ "call\t$dst", [(MSP430call GR16:$dst)]>;
+ def CALLm : II16m<0x0,
+ (outs), (ins memsrc:$dst, variable_ops),
+ "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r : II16r<0x0,
+ (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ []>;
+def MOV16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "mov.w\t{$src, $dst}",
+ []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : I8rr<0x0,
+ (outs GR16:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : I8rm<0x0,
+ (outs GR16:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.w\t{@$base+, $dst}", []>;
+}
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+def MOV8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "mov.w\t{$src, $dst}",
+ [(store GR16:$src, addr:$dst)]>;
+
+def MOV8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 (load addr:$src)), addr:$dst)]>;
+def MOV16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 (load addr:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Constraints = "$src = $dst" in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+
+def ADD8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def ADD16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def ADD8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADD16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "add.w\t{@$base+, $dst}", []>;
+}
+
+
+def ADD8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def ADD16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADD8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
+def ADC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def ADC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def ADC16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def ADC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def AND16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def AND8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def AND16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def AND8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def AND16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "and.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def AND8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def AND16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def AND16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def AND16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
+def OR16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
+}
+
+def OR8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
+def OR16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
+
+def OR8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
+def OR16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "bis.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def OR8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (i8 (load addr:$dst)),
+ (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (i16 (load addr:$dst)),
+ (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
+def BIC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
+
+def BIC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
+
+let Constraints = "" in {
+def BIC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst),
+ (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def XOR16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def XOR8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def XOR16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def XOR8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def XOR16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "xor.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def XOR8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+def SUB8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def SUB16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SUB8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def SUB16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def SUB8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def SUB16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src = $dst" in {
+def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR8:$dst, GR16:$base_wb),
+ (ins GR8:$src, GR16:$base),
+ "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+ (outs GR16:$dst, GR16:$base_wb),
+ (ins GR16:$src, GR16:$base),
+ "sub.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def SUB8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
+ (implicit SRW)]>;
+def SBC16rr : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SBC8ri : I8ri<0x0,
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
+ (implicit SRW)]>;
+def SBC16ri : I16ri<0x0,
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
+ (implicit SRW)]>;
+
+def SBC8rm : I8rm<0x0,
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+def SBC16rm : I16rm<0x0,
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let Constraints = "" in {
+def SBC8mr : I8mr<0x0,
+ (outs), (ins memdst:$dst, GR8:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mr : I16mr<0x0,
+ (outs), (ins memdst:$dst, GR16:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mi : I8mi<0x0,
+ (outs), (ins memdst:$dst, i8imm:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mi : I16mi<0x0,
+ (outs), (ins memdst:$dst, i16imm:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mm : I8mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mm : I16mm<0x0,
+ (outs), (ins memdst:$dst, memsrc:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: memory variant!
+def SAR8r1 : II8r<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "rra.b\t$dst",
+ [(set GR8:$dst, (MSP430rra GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1 : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "rra.w\t$dst",
+ [(set GR16:$dst, (MSP430rra GR16:$src)),
+ (implicit SRW)]>;
+
+def SHL8r1 : I8rr<0x0,
+ (outs GR8:$dst), (ins GR8:$src),
+ "rla.b\t$dst",
+ [(set GR8:$dst, (MSP430rla GR8:$src)),
+ (implicit SRW)]>;
+def SHL16r1 : I16rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "rla.w\t$dst",
+ [(set GR16:$dst, (MSP430rla GR16:$src)),
+ (implicit SRW)]>;
+
+def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "clrc\n\t"
+ "rrc.b\t$dst",
+ [(set GR8:$dst, (MSP430rrc GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "clrc\n\t"
+ "rrc.w\t$dst",
+ [(set GR16:$dst, (MSP430rrc GR16:$src)),
+ (implicit SRW)]>;
+
+// FIXME: Memory sext's ?
+def SEXT16r : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "sxt\t$dst",
+ [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+ (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def ZEXT16r : I8rr<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
+// FIXME: Memory bitswaps?
+def SWPB16r : II16r<0x0,
+ (outs GR16:$dst), (ins GR16:$src),
+ "swpb\t$dst",
+ [(set GR16:$dst, (bswap GR16:$src))]>;
+
+} // Constraints = "$src = $dst"
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr : I8rr<0x0,
+ (outs), (ins GR8:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : I16rr<0x0,
+ (outs), (ins GR16:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ri : I8ri<0x0,
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+def CMP16ri : I16ri<0x0,
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+
+def CMP8mi : I8mi<0x0,
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
+ (i8 imm:$src2)), (implicit SRW)]>;
+def CMP16mi : I16mi<0x0,
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
+ (i16 imm:$src2)), (implicit SRW)]>;
+
+def CMP8rm : I8rm<0x0,
+ (outs), (ins GR8:$src, memsrc:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, (load addr:$src2)),
+ (implicit SRW)]>;
+def CMP16rm : I16rm<0x0,
+ (outs), (ins GR16:$src, memsrc:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, (load addr:$src2)),
+ (implicit SRW)]>;
+
+def CMP8mr : I8mr<0x0,
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR8:$src2),
+ (implicit SRW)]>;
+def CMP16mr : I16mr<0x0,
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR16:$src2),
+ (implicit SRW)]>;
+
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : I8rr<0x0,
+ (outs), (ins GR8:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
+ (implicit SRW)]>;
+def BIT16rr : I16rr<0x0,
+ (outs), (ins GR16:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
+ (implicit SRW)]>;
+}
+def BIT8ri : I8ri<0x0,
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
+ (implicit SRW)]>;
+def BIT16ri : I16ri<0x0,
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+ (implicit SRW)]>;
+
+def BIT8rm : I8rm<0x0,
+ (outs), (ins GR8:$src, memdst:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
+ (implicit SRW)]>;
+def BIT16rm : I16rm<0x0,
+ (outs), (ins GR16:$src, memdst:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
+ (implicit SRW)]>;
+
+def BIT8mr : I8mr<0x0,
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
+ (implicit SRW)]>;
+def BIT16mr : I16mr<0x0,
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
+ (implicit SRW)]>;
+
+def BIT8mi : I8mi<0x0,
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+ (implicit SRW)]>;
+def BIT16mi : I16mi<0x0,
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+ (implicit SRW)]>;
+
+def BIT8mm : I8mm<0x0,
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (i8 (load addr:$src)),
+ (load addr:$src2)),
+ 0),
+ (implicit SRW)]>;
+def BIT16mm : I16mm<0x0,
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (i16 (load addr:$src)),
+ (load addr:$src2)),
+ 0),
+ (implicit SRW)]>;
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
+
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+ (ADD16ri GR16:$src, tblockaddress:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+ (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tblockaddress:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+ (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+ (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src, GR16:$src2),
+ (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+ (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+ (ADD16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+ (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src, GR8:$src2),
+ (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+ (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+ (ADD8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+ (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src, GR16:$src2),
+ (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+ (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+ (SUB16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+ (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src, GR8:$src2),
+ (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+ (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+ (SUB8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+ (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
+ (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
new file mode 100644
index 000000000000..d1d9a1158635
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -0,0 +1,150 @@
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MSP430 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MSP430MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: assert(0 && "Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: assert(0 && "Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MSP430MCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ assert(0 && "unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
new file mode 100644
index 000000000000..e937696406fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCINSTLOWER_H
+#define MSP430_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class AsmPrinter;
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+
+ /// MSP430MCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
+ MCContext &Ctx;
+ Mangler &Mang;
+
+ AsmPrinter &Printer;
+public:
+ MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+ : Ctx(ctx), Mang(mang), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 000000000000..383fd2e9821c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,46 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+public:
+ MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 000000000000..1cc60bba3a55
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,254 @@
+//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : MSP430GenRegisterInfo(), TM(tm), TII(tii) {
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
+}
+
+const unsigned*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const Function* F = MF->getFunction();
+ static const unsigned CalleeSavedRegs[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+ static const unsigned CalleeSavedRegsFP[] = {
+ MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+ static const unsigned CalleeSavedRegsIntr[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
+ static const unsigned CalleeSavedRegsIntrFP[] = {
+ MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
+
+ if (TFI->hasFP(*MF))
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
+ else
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntr : CalleeSavedRegs);
+
+}
+
+BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Mark 4 special registers with subregisters as reserved.
+ Reserved.set(MSP430::PCB);
+ Reserved.set(MSP430::SPB);
+ Reserved.set(MSP430::SRB);
+ Reserved.set(MSP430::CGB);
+ Reserved.set(MSP430::PCW);
+ Reserved.set(MSP430::SPW);
+ Reserved.set(MSP430::SRW);
+ Reserved.set(MSP430::CGW);
+
+ // Mark frame pointer as reserved if needed.
+ if (TFI->hasFP(MF))
+ Reserved.set(MSP430::FPW);
+
+ return Reserved;
+}
+
+const TargetRegisterClass *
+MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return &MSP430::GR16RegClass;
+}
+
+void MSP430RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // Skip the saved PC
+ Offset += 2;
+
+ if (!TFI->hasFP(MF))
+ Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += 2; // Skip the saved FPW
+
+ // Fold imm into offset
+ Offset += MI.getOperand(i+1).getImm();
+
+ if (MI.getOpcode() == MSP430::ADD16ri) {
+ // This is actually "load effective address" of the stack slot
+ // instruction. We have only two-address instructions, thus we need to
+ // expand it into mov + add
+
+ MI.setDesc(TII.get(MSP430::MOV16rr));
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ if (Offset == 0)
+ return;
+
+ // We need to materialize the offset via add instruction.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (Offset < 0)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ .addReg(DstReg).addImm(-Offset);
+ else
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ .addReg(DstReg).addImm(Offset);
+
+ return;
+ }
+
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+void
+MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the FPW register that must be saved.
+ if (TFI->hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ }
+}
+
+unsigned MSP430RegisterInfo::getRARegister() const {
+ return MSP430::PCW;
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
+
+int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ llvm_unreachable("Not implemented yet!");
+ return 0;
+}
+
+int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+ llvm_unreachable("Not implemented yet!");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 000000000000..fb70594ab37c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,71 @@
+//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MSP430GenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+ MSP430TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+public:
+ MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
+
+ const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const {
+ // No sub-classes makes this really easy.
+ return A;
+ }
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 000000000000..d1c2e3f7915c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,85 @@
+//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+def PCB : MSP430Reg<0, "r0">;
+def SPB : MSP430Reg<1, "r1">;
+def SRB : MSP430Reg<2, "r2">;
+def CGB : MSP430Reg<3, "r3">;
+def FPB : MSP430Reg<4, "r4">;
+def R5B : MSP430Reg<5, "r5">;
+def R6B : MSP430Reg<6, "r6">;
+def R7B : MSP430Reg<7, "r7">;
+def R8B : MSP430Reg<8, "r8">;
+def R9B : MSP430Reg<9, "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
+def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+}
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+ // Volatile registers
+ (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+ // Frame pointer, sometimes allocable
+ FPB,
+ // Volatile, but not allocable
+ PCB, SPB, SRB, CGB)>;
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+ // Volatile registers
+ (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ // Frame pointer, sometimes allocable
+ FPW,
+ // Volatile, but not allocable
+ PCW, SPW, SRW, CGW)>
+{
+ let SubRegClasses = [(GR8 subreg_8bit)];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..24f45fa3881b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430TargetMachine.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 000000000000..fa8194830ff8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+ ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 000000000000..b58c50afb982
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,32 @@
+//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MSP430GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS) :
+ MSP430GenSubtargetInfo(TT, CPU, FS) {
+ std::string CPUName = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 000000000000..1ce5f11fe1bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,42 @@
+//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MSP430GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+
+class MSP430Subtarget : public MSP430GenSubtargetInfo {
+ bool ExtendedInsts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ MSP430Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 000000000000..971f512141e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,51 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMSP430Target() {
+ // Register the target.
+ RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+}
+
+MSP430TargetMachine::MSP430TargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS),
+ // FIXME: Check TargetData string.
+ DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) { }
+
+
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createMSP430ISelDag(*this, OptLevel));
+ return false;
+}
+
+bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createMSP430BranchSelectionPass());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 000000000000..2a9eea0bcd82
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,69 @@
+//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430FrameLowering.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+ MSP430Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
+ MSP430FrameLowering FrameLowering;
+
+public:
+ MSP430TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const MSP430TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
new file mode 100644
index 000000000000..f9ca5c49c979
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSP430Target;
+
+extern "C" void LLVMInitializeMSP430TargetInfo() {
+ RegisterTarget<Triple::msp430>
+ X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
new file mode 100644
index 000000000000..46c687b64001
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -0,0 +1,235 @@
+//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for assembly backends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/Mangler.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+static bool isAcceptableChar(char C, bool AllowPeriod) {
+ if ((C < 'a' || C > 'z') &&
+ (C < 'A' || C > 'Z') &&
+ (C < '0' || C > '9') &&
+ C != '_' && C != '$' && C != '@' &&
+ !(AllowPeriod && C == '.'))
+ return false;
+ return true;
+}
+
+static char HexDigit(int V) {
+ return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
+ OutName.push_back('_');
+ OutName.push_back(HexDigit(C >> 4));
+ OutName.push_back(HexDigit(C & 15));
+ OutName.push_back('_');
+}
+
+/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
+/// for this assembler.
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
+ assert(!Str.empty() && "Cannot create an empty MCSymbol");
+
+ // If the first character is a number and the target does not allow this, we
+ // need quotes.
+ if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
+ return true;
+
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ if (!isAcceptableChar(Str[i], AllowPeriod))
+ return true;
+ return false;
+}
+
+/// appendMangledName - Add the specified string in mangled form if it uses
+/// any unusual characters.
+static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
+ const MCAsmInfo &MAI) {
+ // The first character is not allowed to be a number unless the target
+ // explicitly allows it.
+ if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+ MangleLetter(OutName, Str[0]);
+ Str = Str.substr(1);
+ }
+
+ bool AllowPeriod = MAI.doesAllowPeriodsInName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (!isAcceptableChar(Str[i], AllowPeriod))
+ MangleLetter(OutName, Str[i]);
+ else
+ OutName.push_back(Str[i]);
+ }
+}
+
+
+/// appendMangledQuotedName - On systems that support quoted symbols, we still
+/// have to escape some (obscure) characters like " and \n which would break the
+/// assembler's lexing.
+static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
+ StringRef Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ if (Str[i] == '"' || Str[i] == '\n')
+ MangleLetter(OutName, Str[i]);
+ else
+ OutName.push_back(Str[i]);
+ }
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified name as the global variable name. GVName must not be
+/// empty.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+ const Twine &GVName, ManglerPrefixTy PrefixTy) {
+ SmallString<256> TmpData;
+ StringRef Name = GVName.toStringRef(TmpData);
+ assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+
+ // If the global name is not led with \1, add the appropriate prefixes.
+ if (Name[0] == '\1') {
+ Name = Name.substr(1);
+ } else {
+ if (PrefixTy == Mangler::Private) {
+ const char *Prefix = MAI.getPrivateGlobalPrefix();
+ OutName.append(Prefix, Prefix+strlen(Prefix));
+ } else if (PrefixTy == Mangler::LinkerPrivate) {
+ const char *Prefix = MAI.getLinkerPrivateGlobalPrefix();
+ OutName.append(Prefix, Prefix+strlen(Prefix));
+ }
+
+ const char *Prefix = MAI.getGlobalPrefix();
+ if (Prefix[0] == 0)
+ ; // Common noop, no prefix.
+ else if (Prefix[1] == 0)
+ OutName.push_back(Prefix[0]); // Common, one character prefix.
+ else
+ OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary length prefix.
+ }
+
+ // If this is a simple string that doesn't need escaping, just append it.
+ if (!NameNeedsEscaping(Name, MAI) ||
+ // If quotes are supported, they can be used unless the string contains
+ // a quote or newline.
+ (MAI.doesAllowQuotesInName() &&
+ Name.find_first_of("\n\"") == StringRef::npos)) {
+ OutName.append(Name.begin(), Name.end());
+ return;
+ }
+
+ // On systems that do not allow quoted names, we need to mangle most
+ // strange characters.
+ if (!MAI.doesAllowQuotesInName())
+ return appendMangledName(OutName, Name, MAI);
+
+ // Okay, the system allows quoted strings. We can quote most anything, the
+ // only characters that need escaping are " and \n.
+ assert(Name.find_first_of("\n\"") != StringRef::npos);
+ return appendMangledQuotedName(OutName, Name);
+}
+
+/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
+/// a suffix on their name indicating the number of words of arguments they
+/// take.
+static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
+ const Function *F, const TargetData &TD) {
+ // Calculate arguments size total.
+ unsigned ArgWords = 0;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ const Type *Ty = AI->getType();
+ // 'Dereference' type in case of byval parameter attribute
+ if (AI->hasByValAttr())
+ Ty = cast<PointerType>(Ty)->getElementType();
+ // Size should be aligned to DWORD boundary
+ ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ raw_svector_ostream(OutName) << '@' << ArgWords;
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified global variable's name. If the global variable doesn't
+/// have a name, this fills in a unique name for the global.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+ const GlobalValue *GV,
+ bool isImplicitlyPrivate) {
+ ManglerPrefixTy PrefixTy = Mangler::Default;
+ if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+ PrefixTy = Mangler::Private;
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+ GV->hasLinkerPrivateWeakDefAutoLinkage())
+ PrefixTy = Mangler::LinkerPrivate;
+
+ // If this global has a name, handle it simply.
+ if (GV->hasName()) {
+ getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+ } else {
+ // Get the ID for the global, assigning a new one if we haven't got one
+ // already.
+ unsigned &ID = AnonGlobalIDs[GV];
+ if (ID == 0) ID = NextAnonGlobalID++;
+
+ // Must mangle the global into a unique ID.
+ getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+ }
+
+ // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
+ // add it.
+ if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ CallingConv::ID CC = F->getCallingConv();
+
+ // fastcall functions need to start with @.
+ // FIXME: This logic seems unlikely to be right.
+ if (CC == CallingConv::X86_FastCall) {
+ if (OutName[0] == '_')
+ OutName[0] = '@';
+ else
+ OutName.insert(OutName.begin(), '@');
+ }
+
+ // fastcall and stdcall functions usually need @42 at the end to specify
+ // the argument info.
+ const FunctionType *FT = F->getFunctionType();
+ if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+ // "Pure" variadic functions do not receive @0 suffix.
+ (!FT->isVarArg() || FT->getNumParams() == 0 ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr())))
+ AddFastCallStdCallSuffix(OutName, F, TD);
+ }
+ }
+}
+
+/// getSymbol - Return the MCSymbol for the specified global value. This
+/// symbol is the main label that is the address of the global.
+MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
+ SmallString<60> NameStr;
+ getNameWithPrefix(NameStr, GV, false);
+ return Context.GetOrCreateSymbol(NameStr.str());
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..8852fd4126e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsAsmPrinter
+ MipsInstPrinter.cpp
+ )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile
new file mode 100644
index 000000000000..63e38ef3e6aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
new file mode 100644
index 000000000000..41c1dd3919b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -0,0 +1,127 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MipsInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "MipsGenAsmWriter.inc"
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+ switch (CC) {
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_OEQ:
+ case FCOND_UNE: return "eq";
+ case FCOND_UEQ:
+ case FCOND_ONE: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "nge";
+ case FCOND_LE:
+ case FCOND_NLE: return "le";
+ case FCOND_NGT:
+ case FCOND_GT: return "ngt";
+ }
+ llvm_unreachable("Impossible condition code!");
+}
+
+StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << '$' << LowercaseString(getRegisterName(RegNo));
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ printInstruction(MI, O);
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
+void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum+1, O);
+ O << "(";
+ printOperand(MI, opNum, O);
+ O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ const MCOperand& MO = MI->getOperand(opNum);
+ O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
new file mode 100644
index 000000000000..680208eb819b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -0,0 +1,100 @@
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTPRINTER_H
+#define MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were orignally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_OEQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_UNE,
+ FCOND_ONE,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class TargetMachine;
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+ MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getInstructionName(unsigned Opcode);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+ void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..97de75db5347
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMipsDesc
+ MipsMCTargetDesc.cpp
+ MipsMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..7fe2086a6e00
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
new file mode 100644
index 000000000000..5d9242500f6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -0,0 +1,38 @@
+//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::mips)
+ IsLittleEndian = false;
+
+ AlignmentIsInBytes = false;
+ Data16bitsDirective = "\t.2byte\t";
+ Data32bitsDirective = "\t.4byte\t";
+ Data64bitsDirective = 0;
+ PrivateGlobalPrefix = "$";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ GPRel32Directive = "\t.gpword\t";
+ WeakRefDirective = "\t.weak\t";
+
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ HasLEB128 = true;
+ DwarfRegNumForCFI = true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
new file mode 100644
index 000000000000..41b719207b7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class MipsMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit MipsMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
new file mode 100644
index 000000000000..06f0d0bfb6b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCTargetDesc.h"
+#include "MipsMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MipsGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMipsMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitMipsMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeMipsMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitMipsMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeMipsMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
+ createMipsMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMipsMCAsmInfo() {
+ RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget);
+ RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
new file mode 100644
index 000000000000..3d18f114c8bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMipsTarget;
+extern Target TheMipselTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Mips registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MipsGenRegisterInfo.inc"
+
+// Defines symbolic names for the Mips instructions.
+#define GET_INSTRINFO_ENUM
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MipsGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
new file mode 100644
index 000000000000..984b5adfc5f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -0,0 +1,34 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MipsTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+ FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
new file mode 100644
index 000000000000..433cd57f34e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -0,0 +1,101 @@
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
+ "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
+ "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+ "true", "Only supports single precision float">;
+def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+ "Enable o32 ABI">;
+def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+ "Enable eabi ABI">;
+def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
+ "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+ "Enable 'signext in register' instructions.">;
+def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
+ "Enable 'conditional move' instructions.">;
+def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
+ "Enable 'multiply add/sub' instructions.">;
+def FeatureMinMax : SubtargetFeature<"minmax", "HasMinMax", "true",
+ "Enable 'min/max' instructions.">;
+def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
+ "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
+ "Enable 'count leading bits' instructions.">;
+def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+ "Mips1 ISA Support">;
+def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+ "Mips2 ISA Support">;
+def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+ "Mips32 ISA Support",
+ [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
+ "Mips32r2", "Mips32r2 ISA Support",
+ [FeatureMips32, FeatureSEInReg]>;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips1", [FeatureMips1]>;
+def : Proc<"r2000", [FeatureMips1]>;
+def : Proc<"r3000", [FeatureMips1]>;
+
+def : Proc<"mips2", [FeatureMips2]>;
+def : Proc<"r6000", [FeatureMips2]>;
+
+def : Proc<"4ke", [FeatureMips32r2]>;
+
+// Allegrex is a 32bit subset of r4000, both for integer and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
+ FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
+ FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+
+def MipsAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def Mips : Target {
+ let InstructionSet = MipsInstrInfo;
+
+ let AssemblyWriters = [MipsAsmWriter];
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 000000000000..69e03bd29724
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,440 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+#include "MipsAsmPrinter.h"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+using namespace llvm;
+
+void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ if (MI->isDebugValue()) {
+ PrintDebugValueComment(MI, OS);
+ return;
+ }
+
+ MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+ MCInst TmpInst0;
+ MCInstLowering.Lower(MI, TmpInst0);
+ OutStreamer.EmitInstruction(TmpInst0);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Mips Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "(f)mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame $fp,48,$ra
+// .mask 0xc0000000,-8
+// addiu $sp, $sp, -48
+// sw $ra, 40($sp)
+// sw $fp, 36($sp)
+//
+// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+// 30 (FP) are saved at prologue. As the save order on prologue is from
+// left to right, RA is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (RA) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+ // CPU and FPU Saved Registers Bitmasks
+ unsigned CPUBitmask = 0, FPUBitmask = 0;
+ int CPUTopSavedRegOff, FPUTopSavedRegOff;
+
+ // Set the CPU and FPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ // size of stack area to which FP callee-saved regs are saved.
+ unsigned CPURegSize = Mips::CPURegsRegisterClass->getSize();
+ unsigned FGR32RegSize = Mips::FGR32RegisterClass->getSize();
+ unsigned AFGR64RegSize = Mips::AFGR64RegisterClass->getSize();
+ bool HasAFGR64Reg = false;
+ unsigned CSFPRegsSize = 0;
+ unsigned i, e = CSI.size();
+
+ // Set FPU Bitmask.
+ for (i = 0; i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Mips::CPURegsRegisterClass->contains(Reg))
+ break;
+
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ if (Mips::AFGR64RegisterClass->contains(Reg)) {
+ FPUBitmask |= (3 << RegNum);
+ CSFPRegsSize += AFGR64RegSize;
+ HasAFGR64Reg = true;
+ continue;
+ }
+
+ FPUBitmask |= (1 << RegNum);
+ CSFPRegsSize += FGR32RegSize;
+ }
+
+ // Set CPU Bitmask.
+ for (; i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // FP Regs are saved right below where the virtual frame pointer points to.
+ FPUTopSavedRegOff = FPUBitmask ?
+ (HasAFGR64Reg ? -AFGR64RegSize : -FGR32RegSize) : 0;
+
+ // CPU Regs are saved below FP Regs.
+ CPUTopSavedRegOff = CPUBitmask ? -CSFPRegsSize - CPURegSize : 0;
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask, O);
+ O << ',' << CPUTopSavedRegOff << '\n';
+
+ // Print FPUBitmask
+ O << "\t.fmask\t"; printHex32(FPUBitmask, O);
+ O << "," << FPUTopSavedRegOff << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::emitFrameDirective() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(*MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+ OutStreamer.EmitRawText("\t.frame\t$" +
+ Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+ "," + Twine(stackSize) + ",$" +
+ Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
+}
+
+/// Emit Set directives.
+const char *MipsAsmPrinter::getCurrentABIString() const {
+ switch (Subtarget->getTargetABI()) {
+ case MipsSubtarget::O32: return "abi32";
+ case MipsSubtarget::O64: return "abiO64";
+ case MipsSubtarget::N32: return "abiN32";
+ case MipsSubtarget::N64: return "abi64";
+ case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ default: break;
+ }
+
+ llvm_unreachable("Unknown Mips ABI");
+ return NULL;
+}
+
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+ emitFrameDirective();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printSavedRegsBitmask(OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
+ // There are instruction for this macros, but they must
+ // always be at the function end, and we can't emit and
+ // break with BB logic.
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const {
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+ // If the predecessor is a switch statement, assume a jump table
+ // implementation, so it is not a fall through.
+ if (const BasicBlock *bb = Pred->getBasicBlock())
+ if (isa<SwitchInst>(bb->getTerminator()))
+ return false;
+
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Otherwise, check the last instruction.
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+
+ return !I->getDesc().isBarrier();
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+ return false;
+}
+
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ bool closeP = false;
+
+ if (MO.getTargetFlags())
+ closeP = true;
+
+ switch(MO.getTargetFlags()) {
+ case MipsII::MO_GPREL: O << "%gp_rel("; break;
+ case MipsII::MO_GOT_CALL: O << "%call16("; break;
+ case MipsII::MO_GOT: O << "%got("; break;
+ case MipsII::MO_ABS_HI: O << "%hi("; break;
+ case MipsII::MO_ABS_LO: O << "%lo("; break;
+ case MipsII::MO_TLSGD: O << "%tlsgd("; break;
+ case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
+ case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
+ case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break;
+ }
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << '$'
+ << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ O << BA->getName();
+ break;
+ }
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum+1, O);
+ O << "(";
+ printOperand(MI, opNum, O);
+ O << ")";
+}
+
+void MipsAsmPrinter::
+printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand& MO = MI->getOperand(opNum);
+ O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ // FIXME: Use SwitchSection.
+
+ // Tell the assembler which ABI we are using
+ OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+
+ // TODO: handle O64 ABI
+ if (Subtarget->isABI_EABI()) {
+ if (Subtarget->isGP32bit())
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+ }
+
+ // return to previous section
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
+}
+
+MachineLocation
+MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ // TODO: implement
+}
+
+// Force static initialization.
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ return new MipsInstPrinter(MAI);
+}
+
+extern "C" void LLVMInitializeMipsAsmPrinter() {
+ RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+ RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+
+ TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+ createMipsMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
new file mode 100644
index 000000000000..16461ff1fbb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -0,0 +1,71 @@
+//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mips Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSASMPRINTER_H
+#define MIPSASMPRINTER_H
+
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineInstr;
+class raw_ostream;
+class MachineBasicBlock;
+class Module;
+
+class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+ const MipsSubtarget *Subtarget;
+
+public:
+ explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Mips Assembly Printer";
+ }
+
+ void EmitInstruction(const MachineInstr *MI);
+ void printSavedRegsBitmask(raw_ostream &O);
+ void printHex32(unsigned int Value, raw_ostream &O);
+ void emitFrameDirective();
+ const char *getCurrentABIString() const;
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ void EmitStartOfAsmFile(Module &M);
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 000000000000..876f0fcc83ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,86 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+ // i32 are returned in registers V0, V1, A0, A1
+ CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in register D0, D1
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ // Single fp arguments are passed in pairs within 32-bit mode
+ CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+ CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+ CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+ // The first 4 double fp arguments are passed in single fp registers.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+]>;
+
+def RetCC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCDelegateTo<RetCC_MipsO32>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 000000000000..c3a6211399cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,82 @@
+//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ const MCInstrDesc& MCid = I->getDesc();
+ if (MCid.hasDelaySlot() &&
+ (TM.getSubtarget<MipsSubtarget>().isMips1() ||
+ MCid.isCall() || MCid.isBranch() || MCid.isReturn())) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp b/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp
new file mode 100644
index 000000000000..03d922fe7cd6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -0,0 +1,94 @@
+//===-- MipsEmitGPRestore.cpp - Emit GP restore instruction----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass emits instructions that restore $gp right
+// after jalr instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "emit-gp-restore"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+ struct Inserter : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Inserter(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Emit GP Restore";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char Inserter::ID = 0;
+} // end of anonymous namespace
+
+bool Inserter::runOnMachineFunction(MachineFunction &F) {
+ if (TM.getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ bool Changed = false;
+ int FI = F.getInfo<MipsFunctionInfo>()->getGPFI();
+
+ for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock& MBB = *MFI;
+ MachineBasicBlock::iterator I = MFI->begin();
+
+ // If MBB is a landing pad, insert instruction that restores $gp after
+ // EH_LABEL.
+ if (MBB.isLandingPad()) {
+ // Find EH_LABEL first.
+ for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ;
+
+ // Insert lw.
+ ++I;
+ DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+ .addImm(0);
+ Changed = true;
+ }
+
+ while (I != MFI->end()) {
+ if (I->getOpcode() != Mips::JALR) {
+ ++I;
+ continue;
+ }
+
+ DebugLoc dl = I->getDebugLoc();
+ // emit lw $gp, ($gp save slot on stack) after jalr
+ BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+ .addImm(0);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// createMipsEmitGPRestorePass - Returns a pass that emits instructions that
+/// restores $gp clobbered by jalr instructions.
+FunctionPass *llvm::createMipsEmitGPRestorePass(MipsTargetMachine &tm) {
+ return new Inserter(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
new file mode 100644
index 000000000000..a622258a4dcb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -0,0 +1,117 @@
+//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands pseudo instructions into target instructions after register
+// allocation but before post-RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-expand-pseudo"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+ struct MipsExpandPseudo : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ MipsExpandPseudo(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips PseudoInstrs Expansion";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ private:
+ void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);
+ void ExpandExtractElementF64(MachineBasicBlock&,
+ MachineBasicBlock::iterator);
+ };
+ char MipsExpandPseudo::ID = 0;
+} // end of anonymous namespace
+
+bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) {
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)
+ Changed |= runOnMachineBasicBlock(*I);
+
+ return Changed;
+}
+
+bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ const MCInstrDesc& MCid = I->getDesc();
+
+ switch(MCid.getOpcode()) {
+ default:
+ ++I;
+ continue;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, I);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, I);
+ break;
+ }
+
+ // delete original instr
+ MBB.erase(I++);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const unsigned* SubReg =
+ TM.getRegisterInfo()->getSubRegisters(DstReg);
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);
+}
+
+void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+ const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
+}
+
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
+/// instrs into real instrs
+FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
+ return new MipsExpandPseudo(tm);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 000000000000..a0f90a0d487a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,331 @@
+//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+// The stack frame required by the ABI (after call):
+// Offset
+//
+// 0 ----------
+// 4 Args to pass
+// . saved $GP (used in PIC)
+// . Alloca allocations
+// . Local Area
+// . CPU "Callee Saved" Registers
+// . saved FP
+// . saved RA
+// . FPU "Callee Saved" Registers
+// StackSize -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: 4
+// sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
+ || MFI->isFrameAddressTaken();
+}
+
+bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
+ return true;
+}
+
+static unsigned AlignOffset(unsigned Offset, unsigned Align) {
+ return (Offset + Align - 1) / Align * Align;
+}
+
+// expand pair of register and immediate if the immediate doesn't fit in the
+// 16-bit offset field.
+// e.g.
+// if OrigImm = 0x10000, OrigReg = $sp:
+// generate the following sequence of instrs:
+// lui $at, hi(0x10000)
+// addu $at, $sp, $at
+//
+// (NewReg, NewImm) = ($at, lo(Ox10000))
+// return true
+static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
+ unsigned& NewReg, int& NewImm,
+ MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ // OrigImm fits in the 16-bit field
+ if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+ NewReg = OrigReg;
+ NewImm = OrigImm;
+ return false;
+ }
+
+ MachineFunction* MF = MBB.getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ DebugLoc DL = I->getDebugLoc();
+ int ImmLo = (short)(OrigImm & 0xffff);
+ int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+ ((OrigImm & 0x8000) != 0);
+
+ // FIXME: change this when mips goes MC".
+ BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
+ BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+ BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+ .addReg(Mips::AT);
+ NewReg = Mips::AT;
+ NewImm = ImmLo;
+
+ return true;
+}
+
+void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ bool ATUsed;
+
+ // First, compute final stack size.
+ unsigned RegSize = STI.isGP32bit() ? 4 : 8;
+ unsigned StackAlign = getStackAlignment();
+ unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ?
+ (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) :
+ MipsFI->getMaxCallFrameSize();
+ unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) +
+ AlignOffset(MFI->getStackSize(), StackAlign);
+
+ // Update stack size
+ MFI->setStackSize(StackSize);
+
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+ // TODO: check need from GP here.
+ if (isPIC && STI.isABI_O32())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+ .addReg(RegInfo->getPICCallReg());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack : addi sp, sp, (-imm)
+ ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
+ MBBI);
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(NewReg).addImm(NewImm);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (CSI.size()) {
+ // Find the instruction past the last instruction that saves a callee-saved
+ // register to the stack.
+ for (unsigned i = 0; i < CSI.size(); ++i)
+ ++MBBI;
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+
+ // If Reg is a double precision register, emit two cfa_offsets,
+ // one for each of the paired single precision registers.
+ if (Mips::AFGR64RegisterClass->contains(Reg)) {
+ const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+ MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+ MachineLocation SrcML0(*SubRegs);
+ MachineLocation SrcML1(*(SubRegs + 1));
+
+ if (!STI.isLittle())
+ std::swap(SrcML0, SrcML1);
+
+ Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+ Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+ }
+ else {
+ // Reg is either in CPURegs or FGR32.
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(Reg);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+ }
+ }
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+ .addReg(Mips::SP).addReg(Mips::ZERO);
+
+ // emit ".cfi_def_cfa_register $fp"
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+ DstML = MachineLocation(Mips::FP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+ }
+
+ // Restore GP from the saved stack location
+ if (MipsFI->needGPSaveRestore())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+ .addImm(MFI->getObjectOffset(MipsFI->getGPFI()));
+}
+
+void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the number of bytes from FrameInfo
+ unsigned StackSize = MFI->getStackSize();
+
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ bool ATUsed = false;
+
+ // if framepointer enabled, restore the stack pointer.
+ if (hasFP(MF)) {
+ // Find the first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instruction "move $sp, $fp" at this location.
+ BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP)
+ .addReg(Mips::FP).addReg(Mips::ZERO);
+ }
+
+ // adjust stack : insert addi sp, sp, (imm)
+ if (StackSize) {
+ ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB,
+ MBBI);
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(NewReg).addImm(NewImm);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+ }
+}
+
+void
+MipsFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Mips::SP, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void MipsFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineRegisterInfo& MRI = MF.getRegInfo();
+
+ // FIXME: remove this code if register allocator can correctly mark
+ // $fp and $ra used or unused.
+
+ // Mark $fp and $ra as used or unused.
+ if (hasFP(MF))
+ MRI.setPhysRegUsed(Mips::FP);
+
+ // The register allocator might determine $ra is used after seeing
+ // instruction "jr $ra", but we do not want PrologEpilogInserter to insert
+ // instructions to save/restore $ra unless there is a function call.
+ // To correct this, $ra is explicitly marked unused if there is no
+ // function call.
+ if (MF.getFrameInfo()->hasCalls())
+ MRI.setPhysRegUsed(Mips::RA);
+ else
+ MRI.setPhysRegUnused(Mips::RA);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 000000000000..78c78eea5b9a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,50 @@
+//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+ const MipsSubtarget &STI;
+
+public:
+ explicit MipsFrameLowering(const MipsSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ bool targetHandlesStackFrameRounding() const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 000000000000..90aaeb60d06f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,482 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MipsTargetMachine.
+ MipsTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MipsTargetMachine &getTargetMachine() {
+ return static_cast<const MipsTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MipsInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
+
+ SDNode *SelectLoadFp64(SDNode *N);
+ SDNode *SelectStoreFp64(SDNode *N);
+
+ // getI32Imm - Return a target constant with the specified
+ // value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Addr.getOpcode() == MipsISD::WrapperPIC) {
+ Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Offset = Addr.getOperand(0);
+ return true;
+ }
+ } else {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
+ Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Offset = Addr;
+ return true;
+ }
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
+ Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
+ Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+ SDValue LoVal = Addr.getOperand(1);
+ if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
+ isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
+ Base = Addr.getOperand(0);
+ Offset = LoVal.getOperand(0);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
+ MVT::SimpleValueType NVT =
+ N->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (!Subtarget.isMips1() || NVT != MVT::f64)
+ return NULL;
+
+ LoadSDNode *LN = cast<LoadSDNode>(N);
+ if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+ LN->getAddressingMode() != ISD::UNINDEXED)
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Offset0, Offset1, Base;
+
+ if (!SelectAddr(N1, Base, Offset0) ||
+ N1.getValueType() != MVT::i32)
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ DebugLoc dl = N->getDebugLoc();
+
+ // The second load should start after for 4 bytes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+ Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+ else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
+ Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
+ MVT::i32,
+ CP->getAlignment(),
+ CP->getOffset()+4,
+ CP->getTargetFlags());
+ else
+ return NULL;
+
+ // Choose the offsets depending on the endianess
+ if (TM.getTargetData()->isBigEndian())
+ std::swap(Offset0, Offset1);
+
+ // Instead of:
+ // ldc $f0, X($3)
+ // Generate:
+ // lwc $f0, X($3)
+ // lwc $f1, X+4($3)
+ SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+ MVT::Other, Base, Offset0, Chain);
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+ MVT::f64, Undef, SDValue(LD0, 0));
+
+ SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+ MVT::Other, Base, Offset1, SDValue(LD0, 1));
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+ MVT::f64, I0, SDValue(LD1, 0));
+
+ ReplaceUses(SDValue(N, 0), I1);
+ ReplaceUses(SDValue(N, 1), Chain);
+ cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ return I1.getNode();
+}
+
+SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
+
+ if (!Subtarget.isMips1() ||
+ N->getOperand(1).getValueType() != MVT::f64)
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
+ return NULL;
+
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue Offset0, Offset1, Base;
+
+ if (!SelectAddr(N2, Base, Offset0) ||
+ N1.getValueType() != MVT::f64 ||
+ N2.getValueType() != MVT::i32)
+ return NULL;
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the even and odd part from the f64 register
+ SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
+ dl, MVT::f32, N1);
+ SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
+ dl, MVT::f32, N1);
+
+ // The second store should start after for 4 bytes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+ Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+ else
+ return NULL;
+
+ // Choose the offsets depending on the endianess
+ if (TM.getTargetData()->isBigEndian())
+ std::swap(Offset0, Offset1);
+
+ // Instead of:
+ // sdc $f0, X($3)
+ // Generate:
+ // swc $f0, X($3)
+ // swc $f1, X+4($3)
+ SDValue Ops0[] = { FPEven, Base, Offset0, Chain };
+ Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+ MVT::Other, Ops0, 4), 0);
+ cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ SDValue Ops1[] = { FPOdd, Base, Offset1, Chain };
+ Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+ MVT::Other, Ops1, 4), 0);
+ cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ ReplaceUses(SDValue(N, 0), Chain);
+ return Chain.getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::ADDu;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SUBu;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+ SDValue(Carry,0), RHS);
+
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
+ LHS, SDValue(AddCarry,0));
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+
+ unsigned Op;
+ Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+
+ SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+
+ SDValue InFlag = SDValue(Mul, 0);
+ SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
+ MVT::Glue, InFlag);
+ InFlag = SDValue(Lo,1);
+ SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
+
+ return NULL;
+ }
+
+ /// Special Muls
+ case ISD::MUL:
+ if (Subtarget.isMips32())
+ break;
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ SDValue MulOp1 = Node->getOperand(0);
+ SDValue MulOp2 = Node->getOperand(1);
+
+ unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
+ MVT::Glue, MulOp1, MulOp2);
+
+ SDValue InFlag = SDValue(MulNode, 0);
+
+ if (Opcode == ISD::MUL)
+ return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
+ else
+ return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+ }
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Mips::ZERO, MVT::i32);
+ SDValue Undef = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
+ SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+ MVT::f64, Undef, SDValue(MTC, 0));
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+ MVT::f64, I0, SDValue(MTC, 0));
+ ReplaceUses(SDValue(Node, 0), I1);
+ return I1.getNode();
+ }
+ break;
+ }
+
+ case ISD::LOAD:
+ if (SDNode *ResNode = SelectLoadFp64(Node))
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+
+ case ISD::STORE:
+ if (SDNode *ResNode = SelectStoreFp64(Node))
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+
+ case MipsISD::ThreadPointer: {
+ unsigned SrcReg = Mips::HWR29;
+ unsigned DestReg = Mips::V1;
+ SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
+ Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return ResNode.getNode();
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ OutOps.push_back(Op);
+ return false;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+ return new MipsDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 000000000000..b4f4b1b4bf04
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,2371 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MipsISD::JmpLink: return "MipsISD::JmpLink";
+ case MipsISD::Hi: return "MipsISD::Hi";
+ case MipsISD::Lo: return "MipsISD::Lo";
+ case MipsISD::GPRel: return "MipsISD::GPRel";
+ case MipsISD::TlsGd: return "MipsISD::TlsGd";
+ case MipsISD::TprelHi: return "MipsISD::TprelHi";
+ case MipsISD::TprelLo: return "MipsISD::TprelLo";
+ case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
+ case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
+ case MipsISD::FPCmp: return "MipsISD::FPCmp";
+ case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
+ case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
+ case MipsISD::FPRound: return "MipsISD::FPRound";
+ case MipsISD::MAdd: return "MipsISD::MAdd";
+ case MipsISD::MAddu: return "MipsISD::MAddu";
+ case MipsISD::MSub: return "MipsISD::MSub";
+ case MipsISD::MSubu: return "MipsISD::MSubu";
+ case MipsISD::DivRem: return "MipsISD::DivRem";
+ case MipsISD::DivRemU: return "MipsISD::DivRemU";
+ case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
+ case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
+ case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
+ case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
+ default: return NULL;
+ }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM)
+ : TargetLowering(TM, new MipsTargetObjectFile()) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+ // Mips does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+ addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat())
+ if (!Subtarget->isFP64bit())
+ addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // MIPS doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+ // Mips Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ // Operations not directly supported by Mips.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+
+ if (!Subtarget->isMips32r2())
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ if (Subtarget->isSingleFloat())
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ if (!Subtarget->hasSEInReg()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ }
+
+ if (!Subtarget->hasBitCount())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ if (!Subtarget->hasSwap())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
+ setTargetDAGCombine(ISD::SDIVREM);
+ setTargetDAGCombine(ISD::UDIVREM);
+ setTargetDAGCombine(ISD::SETCC);
+
+ setMinFunctionAlignment(2);
+
+ setStackPointerRegisterToSaveRestore(Mips::SP);
+ computeRegisterProperties();
+
+ setExceptionPointerRegister(Mips::A0);
+ setExceptionSelectorRegister(Mips::A1);
+}
+
+MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+// SelectMadd -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode* MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDValue Chain = CurDAG->getEntryNode();
+ DebugLoc dl = ADDENode->getDebugLoc();
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, dl,
+ MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ADDCNode->getOperand(1),// Lo0
+ ADDENode->getOperand(1));// Hi0
+
+ // create CopyFromReg nodes
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ MAdd);
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ Mips::HI, MVT::i32,
+ CopyFromLo.getValue(2));
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
+
+ if (!SDValue(ADDENode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
+
+ return true;
+}
+
+// SelectMsub -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode* MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDValue Chain = CurDAG->getEntryNode();
+ DebugLoc dl = SUBENode->getDebugLoc();
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, dl,
+ MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ SUBCNode->getOperand(0),// Lo0
+ SUBENode->getOperand(0));// Hi0
+
+ // create CopyFromReg nodes
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ MSub);
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ Mips::HI, MVT::i32,
+ CopyFromLo.getValue(2));
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
+
+ if (!SDValue(SUBENode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
+
+ return true;
+}
+
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+ MipsISD::DivRemU;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+ N->getOperand(0), N->getOperand(1));
+ SDValue InChain = DAG.getEntryNode();
+ SDValue InGlue = DivRem;
+
+ // insert MFLO
+ if (N->hasAnyUseOfValue(0)) {
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+ InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+ InChain = CopyFromLo.getValue(1);
+ InGlue = CopyFromLo.getValue(2);
+ }
+
+ // insert MFHI
+ if (N->hasAnyUseOfValue(1)) {
+ SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+ Mips::HI, MVT::i32, InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+ }
+
+ return SDValue();
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return Mips::FCOND_OEQ;
+ case ISD::SETUNE: return Mips::FCOND_UNE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return Mips::FCOND_OLT;
+ case ISD::SETGT:
+ case ISD::SETOGT: return Mips::FCOND_OGT;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return Mips::FCOND_OGE;
+ case ISD::SETULT: return Mips::FCOND_ULT;
+ case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETUGT: return Mips::FCOND_UGT;
+ case ISD::SETUGE: return Mips::FCOND_UGE;
+ case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETO: return Mips::FCOND_OR;
+ case ISD::SETNE:
+ case ISD::SETONE: return Mips::FCOND_ONE;
+ case ISD::SETUEQ: return Mips::FCOND_UEQ;
+ }
+}
+
+
+// Returns true if condition code has to be inverted.
+static bool InvertFPCondCode(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return false;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return true;
+
+ assert(false && "Illegal Condition Code");
+ return false;
+}
+
+// Creates and returns an FPCmp node from a setcc node.
+// Returns Op if setcc is not a floating point comparison.
+static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+ // must be a SETCC node
+ if (Op.getOpcode() != ISD::SETCC)
+ return Op;
+
+ SDValue LHS = Op.getOperand(0);
+
+ if (!LHS.getValueType().isFloatingPoint())
+ return Op;
+
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
+ // node if necessary.
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+ DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+// Creates and returns a CMovFPT/F node.
+static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+ SDValue False, DebugLoc DL) {
+ bool invert = InvertFPCondCode((Mips::CondCode)
+ cast<ConstantSDNode>(Cond.getOperand(2))
+ ->getSExtValue());
+
+ return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
+ True.getValueType(), True, False, Cond);
+}
+
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return SDValue();
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+}
+
+SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+ const {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned opc = N->getOpcode();
+
+ switch (opc) {
+ default: break;
+ case ISD::ADDE:
+ return PerformADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::SUBE:
+ return PerformSUBECombine(N, DAG, DCI, Subtarget);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+ case ISD::SETCC:
+ return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ }
+
+ return SDValue();
+}
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode())
+ {
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value. It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+// Get fp branch code (not opcode) from condition code.
+static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return Mips::BRANCH_T;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return Mips::BRANCH_F;
+
+ return Mips::BRANCH_INVALID;
+}
+
+static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
+ DebugLoc dl,
+ const MipsSubtarget* Subtarget,
+ const TargetInstrInfo *TII,
+ bool isFPCmp, unsigned Opc) {
+ // There is no need to expand CMov instructions if target has
+ // conditional moves.
+ if (Subtarget->hasCondMov())
+ return BB;
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Emit the right instruction according to the type of the operands compared
+ if (isFPCmp)
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ else
+ BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
+ .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ if (isFPCmp)
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+ else
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ switch (MI->getOpcode()) {
+ default:
+ assert(false && "Unexpected instr type to insert");
+ return NULL;
+ case Mips::MOVT:
+ case Mips::MOVT_S:
+ case Mips::MOVT_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1F);
+ case Mips::MOVF:
+ case Mips::MOVF_S:
+ case Mips::MOVF_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1T);
+ case Mips::MOVZ_I:
+ case Mips::MOVZ_S:
+ case Mips::MOVZ_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BNE);
+ case Mips::MOVN_I:
+ case Mips::MOVN_S:
+ case Mips::MOVN_D:
+ return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BEQ);
+
+ case Mips::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+ case Mips::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+
+ case Mips::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+ case Mips::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+
+ case Mips::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+ case Mips::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+
+ case Mips::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+ case Mips::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+
+ case Mips::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
+ case Mips::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, 0, true);
+
+ case Mips::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+ case Mips::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+
+ case Mips::ATOMIC_SWAP_I8:
+ return EmitAtomicBinaryPartword(MI, BB, 1, 0);
+ case Mips::ATOMIC_SWAP_I16:
+ return EmitAtomicBinaryPartword(MI, BB, 2, 0);
+ case Mips::ATOMIC_SWAP_I32:
+ return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case Mips::ATOMIC_CMP_SWAP_I8:
+ return EmitAtomicCmpSwapPartword(MI, BB, 1);
+ case Mips::ATOMIC_CMP_SWAP_I16:
+ return EmitAtomicCmpSwapPartword(MI, BB, 2);
+ case Mips::ATOMIC_CMP_SWAP_I32:
+ return EmitAtomicCmpSwap(MI, BB, 4);
+ }
+}
+
+// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
+// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
+ assert(Size == 4 && "Unsupported size for EmitAtomicBinary.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Incr = MI->getOperand(2).getReg();
+
+ unsigned Oldval = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0)
+ // fallthrough --> loopMBB
+
+ // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before
+ // the loop and then loading it from stack in block loopMBB is necessary to
+ // prevent MachineLICM pass to hoist "or" instruction out of the block
+ // loopMBB.
+
+ int fi = 0;
+ if (BinOpcode == 0 && !Nand) {
+ // Get or create a temporary stack location.
+ MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+ fi = MipsFI->getAtomicFrameIndex();
+ if (fi == -1) {
+ fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
+ MipsFI->setAtomicFrameIndex(fi);
+ }
+
+ BuildMI(BB, dl, TII->get(Mips::SW))
+ .addReg(Incr).addFrameIndex(fi).addImm(0);
+ }
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ll oldval, 0(ptr)
+ // or dest, $0, oldval
+ // <binop> tmp1, oldval, incr
+ // sc tmp1, 0(ptr)
+ // beq tmp1, $0, loopMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
+ if (Nand) {
+ // and tmp2, oldval, incr
+ // nor tmp1, $0, tmp2
+ BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr);
+ BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ } else if (BinOpcode) {
+ // <binop> tmp1, oldval, incr
+ BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr);
+ } else {
+ // lw tmp2, fi(sp) // load incr from stack
+ // or tmp1, $zero, tmp2
+ BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ }
+ BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
+ assert((Size == 1 || Size == 2) &&
+ "Unsupported size for EmitAtomicBinaryPartial.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Incr = MI->getOperand(2).getReg();
+
+ unsigned Addr = RegInfo.createVirtualRegister(RC);
+ unsigned Shift = RegInfo.createVirtualRegister(RC);
+ unsigned Mask = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+ unsigned Newval = RegInfo.createVirtualRegister(RC);
+ unsigned Oldval = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp10 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp11 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp12 = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // addiu tmp1,$0,-4 # 0xfffffffc
+ // and addr,ptr,tmp1
+ // andi tmp2,ptr,3
+ // sll shift,tmp2,3
+ // ori tmp3,$0,255 # 0xff
+ // sll mask,tmp3,shift
+ // nor mask2,$0,mask
+ // andi tmp4,incr,255
+ // sll incr2,tmp4,shift
+ // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0)
+
+ // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before
+ // the loop and then loading it from stack in block loopMBB is necessary to
+ // prevent MachineLICM pass to hoist "or" instruction out of the block
+ // loopMBB.
+
+ int64_t MaskImm = (Size == 1) ? 255 : 65535;
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ if (BinOpcode != Mips::SUBu) {
+ BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift);
+ } else {
+ BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift);
+ }
+
+ int fi = 0;
+ if (BinOpcode == 0 && !Nand) {
+ // Get or create a temporary stack location.
+ MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+ fi = MipsFI->getAtomicFrameIndex();
+ if (fi == -1) {
+ fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
+ MipsFI->setAtomicFrameIndex(fi);
+ }
+
+ BuildMI(BB, dl, TII->get(Mips::SW))
+ .addReg(Incr2).addFrameIndex(fi).addImm(0);
+ }
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ll oldval,0(addr)
+ // binop tmp7,oldval,incr2
+ // and newval,tmp7,mask
+ // and tmp8,oldval,mask2
+ // or tmp9,tmp8,newval
+ // sc tmp9,0(addr)
+ // beq tmp9,$0,loopMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
+ if (Nand) {
+ // and tmp6, oldval, incr2
+ // nor tmp7, $0, tmp6
+ BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
+ } else if (BinOpcode == Mips::SUBu) {
+ // addu tmp7, oldval, incr2
+ BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2);
+ } else if (BinOpcode) {
+ // <binop> tmp7, oldval, incr2
+ BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2);
+ } else {
+ // lw tmp6, fi(sp) // load incr2 from stack
+ // or tmp7, $zero, tmp6
+ BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
+ }
+ BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
+ BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // and tmp10,oldval,mask
+ // srl tmp11,tmp10,shift
+ // sll tmp12,tmp11,24
+ // sra dest,tmp12,24
+ BB = exitMBB;
+ int64_t ShiftImm = (Size == 1) ? 24 : 16;
+ // reverse order
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
+ .addReg(Tmp12).addImm(ShiftImm);
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12)
+ .addReg(Tmp11).addImm(ShiftImm);
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11)
+ .addReg(Tmp10).addReg(Shift);
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10)
+ .addReg(Oldval).addReg(Mask);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Oldval = MI->getOperand(2).getReg();
+ unsigned Newval = MI->getOperand(3).getReg();
+
+ unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Get or create a temporary stack location.
+ MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+ int fi = MipsFI->getAtomicFrameIndex();
+ if (fi == -1) {
+ fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
+ MipsFI->setAtomicFrameIndex(fi);
+ }
+
+ // thisMBB:
+ // ...
+ // sw newval, fi(sp) // store newval to stack
+ // fallthrough --> loop1MBB
+
+ // Note: storing newval to stack before the loop and then loading it from
+ // stack in block loop2MBB is necessary to prevent MachineLICM pass to
+ // hoist "or" instruction out of the block loop2MBB.
+
+ BuildMI(BB, dl, TII->get(Mips::SW))
+ .addReg(Newval).addFrameIndex(fi).addImm(0);
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ll dest, 0(ptr)
+ // bne dest, oldval, exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BNE))
+ .addReg(Dest).addReg(Oldval).addMBB(exitMBB);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(loop2MBB);
+
+ // loop2MBB:
+ // lw tmp2, fi(sp) // load newval from stack
+ // or tmp1, $0, tmp2
+ // sc tmp1, 0(ptr)
+ // beq tmp1, $0, loop1MBB
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ assert((Size == 1 || Size == 2) &&
+ "Unsupported size for EmitAtomicCmpSwapPartial.");
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Ptr = MI->getOperand(1).getReg();
+ unsigned Oldval = MI->getOperand(2).getReg();
+ unsigned Newval = MI->getOperand(3).getReg();
+
+ unsigned Addr = RegInfo.createVirtualRegister(RC);
+ unsigned Shift = RegInfo.createVirtualRegister(RC);
+ unsigned Mask = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+ unsigned Oldval2 = RegInfo.createVirtualRegister(RC);
+ unsigned Oldval3 = RegInfo.createVirtualRegister(RC);
+ unsigned Oldval4 = RegInfo.createVirtualRegister(RC);
+ unsigned Newval2 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
+
+ // insert new blocks after the current block
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = BB;
+ ++It;
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // addiu tmp1,$0,-4 # 0xfffffffc
+ // and addr,ptr,tmp1
+ // andi tmp2,ptr,3
+ // sll shift,tmp2,3
+ // ori tmp3,$0,255 # 0xff
+ // sll mask,tmp3,shift
+ // nor mask2,$0,mask
+ // andi tmp4,oldval,255
+ // sll oldval2,tmp4,shift
+ // andi tmp5,newval,255
+ // sll newval2,tmp5,shift
+ int64_t MaskImm = (Size == 1) ? 255 : 65535;
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift);
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ll oldval3,0(addr)
+ // and oldval4,oldval3,mask
+ // bne oldval4,oldval2,exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::BNE))
+ .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(loop2MBB);
+
+ // loop2MBB:
+ // and tmp6,oldval3,mask2
+ // or tmp7,tmp6,newval2
+ // sc tmp7,0(addr)
+ // beq tmp7,$0,loop1MBB
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
+ BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
+ .addReg(Tmp7).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::BEQ))
+ .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // srl tmp8,oldval4,shift
+ // sll tmp9,tmp8,24
+ // sra dest,tmp9,24
+ BB = exitMBB;
+ int64_t ShiftImm = (Size == 1) ? 24 : 16;
+ // reverse order
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
+ .addReg(Tmp9).addImm(ShiftImm);
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9)
+ .addReg(Tmp8).addImm(ShiftImm);
+ BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8)
+ .addReg(Oldval4).addReg(Shift);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
+ cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
+ "Cannot lower if the alignment of the allocated space is larger than \
+ that of the stack.");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get a reference from Mips stack pointer
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
+ SDValue());
+
+ // This node always has two return values: a new stack pointer
+ // value and a chain
+ SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+ SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+ SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+ return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
+}
+
+SDValue MipsTargetLowering::
+LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ // The first operand is the chain, the second is the condition, the third is
+ // the block to branch to if the condition is true.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+
+ // Return if flag is not set by a floating point comparison.
+ if (CondRes.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue CCNode = CondRes.getOperand(2);
+ Mips::CondCode CC =
+ (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+ SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+
+ return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
+
+ // Return if flag is not set by a floating point comparison.
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+ Op.getDebugLoc());
+}
+
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+
+ MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
+
+ // %gp_rel relocation
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_GPREL);
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+ }
+ // %hi/%lo relocation
+ SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_HI);
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_LO);
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+ }
+
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_GOT);
+ GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA);
+ SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), GA, MachinePointerInfo(),
+ false, false, 0);
+ // On functions and global targets not internal linked only
+ // a load from got/GP is necessary for PIC to work.
+ if (!GV->hasInternalLinkage() &&
+ (!GV->hasLocalLinkage() || isa<Function>(GV)))
+ return ResNode;
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+}
+
+SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ // %hi/%lo relocation
+ SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_HI);
+ SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_LO);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ }
+
+ SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_GOT);
+ BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset);
+ SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_LO);
+ SDValue Load = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), BAGOTOffset,
+ MachinePointerInfo(), false, false, 0);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ // If the relocation model is PIC, use the General Dynamic TLS Model,
+ // otherwise use the Initial Exec or Local Exec TLS Model.
+ // TODO: implement Local Dynamic TLS model
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // General Dynamic TLS Model
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
+ 0, MipsII::MO_TLSGD);
+ SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
+ SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
+ SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(DAG.getEntryNode(),
+ (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false, true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+ dl);
+
+ return CallResult.first;
+ }
+
+ SDValue Offset;
+ if (GV->isDeclaration()) {
+ // Initial Exec TLS Model
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_GOTTPREL);
+ Offset = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), TGA, MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ // Local Exec TLS Model
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_TPREL_HI);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_TPREL_LO);
+ SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+ SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+ Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ }
+
+ SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue MipsTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+
+ if (!IsPIC) {
+ SDValue Ops[] = { JTI };
+ HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+ } else {// Emit Load from Global Pointer
+ JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
+ HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+ MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+ return ResNode;
+}
+
+SDValue MipsTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = N->getConstVal();
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ // gp_rel relocation
+ // FIXME: we should reference the constant pool using small data sections,
+ // but the asm printer currently doesn't support this feature without
+ // hacking it. This feature should come soon so we can uncomment the
+ // stuff below.
+ //if (IsInSmallSection(C->getType())) {
+ // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+ // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_HI);
+ SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_LO);
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+ } else {
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_GOT);
+ CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP);
+ SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+ CP, MachinePointerInfo::getConstantPool(),
+ false, false, 0);
+ SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ }
+
+ return ResNode;
+}
+
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+}
+
+static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
+ // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
+ SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
+ DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
+ DAG.getConstant(0x80000000, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+}
+
+static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
+ // FIXME:
+ // Use ext/ins instructions if target architecture is Mips32r2.
+ // Eliminate redundant mfc1 and mtc1 instructions.
+ unsigned LoIdx = 0, HiIdx = 1;
+
+ if (!isLittle)
+ std::swap(LoIdx, HiIdx);
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getConstant(LoIdx, MVT::i32));
+ SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32));
+ SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32));
+ SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0,
+ DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1,
+ DAG.getConstant(0x80000000, MVT::i32));
+ SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
+
+ if (!isLittle)
+ std::swap(Word0, Word1);
+
+ return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1);
+}
+
+SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
+ const {
+ EVT Ty = Op.getValueType();
+
+ assert(Ty == MVT::f32 || Ty == MVT::f64);
+
+ if (Ty == MVT::f32)
+ return LowerFCOPYSIGN32(Op, DAG);
+ else
+ return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+}
+
+SDValue MipsTargetLowering::
+LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ // check the depth
+ assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
+ "Frame address can only be determined for current frame.");
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT);
+ return FrameAddr;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+// an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// go to stack.
+//
+// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+ static const unsigned IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned F32Regs[] = {
+ Mips::F12, Mips::F14
+ };
+ static const unsigned F64Regs[] = {
+ Mips::D6, Mips::D7
+ };
+
+ // ByVal Args
+ if (ArgFlags.isByVal()) {
+ State.HandleByVal(ValNo, ValVT, LocVT, LocInfo,
+ 1 /*MinSize*/, 4 /*MinAlign*/, ArgFlags);
+ unsigned NextReg = (State.getNextStackOffset() + 3) / 4;
+ for (unsigned r = State.getFirstUnallocated(IntRegs, IntRegsSize);
+ r < std::min(IntRegsSize, NextReg); ++r)
+ State.AllocateReg(IntRegs[r]);
+ return false;
+ }
+
+ // Promote i8 and i16
+ if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ unsigned Reg;
+
+ // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following
+ // is true: function is vararg, argument is 3rd or higher, there is previous
+ // argument which is not f32 or f64.
+ bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1
+ || State.getFirstUnallocated(F32Regs, FloatRegsSize) != ValNo;
+ unsigned OrigAlign = ArgFlags.getOrigAlign();
+ bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+
+ if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ // If this is the first part of an i64 arg,
+ // the allocated register must be either A0 or A2.
+ if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3))
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
+ // Allocate int register and shadow next int register. If first
+ // available register is Mips::A1 or Mips::A3, shadow it too.
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg == Mips::A1 || Reg == Mips::A3)
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) {
+ // we are guaranteed to find an available float register
+ if (ValVT == MVT::f32) {
+ Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+ // Shadow int register
+ State.AllocateReg(IntRegs, IntRegsSize);
+ } else {
+ Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+ // Shadow int registers
+ unsigned Reg2 = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg2 == Mips::A1 || Reg2 == Mips::A3)
+ State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ }
+ } else
+ llvm_unreachable("Cannot handle this ValVT.");
+
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ unsigned Offset = State.AllocateStack(SizeInBytes, OrigAlign);
+
+ if (!Reg)
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static const unsigned O32IntRegsSize = 4;
+
+static const unsigned O32IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+};
+
+// Write ByVal Arg to arg registers and stack.
+static void
+WriteByValArg(SDValue& Chain, DebugLoc dl,
+ SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
+ SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ MVT PtrType) {
+ unsigned FirstWord = VA.getLocMemOffset() / 4;
+ unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+ unsigned LastWord = FirstWord + NumWords;
+ unsigned CurWord;
+
+ // copy the first 4 words of byval arg to registers A0 - A3
+ for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
+ ++CurWord) {
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant((CurWord - FirstWord) * 4,
+ MVT::i32));
+ SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(LoadVal.getValue(1));
+ unsigned DstReg = O32IntRegs[CurWord];
+ RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
+ }
+
+ // copy remaining part of byval arg to stack.
+ if (CurWord < LastWord) {
+ unsigned SizeInBytes = (LastWord - CurWord) * 4;
+ SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant((CurWord - FirstWord) * 4,
+ MVT::i32));
+ LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
+ SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(SizeInBytes, MVT::i32),
+ /*Align*/4,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
+ }
+}
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isTailCall.
+SDValue
+MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // MIPs target does not yet support tail call optimization.
+ isTailCall = false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ if (Subtarget->isABI_O32())
+ CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NextStackOffset = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset,
+ true));
+
+ // If this is the first call, create a stack frame object that points to
+ // a location to which .cprestore saves $gp.
+ if (IsPIC && !MipsFI->getGPFI())
+ MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
+
+ // Get the frame index of the stack frame object that points to the location
+ // of dynamically allocated area on the stack.
+ int DynAllocFI = MipsFI->getDynAllocFI();
+
+ // Update size of the maximum argument space.
+ // For O32, a minimum of four words (16 bytes) of argument space is
+ // allocated.
+ if (Subtarget->isABI_O32())
+ NextStackOffset = std::max(NextStackOffset, (unsigned)16);
+
+ unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
+
+ if (MaxCallFrameSize < NextStackOffset) {
+ MipsFI->setMaxCallFrameSize(NextStackOffset);
+
+ // Set the offsets relative to $sp of the $gp restore slot and dynamically
+ // allocated stack space. These offsets must be aligned to a boundary
+ // determined by the stack alignment of the ABI.
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+ StackAlignment * StackAlignment;
+
+ if (IsPIC)
+ MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+ MFI->setObjectOffset(DynAllocFI, NextStackOffset);
+ }
+
+ // With EABI is it possible to have 16 args on registers.
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue Arg = OutVals[i];
+ CCValAssign &VA = ArgLocs[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ if (Subtarget->isABI_O32() && VA.isRegLoc()) {
+ if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Arg, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Arg, DAG.getConstant(1, MVT::i32));
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+ continue;
+ }
+ }
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // ByVal Arg.
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (Flags.isByVal()) {
+ assert(Subtarget->isABI_O32() &&
+ "No support for ByVal args by ABIs other than O32 yet.");
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
+ VA, Flags, getPointerTy());
+ continue;
+ }
+
+ // Create the frame index object for this incoming parameter
+ LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+ SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+
+ // Extend range of indices of frame objects for outgoing arguments that were
+ // created during this function call. Skip this step if no such objects were
+ // created.
+ if (LastFI)
+ MipsFI->extendOutArgFIRange(FirstFI, LastFI);
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
+ bool LoadSymAddr = false;
+ SDValue CalleeLo;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0,MipsII:: MO_GOT);
+ CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
+ 0, MipsII::MO_ABS_LO);
+ } else {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0, OpFlag);
+ }
+
+ LoadSymAddr = true;
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), OpFlag);
+ LoadSymAddr = true;
+ }
+
+ SDValue InFlag;
+
+ // Create nodes that load address of callee and copy it to T9
+ if (IsPIC) {
+ if (LoadSymAddr) {
+ // Load callee address
+ Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
+ SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
+
+ // Use GOT+LO if callee has internal linkage.
+ if (CalleeLo.getNode()) {
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
+ Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+ } else
+ Callee = LoadValue;
+ }
+
+ // copy to T9
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+ InFlag = Chain.getValue(1);
+ Callee = DAG.getRegister(Mips::T9, MVT::i32);
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NextStackOffset, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
+ std::vector<SDValue>& OutChains,
+ SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
+ const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) {
+ unsigned LocMem = VA.getLocMemOffset();
+ unsigned FirstWord = LocMem / 4;
+
+ // copy register A0 - A3 to frame object
+ for (unsigned i = 0; i < NumWords; ++i) {
+ unsigned CurWord = FirstWord + i;
+ if (CurWord >= O32IntRegsSize)
+ break;
+
+ unsigned SrcReg = O32IntRegs[CurWord];
+ unsigned Reg = AddLiveIn(MF, SrcReg, Mips::CPURegsRegisterClass);
+ SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN,
+ DAG.getConstant(i * 4, MVT::i32));
+ SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32),
+ StorePtr, MachinePointerInfo(), false,
+ false, 0);
+ OutChains.push_back(Store);
+ }
+}
+
+/// LowerFormalArguments - transform physical registers into virtual registers
+/// and generate load operations for arguments places on the stack.
+SDValue
+MipsTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setVarArgsFrameIndex(0);
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ if (Subtarget->isABI_O32())
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
+ else
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
+
+ int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function.
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ unsigned ArgReg = VA.getLocReg();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = Mips::FGR32RegisterClass;
+ else if (RegVT == MVT::f64) {
+ if (!Subtarget->isSingleFloat())
+ RC = Mips::AFGR64RegisterClass;
+ } else
+ llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+ if (Subtarget->isABI_O32()) {
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
+ unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ VA.getLocReg()+1, RC);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+ if (!Subtarget->isLittle())
+ std::swap(ArgValue, ArgValue2);
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+ ArgValue, ArgValue2);
+ }
+ }
+
+ InVals.push_back(ArgValue);
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ assert(Subtarget->isABI_O32() &&
+ "No support for ByVal args by ABIs other than O32 yet.");
+ assert(Flags.getByValSize() &&
+ "ByVal args of size 0 should have been ignored by front-end.");
+ unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+ LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
+ true);
+ SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+ InVals.push_back(FIN);
+ ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags);
+
+ continue;
+ }
+
+ // The stack pointer offset is relative to the caller stack frame.
+ LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(LastFI),
+ false, false, 0));
+ }
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ unsigned Reg = MipsFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+ MipsFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ if (isVarArg && Subtarget->isABI_O32()) {
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ unsigned NextStackOffset = CCInfo.getNextStackOffset();
+ assert(NextStackOffset % 4 == 0 &&
+ "NextStackOffset must be aligned to 4-byte boundaries.");
+ LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+ MipsFI->setVarArgsFrameIndex(LastFI);
+
+ // If NextStackOffset is smaller than o32's 16-byte reserved argument area,
+ // copy the integer registers that have not been used for argument passing
+ // to the caller's stack frame.
+ for (; NextStackOffset < 16; NextStackOffset += 4) {
+ TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+ unsigned Idx = NextStackOffset / 4;
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+ LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+ SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ MipsFI->setLastInArgFI(LastFI);
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into $v0.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned Reg = MipsFI->getSRetReturnReg();
+
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on Mips is always a "jr $ra"
+ if (Flag.getNode())
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // Mips specific constrainy
+ // GCC config/mips/constraints.md
+ //
+ // 'd' : An address register. Equivalent to r
+ // unless generating MIPS16 code.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
+ case 'y': // Same as 'r'. Exists for compatibility.
+ case 'r':
+ return std::make_pair(0U, Mips::CPURegsRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, Mips::FGR32RegisterClass);
+ if (VT == MVT::f64)
+ if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ return std::make_pair(0U, Mips::AFGR64RegisterClass);
+ break;
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Mips target isn't yet aware of offsets.
+ return false;
+}
+
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+ if (Imm.isNegZero())
+ return false;
+ return Imm.isZero();
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 000000000000..bda26a229e72
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,191 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+ namespace MipsISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Get the Higher 16 bits from a 32-bit immediate
+ // No relation with Mips Hi register
+ Hi,
+
+ // Get the Lower 16 bits from a 32-bit immediate
+ // No relation with Mips Lo register
+ Lo,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // General Dynamic TLS
+ TlsGd,
+
+ // Local Exec TLS
+ TprelHi,
+ TprelLo,
+
+ // Thread Pointer
+ ThreadPointer,
+
+ // Floating Point Branch Conditional
+ FPBrcond,
+
+ // Floating Point Compare
+ FPCmp,
+
+ // Floating Point Conditional Moves
+ CMovFP_T,
+ CMovFP_F,
+
+ // Floating Point Rounding
+ FPRound,
+
+ // Return
+ Ret,
+
+ // MAdd/Sub nodes
+ MAdd,
+ MAddu,
+ MSub,
+ MSubu,
+
+ // DivRem(u)
+ DivRem,
+ DivRemU,
+
+ BuildPairF64,
+ ExtractElementF64,
+
+ WrapperPIC,
+
+ DynAlloc
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MipsTargetLowering : public TargetLowering {
+ public:
+ explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ private:
+ // Subtarget Info
+ const MipsSubtarget *Subtarget;
+
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ // Lower Operand specifics
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode, bool Nand = false) const;
+ MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+ bool Nand = false) const;
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size) const;
+ MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *BB, unsigned Size) const;
+ };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 000000000000..021c16799779
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,374 @@
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Mips FPU instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+// - 32 64-bit registers (default mode)
+// - 16 even 32-bit registers (32-bit compatible mode) for
+// single and double access.
+// * 32bit fp:
+// - 16 even 32-bit registers - single and double (aliased)
+// - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
+ SDTCisInt<2>]>;
+def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+ SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, f64>,
+ SDTCisVT<0, i32>]>;
+
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
+def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
+ SDT_MipsExtractElementF64>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand" in
+ def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def In32BitMode : Predicate<"!Subtarget.isFP64bit()">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+// single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+multiclass FFR1_1<bits<6> funct, string asmstr>
+{
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ !strconcat(asmstr, ".s\t$fd, $fs"), []>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
+ !strconcat(asmstr, ".d\t$fd, $fs"), []>, Requires<[In32BitMode]>;
+}
+
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
+{
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ !strconcat(asmstr, ".s\t$fd, $fs"),
+ [(set FGR32:$fd, (FOp FGR32:$fs))]>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+ !strconcat(asmstr, ".d\t$fd, $fs"),
+ [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+}
+
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+ RegisterClass RcDst, string asmstr>:
+ FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+ !strconcat(asmstr, "\t$fd, $fs"), []>;
+
+
+multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp, bit isComm = 0> {
+ let isCommutable = isComm in {
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+ (ins FGR32:$fs, FGR32:$ft),
+ !strconcat(asmstr, ".s\t$fd, $fs, $ft"),
+ [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+ (ins AFGR64:$fs, AFGR64:$ft),
+ !strconcat(asmstr, ".d\t$fd, $fs, $ft"),
+ [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
+ Requires<[In32BitMode]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+
+let ft = 0 in {
+ defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
+ defm CEIL_W : FFR1_1<0b001110, "ceil.w">;
+ defm ROUND_W : FFR1_1<0b001100, "round.w">;
+ defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
+ defm CVTW : FFR1_1<0b100100, "cvt.w">;
+
+ defm FABS : FFR1_2<0b000101, "abs", fabs>;
+ defm FNEG : FFR1_2<0b000111, "neg", fneg>;
+ defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>;
+
+ /// Convert to Single Precison
+ def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">;
+
+ let Predicates = [IsNotSingleFloat] in {
+ /// Ceil to long signed integer
+ def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
+ def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
+
+ /// Round to long signed integer
+ def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
+ def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
+
+ /// Floor to long signed integer
+ def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
+ def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
+
+ /// Trunc to long signed integer
+ def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
+ def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
+
+ /// Convert to long signed integer
+ def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+ def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+ /// Convert to Double Precison
+ def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+ def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+ def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
+ /// Convert to Single Precison
+ def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
+ def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
+ }
+}
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+let fd = 0 in {
+ /// Move Control Registers From/To CPU Registers
+ def CFC1 : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs),
+ "cfc1\t$rt, $fs", []>;
+
+ def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
+ "ctc1\t$fs, $rt", []>;
+
+ def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
+ "mfc1\t$rt, $fs", []>;
+
+ def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
+ "mtc1\t$rt, $fs", []>;
+}
+
+def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ "mov.s\t$fd, $fs", []>;
+def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+ "mov.d\t$fd, $fs", []>;
+
+/// Floating Point Memory Instructions
+let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
+ def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
+ "ldc1\t$ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
+
+ def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
+ "sdc1\t$ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+}
+
+// LWC1 and SWC1 can always be emitted with odd registers.
+def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1\t$ft, $addr",
+ [(set FGR32:$ft, (load addr:$addr))]>;
+def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr),
+ "swc1\t$ft, $addr", [(store FGR32:$ft, addr:$addr)]>;
+
+/// Floating-point Aritmetic
+defm FADD : FFR1_4<0x10, "add", fadd, 1>;
+defm FDIV : FFR1_4<0x03, "div", fdiv>;
+defm FMUL : FFR1_4<0x02, "mul", fmul, 1>;
+defm FSUB : FFR1_4<0x01, "sub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
+def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
+def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
+
+/// Floating Point Branch of False/True (Likely)
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
+ class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+ (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"),
+ [(MipsFPBrcond op, bb:$dst)]>;
+
+def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
+def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
+def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
+def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN : PatLeaf<(i32 1)>;
+def MIPS_FCOND_OEQ : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+let hasDelaySlot = 1, Defs=[FCR31] in {
+ def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
+ "c.$cc.s\t$fs, $ft",
+ [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
+
+ def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
+ "c.$cc.d\t$fs, $ft",
+ [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
+ Requires<[In32BitMode]>;
+}
+
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:float
+let usesCustomInserter = 1, Constraints = "$F = $dst" in
+class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
+ string instr_asm> :
+ FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
+
+def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
+def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
+
+let Predicates = [In32BitMode] in {
+ def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
+ def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
+}
+
+defm : MovzPats<FGR32, MOVZ_S>;
+defm : MovnPats<FGR32, MOVN_S>;
+
+let Predicates = [In32BitMode] in {
+ defm : MovzPats<AFGR64, MOVZ_D>;
+ defm : MovnPats<AFGR64, MOVN_D>;
+}
+
+let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in {
+// flag:float, data:int
+class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
+ FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+ [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
+
+// flag:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+ string instr_asm> :
+ FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+ [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
+}
+
+def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
+def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+
+let Predicates = [In32BitMode] in {
+ def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+ def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
+
+// This pseudo instr gets expanded into 2 mtc1 instrs after register
+// allocation.
+def BuildPairF64 :
+ MipsPseudo<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi), "",
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+
+// This pseudo instr gets expanded into 2 mfc1 instrs after register
+// allocation.
+// if n is 0, lower part of src is extracted.
+// if n is 1, higher part of src is extracted.
+def ExtractElementF64 :
+ MipsPseudo<(outs CPURegs:$dst),
+ (ins AFGR64:$src, i32imm:$n), "",
+ [(set CPURegs:$dst,
+ (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
+
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
+
+def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>;
+def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
+
+let Predicates = [In32BitMode] in {
+ def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+}
+
+// MipsFPRound is only emitted for MipsI targets.
+def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 000000000000..9f55fb38b959
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,227 @@
+//===- MipsInstrFormats.td - Mips Instruction Formats ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rs - src reg.
+// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// rd - dst reg, only used on 3 regs instr.
+// shamt - only used on shift instructions, contains the shift amount.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "Mips";
+
+ bits<6> opcode;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{31-26} = opcode;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> shamt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{25-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// FLOATING POINT INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// fs - src reg.
+// ft - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// fd - dst reg, only used on 3 regs instr.
+// fmt - double or single precision.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
+//===----------------------------------------------------------------------===//
+
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
+ bits<5> fmt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> ft;
+ bits<5> base;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = ft;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
+//===----------------------------------------------------------------------===//
+
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cc;
+ bits<5> fmt;
+
+ let opcode = 0x11;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-4} = 0b11;
+ let Inst{3-0} = cc;
+}
+
+
+class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<3> N;
+ bits<1> tf;
+
+ let opcode = 0;
+ let tf = _tf;
+
+ let Inst{25-21} = rs;
+ let Inst{20-18} = N;
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 1;
+}
+
+class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<3> N;
+ bits<5> fmt;
+ bits<1> tf;
+
+ let opcode = 17;
+ let fmt = _fmt;
+ let tf = _tf;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-18} = N;
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = 17;
+} \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 000000000000..0a7a7f2dfe4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,461 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+ : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
+ (MI->getOpcode() == Mips::LDC1)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
+ (MI->getOpcode() == Mips::SDC1)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+void MipsInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
+ bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg);
+
+ // CPU-CPU is the most common.
+ if (DestCPU && SrcCPU) {
+ BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // Copy to CPU from other registers.
+ if (DestCPU) {
+ if (Mips::CCRRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SrcReg == Mips::HI)
+ BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+ else if (SrcReg == Mips::LO)
+ BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
+ else
+ llvm_unreachable("Copy to CPU from invalid register");
+ return;
+ }
+
+ // Copy to other registers from CPU.
+ if (SrcCPU) {
+ if (Mips::CCRRegClass.contains(DestReg))
+ BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestReg == Mips::HI)
+ BuildMI(MBB, I, DL, get(Mips::MTHI))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestReg == Mips::LO)
+ BuildMI(MBB, I, DL, get(Mips::MTLO))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ llvm_unreachable("Copy from CPU to invalid register");
+ return;
+ }
+
+ if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ llvm_unreachable("Cannot copy registers");
+}
+
+void MipsInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ else if (RC == Mips::FGR32RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ else if (RC == Mips::AFGR64RegisterClass) {
+ if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+ BuildMI(MBB, I, DL, get(Mips::SDC1))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ } else {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getTarget().getRegisterInfo();
+ const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
+ BuildMI(MBB, I, DL, get(Mips::SWC1))
+ .addReg(SubSet[0], getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ BuildMI(MBB, I, DL, get(Mips::SWC1))
+ .addReg(SubSet[1], getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(4);
+ }
+ } else
+ llvm_unreachable("Register class not handled!");
+}
+
+void MipsInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == Mips::FGR32RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == Mips::AFGR64RegisterClass) {
+ if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+ BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0);
+ } else {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getTarget().getRegisterInfo();
+ const unsigned *SubSet = TRI->getSubRegisters(DestReg);
+ BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
+ .addFrameIndex(FI).addImm(0);
+ BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
+ .addFrameIndex(FI).addImm(4);
+ }
+ } else
+ llvm_unreachable("Register class not handled!");
+}
+
+MachineInstr*
+MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static unsigned GetAnalyzableBrOpc(unsigned Opc) {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
+{
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ : return Mips::BNE;
+ case Mips::BNE : return Mips::BEQ;
+ case Mips::BGTZ : return Mips::BLEZ;
+ case Mips::BGEZ : return Mips::BLTZ;
+ case Mips::BLTZ : return Mips::BGEZ;
+ case Mips::BLEZ : return Mips::BGTZ;
+ case Mips::BC1T : return Mips::BC1F;
+ case Mips::BC1F : return Mips::BC1T;
+ }
+}
+
+static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand>& Cond) {
+ assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+ int NumOp = Inst->getNumExplicitOperands();
+
+ // for both int and fp branches, the last explicit operand is the
+ // MBB.
+ BB = Inst->getOperand(NumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Opc));
+
+ for (int i=0; i<NumOp-1; i++)
+ Cond.push_back(Inst->getOperand(i));
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+ // If this block ends with no branches (it just falls through to its succ)
+ // just return false, leaving TBB/FBB null.
+ TBB = FBB = NULL;
+ return false;
+ }
+
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (!GetAnalyzableBrOpc(LastOpc))
+ return true;
+
+ // Get the second to last instruction in the block.
+ unsigned SecondLastOpc = 0;
+ MachineInstr *SecondLastInst = NULL;
+
+ if (++I != REnd) {
+ SecondLastInst = &*I;
+ SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+ return true;
+ }
+
+ // If there is only one terminator instruction, process it.
+ if (!SecondLastOpc) {
+ // Unconditional branch
+ if (LastOpc == Mips::J) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // Conditional branch
+ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+ return false;
+ }
+
+ // If we reached here, there are two branches.
+ // If there are three terminators, we don't know what sort of block this is.
+ if (++I != REnd && isUnpredicatedTerminator(&*I))
+ return true;
+
+ // If second to last instruction is an unconditional branch,
+ // analyze it and remove the last instruction.
+ if (SecondLastOpc == Mips::J) {
+ // Return if the last instruction cannot be removed.
+ if (!AllowModify)
+ return true;
+
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ LastInst->eraseFromParent();
+ return false;
+ }
+
+ // Conditional branch followed by an unconditional branch.
+ // The last one must be unconditional.
+ if (LastOpc != Mips::J)
+ return true;
+
+ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+
+ return false;
+}
+
+void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond)
+ const {
+ unsigned Opc = Cond[0].getImm();
+ const MCInstrDesc &MCID = get(Opc);
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
+
+ for (unsigned i = 1; i < Cond.size(); ++i)
+ MIB.addReg(Cond[i].getReg());
+
+ MIB.addMBB(TBB);
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ // # of condition operands:
+ // Unconditional branches: 0
+ // Floating point branches: 1 (opc)
+ // Int BranchZero: 2 (opc, reg)
+ // Int Branch: 3 (opc, reg0, reg1)
+ assert((Cond.size() <= 3) &&
+ "# of Mips branch conditions must be <= 3!");
+
+ // Two-way Conditional branch.
+ if (FBB) {
+ BuildCondBr(MBB, TBB, DL, Cond);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+ return 2;
+ }
+
+ // One way branch.
+ // Unconditional branch.
+ if (Cond.empty())
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+ else // Conditional branch.
+ BuildCondBr(MBB, TBB, DL, Cond);
+ return 1;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+ MachineBasicBlock::reverse_iterator FirstBr;
+ unsigned removed;
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ FirstBr = I;
+
+ // Up to 2 branches are removed.
+ // Note that indirect branches are not removed.
+ for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+ if (!GetAnalyzableBrOpc(I->getOpcode()))
+ break;
+
+ MBB.erase(I.base(), FirstBr.base());
+
+ return removed;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert( (Cond.size() && Cond.size() <= 3) &&
+ "Invalid Mips branch condition!");
+ Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+ return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+ unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Mips::GP);
+ RegInfo.addLiveIn(Mips::GP);
+
+ MipsFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 000000000000..4421c4862fa0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,160 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MipsGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace Mips {
+ /// GetOppositeBranchOpc - Return the inverse of the specified
+ /// opcode, e.g. turning BEQ to BNE.
+ unsigned GetOppositeBranchOpc(unsigned Opc);
+}
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // Mips Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ MO_GPREL,
+
+ /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HI,
+ MO_ABS_LO,
+
+ /// MO_TLSGD - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (General
+ // Dynamic TLS).
+ MO_TLSGD,
+
+ /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+ // Exec TLS).
+ MO_GOTTPREL,
+
+ /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+ // the thread pointer (Local Exec TLS).
+ MO_TPREL_HI,
+ MO_TPREL_LO
+ };
+}
+
+class MipsInstrInfo : public MipsGenInstrInfo {
+ MipsTargetMachine &TM;
+ const MipsRegisterInfo RI;
+public:
+ explicit MipsInstrInfo(MipsTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+private:
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
+
+public:
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 000000000000..d1a058712459
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,881 @@
+//===- MipsInstrInfo.td - Target Description for Mips Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<3, 4>,
+ SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<2, 3>]>;
+def SDT_MipsDivRem : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>,
+ SDTCisSameAs<0, 1>]>;
+
+def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+
+def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, iPTR>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// TlsGd node is used to handle General Dynamic TLS
+def MipsTlsGd : SDNode<"MipsISD::TlsGd", SDTIntUnaryOp>;
+
+// TprelHi and TprelLo nodes are used to handle Local Exec TLS
+def MipsTprelHi : SDNode<"MipsISD::TprelHi", SDTIntUnaryOp>;
+def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
+
+// Thread pointer
+def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+ SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// MAdd*/MSub* nodes
+def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+
+// DivRem(u) nodes
+def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
+ [SDNPOutGlue]>;
+def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+ [SDNPOutGlue]>;
+
+// Target constant nodes that are not part of any isel patterns and remain
+// unchanged can cause instructions with illegal operands to be emitted.
+// Wrapper node patterns give the instruction selector a chance to replace
+// target constant nodes that would otherwise remain unchanged with ADDiu
+// nodes. Without these wrapper node patterns, the following conditional move
+// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
+// compiled:
+// movn %got(d)($gp), %got(c)($gp), $4
+// This instruction is illegal since movn can take only register operands.
+
+def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
+
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+ [SDNPHasChain, SDNPInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
+def HasSwap : Predicate<"Subtarget.hasSwap()">;
+def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
+def IsMips32 : Predicate<"Subtarget.isMips32()">;
+def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">;
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def shamt : Operand<i32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Address operand
+def mem : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+def mem_ea : Operand<i32> {
+ let PrintMethod = "printMemOperandEA";
+ let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+ else
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic 3 register operands
+class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+ InstrItinClass itin, bit isComm = 0>:
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin> {
+ let isCommutable = isComm;
+}
+
+class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
+ bit isComm = 0>:
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu> {
+ let isCommutable = isComm;
+}
+
+// Arithmetic 2 register operands
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
+
+// Arithmetic Multiply ADD/SUB
+let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
+class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
+ FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"),
+ [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
+ let isCommutable = isComm;
+}
+
+// Logical
+let isCommutable = 1 in
+class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+
+let isCommutable = 1 in
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+
+// Shifts
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+ let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
+ let shamt = _shamt;
+}
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins uimm16:$imm),
+ !strconcat(instr_asm, "\t$dst, $imm"),
+ [], IIAlu>;
+
+// Memory Load/Store
+let canFoldAsLoad = 1, hasDelaySlot = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI<op, (outs CPURegs:$dst), (ins mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+
+// Conditional Branch
+let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, "\t$a, $b, $offset"),
+ [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+ IIBranch>;
+
+class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
+ !strconcat(instr_asm, "\t$src, $offset"),
+ [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+ IIBranch>;
+}
+
+// SetCC
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
+ PatFrag cond_op>:
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+ IIAlu>;
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
+ Operand Od, PatLeaf imm_type>:
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+ IIAlu>;
+
+// Unconditional branch
+let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+ FJ<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+ FR<op, func, (outs), (ins CPURegs:$target),
+ !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1,
+ // All calls clobber the non-callee saved registers...
+ Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
+ class JumpLink<bits<6> op, string instr_asm>:
+ FJ<op, (outs), (ins calltarget:$target, variable_ops),
+ !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
+ IIBranch>;
+
+ let rd=31 in
+ class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+ FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
+ !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+
+ class BranchLink<string instr_asm>:
+ FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
+ !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
+}
+
+// Mul, Div
+let Defs = [HI, LO] in {
+ let isCommutable = 1 in
+ class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+ class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$$zero, $a, $b"),
+ [(op CPURegs:$a, CPURegs:$b)], itin>;
+}
+
+// Move from Hi/Lo
+class MoveFromLOHI<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins),
+ !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+
+class MoveToLOHI<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+
+class EffectiveAddress<string instr_asm> :
+ FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr),
+ instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
+ FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
+
+// Sign Extend in Register.
+class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
+ FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+
+// Byte Swap
+class ByteSwap<bits<6> func, string instr_asm>:
+ FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+
+// Conditional Move
+class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+ CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+ [], NoItinerary>;
+
+// Read Hardware
+class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$dst), (ins HWRegs:$src),
+ "rdhwr\t$dst, $src", [], IIAlu> {
+ let rs = 0;
+ let shamt = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>;
+def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>;
+def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>;
+def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+
+// These macros are inserted to prevent GAS from complaining
+// when using the AT register.
+def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
+def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
+
+let usesCustomInserter = 1 in {
+ def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_add_8\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_add_16\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_add_32\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+ def ATOMIC_LOAD_SUB_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_sub_8\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_sub_16\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_sub_32\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+ def ATOMIC_LOAD_AND_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_and_8\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_and_16\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_and_32\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+ def ATOMIC_LOAD_OR_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_or_8\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_or_16\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_or_32\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+ def ATOMIC_LOAD_XOR_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_xor_8\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_xor_16\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_xor_32\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+ def ATOMIC_LOAD_NAND_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_nand_8\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_nand_16\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ "atomic_load_nand_32\t$dst, $ptr, $incr",
+ [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+ def ATOMIC_SWAP_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
+ "atomic_swap_8\t$dst, $ptr, $val",
+ [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>;
+ def ATOMIC_SWAP_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
+ "atomic_swap_16\t$dst, $ptr, $val",
+ [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>;
+ def ATOMIC_SWAP_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
+ "atomic_swap_32\t$dst, $ptr, $val",
+ [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
+ "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval",
+ [(set CPURegs:$dst,
+ (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+ def ATOMIC_CMP_SWAP_I16 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
+ "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval",
+ [(set CPURegs:$dst,
+ (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+ def ATOMIC_CMP_SWAP_I32 : MipsPseudo<
+ (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
+ "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval",
+ [(set CPURegs:$dst,
+ (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>;
+def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
+def ANDi : LogicI<0x0c, "andi", and>;
+def ORi : LogicI<0x0d, "ori", or>;
+def XORi : LogicI<0x0e, "xori", xor>;
+def LUi : LoadUpper<0x0f, "lui">;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu, 1>;
+def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
+def ADD : ArithOverflowR<0x00, 0x20, "add", 1>;
+def SUB : ArithOverflowR<0x00, 0x22, "sub">;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>;
+def AND : LogicR<0x24, "and", and>;
+def OR : LogicR<0x25, "or", or>;
+def XOR : LogicR<0x26, "xor", xor>;
+def NOR : LogicNOR<0x00, 0x27, "nor">;
+
+/// Shift Instructions
+def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
+def SRL : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
+def SRA : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
+def SLLV : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
+def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
+def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+
+// Rotate Instructions
+let Predicates = [IsMips32r2] in {
+ def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
+ def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+}
+
+/// Load and Store Instructions
+def LB : LoadM<0x20, "lb", sextloadi8>;
+def LBu : LoadM<0x24, "lbu", zextloadi8>;
+def LH : LoadM<0x21, "lh", sextloadi16>;
+def LHu : LoadM<0x25, "lhu", zextloadi16>;
+def LW : LoadM<0x23, "lw", load>;
+def SB : StoreM<0x28, "sb", truncstorei8>;
+def SH : StoreM<0x29, "sh", truncstorei16>;
+def SW : StoreM<0x2b, "sw", store>;
+
+/// Load-linked, Store-conditional
+let hasDelaySlot = 1 in
+ def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
+ "ll\t$dst, $addr", [], IILoad>;
+let Constraints = "$src = $dst" in
+ def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
+ "sc\t$src, $addr", [], IIStore>;
+
+/// Jump and Branch Instructions
+def J : JumpFJ<0x02, "j">;
+def JR : JumpFR<0x00, 0x08, "jr">;
+def JAL : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def BEQ : CBranch<0x04, "beq", seteq>;
+def BNE : CBranch<0x05, "bne", setne>;
+
+let rt=1 in
+ def BGEZ : CBranchZero<0x01, "bgez", setge>;
+
+let rt=0 in {
+ def BGTZ : CBranchZero<0x07, "bgtz", setgt>;
+ def BLEZ : CBranchZero<0x07, "blez", setle>;
+ def BLTZ : CBranchZero<0x01, "bltz", setlt>;
+}
+
+def BGEZAL : BranchLink<"bgezal">;
+def BLTZAL : BranchLink<"bltzal">;
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+ isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+ def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
+ "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+
+/// Multiply and Divide Instructions.
+def MULT : Mul<0x18, "mult", IIImul>;
+def MULTu : Mul<0x19, "multu", IIImul>;
+def SDIV : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+
+let Defs = [HI] in
+ def MTHI : MoveToLOHI<0x11, "mthi">;
+let Defs = [LO] in
+ def MTLO : MoveToLOHI<0x13, "mtlo">;
+
+let Uses = [HI] in
+ def MFHI : MoveFromLOHI<0x10, "mfhi">;
+let Uses = [LO] in
+ def MFLO : MoveFromLOHI<0x12, "mflo">;
+
+/// Sign Ext In Register Instructions.
+let Predicates = [HasSEInReg] in {
+ let shamt = 0x10, rs = 0 in
+ def SEB : SignExtInReg<0x21, "seb", i8>;
+
+ let shamt = 0x18, rs = 0 in
+ def SEH : SignExtInReg<0x20, "seh", i16>;
+}
+
+/// Count Leading
+def CLZ : CountLeading<0b100000, "clz",
+ [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
+def CLO : CountLeading<0b100001, "clo",
+ [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
+
+/// Byte Swap
+let Predicates = [HasSwap] in {
+ let shamt = 0x3, rs = 0 in
+ def WSBW : ByteSwap<0x20, "wsbw">;
+}
+
+/// Conditional Move
+def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
+def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:int
+let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in
+ class CondMovIntInt<bits<6> funct, string instr_asm> :
+ FR<0, funct, (outs CPURegs:$dst),
+ (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>;
+
+def MOVZ_I : CondMovIntInt<0x0a, "movz">;
+def MOVN_I : CondMovIntInt<0x0b, "movn">;
+
+/// No operation
+let addr=0 in
+ def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">;
+
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">;
+
+// MADD*/MSUB*
+def MADD : MArithR<0, "madd", MipsMAdd, 1>;
+def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
+def MSUB : MArithR<4, "msub", MipsMSub>;
+def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+
+// MUL is a assembly macro in the current used ISAs. In recent ISA's
+// it is a real instruction.
+def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul, 1>, Requires<[IsMips32]>;
+
+def RDHWR : ReadHardware;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+ (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry patterns
+def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$src, immSExt16:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
+
+// hi/lo relocs
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+ (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+
+// gp_rel relocs
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// tlsgd
+def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
+ (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
+
+// tprel hi/lo
+def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
+
+// wrapper_pic
+class WrapperPICPat<SDNode node>:
+ Pat<(MipsWrapperPIC node:$in),
+ (ADDiu GP, node:$in)>;
+
+def : WrapperPICPat<tglobaladdr>;
+def : WrapperPICPat<tconstpool>;
+def : WrapperPICPat<texternalsym>;
+def : WrapperPICPat<tblockaddress>;
+def : WrapperPICPat<tjumptable>;
+
+// Mips does not have "not", so we expand our way
+def : Pat<(not CPURegs:$in),
+ (NOR CPURegs:$in, ZERO)>;
+
+// extended load and stores
+def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+
+// peepholes
+def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+
+// brcond patterns
+def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
+ (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
+def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
+ (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond CPURegs:$cond, bb:$dst),
+ (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+
+// select patterns
+multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
+ def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
+ def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
+ def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+ def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+ def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F),
+ (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
+ def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
+ (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
+ def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F),
+ (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+defm : MovzPats<CPURegs, MOVZ_I>;
+defm : MovnPats<CPURegs, MOVN_I>;
+
+// setcc patterns
+def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
+def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
+
+def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
+ (SLT CPURegs:$rhs, CPURegs:$lhs)>;
+def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
+
+def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
+
+def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
+ (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
+ (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
new file mode 100644
index 000000000000..f5cc3aa25f1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -0,0 +1,118 @@
+//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCInstLower.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
+ MipsAsmPrinter &asmprinter)
+ : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+
+MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy) const {
+ MipsMCSymbolRefExpr::VariantKind Kind;
+ const MCSymbol *Symbol;
+ int Offset = 0;
+
+ switch(MO.getTargetFlags()) {
+ default: assert(0 && "Invalid target flag!");
+ case MipsII::MO_NO_FLAG: Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
+ case MipsII::MO_GPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
+ case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+ case MipsII::MO_GOT: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
+ case MipsII::MO_ABS_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
+ case MipsII::MO_ABS_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
+ case MipsII::MO_TLSGD: Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
+ case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+ case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+ case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+ }
+
+ switch (MOTy) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ if (MO.getOffset())
+ Offset = MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
+ Ctx));
+}
+
+void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, MOTy);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
new file mode 100644
index 000000000000..ec5201be7f6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -0,0 +1,43 @@
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCINSTLOWER_H
+#define MIPSMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineFunction;
+ class Mangler;
+ class MipsAsmPrinter;
+
+/// MipsMCInstLower - This class is used to lower an MachineInstr into an
+// MCInst.
+class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
+ typedef MachineOperand::MachineOperandType MachineOperandType;
+ MCContext &Ctx;
+ Mangler *Mang;
+ MipsAsmPrinter &AsmPrinter;
+public:
+ MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
+ MipsAsmPrinter &asmprinter);
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+private:
+ MCOperand LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
new file mode 100644
index 000000000000..9a2bdae0e339
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -0,0 +1,63 @@
+//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mipsmcsymbolrefexpr"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+const MipsMCSymbolRefExpr*
+MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
+ int Offset, MCContext &Ctx) {
+ return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
+}
+
+void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: assert(0 && "Invalid kind!");
+ case VK_Mips_None: break;
+ case VK_Mips_GPREL: OS << "%gp_rel("; break;
+ case VK_Mips_GOT_CALL: OS << "%call16("; break;
+ case VK_Mips_GOT: OS << "%got("; break;
+ case VK_Mips_ABS_HI: OS << "%hi("; break;
+ case VK_Mips_ABS_LO: OS << "%lo("; break;
+ case VK_Mips_TLSGD: OS << "%tlsgd("; break;
+ case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+ case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+ case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+ }
+
+ OS << *Symbol;
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+
+ if (Kind != VK_Mips_None)
+ OS << ')';
+}
+
+bool
+MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
+ Asm->getOrCreateSymbolData(*Symbol);
+}
+
+const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
+ return Symbol->isDefined() ? &Symbol->getSection() : NULL;
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
new file mode 100644
index 000000000000..3e695963709e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -0,0 +1,62 @@
+//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCSYMBOLREFEXPR_H
+#define MIPSMCSYMBOLREFEXPR_H
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class MipsMCSymbolRefExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_Mips_None,
+ VK_Mips_GPREL,
+ VK_Mips_GOT_CALL,
+ VK_Mips_GOT,
+ VK_Mips_ABS_HI,
+ VK_Mips_ABS_LO,
+ VK_Mips_TLSGD,
+ VK_Mips_GOTTPREL,
+ VK_Mips_TPREL_HI,
+ VK_Mips_TPREL_LO
+ };
+
+private:
+ const VariantKind Kind;
+ const MCSymbol *Symbol;
+ int Offset;
+
+ explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
+ int _Offset)
+ : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
+
+public:
+ static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
+ const MCSymbol *Symbol, int Offset,
+ MCContext &Ctx);
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const MipsMCSymbolRefExpr *) { return true; }
+
+ int getOffset() const { return Offset; }
+ void setOffset(int O) { Offset = O; }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 000000000000..dbb7a6744224
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,114 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include <utility>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+ MachineFunction& MF;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ // Range of frame object indices.
+ // InArgFIRange: Range of indices of all frame objects created during call to
+ // LowerFormalArguments.
+ // OutArgFIRange: Range of indices of all frame objects created during call to
+ // LowerCall except for the frame object for restoring $gp.
+ std::pair<int, int> InArgFIRange, OutArgFIRange;
+ int GPFI; // Index of the frame object for restoring $gp
+ mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
+ unsigned MaxCallFrameSize;
+
+ /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
+ /// intrinsics, it is necessary to use a temporary stack location.
+ /// This field holds the frame index of this location.
+ int AtomicFrameIndex;
+public:
+ MipsFunctionInfo(MachineFunction& MF)
+ : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
+ VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
+ OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+ MaxCallFrameSize(0), AtomicFrameIndex(-1)
+ {}
+
+ bool isInArgFI(int FI) const {
+ return FI <= InArgFIRange.first && FI >= InArgFIRange.second;
+ }
+ void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
+
+ bool isOutArgFI(int FI) const {
+ return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
+ }
+ void extendOutArgFIRange(int FirstFI, int LastFI) {
+ if (!OutArgFIRange.second)
+ // this must be the first time this function was called.
+ OutArgFIRange.first = FirstFI;
+ OutArgFIRange.second = LastFI;
+ }
+
+ int getGPFI() const { return GPFI; }
+ void setGPFI(int FI) { GPFI = FI; }
+ bool needGPSaveRestore() const { return getGPFI(); }
+ bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
+
+ // The first call to this function creates a frame object for dynamically
+ // allocated stack area.
+ int getDynAllocFI() const {
+ if (!DynAllocFI)
+ DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+ return DynAllocFI;
+ }
+ bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
+ void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+
+ int getAtomicFrameIndex() const { return AtomicFrameIndex; }
+ void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 000000000000..24390daff75c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,278 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// Mips::RA, return the number that it corresponds to (e.g. 31).
+unsigned MipsRegisterInfo::
+getRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
+ case Mips::AT : case Mips::F1 : return 1;
+ case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2;
+ case Mips::V1 : case Mips::F3 : return 3;
+ case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4;
+ case Mips::A1 : case Mips::F5 : return 5;
+ case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6;
+ case Mips::A3 : case Mips::F7 : return 7;
+ case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8;
+ case Mips::T1 : case Mips::F9 : return 9;
+ case Mips::T2 : case Mips::F10: case Mips::D5: return 10;
+ case Mips::T3 : case Mips::F11: return 11;
+ case Mips::T4 : case Mips::F12: case Mips::D6: return 12;
+ case Mips::T5 : case Mips::F13: return 13;
+ case Mips::T6 : case Mips::F14: case Mips::D7: return 14;
+ case Mips::T7 : case Mips::F15: return 15;
+ case Mips::S0 : case Mips::F16: case Mips::D8: return 16;
+ case Mips::S1 : case Mips::F17: return 17;
+ case Mips::S2 : case Mips::F18: case Mips::D9: return 18;
+ case Mips::S3 : case Mips::F19: return 19;
+ case Mips::S4 : case Mips::F20: case Mips::D10: return 20;
+ case Mips::S5 : case Mips::F21: return 21;
+ case Mips::S6 : case Mips::F22: case Mips::D11: return 22;
+ case Mips::S7 : case Mips::F23: return 23;
+ case Mips::T8 : case Mips::F24: case Mips::D12: return 24;
+ case Mips::T9 : case Mips::F25: return 25;
+ case Mips::K0 : case Mips::F26: case Mips::D13: return 26;
+ case Mips::K1 : case Mips::F27: return 27;
+ case Mips::GP : case Mips::F28: case Mips::D14: return 28;
+ case Mips::SP : case Mips::F29: return 29;
+ case Mips::FP : case Mips::F30: case Mips::D15: return 30;
+ case Mips::RA : case Mips::F31: return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Mips callee-save register range is $16-$23, $f20-$f30
+ static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
+ Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
+ Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
+ Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
+ Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
+ };
+
+ static const unsigned Mips32CalleeSavedRegs[] = {
+ Mips::D15, Mips::D14, Mips::D13, Mips::D12, Mips::D11, Mips::D10,
+ Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
+ Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
+ };
+
+ if (Subtarget.isSingleFloat())
+ return SingleFloatOnlyCalleeSavedRegs;
+ else
+ return Mips32CalleeSavedRegs;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Mips::ZERO);
+ Reserved.set(Mips::AT);
+ Reserved.set(Mips::K0);
+ Reserved.set(Mips::K1);
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::SP);
+ Reserved.set(Mips::FP);
+ Reserved.set(Mips::RA);
+ Reserved.set(Mips::F31);
+ Reserved.set(Mips::D15);
+
+ // SRV4 requires that odd register can't be used.
+ if (!Subtarget.isSingleFloat() && !Subtarget.isMips32())
+ for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
+ Reserved.set(FReg);
+
+ return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ errs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n");
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following, its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+ MipsFI->isDynAllocFI(FrameIndex))
+ Offset = spOffset;
+ else
+ Offset = spOffset + stackSize;
+
+ Offset += MI.getOperand(i+1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+ // field.
+ if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
+ ((Offset & 0x8000) != 0);
+
+ // FIXME: change this when mips goes MC".
+ BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
+ BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
+ BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
+ .addReg(Mips::AT);
+ FrameReg = Mips::AT;
+ Offset = (short)(Offset & 0xffff);
+
+ BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+ }
+
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned MipsRegisterInfo::
+getRARegister() const {
+ return Mips::RA;
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int MipsRegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return MipsGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 000000000000..646369b5966f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,73 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MipsGenRegisterInfo.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+ const MipsSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// Mips::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Adjust the Mips stack frame.
+ void adjustMipsStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 000000000000..f0db518b754b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,198 @@
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+class MipsRegWithSubRegs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 64-bit (aliased) FPU Registers
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+}
+class AFPR<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_fpeven, sub_fpodd];
+}
+
+// Mips Hardware Registers
+class HWR<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+
+ // General Purpose Registers
+ def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
+ def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>;
+ def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>;
+ def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>;
+ def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>;
+ def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<[5]>;
+ def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<[6]>;
+ def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<[7]>;
+ def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<[8]>;
+ def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<[9]>;
+ def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<[10]>;
+ def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<[11]>;
+ def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<[12]>;
+ def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<[13]>;
+ def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<[14]>;
+ def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<[15]>;
+ def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<[16]>;
+ def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<[17]>;
+ def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<[18]>;
+ def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<[19]>;
+ def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<[20]>;
+ def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<[21]>;
+ def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<[22]>;
+ def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<[23]>;
+ def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<[24]>;
+ def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>;
+ def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>;
+ def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>;
+ def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>;
+ def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>;
+ def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
+ def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+
+ /// Mips Single point precision FPU Registers
+ def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+
+ /// Mips Double point precision FPU Registers (aliased
+ /// with the single precision to hold 64 bit values)
+ def D0 : AFPR< 0, "F0", [F0, F1]>;
+ def D1 : AFPR< 2, "F2", [F2, F3]>;
+ def D2 : AFPR< 4, "F4", [F4, F5]>;
+ def D3 : AFPR< 6, "F6", [F6, F7]>;
+ def D4 : AFPR< 8, "F8", [F8, F9]>;
+ def D5 : AFPR<10, "F10", [F10, F11]>;
+ def D6 : AFPR<12, "F12", [F12, F13]>;
+ def D7 : AFPR<14, "F14", [F14, F15]>;
+ def D8 : AFPR<16, "F16", [F16, F17]>;
+ def D9 : AFPR<18, "F18", [F18, F19]>;
+ def D10 : AFPR<20, "F20", [F20, F21]>;
+ def D11 : AFPR<22, "F22", [F22, F23]>;
+ def D12 : AFPR<24, "F24", [F24, F25]>;
+ def D13 : AFPR<26, "F26", [F26, F27]>;
+ def D14 : AFPR<28, "F28", [F28, F29]>;
+ def D15 : AFPR<30, "F30", [F30, F31]>;
+
+ // Hi/Lo registers
+ def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def LO : Register<"lo">, DwarfRegNum<[65]>;
+
+ // Status flags register
+ def FCR31 : Register<"31">;
+
+ // Hardware register $29
+ def HWR29 : Register<"29">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Mips", [i32], 32, (add
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Reserved
+ ZERO, AT, K0, K1, GP, SP, FP, RA)>;
+
+// 64bit fp:
+// * FGR64 - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
+ // Return Values and Arguments
+ D0, D1, D6, D7,
+ // Not preserved across procedure calls
+ D2, D3, D4, D5, D8, D9,
+ // Callee save
+ D10, D11, D12, D13, D14,
+ // Reserved
+ D15)> {
+ let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
+}
+
+// Condition Register for floating point operations
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
+
+// Hardware registers
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 000000000000..00be8ee94431
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..e4d70fcec591
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsTargetMachine.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 000000000000..6cafb558b35a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsTargetMachine;
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+ ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 000000000000..6eee3333d584
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,62 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MipsGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool little) :
+ MipsGenSubtargetInfo(TT, CPU, FS),
+ MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
+ IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
+ HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
+ HasSwap(false), HasBitCount(false)
+{
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "mips1";
+ MipsArchVersion = Mips1;
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Is the target system Linux ?
+ if (TT.find("linux") == std::string::npos)
+ IsLinux = false;
+
+ // When only the target triple is specified and is
+ // a allegrex target, set the features. We also match
+ // big and little endian allegrex cores (dont really
+ // know if a big one exists)
+ if (TT.find("mipsallegrex") != std::string::npos ||
+ TT.find("psp") != std::string::npos) {
+ MipsABI = EABI;
+ IsSingleFloat = true;
+ MipsArchVersion = Mips2;
+ HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
+ HasSEInReg = true;
+ HasBitCount = true;
+ HasSwap = true;
+ HasCondMov = true;
+ }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 000000000000..533d4afe073e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,128 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MipsGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MipsSubtarget : public MipsGenSubtargetInfo {
+
+public:
+ enum MipsABIEnum {
+ O32, O64, N32, N64, EABI
+ };
+
+protected:
+
+ enum MipsArchEnum {
+ Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
+ };
+
+ // Mips architecture version
+ MipsArchEnum MipsArchVersion;
+
+ // Mips supported ABIs
+ MipsABIEnum MipsABI;
+
+ // IsLittle - The target is Little Endian
+ bool IsLittle;
+
+ // IsSingleFloat - The target only supports single precision float
+ // point operations. This enable the target to use all 32 32-bit
+ // floating point registers instead of only using even ones.
+ bool IsSingleFloat;
+
+ // IsFP64bit - The target processor has 64-bit floating point registers.
+ bool IsFP64bit;
+
+ // IsFP64bit - General-purpose registers are 64 bits wide
+ bool IsGP64bit;
+
+ // HasVFPU - Processor has a vector floating point unit.
+ bool HasVFPU;
+
+ // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+ bool IsLinux;
+
+ /// Features related to the presence of specific instructions.
+
+ // HasSEInReg - SEB and SEH (signext in register) instructions.
+ bool HasSEInReg;
+
+ // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+ bool HasCondMov;
+
+ // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+ // instructions.
+ bool HasMulDivAdd;
+
+ // HasMinMax - MIN and MAX instructions.
+ bool HasMinMax;
+
+ // HasSwap - Byte and half swap instructions.
+ bool HasSwap;
+
+ // HasBitCount - Count leading '1' and '0' bits.
+ bool HasBitCount;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// Only O32 and EABI supported right now.
+ bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_O32() const { return MipsABI == O32; }
+ unsigned getTargetABI() const { return MipsABI; }
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MipsSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool little);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool isMips1() const { return MipsArchVersion == Mips1; }
+ bool isMips32() const { return MipsArchVersion >= Mips32; }
+ bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
+
+ bool isLittle() const { return IsLittle; }
+ bool isFP64bit() const { return IsFP64bit; }
+ bool isGP64bit() const { return IsGP64bit; }
+ bool isGP32bit() const { return !IsGP64bit; }
+ bool isSingleFloat() const { return IsSingleFloat; }
+ bool isNotSingleFloat() const { return !IsSingleFloat; }
+ bool hasVFPU() const { return HasVFPU; }
+ bool isLinux() const { return IsLinux; }
+
+ /// Features related to the presence of specific instructions.
+ bool hasSEInReg() const { return HasSEInReg; }
+ bool hasCondMov() const { return HasCondMov; }
+ bool hasMulDivAdd() const { return HasMulDivAdd; }
+ bool hasMinMax() const { return HasMinMax; }
+ bool hasSwap() const { return HasSwap; }
+ bool hasBitCount() const { return HasBitCount; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 000000000000..20b9f4ea3853
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,88 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMipsTarget() {
+ // Register the target.
+ RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
+ RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS,
+ bool isLittle=false):
+ LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS, isLittle),
+ DataLayout(isLittle ?
+ std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this) {
+ // Abicall enables PIC by default
+ if (getRelocationModel() == Reloc::Default) {
+ if (Subtarget.isABI_O32())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+}
+
+MipselTargetMachine::
+MipselTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS) :
+ MipsTargetMachine(T, TT, CPU, FS, true) {}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsDelaySlotFillerPass(*this));
+ return true;
+}
+
+bool MipsTargetMachine::
+addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMipsEmitGPRestorePass(*this));
+ return true;
+}
+
+bool MipsTargetMachine::
+addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMipsExpandPseudoPass(*this));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 000000000000..a021af2ff16d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,82 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "MipsFrameLowering.h"
+#include "MipsSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MipsInstrInfo InstrInfo;
+ MipsFrameLowering FrameLowering;
+ MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
+ public:
+ MipsTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS,
+ bool isLittle);
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const MipsTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+ };
+
+/// MipselTargetMachine - Mipsel target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+public:
+ MipselTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
new file mode 100644
index 000000000000..cf5d1b58addd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -0,0 +1,102 @@
+//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+ cl::desc("Small data and bss section threshold size (default=8)"),
+ cl::init(8));
+
+void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= SSThreshold;
+}
+
+bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+
+ // Only use small section for non linux targets.
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if (Subtarget.isLinux())
+ return false;
+
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ const Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+
+
+const MCSection *MipsTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
new file mode 100644
index 000000000000..c394a9dc02e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM, SectionKind Kind)const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+
+ // TODO: Classify globals as mips wishes.
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
new file mode 100644
index 000000000000..a8d6fe94b1ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+
+extern "C" void LLVMInitializeMipsTargetInfo() {
+ RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
+
+ RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
+}
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
new file mode 100644
index 000000000000..331266da30b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_TARGET_DEFINITIONS PTX.td)
+
+tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
+tablegen(PTXGenInstrNames.inc -gen-instr-enums)
+tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
+tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PTXGenRegisterNames.inc -gen-register-enums)
+tablegen(PTXGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PTXCodeGen
+ PTXAsmPrinter.cpp
+ PTXISelDAGToDAG.cpp
+ PTXISelLowering.cpp
+ PTXInstrInfo.cpp
+ PTXFrameLowering.cpp
+ PTXMCAsmInfo.cpp
+ PTXMCAsmStreamer.cpp
+ PTXMFInfoExtract.cpp
+ PTXRegisterInfo.cpp
+ PTXSubtarget.cpp
+ PTXTargetMachine.cpp
+ )
+
+add_subdirectory(TargetInfo)
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..df0f63fdba60
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPTXDesc
+ PTXMCTargetDesc.cpp
+ PTXMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..35f5a7b2e6ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
new file mode 100644
index 000000000000..efefead5341d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -0,0 +1,35 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::ptx64)
+ PointerSize = 8;
+
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasDotTypeDotSizeDirective = false;
+
+ HasSingleParameterDotFile = false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
new file mode 100644
index 000000000000..03f5d66b3d60
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ struct PTXMCAsmInfo : public MCAsmInfo {
+ explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+ };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
new file mode 100644
index 000000000000..23f70bd13787
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCTargetDesc.h"
+#include "PTXMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPTXMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPTXMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializePTXMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+}
+
+static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPTXMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
+ createPTXMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
+ createPTXMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializePTXMCAsmInfo() {
+ RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+ RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
new file mode 100644
index 000000000000..1003b0b5ece9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXMCTARGETDESC_H
+#define PTXMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePTX32Target;
+extern Target ThePTX64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "PTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
new file mode 100644
index 000000000000..28cab2429c81
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -0,0 +1,48 @@
+//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "MCTargetDesc/PTXMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class PTXTargetMachine;
+ class FunctionPass;
+
+ namespace PTX {
+ enum StateSpace {
+ GLOBAL = 0, // default to global state space
+ CONSTANT = 1,
+ LOCAL = 2,
+ PARAMETER = 3,
+ SHARED = 4
+ };
+
+ enum Predicate {
+ PRED_NORMAL = 0,
+ PRED_NEGATE = 1
+ };
+ } // namespace PTX
+
+ FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // namespace llvm;
+
+#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
new file mode 100644
index 000000000000..f6fbe9fffc6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -0,0 +1,135 @@
+//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+ "Do not demote .f64 to .f32">;
+
+def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
+ "Disable Fused-Multiply Add">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+ "Use PTX Language Version 2.0">;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+ "Use PTX Language Version 2.1">;
+
+def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
+ "Use PTX Language Version 2.2">;
+
+def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
+ "Use PTX Language Version 2.3">;
+
+//===- PTX Target ---------------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
+ "Use Shader Model 1.0">;
+def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
+ "Use Shader Model 1.1">;
+def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
+ "Use Shader Model 1.2">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
+ "Use Shader Model 1.3">;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
+ "Use Shader Model 2.0">;
+def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
+ "Use Shader Model 2.1">;
+def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
+ "Use Shader Model 2.2">;
+def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
+ "Use Shader Model 2.3">;
+
+def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
+ "PTX_COMPUTE_1_0",
+ "Use Compute Compatibility 1.0">;
+def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
+ "PTX_COMPUTE_1_1",
+ "Use Compute Compatibility 1.1">;
+def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
+ "PTX_COMPUTE_1_2",
+ "Use Compute Compatibility 1.2">;
+def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
+ "PTX_COMPUTE_1_3",
+ "Use Compute Compatibility 1.3">;
+def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
+ "PTX_COMPUTE_2_0",
+ "Use Compute Compatibility 2.0">;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+// Processor definitions for compute/shader models
+def : Proc<"compute_10", [FeatureCOMPUTE10]>;
+def : Proc<"compute_11", [FeatureCOMPUTE11]>;
+def : Proc<"compute_12", [FeatureCOMPUTE12]>;
+def : Proc<"compute_13", [FeatureCOMPUTE13]>;
+def : Proc<"compute_20", [FeatureCOMPUTE20]>;
+def : Proc<"sm_10", [FeatureSM10]>;
+def : Proc<"sm_11", [FeatureSM11]>;
+def : Proc<"sm_12", [FeatureSM12]>;
+def : Proc<"sm_13", [FeatureSM13]>;
+def : Proc<"sm_20", [FeatureSM20]>;
+def : Proc<"sm_21", [FeatureSM21]>;
+def : Proc<"sm_22", [FeatureSM22]>;
+def : Proc<"sm_23", [FeatureSM23]>;
+
+// Processor definitions for common GPU architectures
+def : Proc<"g80", [FeatureSM10]>;
+def : Proc<"gt200", [FeatureSM13]>;
+def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
+def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PTXCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+ let InstructionSet = PTXInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 000000000000..2848d5460eee
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,605 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
+
+ unsigned GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName);
+
+ // autogen'd.
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration();
+
+ StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace
+
+static const char PARAM_PREFIX[] = "__param_";
+static const char RETURN_PREFIX[] = "__ret_";
+
+static const char *getRegisterTypeName(unsigned RegNo) {
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+ TEST_REGCLS(RegPred, pred);
+ TEST_REGCLS(RegI16, b16);
+ TEST_REGCLS(RegI32, b32);
+ TEST_REGCLS(RegI64, b64);
+ TEST_REGCLS(RegF32, b32);
+ TEST_REGCLS(RegF64, b64);
+#undef TEST_REGCLS
+
+ llvm_unreachable("Not in any register class!");
+ return NULL;
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+ switch (addressSpace) {
+ default: llvm_unreachable("Unknown state space");
+ case PTX::GLOBAL: return "global";
+ case PTX::CONSTANT: return "const";
+ case PTX::LOCAL: return "local";
+ case PTX::PARAMETER: return "param";
+ case PTX::SHARED: return "shared";
+ }
+ return NULL;
+}
+
+static const char *getTypeName(const Type* type) {
+ while (true) {
+ switch (type->getTypeID()) {
+ default: llvm_unreachable("Unknown type");
+ case Type::FloatTyID: return ".f32";
+ case Type::DoubleTyID: return ".f64";
+ case Type::IntegerTyID:
+ switch (type->getPrimitiveSizeInBits()) {
+ default: llvm_unreachable("Unknown integer bit-width");
+ case 16: return ".u16";
+ case 32: return ".u32";
+ case 64: return ".u64";
+ }
+ case Type::ArrayTyID:
+ case Type::PointerTyID:
+ type = dyn_cast<const SequentialType>(type)->getElementType();
+ break;
+ }
+ }
+ return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable* [n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i ++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+ OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+ (ST.supportsDouble() ? ""
+ : ", map_f64_to_f32")));
+ // .address_size directive is optional, but it must immediately follow
+ // the .target directive if present within a module
+ if (ST.supportsPTX23()) {
+ std::string addrSize = ST.is64Bit() ? "64" : "32";
+ OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Define any .file directives
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(M);
+
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ DICompileUnit DIUnit(*I);
+ StringRef FN = DIUnit.getFilename();
+ StringRef Dir = DIUnit.getDirectory();
+ GetOrCreateSourceID(FN, Dir);
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // declare global variables
+ for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+ i != e; ++i)
+ EmitVariableDeclaration(i);
+}
+
+bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ EmitFunctionDeclaration();
+ EmitFunctionBody();
+ return false;
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+ OutStreamer.EmitRawText(Twine("{"));
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ // Print local variable definition
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
+ unsigned reg = *i;
+
+ std::string def = "\t.reg .";
+ def += getRegisterTypeName(reg);
+ def += ' ';
+ def += getRegisterName(reg);
+ def += ';';
+ OutStreamer.EmitRawText(Twine(def));
+ }
+
+ const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+ DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+ << " frame object(s)\n");
+ for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+ DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+ if (FrameInfo->getObjectSize(i) > 0) {
+ std::string def = "\t.reg .b";
+ def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+ def += " s";
+ def += utostr(i);
+ def += ";";
+ OutStreamer.EmitRawText(Twine(def));
+ }
+ }
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ std::string str;
+ str.reserve(64);
+
+ raw_string_ostream OS(str);
+
+ DebugLoc DL = MI->getDebugLoc();
+ if (!DL.isUnknown()) {
+
+ const MDNode *S = DL.getScope(MF->getFunction()->getContext());
+
+ // This is taken from DwarfDebug.cpp, which is conveniently not a public
+ // LLVM class.
+ StringRef Fn;
+ StringRef Dir;
+ unsigned Src = 1;
+ if (S) {
+ DIDescriptor Scope(S);
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Fn = CU.getFilename();
+ Dir = CU.getDirectory();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
+ Dir = F.getDirectory();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Fn = SP.getFilename();
+ Dir = SP.getDirectory();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Fn = DB.getFilename();
+ Dir = DB.getDirectory();
+ } else
+ assert(0 && "Unexpected scope info");
+
+ Src = GetOrCreateSourceID(Fn, Dir);
+ }
+ OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
+ 0, 0, 0, Fn);
+
+ const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
+
+ OS << "\t.loc ";
+ OS << utostr(MDL.getFileNum());
+ OS << " ";
+ OS << utostr(MDL.getLine());
+ OS << " ";
+ OS << utostr(MDL.getColumn());
+ OS << "\n";
+ }
+
+
+ // Emit predicate
+ printPredicateOperand(MI, OS);
+
+ // Write instruction to str
+ printInstruction(MI, OS);
+ OS << ';';
+ OS.flush();
+
+ StringRef strref = StringRef(str);
+ OutStreamer.EmitRawText(strref);
+}
+
+void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("<unknown operand type>");
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << (long) MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << *MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_Register:
+ OS << getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_FPImmediate:
+ APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+ // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+ if (isFloat) {
+ OS << "0F";
+ }
+ else {
+ OS << "0D";
+ }
+ // Emit the encoded floating-point value.
+ if (constFP.getZExtValue() > 0) {
+ OS << constFP.toString(16, false);
+ }
+ else {
+ OS << "00000000";
+ // If We have a double-precision zero, pad to 8-bytes.
+ if (!isFloat) {
+ OS << "00000000";
+ }
+ }
+ break;
+ }
+}
+
+void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS, const char *Modifier) {
+ printOperand(MI, opNum, OS);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ OS << "+";
+ printOperand(MI, opNum+1, OS);
+}
+
+void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS, const char *Modifier) {
+ OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS, const char *Modifier) {
+ OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(gv))
+ return;
+
+ MCSymbol *gvsym = Mang->getSymbol(gv);
+
+ assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+ std::string decl;
+
+ // check if it is defined in some other translation unit
+ if (gv->isDeclaration())
+ decl += ".extern ";
+
+ // state space: e.g., .global
+ decl += ".";
+ decl += getStateSpaceName(gv->getType()->getAddressSpace());
+ decl += " ";
+
+ // alignment (optional)
+ unsigned alignment = gv->getAlignment();
+ if (alignment != 0) {
+ decl += ".align ";
+ decl += utostr(Log2_32(gv->getAlignment()));
+ decl += " ";
+ }
+
+
+ if (PointerType::classof(gv->getType())) {
+ const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
+ const Type* elementTy = pointerTy->getElementType();
+
+ decl += ".b8 ";
+ decl += gvsym->getName();
+ decl += "[";
+
+ if (elementTy->isArrayTy())
+ {
+ assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
+
+ const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ unsigned numElements = arrayTy->getNumElements();
+
+ while (elementTy->isArrayTy()) {
+
+ arrayTy = dyn_cast<const ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ numElements *= arrayTy->getNumElements();
+ }
+
+ // FIXME: isPrimitiveType() == false for i16?
+ assert(elementTy->isSingleValueType() &&
+ "Non-primitive types are not handled");
+
+ // Compute the size of the array, in bytes.
+ uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
+ * numElements;
+
+ decl += utostr(arraySize);
+ }
+
+ decl += "]";
+
+ // handle string constants (assume ConstantArray means string)
+
+ if (gv->hasInitializer())
+ {
+ const Constant *C = gv->getInitializer();
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ {
+ decl += " = {";
+
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ {
+ if (i > 0) decl += ",";
+
+ decl += "0x" +
+ utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ }
+
+ decl += "}";
+ }
+ }
+ }
+ else {
+ // Note: this is currently the fall-through case and most likely generates
+ // incorrect code.
+ decl += getTypeName(gv->getType());
+ decl += " ";
+
+ decl += gvsym->getName();
+
+ if (ArrayType::classof(gv->getType()) ||
+ PointerType::classof(gv->getType()))
+ decl += "[]";
+ }
+
+ decl += ";";
+
+ OutStreamer.EmitRawText(Twine(decl));
+
+ OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (!CurrentFnSym->isUndefined()) {
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+ return;
+ }
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const bool isKernel = MFI->isKernel();
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ std::string decl = isKernel ? ".entry" : ".func";
+
+ unsigned cnt = 0;
+
+ if (!isKernel) {
+ decl += " (";
+ for (PTXMachineFunctionInfo::ret_iterator
+ i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i);
+ decl += " ";
+ decl += getRegisterName(*i);
+ }
+ decl += ")";
+ }
+
+ // Print function name
+ decl += " ";
+ decl += CurrentFnSym->getName().str();
+
+ decl += " (";
+
+ cnt = 0;
+
+ // Print parameters
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ decl += ".param .b";
+ decl += utostr(*i);
+ decl += " ";
+ decl += PARAM_PREFIX;
+ decl += utostr(++cnt);
+ } else {
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i);
+ decl += " ";
+ decl += getRegisterName(*i);
+ }
+ }
+ decl += ")";
+
+ OutStreamer.EmitRawText(Twine(decl));
+}
+
+void PTXAsmPrinter::
+printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
+ int i = MI->findFirstPredOperandIdx();
+ if (i == -1)
+ llvm_unreachable("missing predicate operand");
+
+ unsigned reg = MI->getOperand(i).getReg();
+ int predOp = MI->getOperand(i+1).getImm();
+
+ DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
+
+ if (reg != PTX::NoRegister) {
+ O << '@';
+ if (predOp == PTX::PRED_NEGATE)
+ O << '!';
+ O << getRegisterName(reg);
+ }
+}
+
+unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>", StringRef());
+
+ // MCStream expects full path name as filename.
+ if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+ SmallString<128> FullPathName = DirName;
+ sys::path::append(FullPathName, FileName);
+ // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+ return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+ }
+
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
+
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
+
+ // Print out a .file directive to specify files for .loc directives.
+ OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+ return SrcId;
+}
+
+#include "PTXGenAsmWriter.inc"
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+ RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
+ RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
new file mode 100644
index 000000000000..3e3ff4896621
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
@@ -0,0 +1,29 @@
+
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+ CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
+ CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
+ CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
+ CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+ CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
+ CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
+ CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
+ CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
+]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 000000000000..b621b9d634d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 000000000000..9320676150df
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,44 @@
+//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+ const PTXSubtarget &STI;
+
+public:
+ explicit PTXFrameLowering(const PTXSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9adfa624b29e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,182 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+ public:
+ PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+ virtual const char *getPassName() const {
+ return "PTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ SDNode *Select(SDNode *Node);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+ private:
+ // We need this only because we can't match intruction BRAdp
+ // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
+ SDNode *SelectBRCOND(SDNode *Node);
+
+ bool isImm(const SDValue &operand);
+ bool SelectImm(const SDValue &operand, SDValue &imm);
+
+ const PTXSubtarget& getSubtarget() const;
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+ switch (Node->getOpcode()) {
+ case ISD::BRCOND:
+ return SelectBRCOND(Node);
+ default:
+ return SelectCode(Node);
+ }
+}
+
+SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
+ assert(Node->getNumOperands() >= 3);
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Pred = Node->getOperand(1);
+ SDValue Target = Node->getOperand(2); // branch target
+ SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(Target.getOpcode() == ISD::BasicBlock);
+ assert(Pred.getValueType() == MVT::i1);
+
+ // Emit BRAdp
+ SDValue Ops[] = { Target, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+ isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+ return false;
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() != ISD::ADD) {
+ // let SelectADDRii handle the [imm] case
+ if (isImm(Addr))
+ return false;
+ // it is [reg]
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+ }
+
+ if (Addr.getNumOperands() < 2)
+ return false;
+
+ // let SelectADDRii handle the [imm+imm] case
+ if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+ return false;
+
+ // try [reg+imm] and [imm+reg]
+ for (int i = 0; i < 2; i ++)
+ if (SelectImm(Addr.getOperand(1-i), Offset)) {
+ Base = Addr.getOperand(i);
+ return true;
+ }
+
+ // neither [reg+imm] nor [imm+reg]
+ return false;
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ // is [imm+imm]?
+ if (Addr.getOpcode() == ISD::ADD) {
+ return SelectImm(Addr.getOperand(0), Base) &&
+ SelectImm(Addr.getOperand(1), Offset);
+ }
+
+ // is [imm]?
+ if (SelectImm(Addr, Base)) {
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+ }
+
+ return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+ return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+ SDNode *node = operand.getNode();
+ if (!ConstantSDNode::classof(node))
+ return false;
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(node);
+ imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
+ operand.getValueType());
+ return true;
+}
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+ return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 000000000000..6fcf710e3f1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,347 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXISelLowering.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "PTXGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ // Set up the register classes.
+ addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
+ addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+ addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+ addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setMinFunctionAlignment(2);
+
+ ////////////////////////////////////
+ /////////// Expansion //////////////
+ ////////////////////////////////////
+
+ // (any/zero/sign) extload => load + (any/zero/sign) extend
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // f32 extload => load + fextend
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // f64 truncstore => trunc + store
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // sign_extend_inreg => sign_extend
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // br_cc => brcond
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // select_cc => setcc
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ ////////////////////////////////////
+ //////////// Legal /////////////////
+ ////////////////////////////////////
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ ////////////////////////////////////
+ //////////// Custom ////////////////
+ ////////////////////////////////////
+
+ // customise setcc to use bitwise logic if possible
+
+ setOperationAction(ISD::SETCC, MVT::i1, Custom);
+
+ // customize translation of memory addresses
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+}
+
+MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unimplemented operand");
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown opcode");
+ case PTXISD::COPY_ADDRESS:
+ return "PTXISD::COPY_ADDRESS";
+ case PTXISD::LOAD_PARAM:
+ return "PTXISD::LOAD_PARAM";
+ case PTXISD::STORE_PARAM:
+ return "PTXISD::STORE_PARAM";
+ case PTXISD::EXIT:
+ return "PTXISD::EXIT";
+ case PTXISD::RET:
+ return "PTXISD::RET";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Look for X == 0, X == 1, X != 0, or X != 1
+ // We can simplify these to bitwise logic
+
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+ }
+
+ return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
+
+ SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+ dl,
+ PtrVT.getSimpleVT(),
+ targetGlobal);
+
+ return movInstr;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ break;
+ case CallingConv::PTX_Kernel:
+ MFI->setKernel(true);
+ break;
+ case CallingConv::PTX_Device:
+ MFI->setKernel(false);
+ break;
+ }
+
+ // We do one of two things here:
+ // IsKernel || SM >= 2.0 -> Use param space for arguments
+ // SM < 2.0 -> Use registers for arguments
+ if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
+ // We just need to emit the proper LOAD_PARAM ISDs
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+
+ assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
+ "Kernels cannot take pred operands");
+
+ SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ DAG.getTargetConstant(i, MVT::i32));
+ InVals.push_back(ArgValue);
+
+ // Instead of storing a physical register in our argument list, we just
+ // store the total size of the parameter, in bits. The ASM printer
+ // knows how to process this.
+ MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
+ }
+ }
+ else {
+ // For device functions, we use the PTX calling convention to do register
+ // assignments then create CopyFromReg ISDs for the allocated registers
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign& VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass* TRC = 0;
+
+ assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ InVals.push_back(ArgValue);
+
+ MFI->addArgReg(VA.getLocReg());
+ }
+ }
+
+ return Chain;
+}
+
+SDValue PTXTargetLowering::
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention.");
+ case CallingConv::PTX_Kernel:
+ assert(Outs.size() == 0 && "Kernel must return void.");
+ return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+ case CallingConv::PTX_Device:
+ //assert(Outs.size() <= 1 && "Can at most return one value.");
+ break;
+ }
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+ SDValue Flag;
+
+ // Even though we could use the .param space for return arguments for
+ // device functions if SM >= 2.0 and the number of return arguments is
+ // only 1, we just always use registers since this makes the codegen
+ // easier.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
+
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign& VA = RVLocs[i];
+
+ assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+ unsigned Reg = VA.getLocReg();
+
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+
+ MFI->addRetReg(Reg);
+ }
+
+ if (Flag.getNode() == 0) {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+ }
+ else {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 000000000000..43185416e1fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,70 @@
+//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class PTXSubtarget;
+class PTXTargetMachine;
+
+namespace PTXISD {
+ enum NodeType {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ LOAD_PARAM,
+ STORE_PARAM,
+ EXIT,
+ RET,
+ COPY_ADDRESS
+ };
+} // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+ public:
+ explicit PTXTargetLowering(TargetMachine &TM);
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ private:
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 000000000000..8cee351ee0df
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,24 @@
+//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
+ (ops (i1 zero_reg), (i32 0))>;
+
+let Namespace = "PTX" in {
+ class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+ : Instruction {
+ dag OutOperandList = oops;
+ dag InOperandList = !con(iops, (ins pred:$_p));
+ let AsmString = asmstr; // Predicate printing is defined elsewhere.
+ let Pattern = pattern;
+ let isPredicable = 1;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 000000000000..425265a2fdb7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,410 @@
+//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-instrinfo"
+
+#include "PTX.h"
+#include "PTXInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PTXGenInstrInfo.inc"
+
+using namespace llvm;
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+ : PTXGenInstrInfo(),
+ RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+ const TargetRegisterClass *cls;
+ const int opcode;
+} map[] = {
+ { &PTX::RegI16RegClass, PTX::MOVU16rr },
+ { &PTX::RegI32RegClass, PTX::MOVU32rr },
+ { &PTX::RegI64RegClass, PTX::MOVU64rr },
+ { &PTX::RegF32RegClass, PTX::MOVF32rr },
+ { &PTX::RegF64RegClass, PTX::MOVF64rr },
+ { &PTX::RegPredRegClass, PTX::MOVPREDrr }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const {
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
+ if (map[i].cls->contains(DstReg, SrcReg)) {
+ const MCInstrDesc &MCID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPredicate(MI);
+ return;
+ }
+ }
+
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
+ if (DstRC != SrcRC)
+ return false;
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+ if (DstRC == map[i].cls) {
+ const MCInstrDesc &MCID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
+ AddDefaultPredicate(MI);
+ return true;
+ }
+
+ return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case PTX::MOVU16rr:
+ case PTX::MOVU32rr:
+ case PTX::MOVU64rr:
+ case PTX::MOVF32rr:
+ case PTX::MOVF64rr:
+ case PTX::MOVPREDrr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+ "Invalid register-register move instruction");
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+}
+
+// predicate support
+
+bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int i = MI->findFirstPredOperandIdx();
+ return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
+}
+
+bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+}
+
+bool PTXInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ if (Pred.size() < 2)
+ llvm_unreachable("lesser than 2 predicate operands are provided");
+
+ int i = MI->findFirstPredOperandIdx();
+ if (i == -1)
+ llvm_unreachable("missing predicate operand");
+
+ MI->getOperand(i).setReg(Pred[0].getReg());
+ MI->getOperand(i+1).setImm(Pred[1].getImm());
+
+ return true;
+}
+
+bool PTXInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ const MachineOperand &PredReg1 = Pred1[0];
+ const MachineOperand &PredReg2 = Pred2[0];
+ if (PredReg1.getReg() != PredReg2.getReg())
+ return false;
+
+ const MachineOperand &PredOp1 = Pred1[1];
+ const MachineOperand &PredOp2 = Pred2[1];
+ if (PredOp1.getImm() != PredOp2.getImm())
+ return false;
+
+ return true;
+}
+
+bool PTXInstrInfo::
+DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // If an instruction sets a predicate register, it defines a predicate.
+
+ // TODO supprot 5-operand format of setp instruction
+
+ if (MI->getNumOperands() < 1)
+ return false;
+
+ const MachineOperand &MO = MI->getOperand(0);
+
+ if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
+ return false;
+
+ Pred.push_back(MO);
+ Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ return true;
+}
+
+// branch support
+
+bool PTXInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // TODO implement cases when AllowModify is true
+
+ if (MBB.empty())
+ return true;
+
+ MachineBasicBlock::const_iterator iter = MBB.end();
+ const MachineInstr& instLast1 = *--iter;
+ const MCInstrDesc &desc1 = instLast1.getDesc();
+ // for special case that MBB has only 1 instruction
+ const bool IsSizeOne = MBB.size() == 1;
+ // if IsSizeOne is true, *--iter and instLast2 are invalid
+ // we put a dummy value in instLast2 and desc2 since they are used
+ const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
+ const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+
+ DEBUG(dbgs() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
+
+ // this block ends with no branches
+ if (!IsAnyKindOfBranch(instLast1)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
+ return false;
+ }
+
+ // this block ends with only an unconditional branch
+ if (desc1.isUnconditionalBranch() &&
+ // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
+ (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
+ TBB = GetBranchTarget(instLast1);
+ return false;
+ }
+
+ // this block ends with a conditional branch and
+ // it falls through to a successor block
+ if (desc1.isConditionalBranch() &&
+ IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
+ TBB = GetBranchTarget(instLast1);
+ int i = instLast1.findFirstPredOperandIdx();
+ Cond.push_back(instLast1.getOperand(i));
+ Cond.push_back(instLast1.getOperand(i+1));
+ return false;
+ }
+
+ // when IsSizeOne is true, we are done
+ if (IsSizeOne)
+ return true;
+
+ // this block ends with a conditional branch
+ // followed by an unconditional branch
+ if (desc2.isConditionalBranch() &&
+ desc1.isUnconditionalBranch()) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
+ TBB = GetBranchTarget(instLast2);
+ FBB = GetBranchTarget(instLast1);
+ int i = instLast2.findFirstPredOperandIdx();
+ Cond.push_back(instLast2.getOperand(i));
+ Cond.push_back(instLast2.getOperand(i+1));
+ return false;
+ }
+
+ // branch cannot be understood
+ DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
+ return true;
+}
+
+unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ unsigned count = 0;
+ while (!MBB.empty())
+ if (IsAnyKindOfBranch(MBB.back())) {
+ MBB.pop_back();
+ ++count;
+ } else
+ break;
+ DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
+ return count;
+}
+
+unsigned PTXInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: TBB: (NULL)\n");
+ DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: FBB: (NULL)\n");
+ DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
+
+ assert(TBB && "TBB is NULL");
+
+ if (FBB) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ return 2;
+ } else if (Cond.size()) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ return 1;
+ }
+}
+
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr& MI = *MII;
+ DebugLoc DL = MI.getDebugLoc();
+
+ DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+ int OpCode;
+
+ // Select the appropriate opcode based on the register class
+ if (RC == PTX::RegI16RegisterClass) {
+ OpCode = PTX::STACKSTOREI16;
+ } else if (RC == PTX::RegI32RegisterClass) {
+ OpCode = PTX::STACKSTOREI32;
+ } else if (RC == PTX::RegI64RegisterClass) {
+ OpCode = PTX::STACKSTOREI32;
+ } else if (RC == PTX::RegF32RegisterClass) {
+ OpCode = PTX::STACKSTOREF32;
+ } else if (RC == PTX::RegF64RegisterClass) {
+ OpCode = PTX::STACKSTOREF64;
+ } else {
+ llvm_unreachable("Unknown PTX register class!");
+ }
+
+ // Build the store instruction (really a mov)
+ MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+ MIB.addFrameIndex(FrameIdx);
+ MIB.addReg(SrcReg);
+
+ AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr& MI = *MII;
+ DebugLoc DL = MI.getDebugLoc();
+
+ DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+ int OpCode;
+
+ // Select the appropriate opcode based on the register class
+ if (RC == PTX::RegI16RegisterClass) {
+ OpCode = PTX::STACKLOADI16;
+ } else if (RC == PTX::RegI32RegisterClass) {
+ OpCode = PTX::STACKLOADI32;
+ } else if (RC == PTX::RegI64RegisterClass) {
+ OpCode = PTX::STACKLOADI32;
+ } else if (RC == PTX::RegF32RegisterClass) {
+ OpCode = PTX::STACKLOADF32;
+ } else if (RC == PTX::RegF64RegisterClass) {
+ OpCode = PTX::STACKLOADF64;
+ } else {
+ llvm_unreachable("Unknown PTX register class!");
+ }
+
+ // Build the load instruction (really a mov)
+ MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+ MIB.addReg(DestReg);
+ MIB.addFrameIndex(FrameIdx);
+
+ AddDefaultPredicate(MIB);
+}
+
+// static helper routines
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue ops[] = { Op1, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue ops[] = { Op1, Op2, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
+ if (MI->findFirstPredOperandIdx() == -1) {
+ MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ }
+}
+
+bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
+ const MCInstrDesc &desc = inst.getDesc();
+ return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+}
+
+bool PTXInstrInfo::
+IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
+ for (MachineBasicBlock::const_succ_iterator
+ i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
+ if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
+ return true;
+ return false;
+}
+
+MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
+ // FIXME So far all branch instructions put destination in 1st operand
+ const MachineOperand& target = inst.getOperand(0);
+ assert(target.isMBB() && "FIXME: detect branch target operand");
+ return target.getMBB();
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 000000000000..871f1ac8d376
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,133 @@
+//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PTXGenInstrInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class MachineSDNode;
+class SDValue;
+class SelectionDAG;
+
+class PTXInstrInfo : public PTXGenInstrInfo {
+private:
+ const PTXRegisterInfo RI;
+ PTXTargetMachine &TM;
+
+public:
+ explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+ virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ // predicate support
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ // PTX is fully-predicable
+ virtual bool isPredicable(MachineInstr *MI) const { return true; }
+
+ // branch support
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ // Memory operand folding for spills
+ // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+ // loadRegFromStackSlot. Doing so will get rid of the "stack" registers
+ // we currently use to spill, though I doubt the overall effect on ptxas
+ // output will be large. I have yet to see a case where ptxas is unable
+ // to see through the "stack" register usage and hence generates
+ // efficient code anyway.
+ // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ // MachineInstr* MI,
+ // const SmallVectorImpl<unsigned> &Ops,
+ // int FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass* RC,
+ const TargetRegisterInfo* TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ // static helper routines
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1);
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2);
+
+ static void AddDefaultPredicate(MachineInstr *MI);
+
+ static bool IsAnyKindOfBranch(const MachineInstr& inst);
+
+ static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
+
+ static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
+}; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 000000000000..6bfe906d40ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,1102 @@
+//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+// Addressing
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+// Shader Model Support
+def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
+def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
+def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
+def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
+
+// PTX Version Support
+def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
+def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
+def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
+
+// Fused-Multiply Add
+def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
+def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::GLOBAL;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::CONSTANT;
+ return false;
+}]>;
+
+def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::LOCAL;
+ return false;
+}]>;
+
+def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::PARAMETER;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::SHARED;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::GLOBAL;
+ return false;
+}]>;
+
+def store_local
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::LOCAL;
+ return false;
+}]>;
+
+def store_parameter
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::PARAMETER;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::SHARED;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI64, i64imm);
+}
+def MEMii32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+ let PrintMethod = "printReturnOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Branch & call targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+ : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+ : SDNode<"PTXISD::RET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PTXcopyaddress
+ : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
+
+// Load/store .param space
+def PTXloadparam
+ : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+ : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+ def rr32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a),
+ !strconcat(opcstr, ".f32\t$d, $a"),
+ [(set RegF32:$d, (opnode RegF32:$a))]>;
+ def ri32 : InstPTX<(outs RegF32:$d),
+ (ins f32imm:$a),
+ !strconcat(opcstr, ".f32\t$d, $a"),
+ [(set RegF32:$d, (opnode fpimm:$a))]>;
+ def rr64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a),
+ !strconcat(opcstr, ".f64\t$d, $a"),
+ [(set RegF64:$d, (opnode RegF64:$a))]>;
+ def ri64 : InstPTX<(outs RegF64:$d),
+ (ins f64imm:$a),
+ !strconcat(opcstr, ".f64\t$d, $a"),
+ [(set RegF64:$d, (opnode fpimm:$a))]>;
+}
+
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+ def rr32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, RegF32:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+ def ri32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, f32imm:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+ def rr64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, RegF64:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+ def ri64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, f64imm:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
+}
+
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+ def rrr32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, RegF32:$b, RegF32:$c),
+ !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+ [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+ RegF32:$b),
+ RegF32:$c))]>;
+ def rri32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, RegF32:$b, f32imm:$c),
+ !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+ [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+ RegF32:$b),
+ fpimm:$c))]>;
+ def rrr64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, RegF64:$b, RegF64:$c),
+ !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+ [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+ RegF64:$b),
+ RegF64:$c))]>;
+ def rri64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, RegF64:$b, f64imm:$c),
+ !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+ [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+ RegF64:$b),
+ fpimm:$c))]>;
+}
+
+multiclass INT3<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+ def ripreds : InstPTX<(outs RegPred:$d),
+ (ins RegPred:$a, i1imm:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
+ def rrpreds : InstPTX<(outs RegPred:$d),
+ (ins RegPred:$a, RegPred:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+multiclass INT3ntnc<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+ def ir16 : InstPTX<(outs RegI16:$d),
+ (ins i16imm:$a, RegI16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
+ def ir32 : InstPTX<(outs RegI32:$d),
+ (ins i32imm:$a, RegI32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
+ def ir64 : InstPTX<(outs RegI64:$d),
+ (ins i64imm:$a, RegI64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
+}
+
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
+ CondCode cmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
+ def ri
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+ def rr_and_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_and_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ def rr_or_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_or_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ def rr_xor_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_xor_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+
+ def rr_and_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ def ri_and_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ def rr_or_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ def ri_or_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ def rr_xor_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ def ri_xor_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+}
+
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+ CondCode ucmp, CondCode ocmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+ def rr_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+ def rr_and_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ def rr_and_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+ def rr_or_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ def rr_or_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+ def rr_xor_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ def rr_xor_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+ def rr_and_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ def rr_and_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+
+ def rr_or_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ def rr_or_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+
+ def rr_xor_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ def rr_xor_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+}
+
+multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+ def rr
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+}
+
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : INT3<"add", add>;
+defm SUB : INT3<"sub", sub>;
+defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : INT3<"div", udiv>;
+defm REM : INT3<"rem", urem>;
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+// Standard Unary Operations
+defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
+
+// For floating-point division:
+// SM_13+ defaults to .rn for f32 and f64,
+// SM10 must *not* provide a rounding
+
+// TODO:
+// - Allow user selection of rounding modes for fdiv
+// - Add support for -prec-div=false (.approx)
+
+def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, RegF32:$b),
+ "div.rn.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+ Requires<[FDivNeedsRoundingMode]>;
+def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, f32imm:$b),
+ "div.rn.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+ Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, RegF32:$b),
+ "div.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+ Requires<[FDivNoRoundingMode]>;
+def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, f32imm:$b),
+ "div.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+ Requires<[FDivNoRoundingMode]>;
+
+def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, RegF64:$b),
+ "div.rn.f64\t$d, $a, $b",
+ [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+ Requires<[FDivNeedsRoundingMode]>;
+def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, f64imm:$b),
+ "div.rn.f64\t$d, $a, $b",
+ [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+ Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, RegF64:$b),
+ "div.f64\t$d, $a, $b",
+ [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+ Requires<[FDivNoRoundingMode]>;
+def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a, f64imm:$b),
+ "div.f64\t$d, $a, $b",
+ [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+ Requires<[FDivNoRoundingMode]>;
+
+
+
+// Multi-operation hybrid instructions
+
+// The selection of mad/fma is tricky. In some cases, they are the *same*
+// instruction, but in other cases we may prefer one or the other. Also,
+// different PTX versions differ on whether rounding mode flags are required.
+// In the short term, mad is supported on all PTX versions and we use a
+// default rounding mode no matter what shader model or PTX version.
+// TODO: Allow the rounding mode to be selectable through llc.
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
+ Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
+ Requires<[FMadNoRoundingMode, SupportsFMA]>;
+
+///===- Floating-Point Intrinsic Instructions -----------------------------===//
+
+def FSQRT32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a),
+ "sqrt.rn.f32\t$d, $a",
+ [(set RegF32:$d, (fsqrt RegF32:$a))]>;
+
+def FSQRT64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a),
+ "sqrt.rn.f64\t$d, $a",
+ [(set RegF64:$d, (fsqrt RegF64:$a))]>;
+
+def FSIN32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a),
+ "sin.approx.f32\t$d, $a",
+ [(set RegF32:$d, (fsin RegF32:$a))]>;
+
+def FSIN64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a),
+ "sin.approx.f64\t$d, $a",
+ [(set RegF64:$d, (fsin RegF64:$a))]>;
+
+def FCOS32 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a),
+ "cos.approx.f32\t$d, $a",
+ [(set RegF32:$d, (fcos RegF32:$a))]>;
+
+def FCOS64 : InstPTX<(outs RegF64:$d),
+ (ins RegF64:$a),
+ "cos.approx.f64\t$d, $a",
+ [(set RegF64:$d, (fcos RegF64:$a))]>;
+
+
+///===- Comparison and Selection Instructions -----------------------------===//
+
+// .setp
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">;
+defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">;
+defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">;
+defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">;
+defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">;
+defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">;
+defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : INT3ntnc<"shl.b", PTXshl>;
+defm SRL : INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s", PTXsra>;
+
+defm AND : PTX_LOGIC<"and", and>;
+defm OR : PTX_LOGIC<"or", or>;
+defm XOR : PTX_LOGIC<"xor", xor>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+let neverHasSideEffects = 1 in {
+ def MOVPREDrr
+ : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
+ def MOVU16rr
+ : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
+ def MOVU32rr
+ : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
+ def MOVU64rr
+ : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
+ def MOVF32rr
+ : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
+ def MOVF64rr
+ : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVPREDri
+ : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+ [(set RegPred:$d, imm:$a)]>;
+ def MOVU16ri
+ : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+ [(set RegI16:$d, imm:$a)]>;
+ def MOVU32ri
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RegI32:$d, imm:$a)]>;
+ def MOVU64ri
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RegI64:$d, imm:$a)]>;
+ def MOVF32ri
+ : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+ [(set RegF32:$d, fpimm:$a)]>;
+ def MOVF64ri
+ : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+ [(set RegF64:$d, fpimm:$a)]>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVaddr32
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVaddr64
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local", load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// These instructions are used to load/store from the .param space for
+// device and kernel parameters
+
+let hasSideEffects = 1 in {
+ def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
+ "ld.param.pred\t$d, [$a]",
+ [(set RegPred:$d, (PTXloadparam timm:$a))]>;
+ def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
+ "ld.param.u16\t$d, [$a]",
+ [(set RegI16:$d, (PTXloadparam timm:$a))]>;
+ def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
+ "ld.param.u32\t$d, [$a]",
+ [(set RegI32:$d, (PTXloadparam timm:$a))]>;
+ def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
+ "ld.param.u64\t$d, [$a]",
+ [(set RegI64:$d, (PTXloadparam timm:$a))]>;
+ def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
+ "ld.param.f32\t$d, [$a]",
+ [(set RegF32:$d, (PTXloadparam timm:$a))]>;
+ def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
+ "ld.param.f64\t$d, [$a]",
+ [(set RegF64:$d, (PTXloadparam timm:$a))]>;
+
+ def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
+ "st.param.pred\t[$d], $a",
+ [(PTXstoreparam timm:$d, RegPred:$a)]>;
+ def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
+ "st.param.u16\t[$d], $a",
+ [(PTXstoreparam timm:$d, RegI16:$a)]>;
+ def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
+ "st.param.u32\t[$d], $a",
+ [(PTXstoreparam timm:$d, RegI32:$a)]>;
+ def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
+ "st.param.u64\t[$d], $a",
+ [(PTXstoreparam timm:$d, RegI64:$a)]>;
+ def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
+ "st.param.f32\t[$d], $a",
+ [(PTXstoreparam timm:$d, RegF32:$a)]>;
+ def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
+ "st.param.f64\t[$d], $a",
+ [(PTXstoreparam timm:$d, RegF64:$a)]>;
+}
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local", store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
+
+// Conversion to pred
+// PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
+def CVT_pred_u16
+ : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
+ [(set RegPred:$d, (trunc RegI16:$a))]>;
+
+def CVT_pred_u32
+ : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
+ [(set RegPred:$d, (trunc RegI32:$a))]>;
+
+def CVT_pred_u64
+ : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
+ [(set RegPred:$d, (trunc RegI64:$a))]>;
+
+def CVT_pred_f32
+ : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
+ [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_pred_f64
+ : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
+ [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to u16
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+def CVT_u16_preda
+ : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+ [(set RegI16:$d, (anyext RegPred:$a))]>;
+
+def CVT_u16_pred
+ : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+ [(set RegI16:$d, (zext RegPred:$a))]>;
+
+def CVT_u16_preds
+ : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+ [(set RegI16:$d, (sext RegPred:$a))]>;
+
+def CVT_u16_u32
+ : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
+ [(set RegI16:$d, (trunc RegI32:$a))]>;
+
+def CVT_u16_u64
+ : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
+ [(set RegI16:$d, (trunc RegI64:$a))]>;
+
+def CVT_u16_f32
+ : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
+ [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u16_f64
+ : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
+ [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to u32
+
+def CVT_u32_pred
+ : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+ [(set RegI32:$d, (zext RegPred:$a))]>;
+
+def CVT_u32_b16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+ [(set RegI32:$d, (anyext RegI16:$a))]>;
+
+def CVT_u32_u16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+ [(set RegI32:$d, (zext RegI16:$a))]>;
+
+def CVT_u32_preds
+ : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+ [(set RegI32:$d, (sext RegPred:$a))]>;
+
+def CVT_u32_s16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
+ [(set RegI32:$d, (sext RegI16:$a))]>;
+
+def CVT_u32_u64
+ : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
+ [(set RegI32:$d, (trunc RegI64:$a))]>;
+
+def CVT_u32_f32
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
+ [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u32_f64
+ : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
+ [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to u64
+
+def CVT_u64_pred
+ : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+ [(set RegI64:$d, (zext RegPred:$a))]>;
+
+def CVT_u64_preds
+ : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+ [(set RegI64:$d, (sext RegPred:$a))]>;
+
+def CVT_u64_u16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
+ [(set RegI64:$d, (zext RegI16:$a))]>;
+
+def CVT_u64_s16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
+ [(set RegI64:$d, (sext RegI16:$a))]>;
+
+def CVT_u64_u32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
+ [(set RegI64:$d, (zext RegI32:$a))]>;
+
+def CVT_u64_s32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
+ [(set RegI64:$d, (sext RegI32:$a))]>;
+
+def CVT_u64_f32
+ : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
+ [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u64_f64
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
+ [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to f32
+
+def CVT_f32_pred
+ : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
+ "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
+ [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
+
+def CVT_f32_u16
+ : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
+ [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
+
+def CVT_f32_u32
+ : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
+ [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
+
+def CVT_f32_u64
+ : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
+ [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
+
+def CVT_f32_f64
+ : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
+ [(set RegF32:$d, (fround RegF64:$a))]>;
+
+// Conversion to f64
+
+def CVT_f64_pred
+ : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
+ "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
+ [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
+
+def CVT_f64_u16
+ : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
+ [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
+
+def CVT_f64_u32
+ : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
+ [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
+
+def CVT_f64_u64
+ : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
+ [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
+
+def CVT_f64_f32
+ : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
+ [(set RegF64:$d, (fextend RegF32:$a))]>;
+
+///===- Control Flow Instructions -----------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a RegPred register) here
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
+
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+ "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+ "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+ "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+ "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+ "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+ "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+ "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+ "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+ "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+ "mov.f64\t$d, s$a", []>;
+
+///===- Intrinsic Instructions --------------------------------------------===//
+
+include "PTXIntrinsicInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 000000000000..8d97909d339a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,84 @@
+//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+ : InstPTX<(outs RegI64:$d), (ins),
+ !strconcat("mov.u64\t$d, %", regname),
+ [(set RegI64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+ : InstPTX<(outs RegI32:$d), (ins),
+ !strconcat("mov.u32\t$d, %", regname),
+ [(set RegI32:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+
+//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+
+//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+ : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+ : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+ [(int_ptx_bar_sync imm:$i)]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 000000000000..b13a3dace130
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,541 @@
+//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+ formatted_raw_ostream &OS;
+ const MCAsmInfo &MAI;
+ OwningPtr<MCInstPrinter> InstPrinter;
+ OwningPtr<MCCodeEmitter> Emitter;
+
+ SmallString<128> CommentToEmit;
+ raw_svector_ostream CommentStream;
+
+ unsigned IsVerboseAsm : 1;
+ unsigned ShowInst : 1;
+
+public:
+ PTXMCAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &os,
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *printer,
+ MCCodeEmitter *emitter,
+ bool showInst)
+ : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+ IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst) {
+ if (InstPrinter && IsVerboseAsm)
+ InstPrinter->setCommentStream(CommentStream);
+ }
+
+ ~PTXMCAsmStreamer() {}
+
+ inline void EmitEOL() {
+ // If we don't have any comments, just emit a \n.
+ if (!IsVerboseAsm) {
+ OS << '\n';
+ return;
+ }
+ EmitCommentsAndEOL();
+ }
+ void EmitCommentsAndEOL();
+
+ /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+ /// all.
+ virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+ /// hasRawTextSupport - We support EmitRawText.
+ virtual bool hasRawTextSupport() const { return true; }
+
+ /// AddComment - Add a comment that can be emitted to the generated .s
+ /// file if applicable as a QoI issue to make the output of the compiler
+ /// more readable. This only affects the MCAsmStreamer, and only when
+ /// verbose assembly output is enabled.
+ virtual void AddComment(const Twine &T);
+
+ /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+ virtual void AddEncodingComment(const MCInst &Inst);
+
+ /// GetCommentOS - Return a raw_ostream that comments can be written to.
+ /// Unlike AddComment, you are required to terminate comments with \n if you
+ /// use this method.
+ virtual raw_ostream &GetCommentOS() {
+ if (!IsVerboseAsm)
+ return nulls(); // Discard comments unless in verbose asm mode.
+ return CommentStream;
+ }
+
+ /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+ virtual void AddBlankLine() {
+ EmitEOL();
+ }
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void ChangeSection(const MCSection *Section);
+ virtual void InitSections() {}
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+ virtual void EmitThumbFunc(MCSymbol *Func);
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize);
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+ ///
+ /// @param Symbol - The common symbol to emit.
+ /// @param Size - The size of the common symbol.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+ virtual void EmitULEB128Value(const MCExpr *Value);
+ virtual void EmitSLEB128Value(const MCExpr *Value);
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+ virtual void EmitInstruction(const MCInst &Inst);
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+ /// the specified string in the output .s file. This capability is
+ /// indicated by the hasRawTextSupport() predicate.
+ virtual void EmitRawText(StringRef String);
+
+ virtual void Finish();
+
+ /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+ if (!IsVerboseAsm) return;
+
+ // Make sure that CommentStream is flushed.
+ CommentStream.flush();
+
+ T.toVector(CommentToEmit);
+ // Each comment goes on its own line.
+ CommentToEmit.push_back('\n');
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+ if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+ OS << '\n';
+ return;
+ }
+
+ CommentStream.flush();
+ StringRef Comments = CommentToEmit.str();
+
+ assert(Comments.back() == '\n' &&
+ "Comment array not newline terminated");
+ do {
+ // Emit a line of comments.
+ OS.PadToColumn(MAI.getCommentColumn());
+ size_t Position = Comments.find('\n');
+ OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+ Comments = Comments.substr(Position+1);
+ } while (!Comments.empty());
+
+ CommentToEmit.clear();
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+ assert(Bytes && "Invalid size!");
+ return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ //assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ OS << *Symbol << MAI.getLabelSuffix();
+ EmitEOL();
+ Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << *Symbol << " = " << *Value;
+ EmitEOL();
+
+ // FIXME: Lift context changes into super class.
+ Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+ MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+ OS << '"';
+
+ for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+ unsigned char C = Data[i];
+ if (C == '"' || C == '\\') {
+ OS << '\\' << (char)C;
+ continue;
+ }
+
+ if (isprint((unsigned char)C)) {
+ OS << (char)C;
+ continue;
+ }
+
+ switch (C) {
+ case '\b': OS << "\\b"; break;
+ case '\f': OS << "\\f"; break;
+ case '\n': OS << "\\n"; break;
+ case '\r': OS << "\\r"; break;
+ case '\t': OS << "\\t"; break;
+ default:
+ OS << '\\';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ break;
+ }
+ }
+
+ OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ if (Data.empty()) return;
+
+ if (Data.size() == 1) {
+ OS << MAI.getData8bitsDirective(AddrSpace);
+ OS << (unsigned)(unsigned char)Data[0];
+ EmitEOL();
+ return;
+ }
+
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii
+ if (MAI.getAscizDirective() && Data.back() == 0) {
+ OS << MAI.getAscizDirective();
+ Data = Data.substr(0, Data.size()-1);
+ } else {
+ OS << MAI.getAsciiDirective();
+ }
+
+ OS << ' ';
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ const char *Directive = 0;
+ switch (Size) {
+ default: break;
+ case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+ case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+ case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+ case 8:
+ Directive = MAI.getData64bitsDirective(AddrSpace);
+ // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+ if (Directive) break;
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ } else {
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ }
+ return;
+ }
+
+ assert(Directive && "Invalid size for machine code value!");
+ OS << Directive << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ assert(MAI.getGPRel32Directive() != 0);
+ OS << MAI.getGPRel32Directive() << *Value;
+ EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ if (NumBytes == 0) return;
+
+ if (AddrSpace == 0)
+ if (const char *ZeroDirective = MAI.getZeroDirective()) {
+ OS << ZeroDirective << NumBytes;
+ if (FillValue != 0)
+ OS << ',' << (int)FillValue;
+ EmitEOL();
+ return;
+ }
+
+ // Emit a byte at a time.
+ MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // Some assemblers don't support non-power of two alignments, so we always
+ // emit alignments as a power of two if possible.
+ if (isPowerOf2_32(ByteAlignment)) {
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << MAI.getAlignDirective(); break;
+ // FIXME: use MAI for this!
+ case 2: OS << ".p2alignw "; break;
+ case 4: OS << ".p2alignl "; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ if (MAI.getAlignmentIsInBytes())
+ OS << ByteAlignment;
+ else
+ OS << Log2_32(ByteAlignment);
+
+ if (Value || MaxBytesToEmit) {
+ OS << ", 0x";
+ OS.write_hex(truncateToSize(Value, ValueSize));
+
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ }
+ EmitEOL();
+ return;
+ }
+
+ // Non-power of two alignment. This is not widely supported by assemblers.
+ // FIXME: Parameterize this based on MAI.
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << ".balign"; break;
+ case 2: OS << ".balignw"; break;
+ case 4: OS << ".balignl"; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ OS << ' ' << ByteAlignment;
+ OS << ", " << truncateToSize(Value, ValueSize);
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {}
+
+void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+ assert(MAI.hasSingleParameterDotFile());
+ OS << "\t.file\t";
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Filename){
+ OS << "\t.file\t" << FileNo << ' ';
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ // Show the encoding in a comment if we have a code emitter.
+ if (Emitter)
+ AddEncodingComment(Inst);
+
+ // Show the MCInst if enabled.
+ if (ShowInst) {
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+ GetCommentOS() << "\n";
+ }
+
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+ if (InstPrinter)
+ InstPrinter->printInst(&Inst, OS);
+ else
+ Inst.print(OS, &MAI);
+ EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+ if (!String.empty() && String.back() == '\n')
+ String = String.substr(0, String.size()-1);
+ OS << String;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::Finish() {}
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ MCInstPrinter *IP,
+ MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ bool ShowInst) {
+ return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+ IP, CE, ShowInst);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 000000000000..6fe9e6c3f657
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,92 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace llvm {
+ /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+ /// function information for PTXAsmPrinter
+ ///
+ class PTXMFInfoExtract : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ public:
+ PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX Machine Function Info Extractor";
+ }
+ }; // class PTXMFInfoExtract
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
+
+ DEBUG(dbgs()
+ << "PTX::NoRegister == " << PTX::NoRegister << "\n"
+ << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
+
+ DEBUG(for (unsigned reg = PTX::NoRegister + 1;
+ reg < PTX::NUM_TARGET_REGS; ++reg)
+ if (MRI.isPhysRegUsed(reg))
+ dbgs() << "Used Reg: " << reg << "\n";);
+
+ // FIXME: This is a slow linear scanning
+ for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
+ if (MRI.isPhysRegUsed(reg) &&
+ !MFI->isRetReg(reg) &&
+ (MFI->isKernel() || !MFI->isArgReg(reg)))
+ MFI->addLocalVarReg(reg);
+
+ // Notify MachineFunctionInfo that I've done adding local var reg
+ MFI->doneAddLocalVar();
+
+ DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd();
+ i != e; ++i)
+ dbgs() << "Arg Reg: " << *i << "\n";);
+
+ DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
+ i != e; ++i)
+ dbgs() << "Local Var Reg: " << *i << "\n";);
+
+ return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 000000000000..9d65f5bd1ade
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,91 @@
+//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+ bool is_kernel;
+ std::vector<unsigned> reg_arg, reg_local_var;
+ std::vector<unsigned> reg_ret;
+ bool _isDoneAddArg;
+
+public:
+ PTXMachineFunctionInfo(MachineFunction &MF)
+ : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
+ reg_arg.reserve(8);
+ reg_local_var.reserve(32);
+ }
+
+ void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
+
+ void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
+ void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
+ void addRetReg(unsigned reg) {
+ if (!isRetReg(reg)) {
+ reg_ret.push_back(reg);
+ }
+ }
+
+ void doneAddArg(void) {
+ _isDoneAddArg = true;
+ }
+ void doneAddLocalVar(void) {}
+
+ bool isKernel() const { return is_kernel; }
+
+ typedef std::vector<unsigned>::const_iterator reg_iterator;
+ typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
+ typedef std::vector<unsigned>::const_iterator ret_iterator;
+
+ bool argRegEmpty() const { return reg_arg.empty(); }
+ int getNumArg() const { return reg_arg.size(); }
+ reg_iterator argRegBegin() const { return reg_arg.begin(); }
+ reg_iterator argRegEnd() const { return reg_arg.end(); }
+ reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
+ reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
+
+ bool localVarRegEmpty() const { return reg_local_var.empty(); }
+ reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
+ reg_iterator localVarRegEnd() const { return reg_local_var.end(); }
+
+ bool retRegEmpty() const { return reg_ret.empty(); }
+ int getNumRet() const { return reg_ret.size(); }
+ ret_iterator retRegBegin() const { return reg_ret.begin(); }
+ ret_iterator retRegEnd() const { return reg_ret.end(); }
+
+ bool isArgReg(unsigned reg) const {
+ return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
+ }
+
+ bool isRetReg(unsigned reg) const {
+ return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+ }
+
+ bool isLocalVarReg(unsigned reg) const {
+ return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
+ != reg_local_var.end();
+ }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
new file mode 100644
index 000000000000..cb56ea98a2b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -0,0 +1,51 @@
+//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &TII)
+ : PTXGenRegisterInfo() {
+}
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS) const {
+ unsigned Index;
+ MachineInstr& MI = *II;
+
+ Index = 0;
+ while (!MI.getOperand(Index).isFI()) {
+ ++Index;
+ assert(Index < MI.getNumOperands() &&
+ "Instr does not have a FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(Index).getIndex();
+
+ DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+ DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+ DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+ // This frame index is post stack slot re-use assignments
+ MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 000000000000..0b63cb6d458e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,65 @@
+//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_HEADER
+#include "PTXGenRegisterInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+ PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &TII);
+
+ virtual const unsigned
+ *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs; // save nothing
+ }
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved; // reserve no regs
+ }
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS = NULL) const;
+
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+ llvm_unreachable("PTX does not have a frame register");
+ return 0;
+ }
+
+ virtual unsigned getRARegister() const {
+ llvm_unreachable("PTX does not have a return address register");
+ return 0;
+ }
+
+ virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+ }
+ virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const {
+ return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
+ }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 000000000000..1313d248325e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,555 @@
+
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+ let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+///===- Predicate Registers -----------------------------------------------===//
+
+def P0 : PTXReg<"p0">;
+def P1 : PTXReg<"p1">;
+def P2 : PTXReg<"p2">;
+def P3 : PTXReg<"p3">;
+def P4 : PTXReg<"p4">;
+def P5 : PTXReg<"p5">;
+def P6 : PTXReg<"p6">;
+def P7 : PTXReg<"p7">;
+def P8 : PTXReg<"p8">;
+def P9 : PTXReg<"p9">;
+def P10 : PTXReg<"p10">;
+def P11 : PTXReg<"p11">;
+def P12 : PTXReg<"p12">;
+def P13 : PTXReg<"p13">;
+def P14 : PTXReg<"p14">;
+def P15 : PTXReg<"p15">;
+def P16 : PTXReg<"p16">;
+def P17 : PTXReg<"p17">;
+def P18 : PTXReg<"p18">;
+def P19 : PTXReg<"p19">;
+def P20 : PTXReg<"p20">;
+def P21 : PTXReg<"p21">;
+def P22 : PTXReg<"p22">;
+def P23 : PTXReg<"p23">;
+def P24 : PTXReg<"p24">;
+def P25 : PTXReg<"p25">;
+def P26 : PTXReg<"p26">;
+def P27 : PTXReg<"p27">;
+def P28 : PTXReg<"p28">;
+def P29 : PTXReg<"p29">;
+def P30 : PTXReg<"p30">;
+def P31 : PTXReg<"p31">;
+def P32 : PTXReg<"p32">;
+def P33 : PTXReg<"p33">;
+def P34 : PTXReg<"p34">;
+def P35 : PTXReg<"p35">;
+def P36 : PTXReg<"p36">;
+def P37 : PTXReg<"p37">;
+def P38 : PTXReg<"p38">;
+def P39 : PTXReg<"p39">;
+def P40 : PTXReg<"p40">;
+def P41 : PTXReg<"p41">;
+def P42 : PTXReg<"p42">;
+def P43 : PTXReg<"p43">;
+def P44 : PTXReg<"p44">;
+def P45 : PTXReg<"p45">;
+def P46 : PTXReg<"p46">;
+def P47 : PTXReg<"p47">;
+def P48 : PTXReg<"p48">;
+def P49 : PTXReg<"p49">;
+def P50 : PTXReg<"p50">;
+def P51 : PTXReg<"p51">;
+def P52 : PTXReg<"p52">;
+def P53 : PTXReg<"p53">;
+def P54 : PTXReg<"p54">;
+def P55 : PTXReg<"p55">;
+def P56 : PTXReg<"p56">;
+def P57 : PTXReg<"p57">;
+def P58 : PTXReg<"p58">;
+def P59 : PTXReg<"p59">;
+def P60 : PTXReg<"p60">;
+def P61 : PTXReg<"p61">;
+def P62 : PTXReg<"p62">;
+def P63 : PTXReg<"p63">;
+def P64 : PTXReg<"p64">;
+def P65 : PTXReg<"p65">;
+def P66 : PTXReg<"p66">;
+def P67 : PTXReg<"p67">;
+def P68 : PTXReg<"p68">;
+def P69 : PTXReg<"p69">;
+def P70 : PTXReg<"p70">;
+def P71 : PTXReg<"p71">;
+def P72 : PTXReg<"p72">;
+def P73 : PTXReg<"p73">;
+def P74 : PTXReg<"p74">;
+def P75 : PTXReg<"p75">;
+def P76 : PTXReg<"p76">;
+def P77 : PTXReg<"p77">;
+def P78 : PTXReg<"p78">;
+def P79 : PTXReg<"p79">;
+def P80 : PTXReg<"p80">;
+def P81 : PTXReg<"p81">;
+def P82 : PTXReg<"p82">;
+def P83 : PTXReg<"p83">;
+def P84 : PTXReg<"p84">;
+def P85 : PTXReg<"p85">;
+def P86 : PTXReg<"p86">;
+def P87 : PTXReg<"p87">;
+def P88 : PTXReg<"p88">;
+def P89 : PTXReg<"p89">;
+def P90 : PTXReg<"p90">;
+def P91 : PTXReg<"p91">;
+def P92 : PTXReg<"p92">;
+def P93 : PTXReg<"p93">;
+def P94 : PTXReg<"p94">;
+def P95 : PTXReg<"p95">;
+def P96 : PTXReg<"p96">;
+def P97 : PTXReg<"p97">;
+def P98 : PTXReg<"p98">;
+def P99 : PTXReg<"p99">;
+def P100 : PTXReg<"p100">;
+def P101 : PTXReg<"p101">;
+def P102 : PTXReg<"p102">;
+def P103 : PTXReg<"p103">;
+def P104 : PTXReg<"p104">;
+def P105 : PTXReg<"p105">;
+def P106 : PTXReg<"p106">;
+def P107 : PTXReg<"p107">;
+def P108 : PTXReg<"p108">;
+def P109 : PTXReg<"p109">;
+def P110 : PTXReg<"p110">;
+def P111 : PTXReg<"p111">;
+def P112 : PTXReg<"p112">;
+def P113 : PTXReg<"p113">;
+def P114 : PTXReg<"p114">;
+def P115 : PTXReg<"p115">;
+def P116 : PTXReg<"p116">;
+def P117 : PTXReg<"p117">;
+def P118 : PTXReg<"p118">;
+def P119 : PTXReg<"p119">;
+def P120 : PTXReg<"p120">;
+def P121 : PTXReg<"p121">;
+def P122 : PTXReg<"p122">;
+def P123 : PTXReg<"p123">;
+def P124 : PTXReg<"p124">;
+def P125 : PTXReg<"p125">;
+def P126 : PTXReg<"p126">;
+def P127 : PTXReg<"p127">;
+
+///===- 16-Bit Registers --------------------------------------------------===//
+
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+def RH32 : PTXReg<"rh32">;
+def RH33 : PTXReg<"rh33">;
+def RH34 : PTXReg<"rh34">;
+def RH35 : PTXReg<"rh35">;
+def RH36 : PTXReg<"rh36">;
+def RH37 : PTXReg<"rh37">;
+def RH38 : PTXReg<"rh38">;
+def RH39 : PTXReg<"rh39">;
+def RH40 : PTXReg<"rh40">;
+def RH41 : PTXReg<"rh41">;
+def RH42 : PTXReg<"rh42">;
+def RH43 : PTXReg<"rh43">;
+def RH44 : PTXReg<"rh44">;
+def RH45 : PTXReg<"rh45">;
+def RH46 : PTXReg<"rh46">;
+def RH47 : PTXReg<"rh47">;
+def RH48 : PTXReg<"rh48">;
+def RH49 : PTXReg<"rh49">;
+def RH50 : PTXReg<"rh50">;
+def RH51 : PTXReg<"rh51">;
+def RH52 : PTXReg<"rh52">;
+def RH53 : PTXReg<"rh53">;
+def RH54 : PTXReg<"rh54">;
+def RH55 : PTXReg<"rh55">;
+def RH56 : PTXReg<"rh56">;
+def RH57 : PTXReg<"rh57">;
+def RH58 : PTXReg<"rh58">;
+def RH59 : PTXReg<"rh59">;
+def RH60 : PTXReg<"rh60">;
+def RH61 : PTXReg<"rh61">;
+def RH62 : PTXReg<"rh62">;
+def RH63 : PTXReg<"rh63">;
+def RH64 : PTXReg<"rh64">;
+def RH65 : PTXReg<"rh65">;
+def RH66 : PTXReg<"rh66">;
+def RH67 : PTXReg<"rh67">;
+def RH68 : PTXReg<"rh68">;
+def RH69 : PTXReg<"rh69">;
+def RH70 : PTXReg<"rh70">;
+def RH71 : PTXReg<"rh71">;
+def RH72 : PTXReg<"rh72">;
+def RH73 : PTXReg<"rh73">;
+def RH74 : PTXReg<"rh74">;
+def RH75 : PTXReg<"rh75">;
+def RH76 : PTXReg<"rh76">;
+def RH77 : PTXReg<"rh77">;
+def RH78 : PTXReg<"rh78">;
+def RH79 : PTXReg<"rh79">;
+def RH80 : PTXReg<"rh80">;
+def RH81 : PTXReg<"rh81">;
+def RH82 : PTXReg<"rh82">;
+def RH83 : PTXReg<"rh83">;
+def RH84 : PTXReg<"rh84">;
+def RH85 : PTXReg<"rh85">;
+def RH86 : PTXReg<"rh86">;
+def RH87 : PTXReg<"rh87">;
+def RH88 : PTXReg<"rh88">;
+def RH89 : PTXReg<"rh89">;
+def RH90 : PTXReg<"rh90">;
+def RH91 : PTXReg<"rh91">;
+def RH92 : PTXReg<"rh92">;
+def RH93 : PTXReg<"rh93">;
+def RH94 : PTXReg<"rh94">;
+def RH95 : PTXReg<"rh95">;
+def RH96 : PTXReg<"rh96">;
+def RH97 : PTXReg<"rh97">;
+def RH98 : PTXReg<"rh98">;
+def RH99 : PTXReg<"rh99">;
+def RH100 : PTXReg<"rh100">;
+def RH101 : PTXReg<"rh101">;
+def RH102 : PTXReg<"rh102">;
+def RH103 : PTXReg<"rh103">;
+def RH104 : PTXReg<"rh104">;
+def RH105 : PTXReg<"rh105">;
+def RH106 : PTXReg<"rh106">;
+def RH107 : PTXReg<"rh107">;
+def RH108 : PTXReg<"rh108">;
+def RH109 : PTXReg<"rh109">;
+def RH110 : PTXReg<"rh110">;
+def RH111 : PTXReg<"rh111">;
+def RH112 : PTXReg<"rh112">;
+def RH113 : PTXReg<"rh113">;
+def RH114 : PTXReg<"rh114">;
+def RH115 : PTXReg<"rh115">;
+def RH116 : PTXReg<"rh116">;
+def RH117 : PTXReg<"rh117">;
+def RH118 : PTXReg<"rh118">;
+def RH119 : PTXReg<"rh119">;
+def RH120 : PTXReg<"rh120">;
+def RH121 : PTXReg<"rh121">;
+def RH122 : PTXReg<"rh122">;
+def RH123 : PTXReg<"rh123">;
+def RH124 : PTXReg<"rh124">;
+def RH125 : PTXReg<"rh125">;
+def RH126 : PTXReg<"rh126">;
+def RH127 : PTXReg<"rh127">;
+
+///===- 32-Bit Registers --------------------------------------------------===//
+
+def R0 : PTXReg<"r0">;
+def R1 : PTXReg<"r1">;
+def R2 : PTXReg<"r2">;
+def R3 : PTXReg<"r3">;
+def R4 : PTXReg<"r4">;
+def R5 : PTXReg<"r5">;
+def R6 : PTXReg<"r6">;
+def R7 : PTXReg<"r7">;
+def R8 : PTXReg<"r8">;
+def R9 : PTXReg<"r9">;
+def R10 : PTXReg<"r10">;
+def R11 : PTXReg<"r11">;
+def R12 : PTXReg<"r12">;
+def R13 : PTXReg<"r13">;
+def R14 : PTXReg<"r14">;
+def R15 : PTXReg<"r15">;
+def R16 : PTXReg<"r16">;
+def R17 : PTXReg<"r17">;
+def R18 : PTXReg<"r18">;
+def R19 : PTXReg<"r19">;
+def R20 : PTXReg<"r20">;
+def R21 : PTXReg<"r21">;
+def R22 : PTXReg<"r22">;
+def R23 : PTXReg<"r23">;
+def R24 : PTXReg<"r24">;
+def R25 : PTXReg<"r25">;
+def R26 : PTXReg<"r26">;
+def R27 : PTXReg<"r27">;
+def R28 : PTXReg<"r28">;
+def R29 : PTXReg<"r29">;
+def R30 : PTXReg<"r30">;
+def R31 : PTXReg<"r31">;
+def R32 : PTXReg<"r32">;
+def R33 : PTXReg<"r33">;
+def R34 : PTXReg<"r34">;
+def R35 : PTXReg<"r35">;
+def R36 : PTXReg<"r36">;
+def R37 : PTXReg<"r37">;
+def R38 : PTXReg<"r38">;
+def R39 : PTXReg<"r39">;
+def R40 : PTXReg<"r40">;
+def R41 : PTXReg<"r41">;
+def R42 : PTXReg<"r42">;
+def R43 : PTXReg<"r43">;
+def R44 : PTXReg<"r44">;
+def R45 : PTXReg<"r45">;
+def R46 : PTXReg<"r46">;
+def R47 : PTXReg<"r47">;
+def R48 : PTXReg<"r48">;
+def R49 : PTXReg<"r49">;
+def R50 : PTXReg<"r50">;
+def R51 : PTXReg<"r51">;
+def R52 : PTXReg<"r52">;
+def R53 : PTXReg<"r53">;
+def R54 : PTXReg<"r54">;
+def R55 : PTXReg<"r55">;
+def R56 : PTXReg<"r56">;
+def R57 : PTXReg<"r57">;
+def R58 : PTXReg<"r58">;
+def R59 : PTXReg<"r59">;
+def R60 : PTXReg<"r60">;
+def R61 : PTXReg<"r61">;
+def R62 : PTXReg<"r62">;
+def R63 : PTXReg<"r63">;
+def R64 : PTXReg<"r64">;
+def R65 : PTXReg<"r65">;
+def R66 : PTXReg<"r66">;
+def R67 : PTXReg<"r67">;
+def R68 : PTXReg<"r68">;
+def R69 : PTXReg<"r69">;
+def R70 : PTXReg<"r70">;
+def R71 : PTXReg<"r71">;
+def R72 : PTXReg<"r72">;
+def R73 : PTXReg<"r73">;
+def R74 : PTXReg<"r74">;
+def R75 : PTXReg<"r75">;
+def R76 : PTXReg<"r76">;
+def R77 : PTXReg<"r77">;
+def R78 : PTXReg<"r78">;
+def R79 : PTXReg<"r79">;
+def R80 : PTXReg<"r80">;
+def R81 : PTXReg<"r81">;
+def R82 : PTXReg<"r82">;
+def R83 : PTXReg<"r83">;
+def R84 : PTXReg<"r84">;
+def R85 : PTXReg<"r85">;
+def R86 : PTXReg<"r86">;
+def R87 : PTXReg<"r87">;
+def R88 : PTXReg<"r88">;
+def R89 : PTXReg<"r89">;
+def R90 : PTXReg<"r90">;
+def R91 : PTXReg<"r91">;
+def R92 : PTXReg<"r92">;
+def R93 : PTXReg<"r93">;
+def R94 : PTXReg<"r94">;
+def R95 : PTXReg<"r95">;
+def R96 : PTXReg<"r96">;
+def R97 : PTXReg<"r97">;
+def R98 : PTXReg<"r98">;
+def R99 : PTXReg<"r99">;
+def R100 : PTXReg<"r100">;
+def R101 : PTXReg<"r101">;
+def R102 : PTXReg<"r102">;
+def R103 : PTXReg<"r103">;
+def R104 : PTXReg<"r104">;
+def R105 : PTXReg<"r105">;
+def R106 : PTXReg<"r106">;
+def R107 : PTXReg<"r107">;
+def R108 : PTXReg<"r108">;
+def R109 : PTXReg<"r109">;
+def R110 : PTXReg<"r110">;
+def R111 : PTXReg<"r111">;
+def R112 : PTXReg<"r112">;
+def R113 : PTXReg<"r113">;
+def R114 : PTXReg<"r114">;
+def R115 : PTXReg<"r115">;
+def R116 : PTXReg<"r116">;
+def R117 : PTXReg<"r117">;
+def R118 : PTXReg<"r118">;
+def R119 : PTXReg<"r119">;
+def R120 : PTXReg<"r120">;
+def R121 : PTXReg<"r121">;
+def R122 : PTXReg<"r122">;
+def R123 : PTXReg<"r123">;
+def R124 : PTXReg<"r124">;
+def R125 : PTXReg<"r125">;
+def R126 : PTXReg<"r126">;
+def R127 : PTXReg<"r127">;
+
+///===- 64-Bit Registers --------------------------------------------------===//
+
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+def RD32 : PTXReg<"rd32">;
+def RD33 : PTXReg<"rd33">;
+def RD34 : PTXReg<"rd34">;
+def RD35 : PTXReg<"rd35">;
+def RD36 : PTXReg<"rd36">;
+def RD37 : PTXReg<"rd37">;
+def RD38 : PTXReg<"rd38">;
+def RD39 : PTXReg<"rd39">;
+def RD40 : PTXReg<"rd40">;
+def RD41 : PTXReg<"rd41">;
+def RD42 : PTXReg<"rd42">;
+def RD43 : PTXReg<"rd43">;
+def RD44 : PTXReg<"rd44">;
+def RD45 : PTXReg<"rd45">;
+def RD46 : PTXReg<"rd46">;
+def RD47 : PTXReg<"rd47">;
+def RD48 : PTXReg<"rd48">;
+def RD49 : PTXReg<"rd49">;
+def RD50 : PTXReg<"rd50">;
+def RD51 : PTXReg<"rd51">;
+def RD52 : PTXReg<"rd52">;
+def RD53 : PTXReg<"rd53">;
+def RD54 : PTXReg<"rd54">;
+def RD55 : PTXReg<"rd55">;
+def RD56 : PTXReg<"rd56">;
+def RD57 : PTXReg<"rd57">;
+def RD58 : PTXReg<"rd58">;
+def RD59 : PTXReg<"rd59">;
+def RD60 : PTXReg<"rd60">;
+def RD61 : PTXReg<"rd61">;
+def RD62 : PTXReg<"rd62">;
+def RD63 : PTXReg<"rd63">;
+def RD64 : PTXReg<"rd64">;
+def RD65 : PTXReg<"rd65">;
+def RD66 : PTXReg<"rd66">;
+def RD67 : PTXReg<"rd67">;
+def RD68 : PTXReg<"rd68">;
+def RD69 : PTXReg<"rd69">;
+def RD70 : PTXReg<"rd70">;
+def RD71 : PTXReg<"rd71">;
+def RD72 : PTXReg<"rd72">;
+def RD73 : PTXReg<"rd73">;
+def RD74 : PTXReg<"rd74">;
+def RD75 : PTXReg<"rd75">;
+def RD76 : PTXReg<"rd76">;
+def RD77 : PTXReg<"rd77">;
+def RD78 : PTXReg<"rd78">;
+def RD79 : PTXReg<"rd79">;
+def RD80 : PTXReg<"rd80">;
+def RD81 : PTXReg<"rd81">;
+def RD82 : PTXReg<"rd82">;
+def RD83 : PTXReg<"rd83">;
+def RD84 : PTXReg<"rd84">;
+def RD85 : PTXReg<"rd85">;
+def RD86 : PTXReg<"rd86">;
+def RD87 : PTXReg<"rd87">;
+def RD88 : PTXReg<"rd88">;
+def RD89 : PTXReg<"rd89">;
+def RD90 : PTXReg<"rd90">;
+def RD91 : PTXReg<"rd91">;
+def RD92 : PTXReg<"rd92">;
+def RD93 : PTXReg<"rd93">;
+def RD94 : PTXReg<"rd94">;
+def RD95 : PTXReg<"rd95">;
+def RD96 : PTXReg<"rd96">;
+def RD97 : PTXReg<"rd97">;
+def RD98 : PTXReg<"rd98">;
+def RD99 : PTXReg<"rd99">;
+def RD100 : PTXReg<"rd100">;
+def RD101 : PTXReg<"rd101">;
+def RD102 : PTXReg<"rd102">;
+def RD103 : PTXReg<"rd103">;
+def RD104 : PTXReg<"rd104">;
+def RD105 : PTXReg<"rd105">;
+def RD106 : PTXReg<"rd106">;
+def RD107 : PTXReg<"rd107">;
+def RD108 : PTXReg<"rd108">;
+def RD109 : PTXReg<"rd109">;
+def RD110 : PTXReg<"rd110">;
+def RD111 : PTXReg<"rd111">;
+def RD112 : PTXReg<"rd112">;
+def RD113 : PTXReg<"rd113">;
+def RD114 : PTXReg<"rd114">;
+def RD115 : PTXReg<"rd115">;
+def RD116 : PTXReg<"rd116">;
+def RD117 : PTXReg<"rd117">;
+def RD118 : PTXReg<"rd118">;
+def RD119 : PTXReg<"rd119">;
+def RD120 : PTXReg<"rd120">;
+def RD121 : PTXReg<"rd121">;
+def RD122 : PTXReg<"rd122">;
+def RD123 : PTXReg<"rd123">;
+def RD124 : PTXReg<"rd124">;
+def RD125 : PTXReg<"rd125">;
+def RD126 : PTXReg<"rd126">;
+def RD127 : PTXReg<"rd127">;
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 000000000000..8ec646e46f68
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,66 @@
+//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+#include "PTX.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PTXGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : PTXGenSubtargetInfo(TT, CPU, FS),
+ PTXTarget(PTX_COMPUTE_1_0),
+ PTXVersion(PTX_VERSION_2_0),
+ SupportsDouble(false),
+ SupportsFMA(true),
+ Is64Bit(is64Bit) {
+ std::string TARGET = CPU;
+ if (TARGET.empty())
+ TARGET = "generic";
+ ParseSubtargetFeatures(TARGET, FS);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+ switch(PTXTarget) {
+ default: llvm_unreachable("Unknown PTX target");
+ case PTX_SM_1_0: return "sm_10";
+ case PTX_SM_1_1: return "sm_11";
+ case PTX_SM_1_2: return "sm_12";
+ case PTX_SM_1_3: return "sm_13";
+ case PTX_SM_2_0: return "sm_20";
+ case PTX_SM_2_1: return "sm_21";
+ case PTX_SM_2_2: return "sm_22";
+ case PTX_SM_2_3: return "sm_23";
+ case PTX_COMPUTE_1_0: return "compute_10";
+ case PTX_COMPUTE_1_1: return "compute_11";
+ case PTX_COMPUTE_1_2: return "compute_12";
+ case PTX_COMPUTE_1_3: return "compute_13";
+ case PTX_COMPUTE_2_0: return "compute_20";
+ }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+ switch(PTXVersion) {
+ default: llvm_unreachable("Unknown PTX version");
+ case PTX_VERSION_2_0: return "2.0";
+ case PTX_VERSION_2_1: return "2.1";
+ case PTX_VERSION_2_2: return "2.2";
+ case PTX_VERSION_2_3: return "2.3";
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 000000000000..0921f1f22c49
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,121 @@
+//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PTXGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+ class PTXSubtarget : public PTXGenSubtargetInfo {
+ public:
+
+ /**
+ * Enumeration of Shader Models supported by the back-end.
+ */
+ enum PTXTargetEnum {
+ PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
+ PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
+ PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
+ PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
+ PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
+ PTX_LAST_COMPUTE,
+
+ PTX_SM_1_0, /*< Shader Model 1.0 */
+ PTX_SM_1_1, /*< Shader Model 1.1 */
+ PTX_SM_1_2, /*< Shader Model 1.2 */
+ PTX_SM_1_3, /*< Shader Model 1.3 */
+ PTX_SM_2_0, /*< Shader Model 2.0 */
+ PTX_SM_2_1, /*< Shader Model 2.1 */
+ PTX_SM_2_2, /*< Shader Model 2.2 */
+ PTX_SM_2_3, /*< Shader Model 2.3 */
+ PTX_LAST_SM
+ };
+
+ /**
+ * Enumeration of PTX versions supported by the back-end.
+ *
+ * Currently, PTX 2.0 is the minimum supported version.
+ */
+ enum PTXVersionEnum {
+ PTX_VERSION_2_0, /*< PTX Version 2.0 */
+ PTX_VERSION_2_1, /*< PTX Version 2.1 */
+ PTX_VERSION_2_2, /*< PTX Version 2.2 */
+ PTX_VERSION_2_3 /*< PTX Version 2.3 */
+ };
+
+ private:
+
+ /// Shader Model supported on the target GPU.
+ PTXTargetEnum PTXTarget;
+
+ /// PTX Language Version.
+ PTXVersionEnum PTXVersion;
+
+ // The native .f64 type is supported on the hardware.
+ bool SupportsDouble;
+
+ // Support the fused-multiply add (FMA) and multiply-add (MAD)
+ // instructions
+ bool SupportsFMA;
+
+ // Use .u64 instead of .u32 for addresses.
+ bool Is64Bit;
+
+ public:
+
+ PTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ // Target architecture accessors
+ std::string getTargetString() const;
+
+ std::string getPTXVersionString() const;
+
+ bool supportsDouble() const { return SupportsDouble; }
+
+ bool is64Bit() const { return Is64Bit; }
+
+ bool supportsFMA() const { return SupportsFMA; }
+
+ bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
+ bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
+
+ bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
+
+ bool fdivNeedsRoundingMode() const {
+ return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool fmadNeedsRoundingMode() const {
+ return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool useParamSpaceForDeviceArgs() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 000000000000..ab926e02d66f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,87 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ bool useCFI,
+ MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE,
+ TargetAsmBackend *TAB,
+ bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+
+ RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
+ RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
+
+ TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
+}
+
+namespace {
+ const char* DataLayout32 =
+ "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+ const char* DataLayout64 =
+ "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ DataLayout(is64Bit ? DataLayout64 : DataLayout32),
+ Subtarget(TT, CPU, FS, is64Bit),
+ FrameLowering(Subtarget),
+ InstrInfo(*this),
+ TLInfo(*this) {
+}
+
+PTX32TargetMachine::PTX32TargetMachine(const Target &T,
+ const std::string& TT,
+ const std::string& CPU,
+ const std::string& FS)
+ : PTXTargetMachine(T, TT, CPU, FS, false) {
+}
+
+PTX64TargetMachine::PTX64TargetMachine(const Target &T,
+ const std::string& TT,
+ const std::string& CPU,
+ const std::string& FS)
+ : PTXTargetMachine(T, TT, CPU, FS, true) {
+}
+
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createPTXISelDag(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // PTXMFInfoExtract must after register allocation!
+ PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 000000000000..ae4215325211
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,77 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+ private:
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXTargetLowering TLInfo;
+
+ public:
+ PTXTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS,
+ bool is64Bit);
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo(); }
+
+ virtual const PTXTargetLowering *getTargetLowering() const {
+ return &TLInfo; }
+
+ virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+}; // class PTXTargetMachine
+
+
+class PTX32TargetMachine : public PTXTargetMachine {
+public:
+
+ PTX32TargetMachine(const Target &T, const std::string &TT,
+ const std::string& CPU, const std::string& FS);
+}; // class PTX32TargetMachine
+
+class PTX64TargetMachine : public PTXTargetMachine {
+public:
+
+ PTX64TargetMachine(const Target &T, const std::string &TT,
+ const std::string& CPU, const std::string& FS);
+}; // class PTX32TargetMachine
+
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..4b09cf5ce099
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPTXInfo
+ PTXTargetInfo.cpp
+ )
+
+add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
new file mode 100644
index 000000000000..8619785889aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 000000000000..9df6c7567bd1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTX32Target;
+Target llvm::ThePTX64Target;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+ // see llvm/ADT/Triple.h
+ RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
+ "PTX (32-bit) [Experimental]");
+ RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
+ "PTX (64-bit) [Experimental]");
+}
diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py
new file mode 100755
index 000000000000..15286908961d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/generate-register-td.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+##===- generate-register-td.py --------------------------------*-python-*--===##
+##
+## The LLVM Compiler Infrastructure
+##
+## This file is distributed under the University of Illinois Open Source
+## License. See LICENSE.TXT for details.
+##
+##===----------------------------------------------------------------------===##
+##
+## This file describes the PTX register file generator.
+##
+##===----------------------------------------------------------------------===##
+
+from sys import argv, exit, stdout
+
+
+if len(argv) != 5:
+ print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
+ exit(1)
+
+try:
+ num_pred = int(argv[1])
+ num_16bit = int(argv[2])
+ num_32bit = int(argv[3])
+ num_64bit = int(argv[4])
+except:
+ print('ERROR: Invalid integer parameter')
+ exit(1)
+
+## Print the register definition file
+td_file = open('PTXRegisterInfo.td', 'w')
+
+td_file.write('''
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+ let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print predicate registers
+td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
+for r in range(0, num_pred):
+ td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
+
+# Print 16-bit registers
+td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_16bit):
+ td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
+
+# Print 32-bit registers
+td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_32bit):
+ td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
+
+# Print 64-bit registers
+td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_64bit):
+ td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
+
+
+td_file.write('''
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print register classes
+
+td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
+td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
+td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+
+
+td_file.close()
+
+## Now write the PTXCallingConv.td file
+td_file = open('PTXCallingConv.td', 'w')
+
+# Reserve 10% of the available registers for return values, and the other 90%
+# for parameters
+num_ret_pred = int(0.1 * num_pred)
+num_ret_16bit = int(0.1 * num_16bit)
+num_ret_32bit = int(0.1 * num_32bit)
+num_ret_64bit = int(0.1 * num_64bit)
+num_param_pred = num_pred - num_ret_pred
+num_param_16bit = num_16bit - num_ret_16bit
+num_param_32bit = num_32bit - num_ret_32bit
+num_param_64bit = num_64bit - num_ret_64bit
+
+param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
+ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
+param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
+ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
+param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
+ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)]
+param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
+ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
+
+param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
+ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
+param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
+ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
+param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
+ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
+param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
+ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
+
+td_file.write('''
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+ CCIfType<[i1], CCAssignToReg<[%s]>>,
+ CCIfType<[i16], CCAssignToReg<[%s]>>,
+ CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+ CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+ CCIfType<[i1], CCAssignToReg<[%s]>>,
+ CCIfType<[i16], CCAssignToReg<[%s]>>,
+ CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+ CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
+ ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
+
+
+td_file.close()
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..389ea7742b06
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmPrinter
+ PPCInstPrinter.cpp
+ )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
new file mode 100644
index 000000000000..f097e84248ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' powerpc target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 000000000000..1a9bd761359c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,295 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PPCGenAsmWriter.inc"
+
+StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ bool useSubstituteMnemonic = false;
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; useSubstituteMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; useSubstituteMnemonic = true;
+ SH = 32-SH;
+ }
+ if (useSubstituteMnemonic) {
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ return;
+ }
+ }
+
+ if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ return;
+ }
+
+ if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ return;
+ }
+ }
+
+ printInstruction(MI, O);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+ unsigned Code = MI->getOperand(OpNo).getImm();
+ if (StringRef(Modifier) == "cc") {
+ switch ((PPC::Predicate)Code) {
+ default: assert(0 && "Invalid predicate");
+ case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+ }
+
+ assert(StringRef(Modifier) == "reg" &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ // Don't print the register for 'always'.
+ if (Code == PPC::PRED_ALWAYS) return;
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ char Value = MI->getOperand(OpNo).getImm();
+ Value = (Value << (32-5)) >> (32-5);
+ O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned char Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned char Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)(MI->getOperand(OpNo).getImm()*4);
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ O << "$+";
+ printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: assert(0 && "Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+ O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ printS16X4ImmOperand(MI, OpNo, O);
+ else
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ if (MI->getOperand(OpNo).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ const char *RegName = getRegisterName(Op.getReg());
+ // The linux and AIX assembler does not take register prefixes.
+ if (!isDarwinSyntax())
+ RegName = stripRegisterPrefix(RegName);
+
+ O << RegName;
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "lo16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "ha16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 000000000000..d022a4496e84
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,71 @@
+//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+ // 0 -> AIX, 1 -> Darwin.
+ unsigned SyntaxVariant;
+public:
+ PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+ : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+
+ bool isDarwinSyntax() const {
+ return SyntaxVariant == 1;
+ }
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier);
+
+
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // FIXME: Remove
+ void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..a1b81662115a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPowerPCDesc
+ PPCMCTargetDesc.cpp
+ PPCMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..9db66622cced
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
new file mode 100644
index 000000000000..b6dca835b18d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -0,0 +1,62 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+using namespace llvm;
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+ if (is64Bit)
+ PointerSize = 8;
+ IsLittleEndian = false;
+
+ PCSymbol = ".";
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+
+ AssemblerDialect = 1; // New-Style mnemonics.
+ SupportsDebugInformation= true; // Debug information.
+}
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+
+ // Uses '.section' before '.bss' directive
+ UsesELFSectionDirectiveForBSS = true;
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Exceptions handling
+ if (!is64Bit)
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+ HasLCOMMDirective = true;
+ AssemblerDialect = 0; // Old-Style mnemonics.
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
new file mode 100644
index 000000000000..96ae6fbba0e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+ struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit PPCMCAsmInfoDarwin(bool is64Bit);
+ };
+
+ struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+ explicit PPCLinuxMCAsmInfo(bool is64Bit);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 000000000000..02b887f4d5dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,70 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "PPCMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPPCMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+ createPPCMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+ createPPCMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+ if (TheTriple.isOSDarwin())
+ return new PPCMCAsmInfoDarwin(isPPC64);
+ return new PPCLinuxMCAsmInfo(isPPC64);
+
+}
+
+extern "C" void LLVMInitializePowerPCMCAsmInfo() {
+ RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
+ RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 000000000000..cee235097a0a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
new file mode 100644
index 000000000000..7191dd105f3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,85 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+ class PPCTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+ class JITCodeEmitter;
+ class Target;
+ class MachineInstr;
+ class AsmPrinter;
+ class MCInst;
+ class MCCodeEmitter;
+ class MCContext;
+ class MCInstrInfo;
+ class MCSubtargetInfo;
+ class TargetMachine;
+ class TargetAsmBackend;
+
+ FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+ TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
+
+ void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin);
+
+ namespace PPCII {
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // PPC Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB = 1,
+
+ /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+ MO_LO16 = 4, MO_HA16 = 8,
+
+ /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+ /// the function's picbase, e.g. lo16(symbol-picbase).
+ MO_PIC_FLAG = 16,
+
+ /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+ /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+ MO_NLP_FLAG = 32,
+
+ /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+ /// symbol with hidden visibility. This causes a different kind of
+ /// non-lazy-pointer to be generated.
+ MO_NLP_HIDDEN_FLAG = 64
+ };
+ } // end namespace PPCII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
new file mode 100644
index 000000000000..aabf494012e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,112 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+ "Enable GPUL instructions">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+ [Directive64, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def PPCAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+
+ let AssemblyWriters = [PPCAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
new file mode 100644
index 000000000000..4b8cbb711833
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -0,0 +1,123 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "PPC.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {}
+};
+
+class PPCAsmBackend : public TargetAsmBackend {
+const Target &TheTarget;
+public:
+ PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+
+ unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_lo16", 16, 16, 0 },
+ { "fixup_ppc_ha16", 16, 16, 0 },
+ { "fixup_ppc_lo14", 16, 14, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME.
+ return false;
+ }
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ }
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // FIXME: Zero fill for now. That's not right, but at least will get the
+ // section size right.
+ for (uint64_t i = 0; i != Count; ++i)
+ OW->Write8(0);
+ return true;
+ }
+
+ unsigned getPointerSize() const {
+ StringRef Name = TheTarget.getName();
+ if (Name == "ppc64") return 8;
+ assert(Name == "ppc32" && "Unknown target name!");
+ return 4;
+ }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+ class DarwinPPCAsmBackend : public PPCAsmBackend {
+ public:
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ assert(0 && "UNIMP");
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createMachObjectWriter(new PPCMachObjectWriter(
+ /*Is64Bit=*/is64,
+ (is64 ? object::mach::CTM_PowerPC64 :
+ object::mach::CTM_PowerPC),
+ object::mach::CSPPC_ALL),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+} // end anonymous namespace
+
+
+
+
+TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const std::string &TT) {
+ if (Triple(TT).isOSDarwin())
+ return new DarwinPPCAsmBackend(T);
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 000000000000..9de2200296e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,696 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "InstPrinter/PPCInstPrinter.h"
+using namespace llvm;
+
+namespace {
+ class PPCAsmPrinter : public AsmPrinter {
+ protected:
+ DenseMap<MCSymbol*, MCSymbol*> TOC;
+ const PPCSubtarget &Subtarget;
+ uint64_t TOCLabelID;
+ public:
+ explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+ virtual const char *getPassName() const {
+ return "PowerPC Assembly Printer";
+ }
+
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitFunctionEntryLabel();
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M);
+ void EmitStartOfAsmFile(Module &M);
+
+ void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+ };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ // FIXME: PIC relocation model
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ return;
+ case MachineOperand::MO_ExternalSymbol: {
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() == Reloc::Static) {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ }
+
+ MCSymbol *NLPSym =
+ OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
+ MO.getSymbolName()+"$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
+
+ O << *NLPSym;
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) {
+ if (!GV->hasHiddenVisibility()) {
+ SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>()
+ .getGVStubEntry(SymToPrint);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage()) {
+ SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().
+ getHiddenGVStubEntry(SymToPrint);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = Mang->getSymbol(GV);
+ }
+ } else {
+ SymToPrint = Mang->getSymbol(GV);
+ }
+
+ O << *SymToPrint;
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ break; // PPC never has a prefix.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ assert(MI->getOperand(OpNo).isReg());
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+
+ // Lower multi-instruction pseudo operations.
+ switch (MI->getOpcode()) {
+ default: break;
+ case TargetOpcode::DBG_VALUE: {
+ if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+
+ SmallString<32> Str;
+ raw_svector_ostream O(Str);
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8: {
+ // Transform %LR = MovePCtoLR
+ // Into this, where the label is the PIC base:
+ // bl L1$pb
+ // L1$pb:
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+
+ // Emit the 'bl'.
+ TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
+
+
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
+ Create(PICBase, OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+ return;
+ }
+ case PPC::LDtoc: {
+ // Transform %X3 = LDtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD, and the global address operand to be a
+ // reference to the TOC entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert(MO.isGlobal());
+
+ // Map symbol -> label of TOC entry.
+ MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+ if (TOCEntry == 0)
+ TOCEntry = GetTempSymbol("C", TOCLabelID++);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ case PPC::MFCRpseud:
+ // Transform: %R3 = MFCRpseud %CR7
+ // Into: %R3 = MFCR ;; cr7
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ TmpInst.setOpcode(PPC::MFCR);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+ if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ // Emit an official procedure descriptor.
+ // FIXME 64-bit SVR4: Use MCSection here!
+ OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
+ OutStreamer.EmitRawText(StringRef("\t.align 3"));
+ OutStreamer.EmitLabel(CurrentFnSym);
+ OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
+ ",.TOC.@tocbase");
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
+ OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ if (isPPC64 && !TOC.empty()) {
+ // FIXME 64-bit SVR4: Use MCSection here?
+ OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+
+ // FIXME: This is nondeterminstic!
+ for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+ E = TOC.end(); I != E; ++I) {
+ OutStreamer.EmitLabel(I->second);
+ OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
+ "[TC]," + I->first->getName());
+ }
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppc64"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+
+ // FIXME: This is a total hack, finish mc'izing the PPC backend.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText()));
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText()));
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+ // Remove $stub suffix, add $lazy_ptr.
+ SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
+ TmpStr += "$lazy_ptr";
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+ // Add $tmp suffix to $stub, yielding $stub$tmp.
+ SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
+ TmpStr += "$tmp";
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+ bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+ // .lazy_symbol_pointer
+ const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ // FIXME: MCize this.
+ OutStreamer.EmitRawText(StringRef("\tmflr r0"));
+ OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+ OutStreamer.EmitLabel(AnonSymbol);
+ OutStreamer.EmitRawText(StringRef("\tmflr r11"));
+ OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
+ "-" + AnonSymbol->getName() + ")");
+ OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
+
+ if (isPPC64)
+ OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+ "-" + AnonSymbol->getName() + ")(r11)");
+ else
+ OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+ "-" + AnonSymbol->getName() + ")(r11)");
+ OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+ OutStreamer.EmitRawText(StringRef("\tbctr"));
+
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ if (isPPC64)
+ OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+ }
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
+ if (isPPC64)
+ OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+ ")(r11)");
+ else
+ OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+ ")(r11)");
+ OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+ OutStreamer.EmitRawText(StringRef("\tbctr"));
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ if (isPPC64)
+ OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+ }
+
+ OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+ // Darwin/PPC always uses mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty())
+ EmitFunctionStubs(Stubs);
+
+ if (MAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I) {
+ if (*I) {
+ MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+ }
+ }
+ }
+
+ // Output stubs for dynamically-linked functions.
+ Stubs = MMIMacho.GetGVStubList();
+
+ // Output macho stubs for external and common global variables.
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin())
+ return new PPCDarwinAsmPrinter(tm, Streamer);
+ return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 000000000000..e161d23600e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,174 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<16>(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 000000000000..441db94581ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,132 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ CCIfType<[f32], CCAssignToReg<[F1]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+
+ // Vector types are always returned in V2.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+
+ // Common sub-targets passes FP values in F1 - F13
+ CCIfType<[f32, f64],
+ CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
+
+ // The first 12 Vector arguments are passed in altivec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC_SVR4_Common : CallingConv<[
+ // The ABI requires i64 to be passed in two adjacent registers with the first
+ // register having an odd register number.
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+ // Make sure the i64 words from a long double are either both passed in
+ // registers or both passed on the stack.
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+
+ // FP values are passed in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // Split arguments have an alignment of 8 bytes on the stack.
+ CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // Floats are stored in double precision format, thus they have the same
+ // alignment and size as doubles.
+ CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
+// vector arguments in vector registers before putting them on the stack.
+def CC_PPC_SVR4 : CallingConv<[
+ // The first 12 Vector arguments are passed in AltiVec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+
+ CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC_SVR4 calling convention.
+//
+// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+
+def CC_PPC_SVR4_ByVal : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+]>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 000000000000..42232a07535b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,261 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class PPCCodeEmitter : public MachineFunctionPass {
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+ void *MovePCtoLROffset;
+ public:
+
+ PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+
+ MachineRelocation GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const;
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+ };
+}
+
+char PPCCodeEmitter::ID = 0;
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new PPCCodeEmitter(TM, JCE);
+}
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+ do {
+ MovePCtoLROffset = 0;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ const MachineInstr &MI = *I;
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(MI));
+ break;
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+ }
+}
+
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const {
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ intptr_t Cst = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ Cst = -(intptr_t)MovePCtoLROffset - 4;
+ }
+
+ if (MO.isGlobal())
+ return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+ const_cast<GlobalValue *>(MO.getGlobal()),
+ Cst, isa<Function>(MO.getGlobal()));
+ if (MO.isSymbol())
+ return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ RelocID, MO.getSymbolName(), Cst);
+ if (MO.isCPI())
+ return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+
+ if (MO.isMBB())
+ return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ RelocID, MO.getMBB());
+
+ assert(MO.isJTI());
+ return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+}
+
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+ return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+#include "PPCGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
new file mode 100644
index 000000000000..b3c889e3f8da
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
@@ -0,0 +1,45 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+ // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+ // and 'bl'.
+ fixup_ppc_br24 = FirstTargetFixupKind,
+
+ /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14,
+
+ /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'li'.
+ fixup_ppc_lo16,
+
+ /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+ /// like 'lis'.
+ fixup_ppc_ha16,
+
+ /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'std'.
+ fixup_ppc_lo14,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 000000000000..375e000fe401
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,978 @@
+//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment. This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta. You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+ // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+ if (!DisableRedZone &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ // No need for frame
+ MFI->setStackSize(0);
+ return;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // FIXME: This is pretty much broken by design: hasFP() might be called really
+ // early, before the stack layout was calculated and thus hasFP() might return
+ // true or false here depending on the time of call.
+ return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Naked functions have no stack frame pushed, so we don't have a frame
+ // pointer.
+ if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ return false;
+
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
+ (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl;
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ // Prepare for frame info.
+ MCSymbol *FrameLabel = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ // FIXME: determineFrameLayout() may change the frame size. This should be
+ // moved upper, to some hook.
+ determineFrameLayout(MF);
+ unsigned FrameSize = MFI->getStackSize();
+
+ int NegFrameSize = -FrameSize;
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X31)
+ .addImm(FPOffset/4)
+ .addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X0)
+ .addImm(LROffset / 4)
+ .addReg(PPC::X1);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R31)
+ .addImm(FPOffset)
+ .addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R0)
+ .addImm(LROffset)
+ .addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!isPPC64) {
+ // PPC32.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+ .addReg(PPC::R1)
+ .addImm(NegFrameSize)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1)
+ .addImm(NegFrameSize / 4)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Add the "machine moves" for the instructions we generated above, but in
+ // reverse order.
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Show update of SP.
+ if (NegFrameSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ if (MustSaveLR) {
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+ }
+ }
+
+ MCSymbol *ReadyLabel = 0;
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!isPPC64) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+
+ if (needsFrameMoves) {
+ ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+ // Mark effective beginning of when frame pointer is ready.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+ (isPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ }
+
+ if (needsFrameMoves) {
+ MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+
+ // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
+ // subregisters of CR2. We just need to emit a move of CR2.
+ if (Reg == PPC::CR2LT || Reg == PPC::CR2GT || Reg == PPC::CR2EQ)
+ continue;
+ if (Reg == PPC::CR2UN)
+ Reg = PPC::CR2;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+ }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no terminator");
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl;
+
+ assert((RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!isPPC64) {
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+ .addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+ .addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = RegInfo->getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MF.getRegInfo().setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && needsFP(MF)) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+ }
+
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+ // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+ // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+ // r0 for now.
+
+ if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+ if (needsFP(MF) || spillsCR(MF)) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI())
+ return;
+
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty() && !needsFP(MF)) {
+ return;
+ }
+
+ unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
+ unsigned MinFPR = PPC::F31;
+ unsigned MinVR = PPC::V31;
+
+ bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
+ bool HasFPSaveArea = false;
+ bool HasCRSaveArea = false;
+ bool HasVRSAVESaveArea = false;
+ bool HasVRSaveArea = false;
+
+ SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
+ SmallVector<CalleeSavedInfo, 18> FPRegs;
+ SmallVector<CalleeSavedInfo, 18> VRegs;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (PPC::GPRCRegisterClass->contains(Reg)) {
+ HasGPSaveArea = true;
+
+ GPRegs.push_back(CSI[i]);
+
+ if (Reg < MinGPR) {
+ MinGPR = Reg;
+ }
+ } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
+ } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+ HasFPSaveArea = true;
+
+ FPRegs.push_back(CSI[i]);
+
+ if (Reg < MinFPR) {
+ MinFPR = Reg;
+ }
+// FIXME SVR4: Disable CR save area for now.
+ } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+ || PPC::CRRCRegisterClass->contains(Reg)) {
+// HasCRSaveArea = true;
+ } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ HasVRSAVESaveArea = true;
+ } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+ HasVRSaveArea = true;
+
+ VRegs.push_back(CSI[i]);
+
+ if (Reg < MinVR) {
+ MinVR = Reg;
+ }
+ } else {
+ llvm_unreachable("Unknown RegisterClass!");
+ }
+ }
+
+ PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+
+ int64_t LowerBound = 0;
+
+ // Take into account stack space reserved for tail calls.
+ int TCSPDelta = 0;
+ if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ LowerBound = TCSPDelta;
+ }
+
+ // The Floating-point register save area is right below the back chain word
+ // of the previous stack frame.
+ if (HasFPSaveArea) {
+ for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+ int FI = FPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+ }
+
+ // Check whether the frame pointer register is allocated. If so, make sure it
+ // is spilled to the correct offset.
+ if (needsFP(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getFramePointerSaveIndex();
+ assert(FI && "No Frame Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // General register save area starts right below the Floating-point
+ // register save area.
+ if (HasGPSaveArea || HasG8SaveArea) {
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+ int FI = GPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg =
+ std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
+ PPCRegisterInfo::getRegisterNumbering(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
+ }
+
+ // The CR save area is below the general register save area.
+ if (HasCRSaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the CR/CRBIT register class?
+ // Adjust the frame index of the CR spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+ PPC::CRRCRegisterClass->contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The CR save area is always 4 bytes long.
+ }
+
+ if (HasVRSAVESaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the VRSAVE register class?
+ // Adjust the frame index of the VRSAVE spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+ }
+
+ if (HasVRSaveArea) {
+ // Insert alignment padding, we need 16-byte alignment.
+ LowerBound = (LowerBound - 15) & ~(15);
+
+ for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+ int FI = VRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 000000000000..0c18de1e2e26
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,322 @@
+//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+ class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+ const PPCSubtarget &Subtarget;
+
+public:
+ PPCFrameLowering(const PPCSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ }
+
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool needsFP(const MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI)
+ return isPPC64 ? 16 : 8;
+ // SVR4 ABI:
+ return isPPC64 ? 16 : 4;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI:
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
+ if (isDarwinABI)
+ return isPPC64 ? -8U : -4U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -8U : -4U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI || isPPC64)
+ return 6 * (isPPC64 ? 8 : 4);
+
+ // SVR4 ABI:
+ return 8;
+ }
+
+ /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+ /// argument area.
+ static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI / 64-bit SVR4 ABI:
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ if (isDarwinABI || isPPC64)
+ return 8 * (isPPC64 ? 8 : 4);
+
+ // 32-bit SVR4 ABI:
+ // There is no default stack allocated for the 8 first GPR arguments.
+ return 0;
+ }
+
+ /// getMinCallFrameSize - Return the minimum size a call frame can be using
+ /// the PowerPC ABI.
+ static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
+ // The call frame needs to be at least big enough for linkage and 8 args.
+ return getLinkageSize(isPPC64, isDarwinABI) +
+ getMinCallArgumentsSize(isPPC64, isDarwinABI);
+ }
+
+ // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return 0;
+ }
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset.
+ // FIXME SVR4: Disable CR save area for now.
+// {PPC::CR2, -4},
+// {PPC::CR3, -4},
+// {PPC::CR4, -4},
+// {PPC::CR2LT, -4},
+// {PPC::CR2GT, -4},
+// {PPC::CR2EQ, -4},
+// {PPC::CR2UN, -4},
+// {PPC::CR3LT, -4},
+// {PPC::CR3GT, -4},
+// {PPC::CR3EQ, -4},
+// {PPC::CR3UN, -4},
+// {PPC::CR4LT, -4},
+// {PPC::CR4GT, -4},
+// {PPC::CR4EQ, -4},
+// {PPC::CR4UN, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}
+ };
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+ // mode?
+ {PPC::R31, -4},
+ {PPC::R30, -12},
+ {PPC::R29, -20},
+ {PPC::R28, -28},
+ {PPC::R27, -36},
+ {PPC::R26, -44},
+ {PPC::R25, -52},
+ {PPC::R24, -60},
+ {PPC::R23, -68},
+ {PPC::R22, -76},
+ {PPC::R21, -84},
+ {PPC::R20, -92},
+ {PPC::R19, -100},
+ {PPC::R18, -108},
+ {PPC::R17, -116},
+ {PPC::R16, -124},
+ {PPC::R15, -132},
+ {PPC::R14, -140},
+
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // CR save area offset.
+ // FIXME SVR4: Disable CR save area for now.
+// {PPC::CR2, -4},
+// {PPC::CR3, -4},
+// {PPC::CR4, -4},
+// {PPC::CR2LT, -4},
+// {PPC::CR2GT, -4},
+// {PPC::CR2EQ, -4},
+// {PPC::CR2UN, -4},
+// {PPC::CR3LT, -4},
+// {PPC::CR3GT, -4},
+// {PPC::CR3EQ, -4},
+// {PPC::CR3UN, -4},
+// {PPC::CR4LT, -4},
+// {PPC::CR4GT, -4},
+// {PPC::CR4EQ, -4},
+// {PPC::CR4UN, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}
+ };
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 000000000000..cddc9d858adf
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,308 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+ : TII(tii) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DEBUG(errs() << "=== Start of dispatch group\n");
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ if ((int)Opcode >= 0) {
+ isFirst = isSingle = isCracked = isLoad = isStore = false;
+ return PPCII::PPC970_Pseudo;
+ }
+ Opcode = ~Opcode;
+
+ const MCInstrDesc &MCID = TII.get(Opcode);
+
+ isLoad = MCID.mayLoad();
+ isStore = MCID.mayStore();
+
+ uint64_t TSFlags = MCID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+ return true;
+ if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StorePtr2[i] == Ptr2) {
+ if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+ if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ int StoreOffs = StoreOffset->getZExtValue();
+ int LoadOffs = LoadOffset->getZExtValue();
+ if (StoreOffs < LoadOffs) {
+ if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+ } else {
+ if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+ const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: llvm_unreachable("Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores) {
+ unsigned LoadSize;
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown load!");
+ case PPC::LBZ: case PPC::LBZU:
+ case PPC::LBZX:
+ case PPC::LBZ8: case PPC::LBZU8:
+ case PPC::LBZX8:
+ case PPC::LVEBX:
+ LoadSize = 1;
+ break;
+ case PPC::LHA: case PPC::LHAU:
+ case PPC::LHAX:
+ case PPC::LHZ: case PPC::LHZU:
+ case PPC::LHZX:
+ case PPC::LVEHX:
+ case PPC::LHBRX:
+ case PPC::LHA8: case PPC::LHAU8:
+ case PPC::LHAX8:
+ case PPC::LHZ8: case PPC::LHZU8:
+ case PPC::LHZX8:
+ LoadSize = 2;
+ break;
+ case PPC::LFS: case PPC::LFSU:
+ case PPC::LFSX:
+ case PPC::LWZ: case PPC::LWZU:
+ case PPC::LWZX:
+ case PPC::LWA:
+ case PPC::LWAX:
+ case PPC::LVEWX:
+ case PPC::LWBRX:
+ case PPC::LWZ8:
+ case PPC::LWZX8:
+ LoadSize = 4;
+ break;
+ case PPC::LFD: case PPC::LFDU:
+ case PPC::LFDX:
+ case PPC::LD: case PPC::LDU:
+ case PPC::LDX:
+ LoadSize = 8;
+ break;
+ case PPC::LVX:
+ case PPC::LVXL:
+ LoadSize = 16;
+ break;
+ }
+
+ if (isLoadOfStoredAddress(LoadSize,
+ Node->getOperand(0), Node->getOperand(1)))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore) {
+ unsigned ThisStoreSize;
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown store instruction!");
+ case PPC::STB: case PPC::STB8:
+ case PPC::STBU: case PPC::STBU8:
+ case PPC::STBX: case PPC::STBX8:
+ case PPC::STVEBX:
+ ThisStoreSize = 1;
+ break;
+ case PPC::STH: case PPC::STH8:
+ case PPC::STHU: case PPC::STHU8:
+ case PPC::STHX: case PPC::STHX8:
+ case PPC::STVEHX:
+ case PPC::STHBRX:
+ ThisStoreSize = 2;
+ break;
+ case PPC::STFS:
+ case PPC::STFSU:
+ case PPC::STFSX:
+ case PPC::STWX: case PPC::STWX8:
+ case PPC::STWUX:
+ case PPC::STW: case PPC::STW8:
+ case PPC::STWU:
+ case PPC::STVEWX:
+ case PPC::STFIWX:
+ case PPC::STWBRX:
+ ThisStoreSize = 4;
+ break;
+ case PPC::STD_32:
+ case PPC::STDX_32:
+ case PPC::STD:
+ case PPC::STDU:
+ case PPC::STFD:
+ case PPC::STFDX:
+ case PPC::STDX:
+ case PPC::STDUX:
+ ThisStoreSize = 8;
+ break;
+ case PPC::STVX:
+ case PPC::STVXL:
+ ThisStoreSize = 16;
+ break;
+ }
+
+ StoreSize[NumStores] = ThisStoreSize;
+ StorePtr1[NumStores] = Node->getOperand(1);
+ StorePtr2[NumStores] = Node->getOperand(2);
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 000000000000..2f81f0f7c7f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const TargetInstrInfo &TII;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ SDValue StorePtr1[4], StorePtr2[4];
+ unsigned StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(unsigned LoadSize,
+ SDValue Ptr1, SDValue Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2176c02c8503
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1087 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class PPCDAGToDAGISel : public SelectionDAGISel {
+ const PPCTargetMachine &TM;
+ const PPCTargetLowering &PPCLowering;
+ const PPCSubtarget &PPCSubTarget;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(*TM.getTargetLowering()),
+ PPCSubTarget(*TM.getSubtargetImpl()) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnMachineFunction(MF);
+
+ InsertVRSaveCode(MF);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ Out = N;
+ return true;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmShift - Returns true if the address N can be represented by
+ /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
+ /// for use by STD and friends.
+ bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions. It is always correct to compute the value into
+ /// a register. The case of adding a (possibly relocatable) constant to a
+ /// register can be improved, but it is wrong to substitute Reg+Reg for
+ /// Reg in an asm, because the load or store opcode would have to change.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ OutOps.push_back(Op);
+ return false;
+ }
+
+ void InsertVRSaveCode(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDNode *N);
+ };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type created
+ // by the scheduler. Detect them now.
+ bool HasVectorVReg = false;
+ for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl;
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl;
+
+ if (PPCLowering.getPointerTy() == MVT::i32) {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = CountLeadingZeros_32(Val);
+ // look for the first zero bit after the run of ones
+ ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = CountLeadingZeros_32(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool isShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (isShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (isShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ } else {
+ Op1 = Op1.getOperand(0);
+ }
+ }
+
+ SH &= 31;
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+ return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, DebugLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>((int)Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt<32>(Imm)) {
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ llvm_unreachable("Should be lowered by legalize!");
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons. In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+ Invert = false;
+ Other = -1;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ llvm_unreachable("Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+ return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ SDValue AD =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ bool Inv;
+ int OtherCondIdx;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(0, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ CR7Reg, CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (OtherCondIdx == -1 && !Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ if (Inv) {
+ assert(OtherCondIdx == -1 && "Can't have split plus negation");
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ }
+
+ // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+ // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+ // Get the other bit of the comparison.
+ Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+ SDValue OtherCond =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = CountTrailingZeros_64(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift),
+ getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ return SelectSETCC(N);
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFCR: {
+ SDValue InFlag = N->getOperand(1);
+ // Use MFOCRF if supported.
+ if (PPCSubTarget.isGigaProcessor())
+ return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ else
+ return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDValue N0 = N->getOperand(0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDValue PT =
+ SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (isa<ConstantSDNode>(Offset) ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ // FIXME: PPC64
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
+ } else {
+ llvm_unreachable("R+R preindex loads not supported yet!");
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return NULL;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (!isPPC64)
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDValue Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 000000000000..9741a3902af7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,5768 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+ cl::Hidden);
+
+static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->isDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new TargetLoweringObjectFileELF();
+}
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+ // arguments are at least 4/8 bytes aligned.
+ setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+ addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+ addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ }
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ } else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ // This is just the low 32 bits of a (signed) fp->i64 conversion.
+ // We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+ addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ }
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine(ISD::BSWAP);
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ setMinFunctionAlignment(2);
+ if (PPCSubTarget.isDarwin())
+ setPrefFunctionAlignment(4);
+
+ computeRegisterProperties();
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ const TargetMachine &TM = getTargetMachine();
+ // Darwin passes everything on 4 byte boundary.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ return 4;
+ // FIXME SVR4 TBD
+ return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
+ case PPCISD::LOAD: return "PPCISD::LOAD";
+ case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
+ case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
+ case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
+ case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
+ case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
+ case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ }
+}
+
+MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ return CFP->getValueAPF().isNegZero();
+
+ return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(0, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDValue();
+ }
+
+ // Properly sign extend the value.
+ int ShAmt = (4-ByteSize)*8;
+ int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N.getOperand(1),
+ APInt::getAllOnesValue(N.getOperand(1)
+ .getValueSizeInBits()),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from parent load or store, not from address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4]. Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address. Verify low two bits are clear.
+ if ((CN->getZExtValue() & 3) == 0) {
+ // If this address fits entirely in a 14-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Fold the low-part of 32-bit absolute addresses into addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+ Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
+ return true;
+ }
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ // Disabled by default for now.
+ if (!EnablePPCPreinc) return false;
+
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (VT.isVector())
+ return false;
+
+ // TODO: Check reg+reg first.
+
+ // LDU/STU use reg+imm*4, others use reg+imm.
+ if (VT != MVT::i64) {
+ // reg + imm
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ return false;
+ } else {
+ // reg + imm * 4.
+ if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+ unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ HiOpFlags = PPCII::MO_HA16;
+ LoOpFlags = PPCII::MO_LO16;
+
+ // Don't use the pic base if not in PIC relocation model. Or if we are on a
+ // non-darwin platform. We don't support PIC on other platforms yet.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+ TM.getSubtarget<PPCSubtarget>().isDarwin();
+ if (isPIC) {
+ HiOpFlags |= PPCII::MO_PIC_FLAG;
+ LoOpFlags |= PPCII::MO_PIC_FLAG;
+ }
+
+ // If this is a reference to a global value that requires a non-lazy-ptr, make
+ // sure that instruction lowering adds it.
+ if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ HiOpFlags |= PPCII::MO_NLP_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+ if (GV->hasHiddenVisibility()) {
+ HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ }
+ }
+
+ return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+ SelectionDAG &DAG) {
+ EVT PtrVT = HiPart.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ DebugLoc DL = HiPart.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ if (isPIC)
+ Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue CPIHi =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ SDValue CPILo =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+ return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
+ SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ DebugLoc DL = GSDN->getDebugLoc();
+ const GlobalValue *GV = GSDN->getGlobal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+ SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+ // If the global reference is actually to a non-lazy-pointer, we have to do an
+ // extra load to get the address of the global.
+ if (MOHiFlag & PPCII::MO_NLP_FLAG)
+ Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+ false, false, 0);
+ return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ EVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ EVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+ // gpr_index
+ SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ VAListPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = GprIndex.getValue(1);
+
+ if (VT == MVT::i64) {
+ // Check if GprIndex is even
+ SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+ DAG.getConstant(0, MVT::i32), ISD::SETNE);
+ SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ // Align GprIndex to be even if it isn't
+ GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+ GprIndex);
+ }
+
+ // fpr index is 1 byte after gpr
+ SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(1, MVT::i32));
+
+ // fpr
+ SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ FprPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = FprIndex.getValue(1);
+
+ SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(8, MVT::i32));
+
+ SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(4, MVT::i32));
+
+ // areas
+ SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+ MachinePointerInfo(), false, false, 0);
+ InChain = OverflowArea.getValue(1);
+
+ SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+ MachinePointerInfo(), false, false, 0);
+ InChain = RegSaveArea.getValue(1);
+
+ // select overflow_area if index > 8
+ SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+ // adjustment constant gpr_index * 4/8
+ SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ // OurReg = RegSaveArea + RegConstant
+ SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+ RegConstant);
+
+ // Floating types are 32 bytes into RegSaveArea
+ if (VT.isFloatingPoint())
+ OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+ DAG.getConstant(32, MVT::i32));
+
+ // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+ SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+ MVT::i32));
+
+ InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+ VT.isInteger() ? VAListPtr : FprPtr,
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+
+ // determine if we should load from reg_save_area or overflow_area
+ SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+ // increase overflow_area by 4/8 if gpr/fpr > 8
+ SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+ OverflowAreaPlusN);
+
+ InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+ OverflowAreaPtr,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+ const Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+ *DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ Args, DAG, dl);
+
+ SDValue Ops[] =
+ { CallResult.first, CallResult.second };
+
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+ }
+
+ // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+ SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+ PtrVT);
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1),
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+ MachinePointerInfo(SV, nextOffset), MVT::i8,
+ false, false, 0);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const unsigned *GetFPR() {
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+
+ return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned ArgSize = ArgVT.getSizeInBits()/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+ return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ } else {
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // 32-bit SVR4 ABI Stack Frame Layout:
+ // +-----------------------------------+
+ // +--> | Back chain |
+ // | +-----------------------------------+
+ // | | Floating-point register save area |
+ // | +-----------------------------------+
+ // | | General register save area |
+ // | +-----------------------------------+
+ // | | CR save word |
+ // | +-----------------------------------+
+ // | | VRSAVE save word |
+ // | +-----------------------------------+
+ // | | Alignment padding |
+ // | +-----------------------------------+
+ // | | Vector register save area |
+ // | +-----------------------------------+
+ // | | Local variable space |
+ // | +-----------------------------------+
+ // | | Parameter list area |
+ // | +-----------------------------------+
+ // | | LR save word |
+ // | +-----------------------------------+
+ // SP--> +--- | Back chain |
+ // +-----------------------------------+
+ //
+ // Specifications:
+ // System V Application Binary Interface PowerPC Processor Supplement
+ // AltiVec Technology Programming Interface Manual
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ unsigned PtrByteSize = 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ TargetRegisterClass *RC;
+ EVT ValVT = VA.getValVT();
+
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i32:
+ RC = PPC::GPRCRegisterClass;
+ break;
+ case MVT::f32:
+ RC = PPC::F4RCRegisterClass;
+ break;
+ case MVT::f64:
+ RC = PPC::F8RCRegisterClass;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ RC = PPC::VRRCRegisterClass;
+ break;
+ }
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+
+ InVals.push_back(ArgValue);
+ } else {
+ // Argument stored in memory.
+ assert(VA.isMemLoc());
+
+ unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ isImmutable);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // Assign locations to all of the incoming aggregate by value arguments.
+ // Aggregates passed by value are stored in the local variable space of the
+ // caller's stack frame, right above the parameter list area.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(false, false));
+
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+
+ FI->setMinReservedArea(MinReservedArea);
+
+ SmallVector<SDValue, 8> MemOps;
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ static const unsigned GPArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+ static const unsigned FPArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+ FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+ NumGPArgRegs));
+ FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+ NumFPArgRegs));
+
+ // Make room for NumGPArgRegs and NumFPArgRegs.
+ int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+ NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+
+ FuncInfo->setVarArgsStackOffset(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ CCInfo.getNextStackOffset(), true));
+
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+ // is set.
+ // The double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const unsigned *FPR = GetFPR();
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+ ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += isPPC64 ? 8 : 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(),
+ ObjSize==1 ? MVT::i8 : MVT::i16,
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+
+ ArgOffset += PtrByteSize;
+
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgVal will be address of the beginning of
+ // the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ if (!isPPC64) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32) {
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // All FP arguments reserve stack space in the Darwin ABI.
+ ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area for the Darwin ABI.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isVarArg,
+ unsigned CC,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned NumOps = Outs.size();
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+
+ // Tail call needs the stack to be aligned.
+ if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accommodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+ unsigned ParamSize) {
+
+ if (!isTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!GuaranteedTailCallOpt)
+ return false;
+
+ // Variable argument functions are not supported.
+ if (isVarArg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != Ins.size(); i++) {
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Flags.isByVal()) return false;
+ }
+
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 6 >> 6) != Addr)
+ return 0; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2,
+ DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDValue, 8> &MemOpChains,
+ DebugLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isDarwinABI,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
+ isDarwinABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewRetAddr),
+ false, false, 0);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ int NewFPLoc =
+ SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+ true);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(NewFPIdx),
+ false, false, 0);
+ }
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ DebugLoc dl) const {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+ false, false, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+ false, false, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, false, MachinePointerInfo(0),
+ MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+ DebugLoc dl) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+ DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDValue LROp, SDValue FPOp, bool isDarwinABI,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isDarwinABI, dl);
+
+ // Emit callseq_end just before tailcall node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+ SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+ SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ const PPCSubtarget &PPCSubTarget) {
+
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+
+ unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+
+ bool needIndirectCall = true;
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ unsigned OpFlags = 0;
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(),
+ 0, OpFlags);
+ needIndirectCall = false;
+ }
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+ OpFlags);
+ needIndirectCall = false;
+ }
+
+ if (needIndirectCall) {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+ if (isSVR4ABI && isPPC64) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // All those operations are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // operations together, a TOC access in the caller could be scheduled
+ // between the load of the callee TOC and the branch to the callee, which
+ // results in the TOC access going through the TOC of the callee instead
+ // of going through the TOC of the caller, which leads to incorrect code.
+
+ // Load the address of the function entry point from the function
+ // descriptor.
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
+ InFlag.getNode() ? 3 : 2);
+ Chain = LoadFuncPtr.getValue(1);
+ InFlag = LoadFuncPtr.getValue(2);
+
+ // Load environment pointer into r11.
+ // Offset of the environment pointer within the function descriptor.
+ SDValue PtrOff = DAG.getIntPtrConstant(16);
+
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+ SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
+ InFlag);
+ Chain = LoadEnvPtr.getValue(1);
+ InFlag = LoadEnvPtr.getValue(2);
+
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+
+ // Load TOC of the callee into r2. We are using a target-specific load
+ // with r2 hard coded, because the result of a target-independent load
+ // would never go directly into r2, since r2 is a reserved register (which
+ // prevents the register allocator from allocating it), resulting in an
+ // additional register being allocated and an unnecessary move instruction
+ // being generated.
+ VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
+ Callee, InFlag);
+ Chain = LoadTOCPtr.getValue(0);
+ InFlag = LoadTOCPtr.getValue(1);
+
+ MTCTROps[0] = Chain;
+ MTCTROps[1] = LoadFuncPtr;
+ MTCTROps[2] = InFlag;
+ }
+
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+ 2 + (InFlag.getNode() != 0));
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ Ops.push_back(Chain);
+ CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ Callee.setNode(0);
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ return CallOpc;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ EVT VT = VA.getValVT();
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VT, InFlag).getValue(1);
+ InVals.push_back(Chain.getValue(0));
+ InFlag = Chain.getValue(2);
+ }
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+ bool isTailCall, bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const {
+ std::vector<EVT> NodeTys;
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+ isTailCall, RegsToPass, Ops, NodeTys,
+ PPCSubTarget);
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ // If this is the first return lowered for this function, add the regs
+ // to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ assert(((Callee.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+ Callee.getOpcode() == ISD::TargetExternalSymbol ||
+ Callee.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(Callee)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Add a NOP immediately after the branch instruction when using the 64-bit
+ // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // thus have a different TOC, the call will be replaced with a call to a stub
+ // function which saves the current TOC, loads the TOC of the callee and
+ // branches to the callee. The NOP will be replaced with a load instruction
+ // which restores the TOC of the caller from the TOC save slot of the current
+ // stack frame. If caller and callee belong to the same module (and have the
+ // same TOC), the NOP will remain unchanged.
+ if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (CallOpc == PPCISD::BCTRL_SVR4) {
+ // This is a call through a function pointer.
+ // Restore the caller TOC from the save area into R2.
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ // We are using a target-specific load with r2 hard coded, because the
+ // result of a target-independent load would never go directly into r2,
+ // since r2 is a reserved register (which prevents the register allocator
+ // from allocating it), resulting in an additional register being
+ // allocated and an unnecessary move instruction being generated.
+ Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ // Otherwise insert NOP.
+ InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+ }
+ }
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(BytesCalleePops, true),
+ InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (isTailCall)
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ Ins, DAG);
+
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+ // of the 32-bit SVR4 ABI stack frame layout.
+
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+ unsigned PtrByteSize = 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area and the part of the local variable space which
+ // contains copies of aggregates which are passed by value.
+
+ // Assign locations to all of the outgoing arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+ if (isVarArg) {
+ // Handle fixed and variable vector arguments differently.
+ // Fixed vector arguments go into registers as long as registers are
+ // available. Variable vector arguments always go into memory.
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ bool Result;
+
+ if (Outs[i].IsFixed) {
+ Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
+ } else {
+ Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
+ }
+
+ if (Result) {
+#ifndef NDEBUG
+ errs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+ } else {
+ // All arguments are treated the same.
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ }
+
+ // Assign locations to all of the outgoing aggregate by value arguments.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+
+ // Size of the linkage area, parameter list area and the part of the local
+ // space variable where copies of aggregates which are passed by value are
+ // stored.
+ unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be moved somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ i != e;
+ ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Argument is an aggregate which is passed by value, thus we need to
+ // create a copy of it in the local variable space of the current stack
+ // frame (which is the stack frame of the caller) and pass the address of
+ // this copy to the callee.
+ assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+ CCValAssign &ByValVA = ByValArgLocs[j++];
+ assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+ // Memory reserved in the local variable space of the callers stack frame.
+ unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ // Create a copy of the argument in the local area of the current
+ // stack frame.
+ SDValue MemcpyCall =
+ CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Pass the address of the aggregate copy on the stack either in a
+ // physical register or in the parameter list area of the current stack
+ // frame to the callee.
+ Arg = PtrOff;
+ }
+
+ if (VA.isRegLoc()) {
+ // Put argument in a physical register.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Put argument in the parameter list area of the current stack frame.
+ assert(VA.isMemLoc());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+
+ if (!isTailCall) {
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ } else {
+ // Calculate and remember argument location.
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+ TailCallArguments);
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Set CR6 to true if this is a vararg call.
+ if (isVarArg) {
+ SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+ RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+ false, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+ Outs, OutVals,
+ nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const unsigned *FPR = GetFPR();
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ if (Size==1 || Size==2) {
+ // Very small objects are passed right-justified.
+ // Everything else is passed left-justified.
+ EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ // And copy the pieces of it that fit into registers.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Check if this is an indirect call (MTCTR/BCTRL).
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG)) {
+ // Load r2 into a virtual register and store it to the TOC save area.
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ SDValue PtrOff = DAG.getIntPtrConstant(40);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ // On Darwin, R12 must contain the address of an indirect callee. This does
+ // not mean the MTCTR instruction must use R12; it's easier to model this as
+ // an extra parameter, so do that.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG))
+ RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+ PPC::R12), Callee));
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool isPPC64 = Subtarget.isPPC64();
+ unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
+ isDarwinABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+ EVT ResVT = Op.getValueType();
+ EVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+ PPCISD::FCTIDZ,
+ dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+
+ // Emit a store to the stack slot.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32)
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, FIPtr.getValueType()));
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, 8, 8);
+ SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
+ SDValue Store =
+ DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
+ Ops, 4, MVT::i64, MMO);
+ // Load the value as a double.
+ SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+ false, false, 0);
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ std::vector<EVT> NodeTys;
+ SDValue MFFSreg, InFlag;
+
+ // Save FP Control Word to register
+ NodeTys.push_back(MVT::f64); // return register
+ NodeTys.push_back(MVT::Glue); // unused in this context
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, MachinePointerInfo(), false, false,0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, DebugLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const EVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ EVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+ &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ DebugLoc dl, EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+ Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ EVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ EVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ DebugLoc dl = Op.getDebugLoc();
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ std::vector<EVT> VTs;
+ VTs.push_back(Op.getOperand(2).getValueType());
+ VTs.push_back(MVT::Glue);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, MachinePointerInfo(),
+ false, false, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ llvm_unreachable("Unknown mul to lower!");
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, PPCSubTarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, PPCSubTarget);
+
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
+ Op.getDebugLoc());
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+ return SDValue();
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ const TargetMachine &TM = getTargetMachine();
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::VAARG: {
+ if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ return;
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64) {
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+ Results.push_back(NewNode);
+ Results.push_back(NewNode.getValue(1));
+ }
+ return;
+ }
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1));
+
+ // This sequence changes FPSCR to do round-to-zero, adds the two halves
+ // of the long double, and puts FPSCR back the way it was. We do not
+ // actually model FPSCR.
+ std::vector<EVT> NodeTys;
+ SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+ NodeTys.push_back(MVT::f64); // Return register
+ NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
+ Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ MFFSreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Glue); // Returns a flag
+ Ops[0] = DAG.getConstant(31, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Glue); // Returns a flag
+ Ops[0] = DAG.getConstant(30, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64); // result of add
+ NodeTys.push_back(MVT::Glue); // Returns a flag
+ Ops[0] = Lo;
+ Ops[1] = Hi;
+ Ops[2] = InFlag;
+ Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
+ FPreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64);
+ Ops[0] = DAG.getConstant(1, MVT::i32);
+ Ops[1] = MFFSreg;
+ Ops[2] = FPreg;
+ Ops[3] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
+ FPreg = Result.getValue(0);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ bool is64bit, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = PPCSubTarget.isPPC64();
+ unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+ .addReg(ShiftReg);
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineFunction *F = BB->getParent();
+
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, true, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[wd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = PPCSubTarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+ .addReg(ShiftReg);
+ } else {
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ const TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue() || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64 &&
+ N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+ DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(Val.getNode());
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
+ N->getOperand(2), N->getOperand(3));
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (cast<StoreSDNode>(N)->isUnindexed() &&
+ N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16)) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+ Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ }
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
+ LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = 0;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = 0;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == 0; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFCR instruction, we know this is safe. Otherwise we
+ // give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFCR)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ std::vector<EVT> VTs;
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ VTs.push_back(LHS.getOperand(2).getValueType());
+ VTs.push_back(MVT::Glue);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'b':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'd':
+ if (type->isDoubleTy())
+ weight = CW_Register;
+ break;
+ case 'v':
+ if (type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, PPC::G8RCRegisterClass);
+ return std::make_pair(0U, PPC::GPRCRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, PPC::F4RCRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, PPC::F8RCRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, PPC::VRRCRegisterClass);
+ case 'y': // crrc
+ return std::make_pair(0U, PPC::CRRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0,0);
+
+ // Only support length 1 constraints.
+ if (Constraint.length() > 1) return;
+
+ char Letter = Constraint[0];
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ unsigned Value = CST->getZExtValue();
+ switch (Letter) {
+ default: llvm_unreachable("Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+ // PPC allows a sign-extended 16-bit immediate field.
+ return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ // Make sure the function does not optimize away the store of the RA to
+ // the stack.
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setLRStoreRequired();
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+
+ DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
+ isPPC64? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+ MFI->getStackSize() &&
+ !MF.getFunction()->hasFnAttr(Attribute::Naked);
+ unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
+ (is31 ? PPC::R31 : PPC::R1);
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+ PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo(), false, false, 0);
+ return FrameAddr;
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (this->PPCSubTarget.isPPC64()) {
+ return MVT::i64;
+ } else {
+ return MVT::i32;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 000000000000..986b4e73fbd3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,486 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ TOC_ENTRY,
+
+ /// The following three target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// Restore the TOC from the TOC save area of the current stack frame.
+ /// This is basically a hard coded load instruction which additionally
+ /// takes/produces a flag.
+ TOC_RESTORE,
+
+ /// Like a regular LOAD but additionally taking/producing a flag.
+ LOAD,
+
+ /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
+ /// a hard coded load instruction.
+ LOAD_TOC,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+ /// registers.
+ EXTSW_32,
+
+ /// CALL - A direct function call.
+ CALL_Darwin, CALL_SVR4,
+
+ /// NOP - Special NOP which follows 64-bit SVR4 calls.
+ NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL_Darwin, BCTRL_SVR4,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+ /// instructions. This copies the bits corresponding to the specified
+ /// CRREG into the resultant GPR. Bits corresponding to other CR regs
+ /// are undefined.
+ MFCR,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ // The following 5 instructions are used only as part of the
+ // long double-to-int conversion sequence.
+
+ /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+ /// register.
+ MFFS,
+
+ /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+ MTFSB0,
+
+ /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+ MTFSB1,
+
+ /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+ /// rounding towards zero. It has flags added so it won't move past the
+ /// FPSCR-setting instructions.
+ FADDRTZ,
+
+ /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+ MTFSF,
+
+ /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+ /// reserve indexed. This is used to implement atomic operations.
+ LARX,
+
+ /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+ /// indexed. This is used to implement atomic operations.
+ STCX,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+ STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+ /// are -0.0.
+ bool isAllNegativeZeroVector(SDNode *N);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ const PPCSubtarget &PPCSubTarget;
+
+ public:
+ explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImmShift - Returns true if the address N can be
+ /// represented by a base register plus a signed 14-bit displacement
+ /// [r+imm*4]. Suitable for use by STD and friends.
+ bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB, bool is64Bit,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode for load / store of the
+ /// given type.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+
+ /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+ /// the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If
+ /// 'NonScalarIntSafe' is true, that means it's safe to return a
+ /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+ /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+ /// constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ virtual EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe, bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ private:
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ DebugLoc dl) const;
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+ bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFormalArguments_Darwin(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ };
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 000000000000..e88ad378ccd9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,749 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+ let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo64 : Operand<i64> {
+ let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>,
+ PPC970_Unit_BRU;
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_Darwin : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_Darwin : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+ }
+}
+
+// ELF 64 ABI Calls = Darwin ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_ELF : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_ELF : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+ }
+}
+
+
+// Calls
+def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
+ (BL8_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
+ (BL8_Darwin texternalsym:$dst)>;
+
+def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
+ (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
+ (BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCnop),
+ (NOP)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+ [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
+ [(set G8RC:$dst,
+ (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
+ [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", LdStLDARX,
+ [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", LdStSTDCX,
+ [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
+ let isReturn = 1 in {
+ def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+ }
+
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+}
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
+ [(set G8RC:$result,
+ (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+
+def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+
+let Defs = [CARRY] in {
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+}
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+}
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhd $rT, $rA, $rB", IntMulHW,
+ [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhdu $rT, $rA, $rB", IntMulHWU,
+ [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "sld $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srd $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+let Defs = [CARRY] in {
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srad $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+}
+
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+let Defs = [CARRY] in {
+def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IntRotateD,
+ [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+}
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+ "cntlzd $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divd $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divdu $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulld $rT, $rA, $rB", IntMulHD,
+ [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+ (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
+ "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
+ "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ []>, isPPC64;
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA,
+ [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in
+def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+ ptr_rc:$rA),
+ "lhau $rD, $disp($rA)", LdStGeneral,
+ []>, RegConstraint<"$rA = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+ "ld $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+ "ld 2, 8($reg)", LdStLD,
+ [(PPCload_toc G8RC:$reg)]>, isPPC64;
+
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+ "ld 2, 40(1)", LdStLD,
+ [(PPCtoc_restore)]>, isPPC64;
+def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+
+let mayLoad = 1 in
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", LdStLD,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+}
+
+def : Pat<(PPCload ixaddr:$src),
+ (LD ixaddr:$src)>;
+def : Pat<(PPCload xaddr:$src),
+ (LDX xaddr:$src)>;
+
+let PPC970_Unit = 2 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+ "std $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdx $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+let PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ s16immX4:$ptroff, ptr_rc:$ptrreg),
+ "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+let mayStore = 1 in
+def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTD,
+ []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
+ "std $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
+ "stdx $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
+def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfid $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctidz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+ (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+ (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+ (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra G8RC:$rS, GPRC:$rB),
+ (SRAD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(srl G8RC:$rS, GPRC:$rB),
+ (SRD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(shl G8RC:$rS, GPRC:$rB),
+ (SLD G8RC:$rS, GPRC:$rB)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+ (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl G8RC:$in, GPRC:$sh),
+ (RLDCL G8RC:$in, GPRC:$sh, 0)>;
+def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 G8RC:$in, tjumptable:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 G8RC:$in, tblockaddress:$g)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 000000000000..256370fa5f52
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,695 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+def V_immneg0 : PatLeaf<(build_vector), [{
+ return PPC::isAllNegativeZeroVector(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def DSS : DSS_Form<822, (outs),
+ (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM", LdStGeneral /*FIXME*/, []>;
+def DSSALL : DSS_Form<822, (outs),
+ (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dssall", LdStGeneral /*FIXME*/, []>;
+def DST : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def DST64 : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT64 : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST64 : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+ "mfvscr $vD", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+ "mtvscr $vB", LdStGeneral,
+ [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+ "lvx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvebx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvehx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvewx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvxl $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB))]>,
+ Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fsub V_immneg0,
+ (fsub (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB)))]>,
+ Requires<[FPContractions]>;
+
+def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
+def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
+def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ [(set VRRC:$vD,
+ (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddfp $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddubm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+
+
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vand $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vandc $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
+ (vnot_ppc VRRC:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfsx $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfux $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctsxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctuxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+
+def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubfp $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsububm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vnor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
+ VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vxor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltb $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vsplth $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltw $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+
+def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
+def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuhum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuwum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+// * 32-bit
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+// * 64-bit
+def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+ (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
+ (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
+ (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
+ (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+ (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+ (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+ (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+ (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+ (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+ (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+ (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+ (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+ (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+ (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+ (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 000000000000..b424d1101416
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 000000000000..84a15b1ca942
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,907 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = Addr{20-16}; // Base Reg
+ let Inst{16-31} = Addr{15-0}; // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = DS_RA{18-14}; // Register #
+ let Inst{16-29} = DS_RA{13-0}; // Displacement.
+ let Inst{30-31} = xo;
+}
+
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{2-6};
+ let BI{0-1} = BIBO{0-1};
+ let BI{2-4} = CR;
+ let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> ST;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ string cstr, InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> RT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+ let Constraints = cstr;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = RT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 000000000000..143444fdc22b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,655 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCHazardRecognizers.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+}
+
+using namespace llvm;
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Should use subtarget info to pick the right hazard recognizer. For
+ // now, always return a PPC970 recognizer.
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new PPCHazardRecognizer970(*TII);
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI)
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return 0;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const MCInstrDesc &MCID = get(Opc);
+ if (MCID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, MCID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ DebugLoc DL;
+ if (RC == PPC::GPRCRegisterClass) {
+ if (SrcReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(PPC::R11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (SrcReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(PPC::X11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (RC == PPC::F4RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (RC == PPC::CRRCRegisterClass) {
+ if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ // FIXME (64-bit): Enable
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+
+ // We need to store the CR in the low 4-bits of the saved value. First,
+ // issue a MFCR to save all of the CRBits.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ .addReg(SrcReg, getKillRegState(isKill)));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are
+ // in CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+ // rlwinm scratch, scratch, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(ShiftBits)
+ .addImm(0).addImm(31));
+ }
+
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(ScratchReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::CRBITRCRegisterClass) {
+ // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+ // backend currently only uses CR1EQ as an individual bit, this should
+ // not cause any bug. If we need other uses of CR bits, the following
+ // code may be invalid.
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // STVX VAL, 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+void
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ if (RC == PPC::GPRCRegisterClass) {
+ if (DestReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (DestReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (RC == PPC::F4RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (RC == PPC::CRRCRegisterClass) {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
+ } else if (RC == PPC::CRBITRCRegisterClass) {
+
+ unsigned Reg = 0;
+ if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+ DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+ DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+ DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+ DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+ DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+ DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+ DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+ DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // Dest = LVX 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+ addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case PPC::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ case PPC::PROLOG_LABEL:
+ case PPC::EH_LABEL:
+ case PPC::GC_LABEL:
+ case PPC::DBG_VALUE:
+ return 0;
+ default:
+ return 4; // PowerPC instructions are all 4 bytes
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 000000000000..90bacc96c87e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,149 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+} // end namespace PPCII
+
+
+class PPCInstrInfo : public PPCGenInstrInfo {
+ PPCTargetMachine &TM;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+public:
+ explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 000000000000..773578c6ea81
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1476 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+def SDT_PPCnop : SDTypeProfile<0, 0, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// This sequence is used for long double->int conversions. It changes the
+// bits in the FPSCR which is not modelled.
+def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+ [SDNPOutGlue]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+ [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
+ [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+ SDTCisVT<3, f64>]>,
+ [SDNPInGlue]>;
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
+def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support atomic operations
+def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field. Used by instructions like 'addi'.
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT {
+ list<Register> Defs = [CR0];
+ bit RC = 1;
+}
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
+ let PrintMethod = "printS16X4ImmOperand";
+}
+def directbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
+}
+def calltarget : Operand<iPTR> {
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def aaddr : Operand<iPTR> {
+ let PrintMethod = "printAbsAddrOperand";
+}
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
+}
+// Address operands
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let EncoderMethod = "getMemRIEncoding";
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
+}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
+
+// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+ (ops (i32 20), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
+ [(set GPRC:$result,
+ (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+ i32imm:$BROPC), "",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+ i32imm:$BROPC), "",
+ []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+ "", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
+ "b${p:cc}lr ${p:reg}", BrB,
+ [(retflag)]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins), "", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", BrB,
+ [(br bb:$dst)]>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_Darwin : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_Darwin : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+ }
+}
+
+// SVR4 ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,CARRY] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_SVR4 : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_SVR4 : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB,
+ [(PPCcall_SVR4 (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+ }
+}
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+ "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+ "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+ "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+ "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+ "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+ "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+ "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+ "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+ [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+ [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", LdStLWARX,
+ [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", LdStSTWCX,
+ [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+ "lfs $rD, $src", LdStLFDU,
+ [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+ "lfd $rD, $src", LdStLFD,
+ [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfs $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfd $rD, $addr", LdStLFD,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", LdStLFDU,
+ [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", LdStLFDU,
+ [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+ "stfs $rS, $dst", LdStUX,
+ [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+ "stfd $rS, $dst", LdStUX,
+ [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2 in {
+def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(store GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+let mayStore = 1 in {
+def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
+ "stwux $rS, $rA, $rB", LdStGeneral,
+ []>;
+}
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", LdStUX,
+ [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+ "stfsx $frS, $dst", LdStUX,
+ [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfdx $frS, $dst", LdStUX,
+ [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "sync", LdStSync,
+ [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IntGeneral,
+ []>;
+}
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ "la $rD, $sym($rA)", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+}
+
+let isReMaterializable = 1 in {
+ def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, immSExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+ []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "slw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "srw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "sraw $rA, $rS, $rB", IntShift,
+ [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Defs = [CARRY] in {
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
+ "srawi $rA, $rS, $SH", IntShift,
+ [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+}
+def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
+ "cntlzw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+ def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+ "frsp $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fsqrt $frD, $frB", FPSqrt,
+ [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fsqrts $frD, $frB", FPSqrt,
+ [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+
+
+// XL-Form instructions. condition register logical ops.
+//
+def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+ "mcrf $BF, $BFA", BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "creqv $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "cror $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+ "creqv $dst, $dst, $dst", BrCR,
+ []>;
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970. As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
+// vreg = MCRF CR0
+// MFCR <kill of whatever preg got assigned to vreg>
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it. Represent this in the
+// instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfcr $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Instructions to manipulate FPSCR. Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IntMTFSB0,
+ [(PPCmtfsb0 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IntMTFSB0,
+ [(PPCmtfsb1 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ // MTFSF does not actually produce an FP result. We pretend it copies
+ // input reg B to the output. If we didn't do this it would look like the
+ // instruction had no outputs (because we aren't modelling the FPSCR) and
+ // it would be deleted.
+ def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
+ (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+ "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+ [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
+ F8RC:$rT, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
+ "mffs $rT", IntMFFS,
+ [(set F8RC:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def FADDrtz: AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+//
+def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_Cracked;
+}
+def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divw $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divwu $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhwu $rT, $rA, $rB", IntMulHWU,
+ [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mullw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+let Defs = [CARRY] in {
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in { // FPU Operations.
+let Uses = [RM] in {
+ def FMADD : AForm_1<63, 29,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMADDS : AForm_1<59, 29,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUB : AForm_1<63, 28,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUBS : AForm_1<59, 28,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FNMADD : AForm_1<63, 31,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMADDS : AForm_1<59, 31,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUB : AForm_1<63, 30,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUBS : AForm_1<59, 30,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+ (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Uses = [RM] in {
+ def FADD : AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ def FADDS : AForm_2<59, 21,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ def FDIV : AForm_2<63, 18,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fdiv $FRT, $FRA, $FRB", FPDivD,
+ [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ def FDIVS : AForm_2<59, 18,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fdivs $FRT, $FRA, $FRB", FPDivS,
+ [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ def FMUL : AForm_3<63, 25,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fmul $FRT, $FRA, $FRB", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+ def FMULS : AForm_3<59, 25,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fmuls $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+ def FSUB : AForm_2<63, 20,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fsub $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ def FSUBS : AForm_2<59, 20,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fsubs $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ }
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+ (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+def RLWINMo : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM : MForm_2<23,
+ (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+ (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+ (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+ (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+ (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub immSExt16:$imm, GPRC:$in),
+ (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+ (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+ (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
+ (BL_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
+ (BL_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
+ (BL_SVR4 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
+ (BL_SVR4 texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS GPRC:$in, tjumptable:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS GPRC:$in, tblockaddress:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+ (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+ Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+ (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+ Requires<[FPContractions]>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+ (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+ (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+ (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+ (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm /*ll*/),
+ (i32 imm /*ls*/),
+ (i32 imm /*sl*/),
+ (i32 imm /*ss*/),
+ (i32 imm /*device*/)),
+ (SYNC)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 000000000000..4590f0045641
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,468 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC32 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl PPCCompilationCallbackC\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#else
+void PPC32CompilationCallback() {
+ llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ defined(__ppc64__)
+#ifdef __ELF__
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC64CompilationCallback\n"
+ ".section \".opd\",\"aw\"\n"
+ ".align 3\n"
+"PPC64CompilationCallback:\n"
+ ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+ ".size PPC64CompilationCallback,24\n"
+ ".previous\n"
+ ".align 4\n"
+ ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+#endif
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr 0\n"
+ "std 0, 16(1)\n"
+ "stdu 1, -280(1)\n"
+ // Save all int arg registers
+ "std 10, 272(1)\n" "std 9, 264(1)\n"
+ "std 8, 256(1)\n" "std 7, 248(1)\n"
+ "std 6, 240(1)\n" "std 5, 232(1)\n"
+ "std 4, 224(1)\n" "std 3, 216(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
+ "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
+ "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
+ "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
+ "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
+ "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
+ "stfd 1, 112(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr 3, 0\n" // return address (still in r0)
+ "ld 5, 280(1)\n" // stub's frame
+ "ld 4, 16(5)\n" // stub's lr
+ "li 5, 1\n" // 1 == 64 bit
+#ifdef __ELF__
+ "bl PPCCompilationCallbackC\n"
+ "nop\n"
+#else
+ "bl _PPCCompilationCallbackC\n"
+#endif
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "ld 10, 272(1)\n" "ld 9, 264(1)\n"
+ "ld 8, 256(1)\n" "ld 7, 248(1)\n"
+ "ld 6, 240(1)\n" "ld 5, 232(1)\n"
+ "ld 4, 224(1)\n" "ld 3, 216(1)\n"
+ // Restore all FP arg registers
+ "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
+ "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
+ "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
+ "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
+ "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
+ "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
+ "lfd 1, 112(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld 1, 280(1)\n"
+ "ld 0, 16(1)\n"
+ "mtlr 0\n"
+ // XXX: any special TOC handling in the ELF case for JIT?
+ "bctr\n"
+ );
+#else
+void PPC64CompilationCallback() {
+ llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+ // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+ // instructions to save the caller's address if this is a lazy-compilation
+ // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+ // into a register and jump through it.
+ StubLayout Result = {10*4, 4};
+ return Result;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+ return Addr;
+ }
+
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ if (is64Bit) {
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+ return Addr;
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: llvm_unreachable("Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Old, 7*4);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 000000000000..47ead59b587d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,49 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+
+ class PPCJITInfo : public TargetJITInfo {
+ protected:
+ PPCTargetMachine &TM;
+ bool is64Bit;
+ public:
+ PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+ useGOT = 0;
+ is64Bit = tmIs64Bit;
+ }
+
+ virtual StubLayout getStubLayout();
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
new file mode 100644
index 000000000000..cf73d861fa4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -0,0 +1,194 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx) {
+ }
+
+ ~PPCMCCodeEmitter() {}
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+
+ // Output the constant in big endian byte order.
+ for (unsigned i = 0; i != 4; ++i) {
+ OS << (char)(Bits >> 24);
+ Bits <<= 8;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+ }
+
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCMCCodeEmitter(MCII, STI, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_ha16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo14));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 000000000000..33af4269a3ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,173 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+ return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ MCContext &Ctx = AP.OutContext;
+
+ SmallString<128> Name;
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+ Name += AP.MAI->getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+ (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+ isImplicitlyPrivate = true;
+
+ AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI(AP).getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+
+ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+ // then add the suffix.
+ if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
+ MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ AsmPrinter &Printer, bool isDarwin) {
+ MCContext &Ctx = Printer.OutContext;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ if (MO.getTargetFlags() & PPCII::MO_LO16)
+ RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16;
+ else if (MO.getTargetFlags() & PPCII::MO_HA16)
+ RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16;
+
+ // FIXME: This isn't right, but we don't have a good way to express this in
+ // the MC Level, see below.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+ RefKind = MCSymbolRefExpr::VK_None;
+
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+
+ // Subtract off the PIC base if required.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+
+ const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+ // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+ // since it is not a symbol!
+ }
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ assert(0 && "unknown operand type");
+ case MachineOperand::MO_Register:
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
+ isDarwin);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 000000000000..e2649c8b380f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,132 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// VarArgsStackOffset - StackOffset for start of stack
+ /// arguments.
+ int VarArgsStackOffset;
+ /// VarArgsNumGPR - Index of the first unused integer
+ /// register for parameter passing.
+ unsigned VarArgsNumGPR;
+ /// VarArgsNumFPR - Index of the first unused double
+ /// register for parameter passing.
+ unsigned VarArgsNumFPR;
+
+public:
+ explicit PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ SpillsCR(false),
+ LRStoreRequired(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false),
+ VarArgsFrameIndex(0),
+ VarArgsStackOffset(0),
+ VarArgsNumGPR(0),
+ VarArgsNumFPR(0) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+ unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+ void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+ void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 000000000000..3164e33faae9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp
new file mode 100644
index 000000000000..12bb0a143406
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp
@@ -0,0 +1,31 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown PPC branch opcode!");
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h
new file mode 100644
index 000000000000..b2c831579f79
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h
@@ -0,0 +1,39 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+#include "PPC.h"
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_ALWAYS = (0 << 5) | 20,
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 000000000000..9c2428b92e65
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,714 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
+// FIXME (64-bit): Eventually enable by default.
+namespace llvm {
+cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC32 register scavenger"),
+ cl::Hidden);
+cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC64 register scavenger"),
+ cl::Hidden);
+}
+
+using namespace llvm;
+
+// FIXME (64-bit): Should be inlined.
+bool
+PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
+ return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
+ (EnablePPC64RS && Subtarget.isPPC64()));
+}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case 0: return 0;
+ case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
+ case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
+ case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
+ case R10: case X10: case F10: case V10: case CR2EQ: return 10;
+ case R11: case X11: case F11: case V11: case CR2UN: return 11;
+ case R12: case X12: case F12: case V12: case CR3LT: return 12;
+ case R13: case X13: case F13: case V13: case CR3GT: return 13;
+ case R14: case X14: case F14: case V14: case CR3EQ: return 14;
+ case R15: case X15: case F15: case V15: case CR3UN: return 15;
+ case R16: case X16: case F16: case V16: case CR4LT: return 16;
+ case R17: case X17: case F17: case V17: case CR4GT: return 17;
+ case R18: case X18: case F18: case V18: case CR4EQ: return 18;
+ case R19: case X19: case F19: case V19: case CR4UN: return 19;
+ case R20: case X20: case F20: case V20: case CR5LT: return 20;
+ case R21: case X21: case F21: case V21: case CR5GT: return 21;
+ case R22: case X22: case F22: case V22: case CR5EQ: return 22;
+ case R23: case X23: case F23: case V23: case CR5UN: return 23;
+ case R24: case X24: case F24: case V24: case CR6LT: return 24;
+ case R25: case X25: case F25: case V25: case CR6GT: return 25;
+ case R26: case X26: case F26: case V26: case CR6EQ: return 26;
+ case R27: case X27: case F27: case V27: case CR6UN: return 27;
+ case R28: case X28: case F28: case V28: case CR7LT: return 28;
+ case R29: case X29: case F29: case V29: case CR7GT: return 29;
+ case R30: case X30: case F30: case V30: case CR7EQ: return 30;
+ case R31: case X31: case F31: case V31: case CR7UN: return 31;
+ default:
+ llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
+ }
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RCRegClass;
+ return &PPC::GPRCRegClass;
+}
+
+const unsigned*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ // 32-bit Darwin calling convention.
+ static const unsigned Darwin32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR, 0
+ };
+
+ // 32-bit SVR4 calling convention.
+ static const unsigned SVR4_CalleeSavedRegs[] = {
+ PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+
+ PPC::VRSAVE,
+
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ 0
+ };
+ // 64-bit Darwin calling convention.
+ static const unsigned Darwin64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR8, 0
+ };
+
+ // 64-bit SVR4 calling convention.
+ static const unsigned SVR4_64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+
+ PPC::VRSAVE,
+
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ 0
+ };
+
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
+ Darwin32_CalleeSavedRegs;
+
+ return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
+ Reserved.set(PPC::R0);
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ // The SVR4 ABI reserves r2 and r13
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::R2); // System-reserved register
+ Reserved.set(PPC::R13); // Small Data Area pointer register
+ }
+ // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // when the stack frame is too big to address directly; we need two regs.
+ // This is a hack.
+ if (Subtarget.isDarwinABI()) {
+ Reserved.set(PPC::R2);
+ }
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is over conservative, as it also prevents allocation of R31
+ // when the FP is not needed.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+ Reserved.set(PPC::R31);
+
+ if (!requiresRegisterScavenging(MF))
+ Reserved.set(PPC::R0); // FIXME (64-bit): Remove
+
+ Reserved.set(PPC::X0);
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+ Reserved.set(PPC::X31);
+
+ // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::X2);
+ }
+ // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // when the stack frame is too big to address directly; we need two regs.
+ // This is a hack.
+ if (Subtarget.isDarwinABI()) {
+ Reserved.set(PPC::X2);
+ }
+ }
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
+ addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(StackReg)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
+ const TargetRegisterClass *RC, int SPAdj) {
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ // FIXME: move ARM callee-saved reg scan to target independent code, then
+ // search for already spilled CS register here.
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ return Reg;
+}
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (MaxAlign > TargetAlign)
+ report_fatal_error("Dynamic alloca with large aligns not supported");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
+
+ // FIXME (64-bit): Use "findScratchRegister"
+ unsigned Reg;
+ if (requiresRegisterScavenging(MF))
+ Reg = findScratchRegister(II, RS, RC, SPAdj);
+ else
+ Reg = PPC::R0;
+
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X0, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+ bool LP64 = Subtarget.isPPC64();
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ if (SrcReg != PPC::CR0)
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Find out which operand is the frame index.
+ unsigned FIOperandNo = 0;
+ while (!MI.getOperand(FIOperandNo).isFI()) {
+ ++FIOperandNo;
+ assert(FIOperandNo != MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ if (MI.isInlineAsm())
+ OffsetOperandNo = FIOperandNo-1;
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II, SPAdj, RS);
+ return;
+ }
+
+ // Special case for pseudo-op SPILL_CR.
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ PPC::R31 : PPC::R1,
+ false);
+
+ // Figure out if the offset in the instruction is shifted right two bits. This
+ // is true for instructions like "STD", which the machine implicitly adds two
+ // low zeros to.
+ bool isIXAddr = false;
+ switch (OpC) {
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ isIXAddr = true;
+ break;
+ }
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ // Naked functions have stack size 0, although getStackSize may not reflect that
+ // because we didn't call all the pieces that compute it for naked functions.
+ if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+ Offset += MFI->getStackSize();
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (isIXAddr)
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+ // FIXME: figure out what SPAdj is doing here.
+
+ // FIXME (64-bit): Use "findScratchRegister".
+ unsigned SReg;
+ if (requiresRegisterScavenging(MF))
+ SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
+ else
+ SReg = PPC::R0;
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ .addReg(SReg, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (OpC != TargetOpcode::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+}
+
+unsigned PPCRegisterInfo::getRARegister() const {
+ return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!Subtarget.isPPC64())
+ return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ PPC64 = 0, PPC32 = 1
+ };
+}
+
+int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ unsigned Flavour = Subtarget.isPPC64() ?
+ DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
+
+ return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, Flavour);
+}
+
+int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ unsigned Flavour = Subtarget.isPPC64() ?
+ DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
+
+ return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 000000000000..33fe5ebcf4cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,78 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include <map>
+
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ std::map<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// PPC::F14, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// requiresRegisterScavenging - We require a register scavenger.
+ /// FIXME (64-bit): Should be inlined.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 000000000000..1acdf4eb853b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,326 @@
+//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ field bits<5> Num = SubReg.Num;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ field bits<3> Num = num;
+ let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>;
+def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>;
+def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>;
+def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>;
+def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>;
+def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>;
+def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>;
+def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>;
+def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>;
+def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>;
+def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
+def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
+def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
+def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
+def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
+def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
+def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
+def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
+def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
+def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
+def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
+def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
+def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
+def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
+def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
+def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
+def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
+def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
+def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
+def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
+def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
+def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+
+// 64-bit General-purpose registers
+def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>;
+def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>;
+def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>;
+def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>;
+def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>;
+def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>;
+def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>;
+def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>;
+def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>;
+def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>;
+def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
+def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
+def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
+def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
+def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
+def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
+def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
+def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
+def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
+def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
+def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
+def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
+def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
+def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
+def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
+def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
+def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
+def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
+def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
+def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
+def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
+def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>;
+def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>;
+def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>;
+def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>;
+def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>;
+def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>;
+def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>;
+def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>;
+def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>;
+def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>;
+def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
+def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
+def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
+def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
+def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
+def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
+def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
+def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
+def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
+def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
+def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
+def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
+def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
+def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
+def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
+def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
+def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
+def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
+def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
+def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
+def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
+def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+
+// Vector registers
+def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>;
+def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>;
+def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>;
+def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>;
+def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>;
+def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>;
+def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>;
+def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>;
+def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>;
+def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>;
+def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
+def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
+def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
+def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
+def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
+def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
+def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
+def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
+def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
+def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
+def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
+def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
+def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
+def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
+def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
+def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
+def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
+def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
+def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
+def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
+def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
+def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">;
+def CR0GT : CRBIT< 1, "1">;
+def CR0EQ : CRBIT< 2, "2">;
+def CR0UN : CRBIT< 3, "3">;
+def CR1LT : CRBIT< 4, "4">;
+def CR1GT : CRBIT< 5, "5">;
+def CR1EQ : CRBIT< 6, "6">;
+def CR1UN : CRBIT< 7, "7">;
+def CR2LT : CRBIT< 8, "8">;
+def CR2GT : CRBIT< 9, "9">;
+def CR2EQ : CRBIT<10, "10">;
+def CR2UN : CRBIT<11, "11">;
+def CR3LT : CRBIT<12, "12">;
+def CR3GT : CRBIT<13, "13">;
+def CR3EQ : CRBIT<14, "14">;
+def CR3UN : CRBIT<15, "15">;
+def CR4LT : CRBIT<16, "16">;
+def CR4GT : CRBIT<17, "17">;
+def CR4EQ : CRBIT<18, "18">;
+def CR4UN : CRBIT<19, "19">;
+def CR5LT : CRBIT<20, "20">;
+def CR5GT : CRBIT<21, "21">;
+def CR5EQ : CRBIT<22, "22">;
+def CR5UN : CRBIT<23, "23">;
+def CR6LT : CRBIT<24, "24">;
+def CR6GT : CRBIT<25, "25">;
+def CR6EQ : CRBIT<26, "26">;
+def CR6UN : CRBIT<27, "27">;
+def CR7LT : CRBIT<28, "28">;
+def CR7GT : CRBIT<29, "29">;
+def CR7EQ : CRBIT<30, "30">;
+def CR7UN : CRBIT<31, "31">;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
+}
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>;
+
+// Carry bit. In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1); this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, LR)>;
+
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+ (sequence "X%u", 30, 14),
+ X31, X13, X0, X1, LR8)>;
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+ (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+ V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+ V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+ (add CR0LT, CR0GT, CR0EQ, CR0UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN)>
+{
+ let CopyCost = -1;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)> {
+ let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
+}
+
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 000000000000..a33e7e03370c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 000000000000..9664f1457171
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,505 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU : FuncUnit; // Branch unit
+def SLU : FuncUnit; // Store/load unit
+def SRU : FuncUnit; // special register unit
+def IU1 : FuncUnit; // integer unit 1 (simple)
+def IU2 : FuncUnit; // integer unit 2 (complex)
+def FPU1 : FuncUnit; // floating point unit 1
+def FPU2 : FuncUnit; // floating point unit 2
+def VPU : FuncUnit; // vector permutation unit
+def VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def VFPU : FuncUnit; // vector floating point unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral : InstrItinClass;
+def IntCompare : InstrItinClass;
+def IntDivD : InstrItinClass;
+def IntDivW : InstrItinClass;
+def IntMFFS : InstrItinClass;
+def IntMFVSCR : InstrItinClass;
+def IntMTFSB0 : InstrItinClass;
+def IntMTSRD : InstrItinClass;
+def IntMulHD : InstrItinClass;
+def IntMulHW : InstrItinClass;
+def IntMulHWU : InstrItinClass;
+def IntMulLI : InstrItinClass;
+def IntRFID : InstrItinClass;
+def IntRotateD : InstrItinClass;
+def IntRotate : InstrItinClass;
+def IntShift : InstrItinClass;
+def IntTrapD : InstrItinClass;
+def IntTrapW : InstrItinClass;
+def BrB : InstrItinClass;
+def BrCR : InstrItinClass;
+def BrMCR : InstrItinClass;
+def BrMCRX : InstrItinClass;
+def LdStDCBA : InstrItinClass;
+def LdStDCBF : InstrItinClass;
+def LdStDCBI : InstrItinClass;
+def LdStGeneral : InstrItinClass;
+def LdStDSS : InstrItinClass;
+def LdStICBI : InstrItinClass;
+def LdStUX : InstrItinClass;
+def LdStLD : InstrItinClass;
+def LdStLDARX : InstrItinClass;
+def LdStLFD : InstrItinClass;
+def LdStLFDU : InstrItinClass;
+def LdStLHA : InstrItinClass;
+def LdStLMW : InstrItinClass;
+def LdStLVecX : InstrItinClass;
+def LdStLWA : InstrItinClass;
+def LdStLWARX : InstrItinClass;
+def LdStSLBIA : InstrItinClass;
+def LdStSLBIE : InstrItinClass;
+def LdStSTD : InstrItinClass;
+def LdStSTDCX : InstrItinClass;
+def LdStSTVEBX : InstrItinClass;
+def LdStSTWCX : InstrItinClass;
+def LdStSync : InstrItinClass;
+def SprISYNC : InstrItinClass;
+def SprMFSR : InstrItinClass;
+def SprMTMSR : InstrItinClass;
+def SprMTSR : InstrItinClass;
+def SprTLBSYNC : InstrItinClass;
+def SprMFCR : InstrItinClass;
+def SprMFMSR : InstrItinClass;
+def SprMFSPR : InstrItinClass;
+def SprMFTB : InstrItinClass;
+def SprMTSPR : InstrItinClass;
+def SprMTSRIN : InstrItinClass;
+def SprRFI : InstrItinClass;
+def SprSC : InstrItinClass;
+def FPGeneral : InstrItinClass;
+def FPCompare : InstrItinClass;
+def FPDivD : InstrItinClass;
+def FPDivS : InstrItinClass;
+def FPFused : InstrItinClass;
+def FPRes : InstrItinClass;
+def FPSqrt : InstrItinClass;
+def VecGeneral : InstrItinClass;
+def VecFP : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex : InstrItinClass;
+def VecPerm : InstrItinClass;
+def VecFPRound : InstrItinClass;
+def VecVSL : InstrItinClass;
+def VecVSR : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IntGeneral
+// addc IntGeneral
+// adde IntGeneral
+// addi IntGeneral
+// addic IntGeneral
+// addic. IntGeneral
+// addis IntGeneral
+// addme IntGeneral
+// addze IntGeneral
+// and IntGeneral
+// andc IntGeneral
+// andi. IntGeneral
+// andis. IntGeneral
+// b BrB
+// bc BrB
+// bcctr BrB
+// bclr BrB
+// cmp IntCompare
+// cmpi IntCompare
+// cmpl IntCompare
+// cmpli IntCompare
+// cntlzd IntRotateD
+// cntlzw IntGeneral
+// crand BrCR
+// crandc BrCR
+// creqv BrCR
+// crnand BrCR
+// crnor BrCR
+// cror BrCR
+// crorc BrCR
+// crxor BrCR
+// dcba LdStDCBA
+// dcbf LdStDCBF
+// dcbi LdStDCBI
+// dcbst LdStDCBF
+// dcbt LdStGeneral
+// dcbtst LdStGeneral
+// dcbz LdStDCBF
+// divd IntDivD
+// divdu IntDivD
+// divw IntDivW
+// divwu IntDivW
+// dss LdStDSS
+// dst LdStDSS
+// dstst LdStDSS
+// eciwx LdStGeneral
+// ecowx LdStGeneral
+// eieio LdStGeneral
+// eqv IntGeneral
+// extsb IntGeneral
+// extsh IntGeneral
+// extsw IntRotateD
+// fabs FPGeneral
+// fadd FPGeneral
+// fadds FPGeneral
+// fcfid FPGeneral
+// fcmpo FPCompare
+// fcmpu FPCompare
+// fctid FPGeneral
+// fctidz FPGeneral
+// fctiw FPGeneral
+// fctiwz FPGeneral
+// fdiv FPDivD
+// fdivs FPDivS
+// fmadd FPFused
+// fmadds FPGeneral
+// fmr FPGeneral
+// fmsub FPFused
+// fmsubs FPGeneral
+// fmul FPFused
+// fmuls FPGeneral
+// fnabs FPGeneral
+// fneg FPGeneral
+// fnmadd FPFused
+// fnmadds FPGeneral
+// fnmsub FPFused
+// fnmsubs FPGeneral
+// fres FPRes
+// frsp FPGeneral
+// frsqrte FPGeneral
+// fsel FPGeneral
+// fsqrt FPSqrt
+// fsqrts FPSqrt
+// fsub FPGeneral
+// fsubs FPGeneral
+// icbi LdStICBI
+// isync SprISYNC
+// lbz LdStGeneral
+// lbzu LdStGeneral
+// lbzux LdStUX
+// lbzx LdStGeneral
+// ld LdStLD
+// ldarx LdStLDARX
+// ldu LdStLD
+// ldux LdStLD
+// ldx LdStLD
+// lfd LdStLFD
+// lfdu LdStLFDU
+// lfdux LdStLFDU
+// lfdx LdStLFDU
+// lfs LdStLFDU
+// lfsu LdStLFDU
+// lfsux LdStLFDU
+// lfsx LdStLFDU
+// lha LdStLHA
+// lhau LdStLHA
+// lhaux LdStLHA
+// lhax LdStLHA
+// lhbrx LdStGeneral
+// lhz LdStGeneral
+// lhzu LdStGeneral
+// lhzux LdStUX
+// lhzx LdStGeneral
+// lmw LdStLMW
+// lswi LdStLMW
+// lswx LdStLMW
+// lvebx LdStLVecX
+// lvehx LdStLVecX
+// lvewx LdStLVecX
+// lvsl LdStLVecX
+// lvsr LdStLVecX
+// lvx LdStLVecX
+// lvxl LdStLVecX
+// lwa LdStLWA
+// lwarx LdStLWARX
+// lwaux LdStLHA
+// lwax LdStLHA
+// lwbrx LdStGeneral
+// lwz LdStGeneral
+// lwzu LdStGeneral
+// lwzux LdStUX
+// lwzx LdStGeneral
+// mcrf BrMCR
+// mcrfs FPGeneral
+// mcrxr BrMCRX
+// mfcr SprMFCR
+// mffs IntMFFS
+// mfmsr SprMFMSR
+// mfspr SprMFSPR
+// mfsr SprMFSR
+// mfsrin SprMFSR
+// mftb SprMFTB
+// mfvscr IntMFVSCR
+// mtcrf BrMCRX
+// mtfsb0 IntMTFSB0
+// mtfsb1 IntMTFSB0
+// mtfsf IntMTFSB0
+// mtfsfi IntMTFSB0
+// mtmsr SprMTMSR
+// mtmsrd LdStLD
+// mtspr SprMTSPR
+// mtsr SprMTSR
+// mtsrd IntMTSRD
+// mtsrdin IntMTSRD
+// mtsrin SprMTSRIN
+// mtvscr IntMFVSCR
+// mulhd IntMulHD
+// mulhdu IntMulHD
+// mulhw IntMulHW
+// mulhwu IntMulHWU
+// mulld IntMulHD
+// mulli IntMulLI
+// mullw IntMulHW
+// nand IntGeneral
+// neg IntGeneral
+// nor IntGeneral
+// or IntGeneral
+// orc IntGeneral
+// ori IntGeneral
+// oris IntGeneral
+// rfi SprRFI
+// rfid IntRFID
+// rldcl IntRotateD
+// rldcr IntRotateD
+// rldic IntRotateD
+// rldicl IntRotateD
+// rldicr IntRotateD
+// rldimi IntRotateD
+// rlwimi IntRotate
+// rlwinm IntGeneral
+// rlwnm IntGeneral
+// sc SprSC
+// slbia LdStSLBIA
+// slbie LdStSLBIE
+// sld IntRotateD
+// slw IntGeneral
+// srad IntRotateD
+// sradi IntRotateD
+// sraw IntShift
+// srawi IntShift
+// srd IntRotateD
+// srw IntGeneral
+// stb LdStGeneral
+// stbu LdStGeneral
+// stbux LdStGeneral
+// stbx LdStGeneral
+// std LdStSTD
+// stdcx. LdStSTDCX
+// stdu LdStSTD
+// stdux LdStSTD
+// stdx LdStSTD
+// stfd LdStUX
+// stfdu LdStUX
+// stfdux LdStUX
+// stfdx LdStUX
+// stfiwx LdStUX
+// stfs LdStUX
+// stfsu LdStUX
+// stfsux LdStUX
+// stfsx LdStUX
+// sth LdStGeneral
+// sthbrx LdStGeneral
+// sthu LdStGeneral
+// sthux LdStGeneral
+// sthx LdStGeneral
+// stmw LdStLMW
+// stswi LdStLMW
+// stswx LdStLMW
+// stvebx LdStSTVEBX
+// stvehx LdStSTVEBX
+// stvewx LdStSTVEBX
+// stvx LdStSTVEBX
+// stvxl LdStSTVEBX
+// stw LdStGeneral
+// stwbrx LdStGeneral
+// stwcx. LdStSTWCX
+// stwu LdStGeneral
+// stwux LdStGeneral
+// stwx LdStGeneral
+// subf IntGeneral
+// subfc IntGeneral
+// subfe IntGeneral
+// subfic IntGeneral
+// subfme IntGeneral
+// subfze IntGeneral
+// sync LdStSync
+// td IntTrapD
+// tdi IntTrapD
+// tlbia LdStSLBIA
+// tlbie LdStDCBF
+// tlbsync SprTLBSYNC
+// tw IntTrapW
+// twi IntTrapW
+// vaddcuw VecGeneral
+// vaddfp VecFP
+// vaddsbs VecGeneral
+// vaddshs VecGeneral
+// vaddsws VecGeneral
+// vaddubm VecGeneral
+// vaddubs VecGeneral
+// vadduhm VecGeneral
+// vadduhs VecGeneral
+// vadduwm VecGeneral
+// vadduws VecGeneral
+// vand VecGeneral
+// vandc VecGeneral
+// vavgsb VecGeneral
+// vavgsh VecGeneral
+// vavgsw VecGeneral
+// vavgub VecGeneral
+// vavguh VecGeneral
+// vavguw VecGeneral
+// vcfsx VecFP
+// vcfux VecFP
+// vcmpbfp VecFPCompare
+// vcmpeqfp VecFPCompare
+// vcmpequb VecGeneral
+// vcmpequh VecGeneral
+// vcmpequw VecGeneral
+// vcmpgefp VecFPCompare
+// vcmpgtfp VecFPCompare
+// vcmpgtsb VecGeneral
+// vcmpgtsh VecGeneral
+// vcmpgtsw VecGeneral
+// vcmpgtub VecGeneral
+// vcmpgtuh VecGeneral
+// vcmpgtuw VecGeneral
+// vctsxs VecFP
+// vctuxs VecFP
+// vexptefp VecFP
+// vlogefp VecFP
+// vmaddfp VecFP
+// vmaxfp VecFPCompare
+// vmaxsb VecGeneral
+// vmaxsh VecGeneral
+// vmaxsw VecGeneral
+// vmaxub VecGeneral
+// vmaxuh VecGeneral
+// vmaxuw VecGeneral
+// vmhaddshs VecComplex
+// vmhraddshs VecComplex
+// vminfp VecFPCompare
+// vminsb VecGeneral
+// vminsh VecGeneral
+// vminsw VecGeneral
+// vminub VecGeneral
+// vminuh VecGeneral
+// vminuw VecGeneral
+// vmladduhm VecComplex
+// vmrghb VecPerm
+// vmrghh VecPerm
+// vmrghw VecPerm
+// vmrglb VecPerm
+// vmrglh VecPerm
+// vmrglw VecPerm
+// vmsubfp VecFP
+// vmsummbm VecComplex
+// vmsumshm VecComplex
+// vmsumshs VecComplex
+// vmsumubm VecComplex
+// vmsumuhm VecComplex
+// vmsumuhs VecComplex
+// vmulesb VecComplex
+// vmulesh VecComplex
+// vmuleub VecComplex
+// vmuleuh VecComplex
+// vmulosb VecComplex
+// vmulosh VecComplex
+// vmuloub VecComplex
+// vmulouh VecComplex
+// vnor VecGeneral
+// vor VecGeneral
+// vperm VecPerm
+// vpkpx VecPerm
+// vpkshss VecPerm
+// vpkshus VecPerm
+// vpkswss VecPerm
+// vpkswus VecPerm
+// vpkuhum VecPerm
+// vpkuhus VecPerm
+// vpkuwum VecPerm
+// vpkuwus VecPerm
+// vrefp VecFPRound
+// vrfim VecFPRound
+// vrfin VecFPRound
+// vrfip VecFPRound
+// vrfiz VecFPRound
+// vrlb VecGeneral
+// vrlh VecGeneral
+// vrlw VecGeneral
+// vrsqrtefp VecFP
+// vsel VecGeneral
+// vsl VecVSL
+// vslb VecGeneral
+// vsldoi VecPerm
+// vslh VecGeneral
+// vslo VecPerm
+// vslw VecGeneral
+// vspltb VecPerm
+// vsplth VecPerm
+// vspltisb VecPerm
+// vspltish VecPerm
+// vspltisw VecPerm
+// vspltw VecPerm
+// vsr VecVSR
+// vsrab VecGeneral
+// vsrah VecGeneral
+// vsraw VecGeneral
+// vsrb VecGeneral
+// vsrh VecGeneral
+// vsro VecPerm
+// vsrw VecGeneral
+// vsubcuw VecGeneral
+// vsubfp VecFP
+// vsubsbs VecGeneral
+// vsubshs VecGeneral
+// vsubsws VecGeneral
+// vsububm VecGeneral
+// vsububs VecGeneral
+// vsubuhm VecGeneral
+// vsubuhs VecGeneral
+// vsubuwm VecGeneral
+// vsubuws VecGeneral
+// vsum2sws VecComplex
+// vsum4sbs VecComplex
+// vsum4shs VecComplex
+// vsum4ubs VecComplex
+// vsumsws VecComplex
+// vupkhpx VecPerm
+// vupkhsb VecPerm
+// vupkhsh VecPerm
+// vupklpx VecPerm
+// vupklsb VecPerm
+// vupklsh VecPerm
+// vxor VecGeneral
+// xor IntGeneral
+// xori IntGeneral
+// xoris IntGeneral
+//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 000000000000..ad4da1fe224f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,64 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<
+ [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 000000000000..03c3b29cc101
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,74 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<
+ [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 000000000000..00cac3c7cab2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,80 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IU3 : FuncUnit; // integer unit 3 (7450 simple)
+def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+ [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
+ InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 000000000000..1671f22b30ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,84 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<
+ [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
+ InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
+ InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
+ InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
+ InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
+ InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
+ InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..d4258b4a0eb1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCTargetMachine.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 000000000000..341b69cdfb5f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCTargetMachine;
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 000000000000..5ea9b0f6596c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,141 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include <cstdlib>
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+
+ if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+ switch(hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_POWERPC_601: return "601";
+ case CPU_SUBTYPE_POWERPC_602: return "602";
+ case CPU_SUBTYPE_POWERPC_603: return "603";
+ case CPU_SUBTYPE_POWERPC_603e: return "603e";
+ case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+ case CPU_SUBTYPE_POWERPC_604: return "604";
+ case CPU_SUBTYPE_POWERPC_604e: return "604e";
+ case CPU_SUBTYPE_POWERPC_620: return "620";
+ case CPU_SUBTYPE_POWERPC_750: return "750";
+ case CPU_SUBTYPE_POWERPC_7400: return "7400";
+ case CPU_SUBTYPE_POWERPC_7450: return "7450";
+ case CPU_SUBTYPE_POWERPC_970: return "970";
+ default: ;
+ }
+
+ return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : PPCGenSubtargetInfo(TT, CPU, FS)
+ , StackAlignment(16)
+ , DarwinDirective(PPC::DIR_NONE)
+ , IsGigaProcessor(false)
+ , Has64BitSupport(false)
+ , Use64BitRegs(false)
+ , IsPPC64(is64Bit)
+ , HasAltivec(false)
+ , HasFSQRT(false)
+ , HasSTFIWX(false)
+ , HasLazyResolverStubs(false)
+ , IsJITCodeModel(false)
+ , TargetTriple(TT) {
+
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+#if defined(__APPLE__)
+ if (CPUName == "generic")
+ CPUName = GetCurrentPowerPCCPU();
+#endif
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (is64Bit) {
+ Has64BitSupport = true;
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ }
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (use64BitRegs() && !has64BitSupport())
+ Use64BitRegs = false;
+
+ // Set up darwin-specific properties.
+ if (isDarwin())
+ HasLazyResolverStubs = true;
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 000000000000..e028de6b09de
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,151 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+class StringRef;
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_64
+ };
+}
+
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public PPCGenSubtargetInfo {
+protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool IsGigaProcessor;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasFSQRT;
+ bool HasSTFIWX;
+ bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+ : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
+ }
+
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
+ // Specific obvious features.
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool isGigaProcessor() const { return IsGigaProcessor; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return TargetTriple.isMacOSX(); }
+
+ bool isDarwinABI() const { return isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwin(); }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 000000000000..e0ea5adba751
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,135 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ if (Triple(TT).isOSDarwin())
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+
+ return NULL;
+}
+
+extern "C" void LLVMInitializePowerPCTarget() {
+ // Register the targets
+ RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
+ RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
+}
+
+
+PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS, is64Bit),
+ DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+ FrameLowering(Subtarget), JITInfo(*this, is64Bit),
+ TLInfo(*this), TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+
+ if (getRelocationModel() == Reloc::Default) {
+ if (Subtarget.isDarwin())
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+}
+
+/// Override this for PowerPC. Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : PPCTargetMachine(T, TT, CPU, FS, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : PPCTargetMachine(T, TT, CPU, FS, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createPPCISelDag(*this));
+ return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64()) {
+ // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+ // instructions to materialize arbitrary global variable + function +
+ // constant pool addresses.
+ setRelocationModel(Reloc::PIC_);
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ DisableJumpTables = true;
+ } else {
+ setRelocationModel(Reloc::Static);
+ }
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 000000000000..baf07e3498f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,94 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PPCInstrInfo InstrInfo;
+ PPCFrameLowering FrameLowering;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+ InstrItineraryData InstrItins;
+
+public:
+ PPCTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS,
+ bool is64Bit);
+
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const PPCFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const PPCTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const PPCRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+ virtual bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+ PPC32TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+ PPC64TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 000000000000..ad607d0ade6a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() {
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+ X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+ RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+ Y(ThePPC64Target, "ppc64", "PowerPC 64");
+}
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 000000000000..dab35e5e4e6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,323 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+static cl::opt<bool> DisableDelaySlotFiller(
+ "disable-sparc-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the Sparc delay slot filler."),
+ cl::Hidden);
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "SPARC Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ MachineBasicBlock::iterator
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+ bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+ return new Filler(tm);
+}
+
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
+ MachineBasicBlock::iterator J = I;
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB, I);
+
+ ++FilledSlots;
+ Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
+ unsigned structSize = 0;
+ if (needsUnimp(I, structSize)) {
+ MachineBasicBlock::iterator J = I;
+ ++J; //skip the delay filler.
+ BuildMI(MBB, ++J, I->getDebugLoc(),
+ TII->get(SP::UNIMP)).addImm(structSize);
+ }
+ }
+ return Changed;
+}
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot)
+{
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ MachineBasicBlock::iterator I = slot;
+
+ if (slot->getOpcode() == SP::RET)
+ return MBB.end();
+
+ if (slot->getOpcode() == SP::RETL) {
+ --I;
+ if (I->getOpcode() != SP::RESTORErr)
+ return MBB.end();
+ //change retl to ret
+ slot->setDesc(TII->get(SP::RET));
+ return I;
+ }
+
+ //Call's delay filler can def some of call's uses.
+ if (slot->getDesc().isCall())
+ insertCallUses(slot, RegUses);
+ else
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool done = false;
+
+ while (!done) {
+ done = (I == MBB.begin());
+
+ if (!done)
+ --I;
+
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || I->getDesc().hasDelaySlot()
+ || isDelayFiller(MBB, I))
+ break;
+
+ if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(I, RegDefs, RegUses);
+ continue;
+ }
+
+ return I;
+ }
+ return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses)
+{
+
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ if (candidate->getDesc().mayLoad()) {
+ sawLoad = true;
+ if (sawStore)
+ return true;
+ }
+
+ if (candidate->getDesc().mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ if (!MO.isReg())
+ continue; // skip
+
+ unsigned Reg = MO.getReg();
+
+ if (MO.isDef()) {
+ //check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ //check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses)
+{
+
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown opcode.");
+ case SP::CALL: break;
+ case SP::JMPLrr:
+ case SP::JMPLri:
+ assert(MI->getNumOperands() >= 2);
+ const MachineOperand &Reg = MI->getOperand(0);
+ assert(Reg.isReg() && "JMPL first operand is not a register.");
+ assert(Reg.isUse() && "JMPL first operand is not a use.");
+ RegUses.insert(Reg.getReg());
+
+ const MachineOperand &RegOrImm = MI->getOperand(1);
+ if (RegOrImm.isImm())
+ break;
+ assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+ assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+ RegUses.insert(RegOrImm.getReg());
+ break;
+ }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses)
+{
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ if (MO.isUse())
+ RegUses.insert(Reg);
+
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+ if (RegSet.count(Reg))
+ return true;
+ // check Aliased Registers
+ for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ *Alias; ++ Alias)
+ if (RegSet.count(*Alias))
+ return true;
+
+ return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate)
+{
+ if (candidate == MBB.begin())
+ return false;
+ if (candidate->getOpcode() == SP::UNIMP)
+ return true;
+ const MCInstrDesc &prevdesc = (--candidate)->getDesc();
+ return prevdesc.hasDelaySlot();
+}
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+ if (!I->getDesc().isCall())
+ return false;
+
+ unsigned structSizeOpNum = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unknown call opcode.");
+ case SP::CALL: structSizeOpNum = 1; break;
+ case SP::JMPLrr:
+ case SP::JMPLri: structSizeOpNum = 2; break;
+ }
+
+ const MachineOperand &MO = I->getOperand(structSizeOpNum);
+ if (!MO.isImm())
+ return false;
+ StructSize = MO.getImm();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/FPMover.cpp b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 000000000000..1423b1e64d66
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,141 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+ struct FPMover : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+
+ static char ID;
+ explicit FPMover(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Sparc Double-FP Move Fixer";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+ return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+ unsigned &OddReg) {
+ static const unsigned EvenHalvesOfPairs[] = {
+ SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+ SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+ };
+ static const unsigned OddHalvesOfPairs[] = {
+ SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+ SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+ };
+ static const unsigned DoubleRegsInOrder[] = {
+ SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+ SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+ };
+ for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ if (DoubleRegsInOrder[i] == DoubleReg) {
+ EvenReg = EvenHalvesOfPairs[i];
+ OddReg = OddHalvesOfPairs[i];
+ return;
+ }
+ llvm_unreachable("Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *MI = I++;
+ DebugLoc dl = MI->getDebugLoc();
+ if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+ MI->getOpcode() == SP::FpNEGD) {
+ Changed = true;
+ unsigned DestDReg = MI->getOperand(0).getReg();
+ unsigned SrcDReg = MI->getOperand(1).getReg();
+ if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+ MBB.erase(MI); // Eliminate the noop copy.
+ ++NoopFpDs;
+ continue;
+ }
+
+ unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+ getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+ getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (MI->getOpcode() == SP::FpMOVD)
+ MI->setDesc(TII->get(SP::FMOVS));
+ else if (MI->getOpcode() == SP::FpNEGD)
+ MI->setDesc(TII->get(SP::FNEGS));
+ else if (MI->getOpcode() == SP::FpABSD)
+ MI->setDesc(TII->get(SP::FABSS));
+ else
+ llvm_unreachable("Unknown opcode!");
+
+ MI->getOperand(0).setReg(EvenDestReg);
+ MI->getOperand(1).setReg(EvenSrcReg);
+ DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
+ // Insert copy for the other half of the double.
+ if (DestDReg != SrcDReg) {
+ MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+ .addReg(OddSrcReg);
+ DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
+ }
+ ++NumFpDs;
+ }
+ }
+ return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+ // If the target has V9 instructions, the fp-mover pseudos will never be
+ // emitted. Avoid a scan of the instructions to improve compile time.
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..1e8c02979887
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMSparcDesc
+ SparcMCTargetDesc.cpp
+ SparcMCAsmInfo.cpp
+ )
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..abcbe2da18ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
new file mode 100644
index 000000000000..6a7e0902354e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::sparcv9)
+ PointerSize = 8;
+
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "!";
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+
+ WeakRefDirective = "\t.weak\t";
+
+ PrivateGlobalPrefix = ".L";
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
new file mode 100644
index 000000000000..0cb6827d2771
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ struct SparcELFMCAsmInfo : public MCAsmInfo {
+ explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
new file mode 100644
index 000000000000..cb92a2bfd417
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCTargetDesc.h"
+#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SparcGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSparcMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSparcMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeSparcMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSparcMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeSparcMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+ createSparcMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSparcMCAsmInfo() {
+ RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
+ RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
new file mode 100644
index 000000000000..2fd9e3f4cbd3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCMCTARGETDESC_H
+#define SPARCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSparcTarget;
+extern Target TheSparcV9Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for Sparc registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SparcGenRegisterInfo.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SparcGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h
new file mode 100644
index 000000000000..7b2c6141dbf8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,109 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+ class FunctionPass;
+ class SparcTargetMachine;
+ class formatted_raw_ostream;
+
+ FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+ FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+
+} // end namespace llvm;
+
+namespace llvm {
+ // Enums corresponding to Sparc condition codes, both icc's and fcc's. These
+ // values must be kept in sync with the ones in the .td file.
+ namespace SPCC {
+ enum CondCodes {
+ //ICC_A = 8 , // Always
+ //ICC_N = 0 , // Never
+ ICC_NE = 9 , // Not Equal
+ ICC_E = 1 , // Equal
+ ICC_G = 10 , // Greater
+ ICC_LE = 2 , // Less or Equal
+ ICC_GE = 11 , // Greater or Equal
+ ICC_L = 3 , // Less
+ ICC_GU = 12 , // Greater Unsigned
+ ICC_LEU = 4 , // Less or Equal Unsigned
+ ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned
+ ICC_CS = 5 , // Carry Set/Less Unsigned
+ ICC_POS = 14 , // Positive
+ ICC_NEG = 6 , // Negative
+ ICC_VC = 15 , // Overflow Clear
+ ICC_VS = 7 , // Overflow Set
+
+ //FCC_A = 8+16, // Always
+ //FCC_N = 0+16, // Never
+ FCC_U = 7+16, // Unordered
+ FCC_G = 6+16, // Greater
+ FCC_UG = 5+16, // Unordered or Greater
+ FCC_L = 4+16, // Less
+ FCC_UL = 3+16, // Unordered or Less
+ FCC_LG = 2+16, // Less or Greater
+ FCC_NE = 1+16, // Not Equal
+ FCC_E = 9+16, // Equal
+ FCC_UE = 10+16, // Unordered or Equal
+ FCC_GE = 11+16, // Greater or Equal
+ FCC_UGE = 12+16, // Unordered or Greater or Equal
+ FCC_LE = 13+16, // Less or Equal
+ FCC_ULE = 14+16, // Unordered or Less or Equal
+ FCC_O = 15+16 // Ordered
+ };
+ }
+
+ inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case SPCC::ICC_NE: return "ne";
+ case SPCC::ICC_E: return "e";
+ case SPCC::ICC_G: return "g";
+ case SPCC::ICC_LE: return "le";
+ case SPCC::ICC_GE: return "ge";
+ case SPCC::ICC_L: return "l";
+ case SPCC::ICC_GU: return "gu";
+ case SPCC::ICC_LEU: return "leu";
+ case SPCC::ICC_CC: return "cc";
+ case SPCC::ICC_CS: return "cs";
+ case SPCC::ICC_POS: return "pos";
+ case SPCC::ICC_NEG: return "neg";
+ case SPCC::ICC_VC: return "vc";
+ case SPCC::ICC_VS: return "vs";
+ case SPCC::FCC_U: return "u";
+ case SPCC::FCC_G: return "g";
+ case SPCC::FCC_UG: return "ug";
+ case SPCC::FCC_L: return "l";
+ case SPCC::FCC_UL: return "ul";
+ case SPCC::FCC_LG: return "lg";
+ case SPCC::FCC_NE: return "ne";
+ case SPCC::FCC_E: return "e";
+ case SPCC::FCC_UE: return "ue";
+ case SPCC::FCC_GE: return "ge";
+ case SPCC::FCC_UGE: return "uge";
+ case SPCC::FCC_LE: return "le";
+ case SPCC::FCC_ULE: return "ule";
+ case SPCC::FCC_O: return "o";
+ }
+ }
+} // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
new file mode 100644
index 000000000000..764336665d0b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,72 @@
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+
+def FeatureV9
+ : SubtargetFeature<"v9", "IsV9", "true",
+ "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+ : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+ : SubtargetFeature<"vis", "IsVIS", "true",
+ "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"v8", []>;
+def : Proc<"supersparc", []>;
+def : Proc<"sparclite", []>;
+def : Proc<"f934", []>;
+def : Proc<"hypersparc", []>;
+def : Proc<"sparclite86x", []>;
+def : Proc<"sparclet", []>;
+def : Proc<"tsc701", []>;
+def : Proc<"v9", [FeatureV9]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = SparcInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 000000000000..edde8427aa89
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,251 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SparcAsmPrinter : public AsmPrinter {
+ public:
+ explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "Sparc Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+
+ virtual void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+ };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand (opNum);
+ bool CloseParen = false;
+ if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+ O << "%hi(";
+ CloseParen = true;
+ } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+ !MO.isReg() && !MO.isImm()) {
+ O << "%lo(";
+ CloseParen = true;
+ }
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+ if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ return;
+ }
+
+ if (MI->getOperand(opNum+1).isReg() &&
+ MI->getOperand(opNum+1).getReg() == SP::G0)
+ return; // don't print "+%g0"
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ O << "+";
+ if (MI->getOperand(opNum+1).isGlobal() ||
+ MI->getOperand(opNum+1).isCPI()) {
+ O << "%lo(";
+ printOperand(MI, opNum+1, O);
+ O << ")";
+ } else {
+ printOperand(MI, opNum+1, O);
+ }
+}
+
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
+ raw_ostream &O) {
+ std::string operand = "";
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ default: assert(0 && "Operand is not a register ");
+ case MachineOperand::MO_Register:
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Operand is not a physical register ");
+ assert(MO.getReg() != SP::O7 &&
+ "%o7 is assigned as destination for getpcx!");
+ operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+ break;
+ }
+
+ unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
+ unsigned bbNum = MI->getParent()->getNumber();
+
+ O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+ O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
+
+ O << "\t sethi\t"
+ << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n' ;
+
+ O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
+ O << "\tor\t" << operand
+ << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n';
+ O << "\tadd\t" << operand << ", %o7, " << operand << '\n';
+
+ return true;
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() {
+ RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+ RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 000000000000..856f87ad1d37
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,36 @@
+//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+ //Custom assign SRet to [sp+64].
+ CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+ // i32 f32 arguments get passed in integer registers if there is space.
+ CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ // f64 arguments are split and passed through registers or through stack.
+ CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 000000000000..320c8ca26d7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ } else {
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 000000000000..9a2ddc83f5aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+ const SparcSubtarget &STI;
+public:
+ explicit SparcFrameLowering(const SparcSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 000000000000..8c6103dd8a39
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,212 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const SparcSubtarget &Subtarget;
+ SparcTargetMachine& TM;
+public:
+ explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
+ : SelectionDAGISel(tm),
+ Subtarget(tm.getSubtarget<SparcSubtarget>()),
+ TM(tm) {
+ }
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ virtual const char *getPassName() const {
+ return "SPARC DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+
+private:
+ SDNode* getGlobalBaseReg();
+};
+} // end anonymous namespace
+
+SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (isInt<13>(CN->getSExtValue())) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(1);
+ Offset = Addr.getOperand(0).getOperand(0);
+ return true;
+ }
+ if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1).getOperand(0);
+ return true;
+ }
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+ Addr.getOperand(1).getOpcode() == SPISD::Lo)
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+ R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case SPISD::GLOBAL_BASE_REG:
+ return getGlobalBaseReg();
+
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ // FIXME: should use a custom expander to expose the SRA to the dag.
+ SDValue DivLHS = N->getOperand(0);
+ SDValue DivRHS = N->getOperand(1);
+
+ // Set the Y register to the high-part.
+ SDValue TopPart;
+ if (N->getOpcode() == ISD::SDIV) {
+ TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
+ CurDAG->getTargetConstant(31, MVT::i32)), 0);
+ } else {
+ TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+ }
+ TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
+ CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+ // FIXME: Handle div by immediate.
+ unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+ return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+ TopPart);
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ // FIXME: Handle mul by immediate.
+ SDValue MulLHS = N->getOperand(0);
+ SDValue MulRHS = N->getOperand(1);
+ unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+ SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+ MulLHS, MulRHS);
+ // The high part is in the Y register.
+ return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+ return NULL;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectADDRrr(Op, Op0, Op1))
+ SelectADDRri(Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+ return new SparcDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 000000000000..6f30d3fd6c35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1273 @@
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ assert (ArgFlags.isSRet());
+
+ //Assign SRet argument
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ 0,
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ static const unsigned RegList[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ //Try to get first reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ } else {
+ //Assign whole thing in stack
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8,4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ //Try to get second reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4,4),
+ LocVT, LocInfo));
+ return true;
+}
+
+#include "SparcGenCallingConv.inc"
+
+SDValue
+SparcTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (MF.getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
+ // If the function returns a struct, copy the SRetReturnReg to I0
+ if (MF.getFunction()->hasStructRetAttr()) {
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+ Flag = Chain.getValue(1);
+ if (MF.getRegInfo().liveout_empty())
+ MF.getRegInfo().addLiveOut(SP::I0);
+ RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
+ }
+
+ SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+
+ if (Flag.getNode())
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+ RetAddrOffsetNode, Flag);
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+ RetAddrOffsetNode);
+}
+
+/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values. TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue
+SparcTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
+
+ const unsigned StackOffset = 92;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ if (i == 0 && Ins[i].Flags.isSRet()) {
+ //Get SRet from [%fp+64]
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ if (VA.isRegLoc()) {
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+ unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+ SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+ assert(i+1 < e);
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ SDValue LoVal;
+ if (NextVA.isMemLoc()) {
+ int FrameIdx = MF.getFrameInfo()->
+ CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+ &SP::IntRegsRegClass);
+ LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
+ }
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+ SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ if (VA.getLocVT() == MVT::f32)
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+ else if (VA.getLocVT() != MVT::i32) {
+ Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+ DAG.getValueType(VA.getLocVT()));
+ Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+ }
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::f64);
+ //If it is double-word aligned, just load.
+ if (Offset % 8 == 0) {
+ int FI = MF.getFrameInfo()->CreateFixedObject(8,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false,false, 0);
+ InVals.push_back(Load);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset+4,
+ true);
+ SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+ SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+ MachinePointerInfo(),
+ false, false, 0);
+
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load ;
+ if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+ Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+ FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+ MachinePointerInfo(),
+ VA.getValVT(), false, false,0);
+ Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+ }
+ InVals.push_back(Load);
+ }
+
+ if (MF.getFunction()->hasStructRetAttr()) {
+ //Copy the SRet Argument to SRetReturnReg
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+ SFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ // Store remaining ArgRegs to the stack if this is a varargs function.
+ if (isVarArg) {
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+ const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = CCInfo.getNextStackOffset();
+ if (NumAllocated == 6)
+ ArgOffset += StackOffset;
+ else {
+ assert(!ArgOffset);
+ ArgOffset = 68+4*NumAllocated;
+ }
+
+ // Remember the vararg offset for the va_start implementation.
+ FuncInfo->setVarArgsFrameOffset(ArgOffset);
+
+ std::vector<SDValue> OutChains;
+
+ for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+ SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0));
+ ArgOffset += 4;
+ }
+
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Sparc target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ // Keep stack frames 8-byte aligned.
+ ArgsSize = (ArgsSize+7) & ~7;
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ //Create local copies for byval args.
+ SmallVector<SDValue, 8> ByValArgs;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ SDValue Arg = OutVals[i];
+ unsigned Size = Flags.getByValSize();
+ unsigned Align = Flags.getByValAlign();
+
+ int FI = MFI->CreateStackObject(Size, Align, false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+ Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+ false, //isVolatile,
+ (Size <= 32), //AlwaysInline if size <= 32
+ MachinePointerInfo(), MachinePointerInfo());
+ ByValArgs.push_back(FIPtr);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ const unsigned StackOffset = 92;
+ bool hasStructRetAttr = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+ //Use local copy if it is a byval arg.
+ if (Flags.isByVal())
+ Arg = ByValArgs[byvalArgIdx++];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (Flags.isSRet()) {
+ assert(VA.needsCustom());
+ // store SRet argument in %sp+64
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(64);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ hasStructRetAttr = true;
+ continue;
+ }
+
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+
+ if (VA.isMemLoc()) {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ //if it is double-word aligned, just store.
+ if (Offset % 8 == 0) {
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ continue;
+ }
+ }
+
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Arg, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ // Sparc is big-endian, so the high part comes first.
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+ // Increment the pointer to the other half.
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(4));
+ // Load the low part.
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+ assert(i+1 != e);
+ CCValAssign &NextVA = ArgLocs[++i];
+ if (NextVA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+ } else {
+ //Store the low part in stack.
+ unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ } else {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ // Store the high part.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ // Store the low part.
+ PtrOff = DAG.getIntPtrConstant(Offset+4);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ if (VA.getLocVT() != MVT::f32) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+
+
+ // Emit all stores, make sure the occur before any copies into physregs.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // Returns a chain & a flag for retval copy to use
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ if (hasStructRetAttr)
+ Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ unsigned Reg = RVLocs[i].getLocReg();
+
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+ const Function *CalleeFn = 0;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CalleeFn = dyn_cast<Function>(G->getGlobal());
+ } else if (ExternalSymbolSDNode *E =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const Function *Fn = DAG.getMachineFunction().getFunction();
+ const Module *M = Fn->getParent();
+ CalleeFn = M->getFunction(E->getSymbol());
+ }
+
+ if (!CalleeFn)
+ return 0;
+
+ assert(CalleeFn->hasStructRetAttr() &&
+ "Callee does not have the StructRet attribute.");
+
+ const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+ const Type *ElementTy = Ty->getElementType();
+ return getTargetData()->getTypeAllocSize(ElementTy);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ISD::SETEQ: return SPCC::ICC_E;
+ case ISD::SETNE: return SPCC::ICC_NE;
+ case ISD::SETLT: return SPCC::ICC_L;
+ case ISD::SETGT: return SPCC::ICC_G;
+ case ISD::SETLE: return SPCC::ICC_LE;
+ case ISD::SETGE: return SPCC::ICC_GE;
+ case ISD::SETULT: return SPCC::ICC_CS;
+ case ISD::SETULE: return SPCC::ICC_LEU;
+ case ISD::SETUGT: return SPCC::ICC_GU;
+ case ISD::SETUGE: return SPCC::ICC_CC;
+ }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return SPCC::FCC_E;
+ case ISD::SETNE:
+ case ISD::SETUNE: return SPCC::FCC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return SPCC::FCC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT: return SPCC::FCC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE: return SPCC::FCC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return SPCC::FCC_GE;
+ case ISD::SETULT: return SPCC::FCC_UL;
+ case ISD::SETULE: return SPCC::FCC_ULE;
+ case ISD::SETUGT: return SPCC::FCC_UG;
+ case ISD::SETUGE: return SPCC::FCC_UGE;
+ case ISD::SETUO: return SPCC::FCC_U;
+ case ISD::SETO: return SPCC::FCC_O;
+ case ISD::SETONE: return SPCC::FCC_LG;
+ case ISD::SETUEQ: return SPCC::FCC_UE;
+ }
+}
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+ addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+ addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Sparc doesn't have i1 sign extending load
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into LO/HI parts.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+ // Sparc doesn't have sext_inreg, replace them with shl/sra
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Sparc has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Custom expand fp<->sint
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ // Sparc has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+ // Sparc doesn't have BRCOND either, it has BR_CC.
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // SPARC has no intrinsics for these particular operations.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // FIXME: Sparc provides these multiplies, but we don't have them yet.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ // VAARG needs to be lowered to not do unaligned accesses for doubles.
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+
+ // No debug info support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ setStackPointerRegisterToSaveRestore(SP::O6);
+
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+ setMinFunctionAlignment(2);
+
+ computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPISD::CMPICC: return "SPISD::CMPICC";
+ case SPISD::CMPFCC: return "SPISD::CMPFCC";
+ case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRFCC: return "SPISD::BRFCC";
+ case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+ case SPISD::Hi: return "SPISD::Hi";
+ case SPISD::Lo: return "SPISD::Lo";
+ case SPISD::FTOI: return "SPISD::FTOI";
+ case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::CALL: return "SPISD::CALL";
+ case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+ case SPISD::FLUSHW: return "SPISD::FLUSHW";
+ }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2, KnownOne2;
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case SPISD::SELECT_ICC:
+ case SPISD::SELECT_FCC:
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+ Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
+ CC == ISD::SETNE &&
+ ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+ (LHS.getOpcode() == SPISD::SELECT_FCC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ getPointerTy());
+ SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ GlobalBase, RelAddr);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ getPointerTy());
+ SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ GlobalBase, RelAddr);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Convert the fp value to integer in an FP register.
+ assert(Op.getValueType() == MVT::i32);
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getOperand(0).getValueType() == MVT::i32);
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+ // Convert the int value to FP in an FP register.
+ return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ // Get the condition flag.
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<EVT> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Glue);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<EVT> VTs;
+ VTs.push_back(LHS.getValueType()); // subcc returns a value
+ VTs.push_back(MVT::Glue);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ Opc = SPISD::SELECT_ICC;
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Offset =
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getRegister(SP::I6, MVT::i32),
+ DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+ MVT::i32));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+ MachinePointerInfo(SV), false, false, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ MVT::i32));
+ // Store the incremented VAList to the legalized pointer
+ InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+ VAListPtr, MachinePointerInfo(SV), false, false, 0);
+ // Load the actual argument out of the pointer VAList, unless this is an
+ // f64 load.
+ if (VT != MVT::f64)
+ return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, 0);
+
+ // Otherwise, load it as i64, then do a bitconvert.
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, 0);
+
+ // Bit-Convert the value to f64.
+ SDValue Ops[2] = {
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
+ V.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0); // Legalize the chain.
+ SDValue Size = Op.getOperand(1); // Legalize the size.
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = SP::O6;
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
+
+ // The resultant pointer is actually 16 words from the bottom of the stack,
+ // to provide a register spill area.
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+ DAG.getConstant(96, MVT::i32));
+ SDValue Ops[2] = { NewVal, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+ dl, MVT::Other, DAG.getEntryNode());
+ return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned FrameReg = SP::I6;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue FrameAddr;
+ if (depth == 0)
+ FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ FrameAddr, DAG.getIntPtrConstant(56));
+ FrameAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ }
+ return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned RetReg = SP::I7;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue RetAddr;
+ if (depth == 0)
+ RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ RetAddr,
+ DAG.getIntPtrConstant((i == depth-1)?60:56));
+ RetAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ }
+ return RetAddr;
+}
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ llvm_unreachable("TLS not implemented for Sparc.");
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned BROpcode;
+ unsigned CC;
+ DebugLoc dl = MI->getDebugLoc();
+ // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ BROpcode = SP::BCOND;
+ break;
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ BROpcode = SP::FBCOND;
+ break;
+ }
+
+ CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ }
+ }
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, SP::IntRegsRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Sparc target isn't yet aware of offsets.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 000000000000..8a1886a856e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,103 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Sparc.h"
+
+namespace llvm {
+ namespace SPISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ GLOBAL_BASE_REG, // Global base reg for PIC
+ FLUSHW // FLUSH register windows to stack
+ };
+ }
+
+ class SparcTargetLowering : public TargetLowering {
+ public:
+ SparcTargetLowering(TargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+ };
+} // end namespace llvm
+
+#endif // SPARC_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 000000000000..6535259e16ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,114 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SP";
+
+ bits<2> op;
+ let Inst{31-30} = op; // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<3> op2;
+ bits<22> imm22;
+ let op = 0; // op = 0
+ let Inst{24-22} = op2;
+ let Inst{21-0} = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : F2<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op2 = op2Val;
+
+ let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bit annul = 0; // currently unused
+
+ let cond = condVal;
+ let op2 = op2Val;
+
+ let Inst{29} = annul;
+ let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+ bits<6> op3;
+ bits<5> rs1;
+ let op{1} = 1; // Op = 2 or 3
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+ let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<8> asi = 0; // asi not currently used
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12-5} = asi; // address space identifier
+ let Inst{4-0} = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<13> simm13;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 000000000000..4e3ddf839985
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,346 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "Sparc.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SparcGenInstrInfo.inc"
+
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+ : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::LDri ||
+ MI->getOpcode() == SP::LDFri ||
+ MI->getOpcode() == SP::LDDFri) {
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::STri ||
+ MI->getOpcode() == SP::STFri ||
+ MI->getOpcode() == SP::STDFri) {
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+static bool IsIntegerCC(unsigned CC)
+{
+ return (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+ switch(CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case SPCC::ICC_NE: return SPCC::ICC_E;
+ case SPCC::ICC_E: return SPCC::ICC_NE;
+ case SPCC::ICC_G: return SPCC::ICC_LE;
+ case SPCC::ICC_LE: return SPCC::ICC_G;
+ case SPCC::ICC_GE: return SPCC::ICC_L;
+ case SPCC::ICC_L: return SPCC::ICC_GE;
+ case SPCC::ICC_GU: return SPCC::ICC_LEU;
+ case SPCC::ICC_LEU: return SPCC::ICC_GU;
+ case SPCC::ICC_CC: return SPCC::ICC_CS;
+ case SPCC::ICC_CS: return SPCC::ICC_CC;
+ case SPCC::ICC_POS: return SPCC::ICC_NEG;
+ case SPCC::ICC_NEG: return SPCC::ICC_POS;
+ case SPCC::ICC_VC: return SPCC::ICC_VS;
+ case SPCC::ICC_VS: return SPCC::ICC_VC;
+
+ case SPCC::FCC_U: return SPCC::FCC_O;
+ case SPCC::FCC_O: return SPCC::FCC_U;
+ case SPCC::FCC_G: return SPCC::FCC_LE;
+ case SPCC::FCC_LE: return SPCC::FCC_G;
+ case SPCC::FCC_UG: return SPCC::FCC_ULE;
+ case SPCC::FCC_ULE: return SPCC::FCC_UG;
+ case SPCC::FCC_L: return SPCC::FCC_GE;
+ case SPCC::FCC_GE: return SPCC::FCC_L;
+ case SPCC::FCC_UL: return SPCC::FCC_UGE;
+ case SPCC::FCC_UGE: return SPCC::FCC_UL;
+ case SPCC::FCC_LG: return SPCC::FCC_UE;
+ case SPCC::FCC_UE: return SPCC::FCC_LG;
+ case SPCC::FCC_NE: return SPCC::FCC_E;
+ case SPCC::FCC_E: return SPCC::FCC_NE;
+ }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ //When we see a non-terminator, we are done
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ //Terminator is not a branch
+ if (!I->getDesc().isBranch())
+ return true;
+
+ //Handle Unconditional branches
+ if (I->getOpcode() == SP::BA) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ unsigned Opcode = I->getOpcode();
+ if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+ return true; //Unknown Opcode
+
+ SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+
+ //Transform the code
+ //
+ // brCC L1
+ // ba L2
+ // L1:
+ // ..
+ // L2:
+ //
+ // into
+ //
+ // brnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+ .addMBB(TargetBB);
+ MBB.addSuccessor(TargetBB);
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+ //FIXME: Handle subsequent conditional branches
+ //For now, we can't handle multiple conditional branches
+ return true;
+ }
+ return false;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "Sparc branch conditions should have one component!");
+
+ if (Cond.empty()) {
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+ return 1;
+ }
+
+ //Conditional branch
+ unsigned CC = Cond[0].getImm();
+
+ if (IsIntegerCC(CC))
+ BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+ else
+ BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+ return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ if (I->getOpcode() != SP::BA
+ && I->getOpcode() != SP::BCOND
+ && I->getOpcode() != SP::FBCOND)
+ break; // Not a branch
+
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
+}
+
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ llvm_unreachable("Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else
+ llvm_unreachable("Can't load this register from stack slot");
+}
+
+unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
+{
+ SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
+ unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+
+
+ DebugLoc dl;
+
+ BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
+ SparcFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 000000000000..eda64efb7a03
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,100 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SparcGenInstrInfo.inc"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+ enum {
+ Pseudo = (1<<0),
+ Load = (1<<1),
+ Store = (1<<2),
+ DelaySlot = (1<<3)
+ };
+}
+
+class SparcInstrInfo : public SparcGenInstrInfo {
+ const SparcRegisterInfo RI;
+ const SparcSubtarget& Subtarget;
+public:
+ explicit SparcInstrInfo(SparcSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const ;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 000000000000..cf5c48fd18d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,825 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions. Note that the machine may be running in 32-bit mode.
+def HasV9 : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions. Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate. In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
+
+def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
+
+def LO10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+ MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+ return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+ (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc :
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc :
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+
+def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"SPISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
+ [SDNPHasChain]>;
+
+def getPCX : Operand<i32> {
+ let PrintMethod = "printGetPCX";
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE : ICC_VAL< 9>; // Not Equal
+def ICC_E : ICC_VAL< 1>; // Equal
+def ICC_G : ICC_VAL<10>; // Greater
+def ICC_LE : ICC_VAL< 2>; // Less or Equal
+def ICC_GE : ICC_VAL<11>; // Greater or Equal
+def ICC_L : ICC_VAL< 3>; // Less
+def ICC_GU : ICC_VAL<12>; // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned
+def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned
+def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>; // Positive
+def ICC_NEG : ICC_VAL< 6>; // Negative
+def ICC_VC : ICC_VAL<15>; // Overflow Clear
+def ICC_VS : ICC_VAL< 7>; // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U : FCC_VAL<23>; // Unordered
+def FCC_G : FCC_VAL<22>; // Greater
+def FCC_UG : FCC_VAL<21>; // Unordered or Greater
+def FCC_L : FCC_VAL<20>; // Less
+def FCC_UL : FCC_VAL<19>; // Unordered or Less
+def FCC_LG : FCC_VAL<18>; // Less or Greater
+def FCC_NE : FCC_VAL<17>; // Not Equal
+def FCC_E : FCC_VAL<25>; // Equal
+def FCC_UE : FCC_VAL<24>; // Unordered or Equal
+def FCC_GE : FCC_VAL<25>; // Greater or Equal
+def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal
+def FCC_LE : FCC_VAL<27>; // Less or Equal
+def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal
+def FCC_O : FCC_VAL<29>; // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern>;
+
+// GETPCX for PIC
+let Defs = [O7] in {
+ def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
+}
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let hasSideEffects = 1, mayStore = 1 in {
+ let rd = 0, rs1 = 0, rs2 = 0 in
+ def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+ "flushw",
+ [(flushw)]>, Requires<[HasV9]>;
+ let rd = 0, rs1 = 1, simm13 = 3 in
+ def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+ "ta 3",
+ [(flushw)]>;
+}
+
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+ "unimp $val", []>;
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
+// fpmover pass.
+let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
+ def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpMOVD $src, $dst", []>;
+ def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpNEGD $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpABSD $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence. This has to handle all
+// permutations of selection between i32/f32/f64 on ICC and FCC.
+ // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in {
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_ICC PSEUDO!",
+ [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+
+ def SELECT_CC_DFP_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_ICC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+ def SELECT_CC_Int_FCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+
+ def SELECT_CC_FP_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_FCC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
+ let rd = O7.Num, rs1 = G0.Num in
+ def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+ let rd = I7.Num, rs1 = G0.Num in
+ def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %i7+$val", []>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri : F3_2<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr : F3_1<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri : F3_2<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMrr:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMri:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr : F3_1<3, 0b000100,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri : F3_2<3, 0b000100,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr : F3_1<3, 0b100100,
+ (outs), (ins MEMrr:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri : F3_2<3, 0b100100,
+ (outs), (ins MEMri:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr : F3_1<3, 0b100111,
+ (outs), (ins MEMrr:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri : F3_2<3, 0b100111,
+ (outs), (ins MEMri:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+ (outs IntRegs:$dst), (ins i32imm:$src),
+ "sethi $src, $dst",
+ [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+ def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND : F3_12<"and", 0b000001, and>;
+
+def ANDNrr : F3_1<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "andn $b, $c, $dst",
+ [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri : F3_2<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "andn $b, $c, $dst", []>;
+
+defm OR : F3_12<"or", 0b000010, or>;
+
+def ORNrr : F3_1<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "orn $b, $c, $dst",
+ [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri : F3_2<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "orn $b, $c, $dst", []>;
+defm XOR : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr : F3_1<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "xnor $b, $c, $dst",
+ [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri : F3_2<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+let Defs = [ICC] in
+ defm ADDCC : F3_12<"addcc", 0b010000, addc>;
+
+let Uses = [ICC] in
+ defm ADDX : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB : F3_12 <"sub" , 0b000100, sub>;
+let Uses = [ICC] in
+ defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+
+let Defs = [ICC] in
+ defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+
+let Uses = [ICC], Defs = [ICC] in
+ def SUBXCCrr: F3_1<2, 0b011100,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "subxcc $b, $c, $dst", []>;
+
+
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+ defm UMUL : F3_12np<"umul", 0b001010>;
+ defm SMUL : F3_12 <"smul", 0b001011, mul>;
+}
+
+// Section B.19 - Divide Instructions, p. 115
+let Defs = [Y] in {
+ defm UDIV : F3_12np<"udiv", 0b001110>;
+ defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE : F3_12np<"save" , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+ def BA : BranchSP<0b1000, (ins brtarget:$dst),
+ "ba $dst",
+ [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [ICC] in
+ def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [FCC] in
+ def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "fb$cc $dst",
+ [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O6],
+ hasDelaySlot = 1, isCall = 1,
+ Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ ICC, FCC, Y] in {
+ def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+
+ // indirect calls
+ def JMPLrr : F3_1<2, 0b111000,
+ (outs), (ins MEMrr:$ptr, variable_ops),
+ "call $ptr",
+ [(call ADDRrr:$ptr)]>;
+ def JMPLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr, variable_ops),
+ "call $ptr",
+ [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+let Uses = [Y] in
+ def RDY : F3_1<2, 0b101000,
+ (outs IntRegs:$dst), (ins),
+ "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+let Defs = [Y] in {
+ def WRYrr : F3_1<2, 0b110000,
+ (outs), (ins IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+ def WRYri : F3_2<2, 0b110000,
+ (outs), (ins IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+}
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fitos $src, $dst",
+ [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fitod $src, $dst",
+ [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fstoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstod $src, $dst",
+ [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtos $src, $dst",
+ [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fnegs $src, $dst",
+ [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fabss $src, $dst",
+ [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fsqrts $src, $dst",
+ [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fsqrtd $src, $dst",
+ [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS : F3_3<2, 0b110100, 0b001000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fadds $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD : F3_3<2, 0b110100, 0b001000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "faddd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS : F3_3<2, 0b110100, 0b001000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsubs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD : F3_3<2, 0b110100, 0b001000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fsubd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS : F3_3<2, 0b110100, 0b001001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fmuls $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD : F3_3<2, 0b110100, 0b001001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+ (fextend FPRegs:$src2)))]>;
+def FDIVS : F3_3<2, 0b110100, 0b001001101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fdivs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD : F3_3<2, 0b110100, 0b001001110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdivd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+let Defs = [FCC] in {
+ def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ (outs), (ins FPRegs:$src1, FPRegs:$src2),
+ "fcmps $src1, $src2\n\tnop",
+ [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+ def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fcmpd $src1, $src2\n\tnop",
+ [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
+ // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+ // FIXME: Add instruction encodings for the JIT some day.
+ let Uses = [ICC] in {
+ def MOVICCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def MOVFCCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [ICC] in {
+ def FMOVS_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def FMOVS_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ }
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+ def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fmovd $src, $dst", []>;
+ def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fnegd $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FABSD : F3_3<2, 0b110100, 0b000001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fabsd $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
+// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+ (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+ (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+ (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+ (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+ (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo. This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+ (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+ (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls:
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
new file mode 100644
index 000000000000..0b74308eb0ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -0,0 +1,47 @@
+//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Sparc specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SPARCMACHINEFUNCTIONINFO_H
+#define SPARCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ class SparcMachineFunctionInfo : public MachineFunctionInfo {
+ private:
+ unsigned GlobalBaseReg;
+
+ /// VarArgsFrameOffset - Frame offset to start of varargs area.
+ int VarArgsFrameOffset;
+
+ /// SRetReturnReg - Holds the virtual register into which the sret
+ /// argument is passed.
+ unsigned SRetReturnReg;
+ public:
+ SparcMachineFunctionInfo()
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+ explicit SparcMachineFunctionInfo(MachineFunction &MF)
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+ void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 000000000000..0acdd2c55d6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,140 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+ const TargetInstrInfo &tii)
+ : SparcGenRegisterInfo(), Subtarget(st), TII(tii) {
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ // FIXME: G1 reserved for now for large imm generation by frame code.
+ Reserved.set(SP::G1);
+ Reserved.set(SP::G2);
+ Reserved.set(SP::G3);
+ Reserved.set(SP::G4);
+ Reserved.set(SP::O6);
+ Reserved.set(SP::I6);
+ Reserved.set(SP::I7);
+ Reserved.set(SP::G0);
+ Reserved.set(SP::G5);
+ Reserved.set(SP::G6);
+ Reserved.set(SP::G7);
+ return Reserved;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+void
+SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+1).getImm();
+
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(SP::I6, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ } else {
+ // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
+ // scavenge a register here instead of reserving G1 all of the time.
+ unsigned OffHi = (unsigned)Offset >> 10U;
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(SP::I6);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(i).ChangeToRegister(SP::G1, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+unsigned SparcRegisterInfo::getRARegister() const {
+ return SP::I7;
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return SP::I6;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 000000000000..ec9e63a686bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,62 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SparcGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+ SparcSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 000000000000..cf928293c169
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,159 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Sparc register file
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "SP";
+}
+
+class SparcCtrlReg<string n>: Register<n> {
+ let Namespace = "SP";
+}
+
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+ let Num = num;
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
+}
+
+// Control Registers
+def ICC : SparcCtrlReg<"ICC">;
+def FCC : SparcCtrlReg<"FCC">;
+
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
+def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
+def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>;
+def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>;
+def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>;
+def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>;
+def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>;
+def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>;
+def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>;
+def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[72]>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[73]>;
+def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[74]>;
+def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[75]>;
+def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[76]>;
+def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[77]>;
+def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[78]>;
+def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[79]>;
+def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[80]>;
+def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[81]>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[82]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[83]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[84]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32,
+ (add L0, L1, L2, L3, L4, L5, L6,
+ L7, I0, I1, I2, I3, I4, I5,
+ O0, O1, O2, O3, O4, O5, O7,
+ G1,
+ // Non-allocatable regs:
+ G2, G3, G4, // FIXME: OK for use only in
+ // applications, not libraries.
+ O6, // stack ptr
+ I6, // frame ptr
+ I7, // return address
+ G0, // constant zero
+ G5, G6, G7 // reserved for kernel
+ )>;
+
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..190c575a52de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcTargetMachine.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 000000000000..dcd42037253d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcTargetMachine;
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+ ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 000000000000..de647e8221a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,44 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SparcGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit) :
+ SparcGenSubtargetInfo(TT, CPU, FS),
+ IsV9(false),
+ V8DeprecatedInsts(false),
+ IsVIS(false),
+ Is64Bit(is64Bit) {
+
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+ if (is64Bit)
+ CPUName = "v9";
+ else
+ CPUName = "v8";
+ }
+ IsV9 = CPUName == "v9";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 000000000000..00a04c3bea57
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,58 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SparcGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class SparcSubtarget : public SparcGenSubtargetInfo {
+ bool IsV9;
+ bool V8DeprecatedInsts;
+ bool IsVIS;
+ bool Is64Bit;
+
+public:
+ SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64bit);
+
+ bool isV9() const { return IsV9; }
+ bool isVIS() const { return IsVIS; }
+ bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool is64Bit() const { return Is64Bit; }
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit()) {
+ p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64";
+ } else {
+ p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32";
+ }
+ return std::string(p);
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 000000000000..cbe6d8754efd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,65 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSparcTarget() {
+ // Register the target.
+ RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
+ RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS, is64bit),
+ DataLayout(Subtarget.getDataLayout()),
+ TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+ FrameLowering(Subtarget) {
+}
+
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createSparcISelDag(*this));
+ return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel){
+ PM.add(createSparcFPMoverPass(*this));
+ PM.add(createSparcDelaySlotFillerPass(*this));
+ return true;
+}
+
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : SparcTargetMachine(T, TT, CPU, FS, false) {
+}
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : SparcTargetMachine(T, TT, CPU, FS, true) {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 000000000000..799fc497f4ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,79 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "SparcInstrInfo.h"
+#include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
+#include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class SparcTargetMachine : public LLVMTargetMachine {
+ SparcSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
+ SparcInstrInfo InstrInfo;
+ SparcFrameLowering FrameLowering;
+public:
+ SparcTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS,
+ bool is64bit);
+
+ virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const SparcTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+/// SparcV8TargetMachine - Sparc 32-bit target machine
+///
+class SparcV8TargetMachine : public SparcTargetMachine {
+public:
+ SparcV8TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+};
+
+/// SparcV9TargetMachine - Sparc 64-bit target machine
+///
+class SparcV9TargetMachine : public SparcTargetMachine {
+public:
+ SparcV9TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
new file mode 100644
index 000000000000..5c06f0727e96
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSparcTarget;
+Target llvm::TheSparcV9Target;
+
+extern "C" void LLVMInitializeSparcTargetInfo() {
+ RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+ RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..2ac90164721f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMSystemZDesc
+ SystemZMCTargetDesc.cpp
+ SystemZMCAsmInfo.cpp
+ )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..08f1a9d51fb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 000000000000..8540546b62d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,32 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SystemZMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+ PointerSize = 8;
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ PCSymbol = ".";
+}
+
+const MCSection *SystemZMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const{
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 000000000000..a6a27e2f4b6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,30 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SystemZMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ struct SystemZMCAsmInfo : public MCAsmInfo {
+ explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+ virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 000000000000..5a826a6ef887
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSystemZMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+ createSystemZMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+ createSystemZMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSystemZMCAsmInfo() {
+ RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 000000000000..e2ad5afd6e57
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSystemZTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 000000000000..88960b9cc601
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,52 @@
+//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_H
+#define LLVM_TARGET_SystemZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SystemZTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ namespace SystemZCC {
+ // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
+ // SystemZInstrInfo.td. They must be kept in synch.
+ enum CondCodes {
+ O = 0,
+ H = 1,
+ NLE = 2,
+ L = 3,
+ NHE = 4,
+ LH = 5,
+ NE = 6,
+ E = 7,
+ NLH = 8,
+ HE = 9,
+ NL = 10,
+ LE = 11,
+ NH = 12,
+ NO = 13,
+ INVALID = -1
+ };
+ }
+
+ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm;
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 000000000000..4c08c087225e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,61 @@
+//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the SystemZ target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
+ "Support Z10 instructions">;
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z9", []>;
+def : Proc<"z10", [FeatureZ10]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+ let InstructionSet = SystemZInstrInfo;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 000000000000..fd4d8b70c75e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,223 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SystemZ assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SystemZAsmPrinter : public AsmPrinter {
+ public:
+ SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "SystemZ Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char* Modifier = 0);
+ void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+ void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char* Modifier = 0);
+ void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char* Modifier = 0);
+ void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (int16_t)MI->getOperand(OpNum).getImm();
+ }
+ void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (uint16_t)MI->getOperand(OpNum).getImm();
+ }
+ void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (int32_t)MI->getOperand(OpNum).getImm();
+ }
+ void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (uint32_t)MI->getOperand(OpNum).getImm();
+ }
+
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void EmitInstruction(const MachineInstr *MI);
+ };
+} // end of anonymous namespace
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ O << *Mang->getSymbol(GV);
+
+ // Assemble calls via PLT for externally visible symbols if PIC.
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+ !GV->hasLocalLinkage())
+ O << "@PLT";
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ std::string Name(MAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ O << Name;
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "@PLT";
+
+ return;
+ }
+ default:
+ assert(0 && "Not implemented yet!");
+ }
+}
+
+
+void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should be already mapped!");
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
+ if (strncmp(Modifier + 7, "even", 4) == 0)
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
+ else if (strncmp(Modifier + 7, "odd", 3) == 0)
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
+ else
+ assert(0 && "Invalid subreg modifier");
+ }
+
+ O << '%' << getRegisterName(Reg);
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol: {
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+ }
+ default:
+ assert(0 && "Not implemented yet!");
+ }
+
+ switch (MO.getTargetFlags()) {
+ default: assert(0 && "Unknown target flag on GV operand");
+ case SystemZII::MO_NO_FLAG:
+ break;
+ case SystemZII::MO_GOTENT: O << "@GOTENT"; break;
+ case SystemZII::MO_PLT: O << "@PLT"; break;
+ }
+
+ printOffset(MO.getOffset(), O);
+}
+
+void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+
+ // Print displacement operand.
+ printOperand(MI, OpNum+1, O);
+
+ // Print base operand (if any)
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ O << ')';
+ }
+}
+
+void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O,
+ const char *Modifier) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Index = MI->getOperand(OpNum+2);
+
+ // Print displacement operand.
+ printOperand(MI, OpNum+1, O);
+
+ // Print base operand (if any)
+ if (Base.getReg()) {
+ O << '(';
+ printOperand(MI, OpNum, O);
+ if (Index.getReg()) {
+ O << ',';
+ printOperand(MI, OpNum+2, O);
+ }
+ O << ')';
+ } else
+ assert(!Index.getReg() && "Should allocate base register first!");
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+ RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 000000000000..c799a9e501aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,46 @@
+//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for SystemZ architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // i64 is returned in register R2
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+ // f32 / f64 are returned in F0
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // The first 5 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+ // The first 4 floating point arguments of non-varargs functions are passed
+ // in FP registers.
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 000000000000..2ad84a2d052e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,386 @@
+//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
+ // Fill the spill offsets map
+ static const unsigned SpillOffsTab[][2] = {
+ { SystemZ::R2D, 0x10 },
+ { SystemZ::R3D, 0x18 },
+ { SystemZ::R4D, 0x20 },
+ { SystemZ::R5D, 0x28 },
+ { SystemZ::R6D, 0x30 },
+ { SystemZ::R7D, 0x38 },
+ { SystemZ::R8D, 0x40 },
+ { SystemZ::R9D, 0x48 },
+ { SystemZ::R10D, 0x50 },
+ { SystemZ::R11D, 0x58 },
+ { SystemZ::R12D, 0x60 },
+ { SystemZ::R13D, 0x68 },
+ { SystemZ::R14D, 0x70 },
+ { SystemZ::R15D, 0x78 }
+ };
+
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+ for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+ RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register. This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int64_t NumBytes, const TargetInstrInfo &TII) {
+ unsigned Opc; uint64_t Chunk;
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+ if (Offset >= (1LL << 15) - 1) {
+ Opc = SystemZ::ADD64ri32;
+ Chunk = (1LL << 31) - 1;
+ } else {
+ Opc = SystemZ::ADD64ri16;
+ Chunk = (1LL << 15) - 1;
+ }
+
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
+ // The PSW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ Offset -= ThisVal;
+ }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ // Note that area for callee-saved stuff is already allocated, thus we need to
+ // 'undo' the stack movement.
+ uint64_t StackSize = MFI->getStackSize();
+ StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+ uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == SystemZ::MOV64mr ||
+ MBBI->getOpcode() == SystemZ::MOV64mrm))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // adjust stack pointer: R15 -= numbytes
+ if (StackSize || MFI->hasCalls()) {
+ assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+ "Invalid stack frame calculation!");
+ emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+ }
+
+ if (hasFP(MF)) {
+ // Update R11 with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+ .addReg(SystemZ::R15D);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(SystemZ::R11D);
+
+ }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ switch (RetOpcode) {
+ case SystemZ::RET: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ // Note that area for callee-saved stuff is already allocated, thus we need to
+ // 'undo' the stack movement.
+ uint64_t StackSize =
+ MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+ // Skip the final terminator instruction.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ --MBBI;
+ if (!PI->getDesc().isTerminator())
+ break;
+ }
+
+ // During callee-saved restores emission stack frame was not yet finialized
+ // (and thus - the stack size was unknown). Tune the offset having full stack
+ // size in hands.
+ if (StackSize || MFI->hasCalls()) {
+ assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+ MBBI->getOpcode() == SystemZ::MOV64rm) &&
+ "Expected to see callee-save register restore code");
+ assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+ "Invalid stack frame calculation!");
+
+ unsigned i = 0;
+ MachineInstr &MI = *MBBI;
+ while (!MI.getOperand(i).isImm()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Unexpected restore code!");
+ }
+
+ uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+ // If Offset does not fit into 20-bit signed displacement field we need to
+ // emit some additional code...
+ if (Offset > 524287) {
+ // Fold the displacement into load instruction as much as possible.
+ NumBytes = Offset - 524287;
+ Offset = 524287;
+ emitSPUpdate(MBB, MBBI, NumBytes, TII);
+ }
+
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Fixed objects are really located in the "previous" frame.
+ if (FI < 0)
+ StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+ Offset += StackSize - getOffsetOfLocalArea();
+
+ // Skip the register save area if we generated the stack frame.
+ if (StackSize || MFI->hasCalls())
+ Offset -= getOffsetOfLocalArea();
+
+ return Offset;
+}
+
+bool
+SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned CalleeFrameSize = 0;
+
+ // Scan the callee-saved and find the bounds of register spill area.
+ unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!SystemZ::FP64RegClass.contains(Reg)) {
+ unsigned Offset = RegSpillOffsets[Reg];
+ CalleeFrameSize += 8;
+ if (StartOffset > Offset) {
+ LowReg = Reg; StartOffset = Offset;
+ }
+ if (EndOffset < Offset) {
+ HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+ }
+ }
+ }
+
+ // Save information for epilogue inserter.
+ MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+ MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+ // Save GPRs
+ if (StartOffset) {
+ // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+ // registers to store, otherwise - just STORE.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+ SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+ // Add store operands.
+ MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+ if (LowReg == HighReg)
+ MIB.addReg(0);
+ MIB.addReg(LowReg, RegState::Kill);
+ if (LowReg != HighReg)
+ MIB.addReg(HighReg, RegState::Kill);
+
+ // Do a second scan adding regs as being killed by instruction
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg != LowReg && Reg != HighReg)
+ MIB.addReg(Reg, RegState::ImplicitKill);
+ }
+ }
+
+ // Save FPRs
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (SystemZ::FP64RegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+bool
+SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+ // Restore FP registers
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (SystemZ::FP64RegClass.contains(Reg))
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, TRI);
+ }
+
+ // Restore GP registers
+ unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+ unsigned StartOffset = RegSpillOffsets[LowReg];
+
+ if (StartOffset) {
+ // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+ // registers to load, otherwise - just LOAD.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+ SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+ // Add store operands.
+ MIB.addReg(LowReg, RegState::Define);
+ if (LowReg != HighReg)
+ MIB.addReg(HighReg, RegState::Define);
+
+ MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+ MIB.addImm(StartOffset);
+ if (LowReg == HighReg)
+ MIB.addReg(0);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Reg != LowReg && Reg != HighReg)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+
+ return true;
+}
+
+void
+SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Determine whether R15/R14 will ever be clobbered inside the function. And
+ // if yes - mark it as 'callee' saved.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Check whether high FPRs are ever used, if yes - we need to save R15 as
+ // well.
+ static const unsigned HighFPRs[] = {
+ SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
+ SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+ };
+
+ bool HighFPRsUsed = false;
+ for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+ HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+ if (FFI->hasCalls())
+ /* FIXME: function is varargs */
+ /* FIXME: function grabs RA */
+ /* FIXME: function calls eh_return */
+ MRI.setPhysRegUsed(SystemZ::R14D);
+
+ if (HighFPRsUsed ||
+ FFI->hasCalls() ||
+ FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+ FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+ /* FIXME: function is varargs */)
+ MRI.setPhysRegUsed(SystemZ::R15D);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 000000000000..1284b6802b3a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,57 @@
+//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_FRAMEINFO_H
+#define SYSTEMZ_FRAMEINFO_H
+
+#include "SystemZ.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace llvm {
+ class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+protected:
+ const SystemZSubtarget &STI;
+
+public:
+ explicit SystemZFrameLowering(const SystemZSubtarget &sti);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
+ bool hasFP(const MachineFunction &MF) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2186ff1fed54
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,779 @@
+//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
+ /// instead of register numbers for the leaves of the matched tree.
+ struct SystemZRRIAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ SDValue IndexReg;
+ int64_t Disp;
+ bool isRI;
+
+ SystemZRRIAddressMode(bool RI = false)
+ : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
+ }
+
+ void dump() {
+ errs() << "SystemZRRIAddressMode " << this << '\n';
+ if (BaseType == RegBase) {
+ errs() << "Base.Reg ";
+ if (Base.Reg.getNode() != 0)
+ Base.Reg.getNode()->dump();
+ else
+ errs() << "nul";
+ errs() << '\n';
+ } else {
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+ }
+ if (!isRI) {
+ errs() << "IndexReg ";
+ if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+ else errs() << "nul";
+ }
+ errs() << " Disp " << Disp << '\n';
+ }
+ };
+}
+
+/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class SystemZDAGToDAGISel : public SelectionDAGISel {
+ const SystemZTargetLowering &Lowering;
+ const SystemZSubtarget &Subtarget;
+
+ void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp);
+ void getAddressOperands(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp,
+ SDValue &Index);
+
+ public:
+ SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual const char *getPassName() const {
+ return "SystemZ DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI16Imm - Return a target constant with the specified value, of type
+ /// i16.
+ inline SDValue getI16Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i16);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "SystemZGenDAGISel.inc"
+
+ private:
+ bool SelectAddrRI12Only(SDValue& Addr,
+ SDValue &Base, SDValue &Disp);
+ bool SelectAddrRI12(SDValue& Addr,
+ SDValue &Base, SDValue &Disp,
+ bool is12BitOnly = false);
+ bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+ bool SelectAddrRRI12(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+ bool SelectAddrRRI20(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+ bool SelectLAAddr(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+
+ SDNode *Select(SDNode *Node);
+
+ bool TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+
+ bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+ bool is12Bit, unsigned Depth = 0);
+ bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
+ };
+} // end anonymous namespace
+
+/// createSystemZISelDag - This pass converts a legalized DAG into a
+/// SystemZ-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+/// isImmSExt20 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 20-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmSExt20(int64_t Val, int64_t &Imm) {
+ if (Val >= -524288 && Val <= 524287) {
+ Imm = Val;
+ return true;
+ }
+ return false;
+}
+
+/// isImmZExt12 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero extension from a 12-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmZExt12(int64_t Val, int64_t &Imm) {
+ if (Val >= 0 && Val <= 0xFFF) {
+ Imm = Val;
+ return true;
+ }
+ return false;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+ bool is12Bit, unsigned Depth) {
+ DebugLoc dl = N.getDebugLoc();
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // FIXME: We can perform better here. If we have something like
+ // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
+ // imm into addressing mode.
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ int64_t Imm = 0;
+ bool Match = (is12Bit ?
+ isImmZExt12(AM.Disp + Val, Imm) :
+ isImmSExt20(AM.Disp + Val, Imm));
+ if (Match) {
+ AM.Disp = Imm;
+ return false;
+ }
+ break;
+ }
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+ AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Test if the LHS of the sub can be folded.
+ SystemZRRIAddressMode Backup = AM;
+ if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRI) {
+ AM = Backup;
+ break;
+ }
+
+ // If the base is a register with multiple uses, this transformation may
+ // save a mov. Otherwise it's probably better not to do it.
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+ (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue RHS = N.getNode()->getOperand(1);
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+
+ // Insert the new nodes into the topological ordering.
+ if (Zero.getNode()->getNodeId() == -1 ||
+ Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+ Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Neg.getNode()->getNodeId() == -1 ||
+ Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+ Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ return false;
+ }
+
+ case ISD::ADD: {
+ SystemZRRIAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (!AM.isRI &&
+ AM.BaseType == SystemZRRIAddressMode::RegBase &&
+ !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
+ AM.Base.Reg = N.getNode()->getOperand(0);
+ AM.IndexReg = N.getNode()->getOperand(1);
+ return false;
+ }
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ SystemZRRIAddressMode Backup = AM;
+ int64_t Offset = CN->getSExtValue();
+ int64_t Imm = 0;
+ bool MatchOffset = (is12Bit ?
+ isImmZExt12(AM.Disp + Offset, Imm) :
+ isImmSExt20(AM.Disp + Offset, Imm));
+ // The resultant disp must fit in 12 or 20-bits.
+ if (MatchOffset &&
+ // LHS should be an addr mode.
+ !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp = Imm;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
+ SystemZRRIAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, check to see if the index register is set.
+ if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
+ AM.IndexReg = N;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = SystemZRRIAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp) {
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+ Base = AM.Base.Reg;
+ else
+ Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
+ SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ getAddressOperandsRI(AM, Base, Disp);
+ Index = AM.IndexReg;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// an unsigned 12-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
+ SDValue &Base, SDValue &Disp) {
+ return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
+}
+
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
+ SDValue &Base, SDValue &Disp,
+ bool is12BitOnly) {
+ SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM12);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+ return false;
+
+ // Check, whether we can match stuff using 20-bit displacements
+ if (!Done && !is12BitOnly &&
+ !MatchAddress(Addr, AM20, /* is12Bit */ false))
+ if (AM12.Disp == 0 && AM20.Disp != 0)
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM12.Base.Reg.getNode())
+ AM12.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+ getAddressOperandsRI(AM12, Base, Disp);
+
+ return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// a signed 20-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
+ SDValue &Base, SDValue &Disp) {
+ SystemZRRIAddressMode AM(/*isRI*/true);
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+ getAddressOperandsRI(AM, Base, Disp);
+
+ return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus an unsigned 12-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ SystemZRRIAddressMode AM20, AM12;
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM12);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+ return false;
+
+ // Check, whether we can match stuff using 20-bit displacements
+ if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
+ if (AM12.Disp == 0 && AM20.Disp != 0)
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM12.Base.Reg.getNode())
+ AM12.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM12.IndexReg.getNode())
+ AM12.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM12, Base, Disp, Index);
+
+ return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus a signed 20-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ SystemZRRIAddressMode AM;
+ bool Done = false;
+
+ if (!Addr.hasOneUse()) {
+ unsigned Opcode = Addr.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(Addr, AM);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+ return false;
+
+ DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+ EVT VT = Addr.getValueType();
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Disp, Index);
+
+ return true;
+}
+
+/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ SystemZRRIAddressMode AM;
+
+ if (MatchAddress(Addr, AM, false))
+ return false;
+
+ EVT VT = Addr.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+ if (AM.Base.Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity += 1;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity += 1;
+
+ if (Complexity > 2) {
+ getAddressOperands(AM, Base, Disp, Index);
+ return true;
+ }
+
+ return false;
+}
+
+bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Disp, SDValue &Index) {
+ if (ISD::isNON_EXTLoad(N.getNode()) &&
+ IsLegalToFold(N, P, P, OptLevel))
+ return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
+ return false;
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+ EVT NVT = Node->getValueType(0);
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned Opcode = Node->getOpcode();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SDIVREM: {
+ unsigned Opc, MOpc;
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ EVT ResVT;
+ bool is32Bit = false;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i32:
+ Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+ ResVT = MVT::v2i64;
+ is32Bit = true;
+ break;
+ case MVT::i64:
+ Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+ ResVT = MVT::v2i64;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+ // Prepare the dividend
+ SDNode *Dividend;
+ if (is32Bit)
+ Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
+ else
+ Dividend = N0.getNode();
+
+ // Insert prepared dividend into suitable 'subreg'
+ SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, ResVT);
+ Dividend =
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+ SDValue(Tmp, 0), SDValue(Dividend, 0),
+ CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
+
+ SDNode *Result;
+ SDValue DivVal = SDValue(Dividend, 0);
+ if (foldedLoad) {
+ SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+ Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+ Ops, array_lengthof(Ops));
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+ } else {
+ Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
+ }
+
+ // Copy the division (odd subreg) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+ SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+
+ ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ // Copy the remainder (even subreg) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
+ SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+
+ ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ return NULL;
+ }
+ case ISD::UDIVREM: {
+ unsigned Opc, MOpc, ClrOpc;
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ EVT ResVT;
+
+ bool is32Bit = false;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i32:
+ Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+ ClrOpc = SystemZ::MOV64Pr0_even;
+ ResVT = MVT::v2i32;
+ is32Bit = true;
+ break;
+ case MVT::i64:
+ Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+ ClrOpc = SystemZ::MOV128r0_even;
+ ResVT = MVT::v2i64;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+ // Prepare the dividend
+ SDNode *Dividend = N0.getNode();
+
+ // Insert prepared dividend into suitable 'subreg'
+ SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, ResVT);
+ {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+ Dividend =
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+ SDValue(Tmp, 0), SDValue(Dividend, 0),
+ CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
+ }
+
+ // Zero out even subreg
+ Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
+
+ SDValue DivVal = SDValue(Dividend, 0);
+ SDNode *Result;
+ if (foldedLoad) {
+ SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+ Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+ Ops, array_lengthof(Ops));
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+ } else {
+ Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
+ }
+
+ // Copy the division (odd subreg) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+ SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+ ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ // Copy the remainder (even subreg) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
+ SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, NVT,
+ SDValue(Result, 0),
+ CurDAG->getTargetConstant(SubRegIdx,
+ MVT::i32));
+ ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+ }
+
+ return NULL;
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ errs() << "\n";
+ );
+ return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 000000000000..871c2972a8c4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,867 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation -----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
+ TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+ RegInfo = TM.getRegisterInfo();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass);
+ addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass);
+ addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
+ addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
+
+ if (!UseSoftFloat) {
+ addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
+ addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
+ }
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+ // TODO: It may be better to default to latency-oriented scheduling, however
+ // LLVM's current latency-oriented scheduler can't handle physreg definitions
+ // such as SystemZ has with PSW, so set this to the register-pressure
+ // scheduler, because it can.
+ setSchedulingPreference(Sched::RegPressure);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+ setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+
+ // FIXME: Can we lower these 2 efficiently?
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+ // FIXME: Can we support these natively?
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+ // Lower some FP stuff
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+
+ // We have only 64-bit bitconverts
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setMinFunctionAlignment(1);
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ default:
+ llvm_unreachable("Should not custom lower this!");
+ return SDValue();
+ }
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
+ return false;
+
+ // +0.0 lzer
+ // +0.0f lzdr
+ // -0.0 lzer + lner
+ // -0.0f lzdr + lndr
+ return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return C_RegisterClass;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ if (VT == MVT::i32)
+ return std::make_pair(0U, SystemZ::GR32RegisterClass);
+ else if (VT == MVT::i128)
+ return std::make_pair(0U, SystemZ::GR128RegisterClass);
+
+ return std::make_pair(0U, SystemZ::GR64RegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+SDValue
+SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ }
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // SystemZ target does not yet support tail call optimization.
+ isTailCall = false;
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+ if (isVarArg)
+ report_fatal_error("Varargs not supported yet");
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue ArgValue;
+ CCValAssign &VA = ArgLocs[i];
+ EVT LocVT = VA.getLocVT();
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ TargetRegisterClass *RC;
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << LocVT.getSimpleVT().SimpleTy
+ << "\n";
+#endif
+ llvm_unreachable(0);
+ case MVT::i64:
+ RC = SystemZ::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ RC = SystemZ::FP32RegisterClass;
+ break;
+ case MVT::f64:
+ RC = SystemZ::FP64RegisterClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+
+ // Create the nodes corresponding to a load from this parameter slot.
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ // from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ }
+
+ // If this is an 8/16/32-bit value, it is really passed promoted to 64
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ InVals.push_back(ArgValue);
+ }
+
+ return Chain;
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+
+ // Offset to first argument stack slot.
+ const unsigned FirstArgOffset = 160;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr =
+ DAG.getCopyFromReg(Chain, dl,
+ (TFI->hasFP(MF) ?
+ SystemZ::R11D : SystemZ::R15D),
+ getPointerTy());
+
+ unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(Offset));
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emitted instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+ DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ VA.getLocVT(), InFlag).getValue(1);
+ SDValue RetValue = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+
+ // If this is an 8/16/32-bit value, it is really passed promoted to 64
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+ InVals.push_back(RetValue);
+ }
+
+ return Chain;
+}
+
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ResValue = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // If this is an 8/16/32-bit value, it is really should be passed promoted
+ // to 64 bits.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
+ else if (VA.getLocInfo() == CCValAssign::AExt)
+ ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &SystemZCC,
+ SelectionDAG &DAG) const {
+ // FIXME: Emit a test if RHS is zero
+
+ bool isUnsigned = false;
+ SystemZCC::CondCodes TCC;
+ switch (CC) {
+ default:
+ llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ TCC = SystemZCC::E;
+ break;
+ case ISD::SETUEQ:
+ TCC = SystemZCC::NLH;
+ break;
+ case ISD::SETNE:
+ case ISD::SETONE:
+ TCC = SystemZCC::NE;
+ break;
+ case ISD::SETUNE:
+ TCC = SystemZCC::LH;
+ break;
+ case ISD::SETO:
+ TCC = SystemZCC::O;
+ break;
+ case ISD::SETUO:
+ TCC = SystemZCC::NO;
+ break;
+ case ISD::SETULE:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NH;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ TCC = SystemZCC::LE;
+ break;
+ case ISD::SETUGE:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NL;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ TCC = SystemZCC::HE;
+ break;
+ case ISD::SETUGT:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NLE;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ TCC = SystemZCC::H;
+ break;
+ case ISD::SETULT:
+ if (LHS.getValueType().isFloatingPoint()) {
+ TCC = SystemZCC::NHE;
+ break;
+ }
+ isUnsigned = true; // FALLTHROUGH
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ TCC = SystemZCC::L;
+ break;
+ }
+
+ SystemZCC = DAG.getConstant(TCC, MVT::i32);
+
+ DebugLoc dl = LHS.getDebugLoc();
+ return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+ dl, MVT::i64, LHS, RHS);
+}
+
+
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue SystemZCC;
+ SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+ return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, SystemZCC, Flag);
+}
+
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue SystemZCC;
+ SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(SystemZCC);
+ Ops.push_back(Flag);
+
+ return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ bool ExtraLoadRequired =
+ Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+ SDValue Result;
+ if (!IsPic && !ExtraLoadRequired) {
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Offset = 0;
+ } else {
+ unsigned char OpFlags = 0;
+ if (ExtraLoadRequired)
+ OpFlags = SystemZII::MO_GOTENT;
+
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+
+ Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
+ getPointerTy(), Result);
+
+ if (ExtraLoadRequired)
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+// FIXME: PIC here
+SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+
+ return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+
+// FIXME: PIC here
+// FIXME: This is just dirty hack. We need to lower cpool properly
+SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment(),
+ CP->getOffset());
+
+ return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case SystemZISD::RET_FLAG: return "SystemZISD::RET_FLAG";
+ case SystemZISD::CALL: return "SystemZISD::CALL";
+ case SystemZISD::BRCOND: return "SystemZISD::BRCOND";
+ case SystemZISD::CMP: return "SystemZISD::CMP";
+ case SystemZISD::UCMP: return "SystemZISD::UCMP";
+ case SystemZISD::SELECT: return "SystemZISD::SELECT";
+ case SystemZISD::PCRelativeWrapper: return "SystemZISD::PCRelativeWrapper";
+ default: return NULL;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const SystemZInstrInfo &TII = *TM.getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == SystemZ::Select32 ||
+ MI->getOpcode() == SystemZ::SelectF32 ||
+ MI->getOpcode() == SystemZ::Select64 ||
+ MI->getOpcode() == SystemZ::SelectF64) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 000000000000..bab3dc23eead
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,145 @@
+//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace SystemZISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// CALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// PCRelativeWrapper - PC relative address
+ PCRelativeWrapper,
+
+ /// CMP, UCMP - Compare instruction
+ CMP,
+ UCMP,
+
+ /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
+ /// the block to branch if condition is true, operand 2 is condition code
+ /// and operand 3 is the flag operand produced by a CMP instruction.
+ BRCOND,
+
+ /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
+ /// condition code and operand 3 is the flag operand.
+ SELECT
+ };
+ }
+
+ class SystemZSubtarget;
+ class SystemZTargetMachine;
+
+ class SystemZTargetLowering : public TargetLowering {
+ public:
+ explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+ TargetLowering::ConstraintType
+ getConstraintType(const std::string &Constraint) const;
+
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue EmitCmp(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &SystemZCC,
+ SelectionDAG &DAG) const;
+
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ private:
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ const SystemZSubtarget &Subtarget;
+ const SystemZTargetMachine &TM;
+ const SystemZRegisterInfo *RegInfo;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 000000000000..ab45ec5984e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,128 @@
+//===- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Displacement, Index.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with R15 or R11 and Disp being offsetted accordingly.
+struct SystemZAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned IndexReg;
+ int32_t Disp;
+ const GlobalValue *GV;
+
+ SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
+ Base.Reg = 0;
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no index or displacement.
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+ // Because memory references are always represented with 3
+ // values, this adds: Reg, [0, NoReg] to the instruction.
+ return MIB.addReg(Reg).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+ return MIB.addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no or index, but with a
+/// displacement. An example is: 10(%r15).
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &
+addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
+ .addReg(Reg2, getKillRegState(isKill2));
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
+ if (AM.BaseType == SystemZAddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ else
+ assert(0);
+
+ return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Flags = 0;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI), Offset),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 000000000000..a65828061d3b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,340 @@
+//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ (binary) floating point instructions in
+// TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: multiclassify!
+
+//===----------------------------------------------------------------------===//
+// FP Pattern fragments
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+ def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
+ "# SelectF32 PSEUDO",
+ [(set FP32:$dst,
+ (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
+ def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
+ "# SelectF64 PSEUDO",
+ [(set FP64:$dst,
+ (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// Floating point constant loads.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
+ "lzer\t{$dst}",
+ [(set FP32:$dst, fpimm0)]>;
+def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
+ "lzdr\t{$dst}",
+ [(set FP64:$dst, fpimm0)]>;
+}
+
+let neverHasSideEffects = 1 in {
+def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "ler\t{$dst, $src}",
+ []>;
+def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "ldr\t{$dst, $src}",
+ []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+ "le\t{$dst, $src}",
+ [(set FP32:$dst, (load rriaddr12:$src))]>;
+def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
+ "ley\t{$dst, $src}",
+ [(set FP32:$dst, (load rriaddr:$src))]>;
+def FMOV64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+ "ld\t{$dst, $src}",
+ [(set FP64:$dst, (load rriaddr12:$src))]>;
+def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
+ "ldy\t{$dst, $src}",
+ [(set FP64:$dst, (load rriaddr:$src))]>;
+}
+
+def FMOV32mr : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
+ "ste\t{$src, $dst}",
+ [(store FP32:$src, rriaddr12:$dst)]>;
+def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
+ "stey\t{$src, $dst}",
+ [(store FP32:$src, rriaddr:$dst)]>;
+def FMOV64mr : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
+ "std\t{$src, $dst}",
+ [(store FP64:$src, rriaddr12:$dst)]>;
+def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
+ "stdy\t{$src, $dst}",
+ [(store FP64:$src, rriaddr:$dst)]>;
+
+def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "cpsdr\t{$dst, $src2, $src1}",
+ [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
+def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "cpsdr\t{$dst, $src2, $src1}",
+ [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+
+let Defs = [PSW] in {
+def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "lcebr\t{$dst, $src}",
+ [(set FP32:$dst, (fneg FP32:$src)),
+ (implicit PSW)]>;
+def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "lcdbr\t{$dst, $src}",
+ [(set FP64:$dst, (fneg FP64:$src)),
+ (implicit PSW)]>;
+
+def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "lpebr\t{$dst, $src}",
+ [(set FP32:$dst, (fabs FP32:$src)),
+ (implicit PSW)]>;
+def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "lpdbr\t{$dst, $src}",
+ [(set FP64:$dst, (fabs FP64:$src)),
+ (implicit PSW)]>;
+
+def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "lnebr\t{$dst, $src}",
+ [(set FP32:$dst, (fneg(fabs FP32:$src))),
+ (implicit PSW)]>;
+def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "lndbr\t{$dst, $src}",
+ [(set FP64:$dst, (fneg(fabs FP64:$src))),
+ (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+let Defs = [PSW] in {
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "aebr\t{$dst, $src2}",
+ [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
+ (implicit PSW)]>;
+def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "adbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
+ (implicit PSW)]>;
+}
+
+def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "aeb\t{$dst, $src2}",
+ [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "adb\t{$dst, $src2}",
+ [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+
+def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "sebr\t{$dst, $src2}",
+ [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
+ (implicit PSW)]>;
+def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "sdbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
+ (implicit PSW)]>;
+
+def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "seb\t{$dst, $src2}",
+ [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "sdb\t{$dst, $src2}",
+ [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "meebr\t{$dst, $src2}",
+ [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
+def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "mdbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
+}
+
+def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "meeb\t{$dst, $src2}",
+ [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
+def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "mdb\t{$dst, $src2}",
+ [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
+
+def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+ "maebr\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
+ FP32:$src1))]>;
+def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+ "maeb\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
+ FP32:$src3),
+ FP32:$src1))]>;
+
+def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+ "madbr\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
+ FP64:$src1))]>;
+def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+ "madb\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
+ FP64:$src3),
+ FP64:$src1))]>;
+
+def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+ "msebr\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
+ FP32:$src1))]>;
+def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+ "mseb\t{$dst, $src3, $src2}",
+ [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
+ FP32:$src3),
+ FP32:$src1))]>;
+
+def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+ "msdbr\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
+ FP64:$src1))]>;
+def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+ "msdb\t{$dst, $src3, $src2}",
+ [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
+ FP64:$src3),
+ FP64:$src1))]>;
+
+def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+ "debr\t{$dst, $src2}",
+ [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
+def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+ "ddbr\t{$dst, $src2}",
+ [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
+
+def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+ "deb\t{$dst, $src2}",
+ [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
+def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+ "ddb\t{$dst, $src2}",
+ [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
+
+} // Constraints = "$src1 = $dst"
+
+def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+ "sqebr\t{$dst, $src}",
+ [(set FP32:$dst, (fsqrt FP32:$src))]>;
+def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+ "sqdbr\t{$dst, $src}",
+ [(set FP64:$dst, (fsqrt FP64:$src))]>;
+
+def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+ "sqeb\t{$dst, $src}",
+ [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
+def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+ "sqdb\t{$dst, $src}",
+ [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
+
+def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
+ "ledbr\t{$dst, $src}",
+ [(set FP32:$dst, (fround FP64:$src))]>;
+
+def FEXT32r64 : Pseudo<(outs FP64:$dst), (ins FP32:$src),
+ "ldebr\t{$dst, $src}",
+ [(set FP64:$dst, (fextend FP32:$src))]>;
+def FEXT32m64 : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+ "ldeb\t{$dst, $src}",
+ [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
+
+let Defs = [PSW] in {
+def FCONVFP32 : Pseudo<(outs FP32:$dst), (ins GR32:$src),
+ "cefbr\t{$dst, $src}",
+ [(set FP32:$dst, (sint_to_fp GR32:$src)),
+ (implicit PSW)]>;
+def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
+ "cegbr\t{$dst, $src}",
+ [(set FP32:$dst, (sint_to_fp GR64:$src)),
+ (implicit PSW)]>;
+
+def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
+ "cdfbr\t{$dst, $src}",
+ [(set FP64:$dst, (sint_to_fp GR32:$src)),
+ (implicit PSW)]>;
+def FCONVFP64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+ "cdgbr\t{$dst, $src}",
+ [(set FP64:$dst, (sint_to_fp GR64:$src)),
+ (implicit PSW)]>;
+
+def FCONVGR32 : Pseudo<(outs GR32:$dst), (ins FP32:$src),
+ "cfebr\t{$dst, 5, $src}",
+ [(set GR32:$dst, (fp_to_sint FP32:$src)),
+ (implicit PSW)]>;
+def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
+ "cfdbr\t{$dst, 5, $src}",
+ [(set GR32:$dst, (fp_to_sint FP64:$src)),
+ (implicit PSW)]>;
+
+def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
+ "cgebr\t{$dst, 5, $src}",
+ [(set GR64:$dst, (fp_to_sint FP32:$src)),
+ (implicit PSW)]>;
+def FCONVGR64 : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+ "cgdbr\t{$dst, 5, $src}",
+ [(set GR64:$dst, (fp_to_sint FP64:$src)),
+ (implicit PSW)]>;
+} // Defs = [PSW]
+
+def FBCONVG64 : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+ "lgdr\t{$dst, $src}",
+ [(set GR64:$dst, (bitconvert FP64:$src))]>;
+def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+ "ldgr\t{$dst, $src}",
+ [(set FP64:$dst, (bitconvert GR64:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
+ "cebr\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
+def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
+ "cdbr\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
+
+def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
+ "ceb\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP32:$src1,
+ (load rriaddr12:$src2)))]>;
+def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
+ "cdb\t$src1, $src2",
+ [(set PSW, (SystemZcmp FP64:$src1,
+ (load rriaddr12:$src2)))]>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Floating point constant -0.0
+def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
+def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 000000000000..b4a8993c1971
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,133 @@
+//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+ bits<5> Value = val;
+}
+
+def Pseudo : Format<0>;
+def EForm : Format<1>;
+def IForm : Format<2>;
+def RIForm : Format<3>;
+def RIEForm : Format<4>;
+def RILForm : Format<5>;
+def RISForm : Format<6>;
+def RRForm : Format<7>;
+def RREForm : Format<8>;
+def RRFForm : Format<9>;
+def RRRForm : Format<10>;
+def RRSForm : Format<11>;
+def RSForm : Format<12>;
+def RSIForm : Format<13>;
+def RSILForm : Format<14>;
+def RSYForm : Format<15>;
+def RXForm : Format<16>;
+def RXEForm : Format<17>;
+def RXFForm : Format<18>;
+def RXYForm : Format<19>;
+def SForm : Format<20>;
+def SIForm : Format<21>;
+def SILForm : Format<22>;
+def SIYForm : Format<23>;
+def SSForm : Format<24>;
+def SSEForm : Format<25>;
+def SSFForm : Format<26>;
+
+class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
+ let Namespace = "SystemZ";
+
+ bits<16> Opcode = op;
+
+ Format Form = f;
+ bits<5> FormBits = Form.Value;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+}
+
+class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : InstSystemZ<0, f, outs, ins> {
+ let Opcode{0-7} = op;
+ let Opcode{8-15} = 0;
+
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
+
+class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : InstSystemZ<0, f, outs, ins> {
+ let Opcode{0-11} = op;
+ let Opcode{12-15} = 0;
+
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
+
+class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : InstSystemZ<op, f, outs, ins> {
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
+
+class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, RRForm, outs, ins, asmstr, pattern>;
+
+class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I12<op, RIForm, outs, ins, asmstr, pattern>;
+
+class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I12<op, RILForm, outs, ins, asmstr, pattern>;
+
+class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, RREForm, outs, ins, asmstr, pattern>;
+
+class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, RXForm, outs, ins, asmstr, pattern> {
+ let AddedComplexity = 1;
+}
+
+class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, RXYForm, outs, ins, asmstr, pattern>;
+
+class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, RSForm, outs, ins, asmstr, pattern> {
+ let AddedComplexity = 1;
+}
+
+class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, RSYForm, outs, ins, asmstr, pattern>;
+
+class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I8<op, SIForm, outs, ins, asmstr, pattern> {
+ let AddedComplexity = 1;
+}
+
+class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, SIYForm, outs, ins, asmstr, pattern>;
+
+class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : I16<op, SILForm, outs, ins, asmstr, pattern>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<0, Pseudo, outs, ins> {
+
+ let Pattern = pattern;
+ let AsmString = asmstr;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 000000000000..99e2730609e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,439 @@
+//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SystemZGenInstrInfo.inc"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+ : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
+ RI(tm, *this), TM(tm) {
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
+ return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ unsigned Opc = 0;
+ if (RC == &SystemZ::GR32RegClass ||
+ RC == &SystemZ::ADDR32RegClass)
+ Opc = SystemZ::MOV32mr;
+ else if (RC == &SystemZ::GR64RegClass ||
+ RC == &SystemZ::ADDR64RegClass) {
+ Opc = SystemZ::MOV64mr;
+ } else if (RC == &SystemZ::FP32RegClass) {
+ Opc = SystemZ::FMOV32mr;
+ } else if (RC == &SystemZ::FP64RegClass) {
+ Opc = SystemZ::FMOV64mr;
+ } else if (RC == &SystemZ::GR64PRegClass) {
+ Opc = SystemZ::MOV64Pmr;
+ } else if (RC == &SystemZ::GR128RegClass) {
+ Opc = SystemZ::MOV128mr;
+ } else
+ llvm_unreachable("Unsupported regclass to store");
+
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ unsigned Opc = 0;
+ if (RC == &SystemZ::GR32RegClass ||
+ RC == &SystemZ::ADDR32RegClass)
+ Opc = SystemZ::MOV32rm;
+ else if (RC == &SystemZ::GR64RegClass ||
+ RC == &SystemZ::ADDR64RegClass) {
+ Opc = SystemZ::MOV64rm;
+ } else if (RC == &SystemZ::FP32RegClass) {
+ Opc = SystemZ::FMOV32rm;
+ } else if (RC == &SystemZ::FP64RegClass) {
+ Opc = SystemZ::FMOV64rm;
+ } else if (RC == &SystemZ::GR64PRegClass) {
+ Opc = SystemZ::MOV64Prm;
+ } else if (RC == &SystemZ::GR128RegClass) {
+ Opc = SystemZ::MOV128rm;
+ } else
+ llvm_unreachable("Unsupported regclass to load");
+
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rr;
+ else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV32rr;
+ else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rrP;
+ else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV128rr;
+ else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV32rr;
+ else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV64rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SystemZ::MOV32rm:
+ case SystemZ::MOV32rmy:
+ case SystemZ::MOV64rm:
+ case SystemZ::MOVSX32rm8:
+ case SystemZ::MOVSX32rm16y:
+ case SystemZ::MOVSX64rm8:
+ case SystemZ::MOVSX64rm16:
+ case SystemZ::MOVSX64rm32:
+ case SystemZ::MOVZX32rm8:
+ case SystemZ::MOVZX32rm16:
+ case SystemZ::MOVZX64rm8:
+ case SystemZ::MOVZX64rm16:
+ case SystemZ::MOVZX64rm32:
+ case SystemZ::FMOV32rm:
+ case SystemZ::FMOV32rmy:
+ case SystemZ::FMOV64rm:
+ case SystemZ::FMOV64rmy:
+ case SystemZ::MOV64Prm:
+ case SystemZ::MOV64Prmy:
+ case SystemZ::MOV128rm:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
+ MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SystemZ::MOV32mr:
+ case SystemZ::MOV32mry:
+ case SystemZ::MOV64mr:
+ case SystemZ::MOV32m8r:
+ case SystemZ::MOV32m8ry:
+ case SystemZ::MOV32m16r:
+ case SystemZ::MOV32m16ry:
+ case SystemZ::MOV64m8r:
+ case SystemZ::MOV64m8ry:
+ case SystemZ::MOV64m16r:
+ case SystemZ::MOV64m16ry:
+ case SystemZ::MOV64m32r:
+ case SystemZ::MOV64m32ry:
+ case SystemZ::FMOV32mr:
+ case SystemZ::FMOV32mry:
+ case SystemZ::FMOV64mr:
+ case SystemZ::FMOV64mry:
+ case SystemZ::MOV64Pmr:
+ case SystemZ::MOV64Pmry:
+ case SystemZ::MOV128mr:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
+ MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(3).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+ SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
+ Cond[0].setImm(getOppositeCondition(CC));
+ return false;
+}
+
+bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == SystemZ::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == SystemZCC::INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != SystemZ::JMP &&
+ getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "SystemZ branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
+ BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+const MCInstrDesc&
+SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case SystemZCC::O: return get(SystemZ::JO);
+ case SystemZCC::H: return get(SystemZ::JH);
+ case SystemZCC::NLE: return get(SystemZ::JNLE);
+ case SystemZCC::L: return get(SystemZ::JL);
+ case SystemZCC::NHE: return get(SystemZ::JNHE);
+ case SystemZCC::LH: return get(SystemZ::JLH);
+ case SystemZCC::NE: return get(SystemZ::JNE);
+ case SystemZCC::E: return get(SystemZ::JE);
+ case SystemZCC::NLH: return get(SystemZ::JNLH);
+ case SystemZCC::HE: return get(SystemZ::JHE);
+ case SystemZCC::NL: return get(SystemZ::JNL);
+ case SystemZCC::LE: return get(SystemZ::JLE);
+ case SystemZCC::NH: return get(SystemZ::JNH);
+ case SystemZCC::NO: return get(SystemZ::JNO);
+ }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: return SystemZCC::INVALID;
+ case SystemZ::JO: return SystemZCC::O;
+ case SystemZ::JH: return SystemZCC::H;
+ case SystemZ::JNLE: return SystemZCC::NLE;
+ case SystemZ::JL: return SystemZCC::L;
+ case SystemZ::JNHE: return SystemZCC::NHE;
+ case SystemZ::JLH: return SystemZCC::LH;
+ case SystemZ::JNE: return SystemZCC::NE;
+ case SystemZ::JE: return SystemZCC::E;
+ case SystemZ::JNLH: return SystemZCC::NLH;
+ case SystemZ::JHE: return SystemZCC::HE;
+ case SystemZ::JNL: return SystemZCC::NL;
+ case SystemZ::JLE: return SystemZCC::LE;
+ case SystemZ::JNH: return SystemZCC::NH;
+ case SystemZ::JNO: return SystemZCC::NO;
+ }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Invalid condition!");
+ case SystemZCC::O: return SystemZCC::NO;
+ case SystemZCC::H: return SystemZCC::NH;
+ case SystemZCC::NLE: return SystemZCC::LE;
+ case SystemZCC::L: return SystemZCC::NL;
+ case SystemZCC::NHE: return SystemZCC::HE;
+ case SystemZCC::LH: return SystemZCC::NLH;
+ case SystemZCC::NE: return SystemZCC::E;
+ case SystemZCC::E: return SystemZCC::NE;
+ case SystemZCC::NLH: return SystemZCC::LH;
+ case SystemZCC::HE: return SystemZCC::NHE;
+ case SystemZCC::NL: return SystemZCC::L;
+ case SystemZCC::LE: return SystemZCC::NLE;
+ case SystemZCC::NH: return SystemZCC::H;
+ case SystemZCC::NO: return SystemZCC::O;
+ }
+}
+
+const MCInstrDesc&
+SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Don't have long disp version of this instruction");
+ case SystemZ::MOV32mr: return get(SystemZ::MOV32mry);
+ case SystemZ::MOV32rm: return get(SystemZ::MOV32rmy);
+ case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
+ case SystemZ::MOV32m8r: return get(SystemZ::MOV32m8ry);
+ case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
+ case SystemZ::MOV64m8r: return get(SystemZ::MOV64m8ry);
+ case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
+ case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
+ case SystemZ::MOV8mi: return get(SystemZ::MOV8miy);
+ case SystemZ::MUL32rm: return get(SystemZ::MUL32rmy);
+ case SystemZ::CMP32rm: return get(SystemZ::CMP32rmy);
+ case SystemZ::UCMP32rm: return get(SystemZ::UCMP32rmy);
+ case SystemZ::FMOV32mr: return get(SystemZ::FMOV32mry);
+ case SystemZ::FMOV64mr: return get(SystemZ::FMOV64mry);
+ case SystemZ::FMOV32rm: return get(SystemZ::FMOV32rmy);
+ case SystemZ::FMOV64rm: return get(SystemZ::FMOV64rmy);
+ case SystemZ::MOV64Pmr: return get(SystemZ::MOV64Pmry);
+ case SystemZ::MOV64Prm: return get(SystemZ::MOV64Prmy);
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 000000000000..6a31e9496365
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,113 @@
+//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+/// SystemZII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SystemZII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // SystemZ Specific MachineOperand flags.
+
+ MO_NO_FLAG = 0,
+
+ /// MO_GOTENT - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// SYMBOL_LABEL @GOTENT
+ MO_GOTENT = 1,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// SYMBOL_LABEL @PLT
+ MO_PLT = 2
+ };
+}
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+ const SystemZRegisterInfo RI;
+ SystemZTargetMachine &TM;
+public:
+ explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
+ SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
+ const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+ const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
+
+ const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+ if (Offset < 0 || Offset >= 4096)
+ return getLongDispOpc(Opc);
+ else
+ return get(Opc);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 000000000000..11a39fcd023a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,1147 @@
+//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Instruction Predicate Definitions.
+def IsZ10 : Predicate<"Subtarget.isZ10()">;
+
+include "SystemZInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
+class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
+def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
+def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisI8<1>, SDTCisVT<2, i64>]>;
+def SDT_SelectCC : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisI8<3>, SDTCisVT<4, i64>]>;
+def SDT_Address : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def SystemZcallseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def SystemZcallseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
+def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
+def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
+ [SDNPHasChain]>;
+def SystemZselect : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
+def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
+
+
+include "SystemZOperands.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(SystemZcallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+ def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
+ "# Select32 PSEUDO",
+ [(set GR32:$dst,
+ (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
+ def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
+ "# Select64 PSEUDO",
+ [(set GR64:$dst,
+ (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ let isBarrier = 1 in {
+ def JMP : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
+
+ let isIndirectBranch = 1 in
+ def JMPr : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
+ }
+
+ let Uses = [PSW] in {
+ def JO : Pseudo<(outs), (ins brtarget:$dst),
+ "jo\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
+ def JH : Pseudo<(outs), (ins brtarget:$dst),
+ "jh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
+ def JNLE: Pseudo<(outs), (ins brtarget:$dst),
+ "jnle\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
+ def JL : Pseudo<(outs), (ins brtarget:$dst),
+ "jl\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
+ def JNHE: Pseudo<(outs), (ins brtarget:$dst),
+ "jnhe\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
+ def JLH : Pseudo<(outs), (ins brtarget:$dst),
+ "jlh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
+ def JNE : Pseudo<(outs), (ins brtarget:$dst),
+ "jne\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
+ def JE : Pseudo<(outs), (ins brtarget:$dst),
+ "je\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
+ def JNLH: Pseudo<(outs), (ins brtarget:$dst),
+ "jnlh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
+ def JHE : Pseudo<(outs), (ins brtarget:$dst),
+ "jhe\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
+ def JNL : Pseudo<(outs), (ins brtarget:$dst),
+ "jnl\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
+ def JLE : Pseudo<(outs), (ins brtarget:$dst),
+ "jle\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
+ def JNH : Pseudo<(outs), (ins brtarget:$dst),
+ "jnh\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
+ def JNO : Pseudo<(outs), (ins brtarget:$dst),
+ "jno\t$dst",
+ [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
+ } // Uses = [PSW]
+} // isBranch = 1
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. Uses for argument
+ // registers are added manually.
+ let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+ F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
+ def CALLi : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
+ "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
+ def CALLr : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
+ "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
+ }
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let isReMaterializable = 1 in
+// FIXME: Provide imm12 variant
+// FIXME: Address should be halfword aligned...
+def LA64r : RXI<0x47,
+ (outs GR64:$dst), (ins laaddr:$src),
+ "lay\t{$dst, $src}",
+ [(set GR64:$dst, laaddr:$src)]>;
+def LA64rm : RXYI<0x71E3,
+ (outs GR64:$dst), (ins i64imm:$src),
+ "larl\t{$dst, $src}",
+ [(set GR64:$dst,
+ (SystemZpcrelwrapper tglobaladdr:$src))]>;
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "# no-op", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+let neverHasSideEffects = 1 in {
+def MOV32rr : RRI<0x18,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lr\t{$dst, $src}",
+ []>;
+def MOV64rr : RREI<0xB904,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lgr\t{$dst, $src}",
+ []>;
+def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+ "# MOV128 PSEUDO!\n"
+ "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+ "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
+ []>;
+def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+ "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
+ []>;
+}
+
+def MOVSX64rr32 : RREI<0xB914,
+ (outs GR64:$dst), (ins GR32:$src),
+ "lgfr\t{$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVZX64rr32 : RREI<0xB916,
+ (outs GR64:$dst), (ins GR32:$src),
+ "llgfr\t{$dst, $src}",
+ [(set GR64:$dst, (zext GR32:$src))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV32ri16 : RII<0x8A7,
+ (outs GR32:$dst), (ins s16imm:$src),
+ "lhi\t{$dst, $src}",
+ [(set GR32:$dst, immSExt16:$src)]>;
+def MOV64ri16 : RII<0x9A7,
+ (outs GR64:$dst), (ins s16imm64:$src),
+ "lghi\t{$dst, $src}",
+ [(set GR64:$dst, immSExt16:$src)]>;
+
+def MOV64rill16 : RII<0xFA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llill\t{$dst, $src}",
+ [(set GR64:$dst, i64ll16:$src)]>;
+def MOV64rilh16 : RII<0xEA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llilh\t{$dst, $src}",
+ [(set GR64:$dst, i64lh16:$src)]>;
+def MOV64rihl16 : RII<0xDA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llihl\t{$dst, $src}",
+ [(set GR64:$dst, i64hl16:$src)]>;
+def MOV64rihh16 : RII<0xCA5,
+ (outs GR64:$dst), (ins u16imm:$src),
+ "llihh\t{$dst, $src}",
+ [(set GR64:$dst, i64hh16:$src)]>;
+
+def MOV64ri32 : RILI<0x1C0,
+ (outs GR64:$dst), (ins s32imm64:$src),
+ "lgfi\t{$dst, $src}",
+ [(set GR64:$dst, immSExt32:$src)]>;
+def MOV64rilo32 : RILI<0xFC0,
+ (outs GR64:$dst), (ins u32imm:$src),
+ "llilf\t{$dst, $src}",
+ [(set GR64:$dst, i64lo32:$src)]>;
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
+ "llihf\t{$dst, $src}",
+ [(set GR64:$dst, i64hi32:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV32rm : RXI<0x58,
+ (outs GR32:$dst), (ins rriaddr12:$src),
+ "l\t{$dst, $src}",
+ [(set GR32:$dst, (load rriaddr12:$src))]>;
+def MOV32rmy : RXYI<0x58E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "ly\t{$dst, $src}",
+ [(set GR32:$dst, (load rriaddr:$src))]>;
+def MOV64rm : RXYI<0x04E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lg\t{$dst, $src}",
+ [(set GR64:$dst, (load rriaddr:$src))]>;
+def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tl\t${dst:subreg_odd}, $src\n"
+ "\tl\t${dst:subreg_even}, 4+$src",
+ [(set GR64P:$dst, (load rriaddr12:$src))]>;
+def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tly\t${dst:subreg_odd}, $src\n"
+ "\tly\t${dst:subreg_even}, 4+$src",
+ [(set GR64P:$dst, (load rriaddr:$src))]>;
+def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
+ "# MOV128 PSEUDO!\n"
+ "\tlg\t${dst:subreg_odd}, $src\n"
+ "\tlg\t${dst:subreg_even}, 8+$src",
+ [(set GR128:$dst, (load rriaddr:$src))]>;
+}
+
+def MOV32mr : RXI<0x50,
+ (outs), (ins rriaddr12:$dst, GR32:$src),
+ "st\t{$src, $dst}",
+ [(store GR32:$src, rriaddr12:$dst)]>;
+def MOV32mry : RXYI<0x50E3,
+ (outs), (ins rriaddr:$dst, GR32:$src),
+ "sty\t{$src, $dst}",
+ [(store GR32:$src, rriaddr:$dst)]>;
+def MOV64mr : RXYI<0x24E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "stg\t{$src, $dst}",
+ [(store GR64:$src, rriaddr:$dst)]>;
+def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tst\t${src:subreg_odd}, $dst\n"
+ "\tst\t${src:subreg_even}, 4+$dst",
+ [(store GR64P:$src, rriaddr12:$dst)]>;
+def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
+ "# MOV64P PSEUDO!\n"
+ "\tsty\t${src:subreg_odd}, $dst\n"
+ "\tsty\t${src:subreg_even}, 4+$dst",
+ [(store GR64P:$src, rriaddr:$dst)]>;
+def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
+ "# MOV128 PSEUDO!\n"
+ "\tstg\t${src:subreg_odd}, $dst\n"
+ "\tstg\t${src:subreg_even}, 8+$dst",
+ [(store GR128:$src, rriaddr:$dst)]>;
+
+def MOV8mi : SII<0x92,
+ (outs), (ins riaddr12:$dst, i32i8imm:$src),
+ "mvi\t{$dst, $src}",
+ [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
+def MOV8miy : SIYI<0x52EB,
+ (outs), (ins riaddr:$dst, i32i8imm:$src),
+ "mviy\t{$dst, $src}",
+ [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
+
+let AddedComplexity = 2 in {
+def MOV16mi : SILI<0xE544,
+ (outs), (ins riaddr12:$dst, s16imm:$src),
+ "mvhhi\t{$dst, $src}",
+ [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
+ Requires<[IsZ10]>;
+def MOV32mi16 : SILI<0xE54C,
+ (outs), (ins riaddr12:$dst, s32imm:$src),
+ "mvhi\t{$dst, $src}",
+ [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
+ Requires<[IsZ10]>;
+def MOV64mi16 : SILI<0xE548,
+ (outs), (ins riaddr12:$dst, s32imm64:$src),
+ "mvghi\t{$dst, $src}",
+ [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
+ Requires<[IsZ10]>;
+}
+
+// sexts
+def MOVSX32rr8 : RREI<0xB926,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lbr\t{$dst, $src}",
+ [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
+def MOVSX64rr8 : RREI<0xB906,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lgbr\t{$dst, $src}",
+ [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
+def MOVSX32rr16 : RREI<0xB927,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lhr\t{$dst, $src}",
+ [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
+def MOVSX64rr16 : RREI<0xB907,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lghr\t{$dst, $src}",
+ [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
+
+// extloads
+def MOVSX32rm8 : RXYI<0x76E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "lb\t{$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
+def MOVSX32rm16 : RXI<0x48,
+ (outs GR32:$dst), (ins rriaddr12:$src),
+ "lh\t{$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
+def MOVSX32rm16y : RXYI<0x78E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "lhy\t{$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
+def MOVSX64rm8 : RXYI<0x77E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lgb\t{$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
+def MOVSX64rm16 : RXYI<0x15E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lgh\t{$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
+def MOVSX64rm32 : RXYI<0x14E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "lgf\t{$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
+
+def MOVZX32rm8 : RXYI<0x94E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "llc\t{$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
+def MOVZX32rm16 : RXYI<0x95E3,
+ (outs GR32:$dst), (ins rriaddr:$src),
+ "llh\t{$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
+def MOVZX64rm8 : RXYI<0x90E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "llgc\t{$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
+def MOVZX64rm16 : RXYI<0x91E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "llgh\t{$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
+def MOVZX64rm32 : RXYI<0x16E3,
+ (outs GR64:$dst), (ins rriaddr:$src),
+ "llgf\t{$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
+
+// truncstores
+def MOV32m8r : RXI<0x42,
+ (outs), (ins rriaddr12:$dst, GR32:$src),
+ "stc\t{$src, $dst}",
+ [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m8ry : RXYI<0x72E3,
+ (outs), (ins rriaddr:$dst, GR32:$src),
+ "stcy\t{$src, $dst}",
+ [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
+
+def MOV32m16r : RXI<0x40,
+ (outs), (ins rriaddr12:$dst, GR32:$src),
+ "sth\t{$src, $dst}",
+ [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m16ry : RXYI<0x70E3,
+ (outs), (ins rriaddr:$dst, GR32:$src),
+ "sthy\t{$src, $dst}",
+ [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
+
+def MOV64m8r : RXI<0x42,
+ (outs), (ins rriaddr12:$dst, GR64:$src),
+ "stc\t{$src, $dst}",
+ [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m8ry : RXYI<0x72E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "stcy\t{$src, $dst}",
+ [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m16r : RXI<0x40,
+ (outs), (ins rriaddr12:$dst, GR64:$src),
+ "sth\t{$src, $dst}",
+ [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m16ry : RXYI<0x70E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "sthy\t{$src, $dst}",
+ [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m32r : RXI<0x50,
+ (outs), (ins rriaddr12:$dst, GR64:$src),
+ "st\t{$src, $dst}",
+ [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m32ry : RXYI<0x50E3,
+ (outs), (ins rriaddr:$dst, GR64:$src),
+ "sty\t{$src, $dst}",
+ [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
+
+// multiple regs moves
+// FIXME: should we use multiple arg nodes?
+def MOV32mrm : RSYI<0x90EB,
+ (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
+ "stmy\t{$from, $to, $dst}",
+ []>;
+def MOV64mrm : RSYI<0x24EB,
+ (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
+ "stmg\t{$from, $to, $dst}",
+ []>;
+def MOV32rmm : RSYI<0x90EB,
+ (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
+ "lmy\t{$from, $to, $dst}",
+ []>;
+def MOV64rmm : RSYI<0x04EB,
+ (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
+ "lmg\t{$from, $to, $dst}",
+ []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+ Constraints = "$src = $dst" in {
+def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+ "lhi\t${dst:subreg_even}, 0",
+ []>;
+def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+ "lghi\t${dst:subreg_even}, 0",
+ []>;
+}
+
+// Byte swaps
+def BSWAP32rr : RREI<0xB91F,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lrvr\t{$dst, $src}",
+ [(set GR32:$dst, (bswap GR32:$src))]>;
+def BSWAP64rr : RREI<0xB90F,
+ (outs GR64:$dst), (ins GR64:$src),
+ "lrvgr\t{$dst, $src}",
+ [(set GR64:$dst, (bswap GR64:$src))]>;
+
+// FIXME: this is invalid pattern for big-endian
+//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
+// "lrvh\t{$dst, $src}",
+// [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
+def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
+ "lrv\t{$dst, $src}",
+ [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
+def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
+ "lrvg\t{$dst, $src}",
+ [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
+
+//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
+// "strvh\t{$src, $dst}",
+// [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
+ "strv\t{$src, $dst}",
+ [(store (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
+ "strvg\t{$src, $dst}",
+ [(store (bswap GR64:$src), rriaddr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Defs = [PSW] in {
+def NEG32rr : RRI<0x13,
+ (outs GR32:$dst), (ins GR32:$src),
+ "lcr\t{$dst, $src}",
+ [(set GR32:$dst, (ineg GR32:$src)),
+ (implicit PSW)]>;
+def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
+ "lcgr\t{$dst, $src}",
+ [(set GR64:$dst, (ineg GR64:$src)),
+ (implicit PSW)]>;
+def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
+ "lcgfr\t{$dst, $src}",
+ [(set GR64:$dst, (ineg (sext GR32:$src))),
+ (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+
+let Defs = [PSW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "ar\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+ (implicit PSW)]>;
+def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "agr\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+ (implicit PSW)]>;
+}
+
+def ADD32rm : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "a\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def ADD32rmy : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "ay\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def ADD64rm : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "ag\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+
+def ADD32ri16 : RII<0xA7A,
+ (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+ "ahi\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
+ (implicit PSW)]>;
+def ADD32ri : RILI<0xC29,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "afi\t{$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+ (implicit PSW)]>;
+def ADD64ri16 : RILI<0xA7B,
+ (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+ "aghi\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
+ (implicit PSW)]>;
+def ADD64ri32 : RILI<0xC28,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "agfi\t{$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
+ (implicit PSW)]>;
+
+let isCommutable = 1 in { // X = ADC Y, Z == X = ADC Z, Y
+def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "alr\t{$dst, $src2}",
+ [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
+def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "algr\t{$dst, $src2}",
+ [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
+}
+
+def ADC32ri : RILI<0xC2B,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "alfi\t{$dst, $src2}",
+ [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
+def ADC64ri32 : RILI<0xC2A,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "algfi\t{$dst, $src2}",
+ [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "alcr\t{$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
+ (implicit PSW)]>;
+def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "alcgr\t{$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
+ (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND32rr : RRI<0x14,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "nr\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
+def AND64rr : RREI<0xB980,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "ngr\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+}
+
+def AND32rm : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "n\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def AND32rmy : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "ny\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "ng\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+def AND32rill16 : RII<0xA57,
+ (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+ "nill\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
+def AND64rill16 : RII<0xA57,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nill\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
+
+def AND32rilh16 : RII<0xA56,
+ (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+ "nilh\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
+def AND64rilh16 : RII<0xA56,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nilh\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
+
+def AND64rihl16 : RII<0xA55,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nihl\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
+def AND64rihh16 : RII<0xA54,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "nihh\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
+
+def AND32ri : RILI<0xC0B,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "nilf\t{$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
+def AND64rilo32 : RILI<0xC0B,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "nilf\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
+def AND64rihi32 : RILI<0xC0A,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "nihf\t{$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR32rr : RRI<0x16,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "or\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
+def OR64rr : RREI<0xB981,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "ogr\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+}
+
+def OR32rm : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "o\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def OR32rmy : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "oy\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "og\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+ // FIXME: Provide proper encoding!
+def OR32ri16 : RII<0xA5B,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "oill\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
+def OR32ri16h : RII<0xA5A,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "oilh\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
+def OR32ri : RILI<0xC0D,
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+ "oilf\t{$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
+
+def OR64rill16 : RII<0xA5B,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oill\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
+def OR64rilh16 : RII<0xA5A,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oilh\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
+def OR64rihl16 : RII<0xA59,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oihl\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
+def OR64rihh16 : RII<0xA58,
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+ "oihh\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
+
+def OR64rilo32 : RILI<0xC0D,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "oilf\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
+def OR64rihi32 : RILI<0xC0C,
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+ "oihf\t{$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
+
+def SUB32rr : RRI<0x1B,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "sr\t{$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+def SUB64rr : RREI<0xB909,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "sgr\t{$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB32rm : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "s\t{$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def SUB32rmy : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "sy\t{$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def SUB64rm : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "sg\t{$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+def SBC32rr : RRI<0x1F,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "slr\t{$dst, $src2}",
+ [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
+def SBC64rr : RREI<0xB90B,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "slgr\t{$dst, $src2}",
+ [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
+
+def SBC32ri : RILI<0xC25,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "sllfi\t{$dst, $src2}",
+ [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
+def SBC64ri32 : RILI<0xC24,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "slgfi\t{$dst, $src2}",
+ [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "slbr\t{$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
+ (implicit PSW)]>;
+def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "slbgr\t{$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
+ (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR32rr : RRI<0x17,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "xr\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+def XOR64rr : RREI<0xB982,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "xgr\t{$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+}
+
+def XOR32rm : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "x\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
+ (implicit PSW)]>;
+def XOR32rmy : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "xy\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+def XOR64rm : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "xg\t{$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
+ (implicit PSW)]>;
+
+def XOR32ri : RILI<0xC07,
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "xilf\t{$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
+
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def MUL32rr : RREI<0xB252,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "msr\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
+def MUL64rr : RREI<0xB90C,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "msgr\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
+}
+
+def MUL64rrP : RRI<0x1C,
+ (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+ "mr\t{$dst, $src2}",
+ []>;
+def UMUL64rrP : RREI<0xB996,
+ (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+ "mlr\t{$dst, $src2}",
+ []>;
+def UMUL128rrP : RREI<0xB986,
+ (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+ "mlgr\t{$dst, $src2}",
+ []>;
+
+def MUL32ri16 : RII<0xA7C,
+ (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+ "mhi\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
+def MUL64ri16 : RII<0xA7D,
+ (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+ "mghi\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
+
+let AddedComplexity = 2 in {
+def MUL32ri : RILI<0xC21,
+ (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+ "msfi\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
+ Requires<[IsZ10]>;
+def MUL64ri32 : RILI<0xC20,
+ (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+ "msgfi\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
+ Requires<[IsZ10]>;
+}
+
+def MUL32rm : RXI<0x71,
+ (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+ "ms\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
+def MUL32rmy : RXYI<0xE351,
+ (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+ "msy\t{$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
+def MUL64rm : RXYI<0xE30C,
+ (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+ "msg\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
+
+def MULSX64rr32 : RREI<0xB91C,
+ (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
+ "msgfr\t{$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
+
+def SDIVREM32r : RREI<0xB91D,
+ (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
+ "dsgfr\t{$dst, $src2}",
+ []>;
+def SDIVREM64r : RREI<0xB90D,
+ (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+ "dsgr\t{$dst, $src2}",
+ []>;
+
+def UDIVREM32r : RREI<0xB997,
+ (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+ "dlr\t{$dst, $src2}",
+ []>;
+def UDIVREM64r : RREI<0xB987,
+ (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+ "dlgr\t{$dst, $src2}",
+ []>;
+let mayLoad = 1 in {
+def SDIVREM32m : RXYI<0xE31D,
+ (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+ "dsgf\t{$dst, $src2}",
+ []>;
+def SDIVREM64m : RXYI<0xE30D,
+ (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+ "dsg\t{$dst, $src2}",
+ []>;
+
+def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
+ "dl\t{$dst, $src2}",
+ []>;
+def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+ "dlg\t{$dst, $src2}",
+ []>;
+} // mayLoad
+} // Constraints = "$src1 = $dst"
+
+//===----------------------------------------------------------------------===//
+// Shifts
+
+let Constraints = "$src = $dst" in
+def SRL32rri : RSI<0x88,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "srl\t{$src, $amt}",
+ [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
+def SRL64rri : RSYI<0xEB0C,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "srlg\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
+
+let Constraints = "$src = $dst" in
+def SHL32rri : RSI<0x89,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "sll\t{$src, $amt}",
+ [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
+def SHL64rri : RSYI<0xEB0D,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "sllg\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
+
+let Defs = [PSW] in {
+let Constraints = "$src = $dst" in
+def SRA32rri : RSI<0x8A,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "sra\t{$src, $amt}",
+ [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
+ (implicit PSW)]>;
+
+def SRA64rri : RSYI<0xEB0A,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "srag\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
+ (implicit PSW)]>;
+} // Defs = [PSW]
+
+def ROTL32rri : RSYI<0xEB1D,
+ (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+ "rll\t{$dst, $src, $amt}",
+ [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
+def ROTL64rri : RSYI<0xEB1C,
+ (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+ "rllg\t{$dst, $src, $amt}",
+ [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def CMP32rr : RRI<0x19,
+ (outs), (ins GR32:$src1, GR32:$src2),
+ "cr\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>;
+def CMP64rr : RREI<0xB920,
+ (outs), (ins GR64:$src1, GR64:$src2),
+ "cgr\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
+
+def CMP32ri : RILI<0xC2D,
+ (outs), (ins GR32:$src1, s32imm:$src2),
+ "cfi\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
+def CMP64ri32 : RILI<0xC2C,
+ (outs), (ins GR64:$src1, s32imm64:$src2),
+ "cgfi\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
+
+def CMP32rm : RXI<0x59,
+ (outs), (ins GR32:$src1, rriaddr12:$src2),
+ "c\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
+def CMP32rmy : RXYI<0xE359,
+ (outs), (ins GR32:$src1, rriaddr:$src2),
+ "cy\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
+def CMP64rm : RXYI<0xE320,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "cg\t$src1, $src2",
+ [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
+
+def UCMP32rr : RRI<0x15,
+ (outs), (ins GR32:$src1, GR32:$src2),
+ "clr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
+def UCMP64rr : RREI<0xB921,
+ (outs), (ins GR64:$src1, GR64:$src2),
+ "clgr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
+
+def UCMP32ri : RILI<0xC2F,
+ (outs), (ins GR32:$src1, i32imm:$src2),
+ "clfi\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
+def UCMP64ri32 : RILI<0xC2E,
+ (outs), (ins GR64:$src1, i64i32imm:$src2),
+ "clgfi\t$src1, $src2",
+ [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
+
+def UCMP32rm : RXI<0x55,
+ (outs), (ins GR32:$src1, rriaddr12:$src2),
+ "cl\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr12:$src2)))]>;
+def UCMP32rmy : RXYI<0xE355,
+ (outs), (ins GR32:$src1, rriaddr:$src2),
+ "cly\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr:$src2)))]>;
+def UCMP64rm : RXYI<0xE351,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "clg\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (load rriaddr:$src2)))]>;
+
+def CMPSX64rr32 : RREI<0xB930,
+ (outs), (ins GR64:$src1, GR32:$src2),
+ "cgfr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sext GR32:$src2)))]>;
+def UCMPZX64rr32 : RREI<0xB931,
+ (outs), (ins GR64:$src1, GR32:$src2),
+ "clgfr\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zext GR32:$src2)))]>;
+
+def CMPSX64rm32 : RXYI<0xE330,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "cgf\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sextloadi64i32 rriaddr:$src2)))]>;
+def UCMPZX64rm32 : RXYI<0xE331,
+ (outs), (ins GR64:$src1, rriaddr:$src2),
+ "clgf\t$src1, $src2",
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zextloadi64i32 rriaddr:$src2)))]>;
+
+// FIXME: Add other crazy ucmp forms
+
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Other crazy stuff
+let Defs = [PSW] in {
+def FLOGR64 : RREI<0xB983,
+ (outs GR128:$dst), (ins GR64:$src),
+ "flogr\t{$dst, $src}",
+ []>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+// ConstPools, JumpTables
+def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
+def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
+
+// anyext
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// calls
+def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
+def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// FIXME: use add/sub tricks with 32678/-32768
+
+// Arbitrary immediate support.
+def : Pat<(i32 imm:$src),
+ (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))),
+ subreg_32bit)>;
+
+// Implement in terms of LLIHF/OILF.
+def : Pat<(i64 imm:$imm),
+ (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// extload patterns
+def : Pat<(extloadi32i8 rriaddr:$src), (MOVZX32rm8 rriaddr:$src)>;
+def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i8 rriaddr:$src), (MOVZX64rm8 rriaddr:$src)>;
+def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
+
+// muls
+def : Pat<(mulhs GR32:$src1, GR32:$src2),
+ (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ GR32:$src1, subreg_odd32),
+ GR32:$src2),
+ subreg_32bit)>;
+
+def : Pat<(mulhu GR32:$src1, GR32:$src2),
+ (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ GR32:$src1, subreg_odd32),
+ GR32:$src2),
+ subreg_32bit)>;
+def : Pat<(mulhu GR64:$src1, GR64:$src2),
+ (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ GR64:$src1, subreg_odd),
+ GR64:$src2),
+ subreg_even)>;
+
+def : Pat<(ctlz GR64:$src),
+ (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 000000000000..fd6e330344b6
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,51 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares SystemZ-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private SystemZ target-specific information for each MachineFunction.
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// LowReg - Low register of range of callee-saved registers to store.
+ unsigned LowReg;
+
+ /// HighReg - High register of range of callee-saved registers to store.
+ unsigned HighReg;
+public:
+ SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getLowReg() const { return LowReg; }
+ void setLowReg(unsigned Reg) { LowReg = Reg; }
+
+ unsigned getHighReg() const { return HighReg; }
+ void setHighReg(unsigned Reg) { HighReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 000000000000..8b835cc26e29
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,325 @@
+//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various SystemZ instruction operands.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff.
+//===----------------------------------------------------------------------===//
+
+// SystemZ specific condition code. These correspond to CondCode in
+// SystemZ.h. They must be kept in synch.
+def SYSTEMZ_COND_O : PatLeaf<(i8 0)>;
+def SYSTEMZ_COND_H : PatLeaf<(i8 1)>;
+def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
+def SYSTEMZ_COND_L : PatLeaf<(i8 3)>;
+def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
+def SYSTEMZ_COND_LH : PatLeaf<(i8 5)>;
+def SYSTEMZ_COND_NE : PatLeaf<(i8 6)>;
+def SYSTEMZ_COND_E : PatLeaf<(i8 7)>;
+def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
+def SYSTEMZ_COND_HE : PatLeaf<(i8 9)>;
+def SYSTEMZ_COND_NL : PatLeaf<(i8 10)>;
+def SYSTEMZ_COND_LE : PatLeaf<(i8 11)>;
+def SYSTEMZ_COND_NH : PatLeaf<(i8 12)>;
+def SYSTEMZ_COND_NO : PatLeaf<(i8 13)>;
+
+def LO8 : SDNodeXForm<imm, [{
+ // Transformation function: return low 8 bits.
+ return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
+}]>;
+
+def LL16 : SDNodeXForm<imm, [{
+ // Transformation function: return low 16 bits.
+ return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
+}]>;
+
+def LH16 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 16-31.
+ return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
+}]>;
+
+def HL16 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 32-47.
+ return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
+}]>;
+
+def HH16 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 48-63.
+ return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
+}]>;
+
+def LO32 : SDNodeXForm<imm, [{
+ // Transformation function: return low 32 bits.
+ return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
+}]>;
+
+def HI32 : SDNodeXForm<imm, [{
+ // Transformation function: return bits 32-63.
+ return getI32Imm(N->getZExtValue() >> 32);
+}]>;
+
+def GetI64FromI32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+}]>;
+
+def i32ll16 : PatLeaf<(i32 imm), [{
+ // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
+ // bits set.
+ return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16 : PatLeaf<(i32 imm), [{
+ // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
+ return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i32ll16c : PatLeaf<(i32 imm), [{
+ // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
+ return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16c : PatLeaf<(i32 imm), [{
+ // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
+ // bits set.
+ return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64ll16 : PatLeaf<(i64 imm), [{
+ // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
+ // bits set.
+ return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16 : PatLeaf<(i64 imm), [{
+ // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
+ return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16 : PatLeaf<(i64 imm), [{
+ // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
+ return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16 : PatLeaf<(i64 imm), [{
+ // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
+ return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
+}], HH16>;
+
+def i64ll16c : PatLeaf<(i64 imm), [{
+ // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
+ // bits set.
+ return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16c : PatLeaf<(i64 imm), [{
+ // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
+ return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16c : PatLeaf<(i64 imm), [{
+ // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
+ return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16c : PatLeaf<(i64 imm), [{
+ // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
+ return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
+}], HH16>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
+ // field.
+ if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((int64_t)val == (int16_t)val);
+ } else if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((int32_t)val == (int16_t)val);
+ }
+
+ return false;
+}], LL16>;
+
+def immSExt32 : PatLeaf<(i64 imm), [{
+ // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
+ // field.
+ uint64_t val = N->getZExtValue();
+ return ((int64_t)val == (int32_t)val);
+}], LO32>;
+
+def i64lo32 : PatLeaf<(i64 imm), [{
+ // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+ // bits set.
+ return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32 : PatLeaf<(i64 imm), [{
+ // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+ return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], HI32>;
+
+def i64lo32c : PatLeaf<(i64 imm), [{
+ // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+ // bits set.
+ return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32c : PatLeaf<(i64 imm), [{
+ // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+ return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], HI32>;
+
+def i32immSExt8 : PatLeaf<(i32 imm), [{
+ // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}], LO8>;
+
+def i32immSExt16 : PatLeaf<(i32 imm), [{
+ // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
+ // sign extended field.
+ return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
+}], LL16>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}], LO32>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // zero extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}], LO32>;
+
+// extloads
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+// A couple of more descriptive operand definitions.
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32>;
+// 32-bits but only 16 bits are significant.
+def i32i16imm : Operand<i32>;
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Unsigned i12
+def u12imm : Operand<i32> {
+ let PrintMethod = "printU12ImmOperand";
+}
+def u12imm64 : Operand<i64> {
+ let PrintMethod = "printU12ImmOperand";
+}
+
+// Signed i16
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+// Unsigned i16
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+// Signed i20
+def s20imm : Operand<i32> {
+ let PrintMethod = "printS20ImmOperand";
+}
+def s20imm64 : Operand<i64> {
+ let PrintMethod = "printS20ImmOperand";
+}
+// Signed i32
+def s32imm : Operand<i32> {
+ let PrintMethod = "printS32ImmOperand";
+}
+def s32imm64 : Operand<i64> {
+ let PrintMethod = "printS32ImmOperand";
+}
+// Unsigned i32
+def u32imm : Operand<i32> {
+ let PrintMethod = "printU32ImmOperand";
+}
+def u32imm64 : Operand<i64> {
+ let PrintMethod = "printU32ImmOperand";
+}
+
+def imm_pcrel : Operand<i64> {
+ let PrintMethod = "printPCRelImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+
+// riaddr := reg + imm
+def riaddr32 : Operand<i64>,
+ ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
+ let PrintMethod = "printRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
+}
+
+def riaddr12 : Operand<i64>,
+ ComplexPattern<i64, 2, "SelectAddrRI12", []> {
+ let PrintMethod = "printRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
+}
+
+def riaddr : Operand<i64>,
+ ComplexPattern<i64, 2, "SelectAddrRI", []> {
+ let PrintMethod = "printRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
+}
+
+//===----------------------------------------------------------------------===//
+
+// rriaddr := reg + reg + imm
+def rriaddr12 : Operand<i64>,
+ ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
+ let PrintMethod = "printRRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
+}
+def rriaddr : Operand<i64>,
+ ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
+ let PrintMethod = "printRRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
+def laaddr : Operand<i64>,
+ ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
+ let PrintMethod = "printRRIAddrOperand";
+ let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 000000000000..59692e883366
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,158 @@
+//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+ const SystemZInstrInfo &tii)
+ : SystemZGenRegisterInfo(), TM(tm), TII(tii) {
+}
+
+const unsigned*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
+ SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+ SystemZ::R14D, SystemZ::R15D,
+ SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
+ SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF)) {
+ // R11D is the frame pointer. Reserve all aliases.
+ Reserved.set(SystemZ::R11D);
+ Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R10P);
+ Reserved.set(SystemZ::R10Q);
+ }
+
+ Reserved.set(SystemZ::R14D);
+ Reserved.set(SystemZ::R15D);
+ Reserved.set(SystemZ::R14W);
+ Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R14P);
+ Reserved.set(SystemZ::R14Q);
+ return Reserved;
+}
+
+const TargetRegisterClass*
+SystemZRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ switch(Idx) {
+ // Exact sub-classes don't exist for the other sub-register indexes.
+ default: return 0;
+ case SystemZ::subreg_32bit:
+ if (B == SystemZ::ADDR32RegisterClass)
+ return A->getSize() == 8 ? SystemZ::ADDR64RegisterClass : 0;
+ return A;
+ }
+}
+
+void SystemZRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MBB.erase(I);
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unxpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+
+ // This must be part of a rri or ri operand memory reference. Replace the
+ // FrameIndex with base register with BasePtr. Add an offset to the
+ // displacement field.
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ // Offset is a either 12-bit unsigned or 20-bit signed integer.
+ // FIXME: handle "too long" displacements.
+ int Offset =
+ TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+
+ // Check whether displacement is too long to fit into 12 bit zext field.
+ MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
+
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned SystemZRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ assert(0 && "What is the frame register");
+ return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 000000000000..2e262e1acc30
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,64 @@
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+class Type;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+ SystemZTargetMachine &TM;
+ const SystemZInstrInfo &TII;
+
+ SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ const TargetRegisterClass*
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 000000000000..a24cbcf4ccd8
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,205 @@
+//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+ let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "SystemZ";
+}
+
+// We identify all our registers with a 4-bit ID, for consistency's sake.
+
+// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<4> num, string n> : SystemZReg<n> {
+ field bits<4> Num = num;
+}
+
+// GPR64 - One of the 16 64-bit general-purpose registers
+class GPR64<bits<4> num, string n, list<Register> subregs,
+ list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+ field bits<4> Num = num;
+ let Aliases = aliases;
+}
+
+// GPR128 - 8 even-odd register pairs
+class GPR128<bits<4> num, string n, list<Register> subregs,
+ list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+ field bits<4> Num = num;
+ let Aliases = aliases;
+}
+
+// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPRS<bits<4> num, string n> : SystemZReg<n> {
+ field bits<4> Num = num;
+}
+
+// FPRL - One of the 16 64-bit floating-point registers
+class FPRL<bits<4> num, string n, list<Register> subregs>
+ : SystemZRegWithSubregs<n, subregs> {
+ field bits<4> Num = num;
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit : SubRegIndex;
+def subreg_odd32 : SubRegIndex;
+def subreg_even : SubRegIndex;
+def subreg_odd : SubRegIndex;
+}
+
+// General-purpose registers
+def R0W : GPR32< 0, "r0">;
+def R1W : GPR32< 1, "r1">;
+def R2W : GPR32< 2, "r2">;
+def R3W : GPR32< 3, "r3">;
+def R4W : GPR32< 4, "r4">;
+def R5W : GPR32< 5, "r5">;
+def R6W : GPR32< 6, "r6">;
+def R7W : GPR32< 7, "r7">;
+def R8W : GPR32< 8, "r8">;
+def R9W : GPR32< 9, "r9">;
+def R10W : GPR32<10, "r10">;
+def R11W : GPR32<11, "r11">;
+def R12W : GPR32<12, "r12">;
+def R13W : GPR32<13, "r13">;
+def R14W : GPR32<14, "r14">;
+def R15W : GPR32<15, "r15">;
+
+let SubRegIndices = [subreg_32bit] in {
+def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>;
+def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>;
+def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>;
+def R3D : GPR64< 3, "r3", [R3W]>, DwarfRegNum<[3]>;
+def R4D : GPR64< 4, "r4", [R4W]>, DwarfRegNum<[4]>;
+def R5D : GPR64< 5, "r5", [R5W]>, DwarfRegNum<[5]>;
+def R6D : GPR64< 6, "r6", [R6W]>, DwarfRegNum<[6]>;
+def R7D : GPR64< 7, "r7", [R7W]>, DwarfRegNum<[7]>;
+def R8D : GPR64< 8, "r8", [R8W]>, DwarfRegNum<[8]>;
+def R9D : GPR64< 9, "r9", [R9W]>, DwarfRegNum<[9]>;
+def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
+def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
+def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
+def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
+def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
+def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
+
+// Register pairs
+let SubRegIndices = [subreg_32bit, subreg_odd32] in {
+def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>;
+def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>;
+def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>;
+def R6P : GPR64< 6, "r6", [R6W, R7W], [R6D, R7D]>;
+def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>;
+def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>;
+def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>;
+def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>;
+}
+
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in {
+def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>;
+def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>;
+def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>;
+def R6Q : GPR128< 6, "r6", [R6D, R7D], [R6P]>;
+def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>;
+def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>;
+def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>;
+def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>;
+}
+
+// Floating-point registers
+def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>;
+def F1S : FPRS< 1, "f1">, DwarfRegNum<[17]>;
+def F2S : FPRS< 2, "f2">, DwarfRegNum<[18]>;
+def F3S : FPRS< 3, "f3">, DwarfRegNum<[19]>;
+def F4S : FPRS< 4, "f4">, DwarfRegNum<[20]>;
+def F5S : FPRS< 5, "f5">, DwarfRegNum<[21]>;
+def F6S : FPRS< 6, "f6">, DwarfRegNum<[22]>;
+def F7S : FPRS< 7, "f7">, DwarfRegNum<[23]>;
+def F8S : FPRS< 8, "f8">, DwarfRegNum<[24]>;
+def F9S : FPRS< 9, "f9">, DwarfRegNum<[25]>;
+def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
+def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
+def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
+def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
+def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
+def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+
+let SubRegIndices = [subreg_32bit] in {
+def F0L : FPRL< 0, "f0", [F0S]>;
+def F1L : FPRL< 1, "f1", [F1S]>;
+def F2L : FPRL< 2, "f2", [F2S]>;
+def F3L : FPRL< 3, "f3", [F3S]>;
+def F4L : FPRL< 4, "f4", [F4S]>;
+def F5L : FPRL< 5, "f5", [F5S]>;
+def F6L : FPRL< 6, "f6", [F6S]>;
+def F7L : FPRL< 7, "f7", [F7S]>;
+def F8L : FPRL< 8, "f8", [F8S]>;
+def F9L : FPRL< 9, "f9", [F9S]>;
+def F10L : FPRL<10, "f10", [F10S]>;
+def F11L : FPRL<11, "f11", [F11S]>;
+def F12L : FPRL<12, "f12", [F12S]>;
+def F13L : FPRL<13, "f13", [F13S]>;
+def F14L : FPRL<14, "f14", [F14S]>;
+def F15L : FPRL<15, "f15", [F15S]>;
+}
+
+// Status register
+def PSW : SystemZReg<"psw">;
+
+/// Register classes.
+/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW", 0, 5),
+ (sequence "R%uW", 15, 6))>;
+
+/// Registers used to generate address. Everything except R0.
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
+
+def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))> {
+ let SubRegClasses = [(GR32 subreg_32bit)];
+}
+
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
+ let SubRegClasses = [(ADDR32 subreg_32bit)];
+}
+
+// Even-odd register pairs
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
+ R12P, R10P, R8P, R6P,
+ R14P)> {
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
+}
+
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
+ R12Q, R10Q, R8Q, R6Q,
+ R14Q)> {
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
+ (GR64 subreg_even, subreg_odd)];
+}
+
+def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
+
+def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
+ let SubRegClasses = [(FP32 subreg_32bit)];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..3eabcd24c598
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZTargetMachine.h"
+using namespace llvm;
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 000000000000..1450401d0403
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+ ~SystemZSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 000000000000..b3ed06639758
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,54 @@
+//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "SystemZ.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS):
+ SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "z9";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
+
+/// True if accessing the GV requires an extra load.
+bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // Extra load is needed for all externally visible.
+ if (isDirectCall)
+ return false;
+
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 000000000000..55cfd80002bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,48 @@
+//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
+#define LLVM_TARGET_SystemZ_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+ bool HasZ10Insts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ SystemZSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool isZ10() const { return HasZ10Insts; }
+
+ bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 000000000000..48298cc744e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,43 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "SystemZ.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+ // Register the target.
+ RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+}
+
+/// SystemZTargetMachine ctor - Create an ILP64 architecture model
+///
+SystemZTargetMachine::SystemZTargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+ "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) {
+
+ if (getRelocationModel() == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+}
+
+bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createSystemZISelDag(*this, OptLevel));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 000000000000..e40b556c0c3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,67 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+
+#include "SystemZInstrInfo.h"
+#include "SystemZISelLowering.h"
+#include "SystemZFrameLowering.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// SystemZTargetMachine
+///
+class SystemZTargetMachine : public LLVMTargetMachine {
+ SystemZSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ SystemZInstrInfo InstrInfo;
+ SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
+ SystemZFrameLowering FrameLowering;
+public:
+ SystemZTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const SystemZRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const SystemZTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // SystemZTargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 000000000000..8272b1188201
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+ RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
new file mode 100644
index 000000000000..a42ce548c895
--- /dev/null
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -0,0 +1,102 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMTarget.a, which implements target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeTarget(PassRegistry &Registry) {
+ initializeTargetDataPass(Registry);
+ initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+ initializeTarget(*unwrap(R));
+}
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+ return wrap(new TargetData(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetData(*unwrap(TD)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+ std::string StringRep = unwrap(TD)->getStringRepresentation();
+ return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+ return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian;
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+ return unwrap(TD)->getPointerSize();
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+ LLVMValueRef GlobalVar) {
+ return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned long long Offset) {
+ const StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned Element) {
+ const StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+ delete unwrap(TD);
+}
diff --git a/contrib/llvm/lib/Target/TargetAsmInfo.cpp b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 000000000000..a97b0e868989
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
+ TLOF = &TM.getTargetLowering()->getObjFileLowering();
+ TFI = TM.getFrameLowering();
+ TRI = TM.getRegisterInfo();
+ TFI->getInitialFrameState(InitialFrameState);
+}
diff --git a/contrib/llvm/lib/Target/TargetAsmLexer.cpp b/contrib/llvm/lib/Target/TargetAsmLexer.cpp
new file mode 100644
index 000000000000..d4893ff2521e
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetAsmLexer.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmLexer.h"
+using namespace llvm;
+
+TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {}
+TargetAsmLexer::~TargetAsmLexer() {}
diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp
new file mode 100644
index 000000000000..17d022a339e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetData.cpp
@@ -0,0 +1,589 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&. None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+
+// Register the default SparcV9 implementation...
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+ assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
+ StructAlignment = 0;
+ StructSize = 0;
+ NumElements = ST->getNumElements();
+
+ // Loop over each of the elements, placing them in memory.
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ const Type *Ty = ST->getElementType(i);
+ unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+ // Add padding if necessary to align the data element properly.
+ if ((StructSize & (TyAlign-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
+
+ // Keep track of maximum alignment constraint.
+ StructAlignment = std::max(TyAlign, StructAlignment);
+
+ MemberOffsets[i] = StructSize;
+ StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+ }
+
+ // Empty structures have alignment of 1 byte.
+ if (StructAlignment == 0) StructAlignment = 1;
+
+ // Add padding to the end of the struct so that it could be put in an array
+ // and all array elements would be aligned correctly.
+ if ((StructSize & (StructAlignment-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+ const uint64_t *SI =
+ std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+ assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+ --SI;
+ assert(*SI <= Offset && "upper_bound didn't work");
+ assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+ (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+ "Upper bound didn't work!");
+
+ // Multiple fields can have the same offset if any of them are zero sized.
+ // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+ // at the i32 element, because it is the last element at that offset. This is
+ // the right one to return, because anything after it will have a higher
+ // offset, implying that this element is non-empty.
+ return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ TargetAlignElem retval;
+ retval.AlignType = align_type;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+ return (AlignType == rhs.AlignType
+ && ABIAlign == rhs.ABIAlign
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+ TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// getInt - Get an integer ignoring errors.
+static unsigned getInt(StringRef R) {
+ unsigned Result = 0;
+ R.getAsInteger(10, Result);
+ return Result;
+}
+
+void TargetData::init(StringRef Desc) {
+ initializeTargetDataPass(*PassRegistry::getPassRegistry());
+
+ LayoutMap = 0;
+ LittleEndian = false;
+ PointerMemSize = 8;
+ PointerABIAlign = 8;
+ PointerPrefAlign = PointerABIAlign;
+
+ // Default alignments
+ setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
+ setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8
+ setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
+ setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
+ setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
+ setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
+ setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ...
+ setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
+ setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
+
+ while (!Desc.empty()) {
+ std::pair<StringRef, StringRef> Split = Desc.split('-');
+ StringRef Token = Split.first;
+ Desc = Split.second;
+
+ if (Token.empty())
+ continue;
+
+ Split = Token.split(':');
+ StringRef Specifier = Split.first;
+ Token = Split.second;
+
+ assert(!Specifier.empty() && "Can't be empty here");
+
+ switch (Specifier[0]) {
+ case 'E':
+ LittleEndian = false;
+ break;
+ case 'e':
+ LittleEndian = true;
+ break;
+ case 'p':
+ Split = Token.split(':');
+ PointerMemSize = getInt(Split.first) / 8;
+ Split = Split.second.split(':');
+ PointerABIAlign = getInt(Split.first) / 8;
+ Split = Split.second.split(':');
+ PointerPrefAlign = getInt(Split.first) / 8;
+ if (PointerPrefAlign == 0)
+ PointerPrefAlign = PointerABIAlign;
+ break;
+ case 'i':
+ case 'v':
+ case 'f':
+ case 'a':
+ case 's': {
+ AlignTypeEnum AlignType;
+ switch (Specifier[0]) {
+ default:
+ case 'i': AlignType = INTEGER_ALIGN; break;
+ case 'v': AlignType = VECTOR_ALIGN; break;
+ case 'f': AlignType = FLOAT_ALIGN; break;
+ case 'a': AlignType = AGGREGATE_ALIGN; break;
+ case 's': AlignType = STACK_ALIGN; break;
+ }
+ unsigned Size = getInt(Specifier.substr(1));
+ Split = Token.split(':');
+ unsigned ABIAlign = getInt(Split.first) / 8;
+
+ Split = Split.second.split(':');
+ unsigned PrefAlign = getInt(Split.first) / 8;
+ if (PrefAlign == 0)
+ PrefAlign = ABIAlign;
+ setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+ break;
+ }
+ case 'n': // Native integer types.
+ Specifier = Specifier.substr(1);
+ do {
+ if (unsigned Width = getInt(Specifier))
+ LegalIntWidths.push_back(Width);
+ Split = Token.split(':');
+ Specifier = Split.first;
+ Token = Split.second;
+ } while (!Specifier.empty() || !Token.empty());
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetData::TargetData() : ImmutablePass(ID) {
+ report_fatal_error("Bad TargetData ctor used. "
+ "Tool did not specify a TargetData to use?");
+}
+
+TargetData::TargetData(const Module *M)
+ : ImmutablePass(ID) {
+ init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == align_type &&
+ Alignments[i].TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ Alignments[i].ABIAlign = abi_align;
+ Alignments[i].PrefAlign = pref_align;
+ return;
+ }
+ }
+
+ Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+ uint32_t BitWidth, bool ABIInfo,
+ const Type *Ty) const {
+ // Check to see if we have an exact match and remember the best match we see.
+ int BestMatchIdx = -1;
+ int LargestInt = -1;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == AlignType &&
+ Alignments[i].TypeBitWidth == BitWidth)
+ return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+ // The best match so far depends on what we're looking for.
+ if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
+ // The "best match" for integers is the smallest size that is larger than
+ // the BitWidth requested.
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+ BestMatchIdx = i;
+ // However, if there isn't one that's larger, then we must use the
+ // largest one we have (see below)
+ if (LargestInt == -1 ||
+ Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+ LargestInt = i;
+ }
+ }
+
+ // Okay, we didn't find an exact solution. Fall back here depending on what
+ // is being looked for.
+ if (BestMatchIdx == -1) {
+ // If we didn't find an integer alignment, fall back on most conservative.
+ if (AlignType == INTEGER_ALIGN) {
+ BestMatchIdx = LargestInt;
+ } else {
+ assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+ // By default, use natural alignment for vector types. This is consistent
+ // with what clang and llvm-gcc do.
+ unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ Align *= cast<VectorType>(Ty)->getNumElements();
+ // If the alignment is not a power of 2, round up to the next power of 2.
+ // This happens for non-power-of-2 length vectors.
+ if (Align & (Align-1))
+ Align = llvm::NextPowerOf2(Align);
+ return Align;
+ }
+ }
+
+ // Since we got a "best match" index, just return it.
+ return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+ : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+class StructLayoutMap {
+ typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
+ LayoutInfoTy LayoutInfo;
+
+public:
+ virtual ~StructLayoutMap() {
+ // Remove any layouts.
+ for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+ I != E; ++I) {
+ StructLayout *Value = I->second;
+ Value->~StructLayout();
+ free(Value);
+ }
+ }
+
+ StructLayout *&operator[](const StructType *STy) {
+ return LayoutInfo[STy];
+ }
+
+ // for debugging...
+ virtual void dump() const {}
+};
+
+} // end anonymous namespace
+
+TargetData::~TargetData() {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+}
+
+const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+ if (!LayoutMap)
+ LayoutMap = new StructLayoutMap();
+
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ StructLayout *&SL = (*STM)[Ty];
+ if (SL) return SL;
+
+ // Otherwise, create the struct layout. Because it is variable length, we
+ // malloc it, then use placement new.
+ int NumElts = Ty->getNumElements();
+ StructLayout *L =
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
+
+ // Set SL before calling StructLayout's ctor. The ctor could cause other
+ // entries to be added to TheMap, invalidating our reference.
+ SL = L;
+
+ new (L) StructLayout(Ty, *this);
+
+ return L;
+}
+
+std::string TargetData::getStringRepresentation() const {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << (LittleEndian ? "e" : "E")
+ << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
+ << ':' << PointerPrefAlign*8;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ const TargetAlignElem &AI = Alignments[i];
+ OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+ << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+ }
+
+ if (!LegalIntWidths.empty()) {
+ OS << "-n" << (unsigned)LegalIntWidths[0];
+
+ for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+ OS << ':' << (unsigned)LegalIntWidths[i];
+ }
+ return OS.str();
+}
+
+
+uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return getPointerSizeInBits();
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
+ }
+ case Type::StructTyID:
+ // Get the layout annotation... which is lazily created on demand.
+ return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+ case Type::IntegerTyID:
+ return cast<IntegerType>(Ty)->getBitWidth();
+ case Type::VoidTyID:
+ return 8;
+ case Type::FloatTyID:
+ return 32;
+ case Type::DoubleTyID:
+ case Type::X86_MMXTyID:
+ return 64;
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ return 128;
+ // In memory objects this is always aligned to a higher boundary, but
+ // only 80 bits contain information.
+ case Type::X86_FP80TyID:
+ return 80;
+ case Type::VectorTyID:
+ return cast<VectorType>(Ty)->getBitWidth();
+ default:
+ llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
+ break;
+ }
+ return 0;
+}
+
+/*!
+ \param abi_or_pref Flag that determines which alignment is returned. true
+ returns the ABI alignment, false returns the preferred alignment.
+ \param Ty The underlying type for which alignment is determined.
+
+ Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+ == false) for the requested type \a Ty.
+ */
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+ int AlignType = -1;
+
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ // Early escape for the non-numeric types.
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return (abi_or_pref
+ ? getPointerABIAlignment()
+ : getPointerPrefAlignment());
+ case Type::ArrayTyID:
+ return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+ case Type::StructTyID: {
+ // Packed structure types always have an ABI alignment of one.
+ if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+ return 1;
+
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ return std::max(Align, Layout->getAlignment());
+ }
+ case Type::IntegerTyID:
+ case Type::VoidTyID:
+ AlignType = INTEGER_ALIGN;
+ break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ // PPC_FP128TyID and FP128TyID have different data contents, but the
+ // same size and alignment, so they look the same here.
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ case Type::X86_FP80TyID:
+ AlignType = FLOAT_ALIGN;
+ break;
+ case Type::X86_MMXTyID:
+ case Type::VectorTyID:
+ AlignType = VECTOR_ALIGN;
+ break;
+ default:
+ llvm_unreachable("Bad type for getAlignment!!!");
+ break;
+ }
+
+ return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+ abi_or_pref, Ty);
+}
+
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, true);
+}
+
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+ return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+ if (Alignments[i].AlignType == STACK_ALIGN)
+ return Alignments[i].ABIAlign;
+
+ return getABITypeAlignment(Ty);
+}
+
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, false);
+}
+
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = getPrefTypeAlignment(Ty);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+ return IntegerType::get(C, getPointerSizeInBits());
+}
+
+
+uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
+ unsigned NumIndices) const {
+ const Type *Ty = ptrTy;
+ assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
+ uint64_t Result = 0;
+
+ generic_gep_type_iterator<Value* const*>
+ TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
+ for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
+ if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+ assert(Indices[CurIDX]->getType() ==
+ Type::getInt32Ty(ptrTy->getContext()) &&
+ "Illegal struct idx");
+ unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+ // Get structure layout information...
+ const StructLayout *Layout = getStructLayout(STy);
+
+ // Add in the offset, as calculated by the structure layout info...
+ Result += Layout->getElementOffset(FieldNo);
+
+ // Update Ty to refer to current element
+ Ty = STy->getElementType(FieldNo);
+ } else {
+ // Update Ty to refer to current element
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // Get the array index and the size of each array element.
+ if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+ Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
+ }
+ }
+
+ return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global. This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
+ const Type *ElemType = GV->getType()->getElementType();
+ unsigned Alignment = getPrefTypeAlignment(ElemType);
+ unsigned GVAlignment = GV->getAlignment();
+ if (GVAlignment >= Alignment) {
+ Alignment = GVAlignment;
+ } else if (GVAlignment != 0) {
+ Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+ }
+
+ if (GV->hasInitializer() && GVAlignment == 0) {
+ if (Alignment < 16) {
+ // If the global is not external, see if it is large. If so, give it a
+ // larger alignment.
+ if (getTypeSizeInBits(ElemType) > 128)
+ Alignment = 16; // 16-byte alignment.
+ }
+ }
+ return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form. This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+ return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
new file mode 100644
index 000000000000..a661ee9c0c65
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+ is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
new file mode 100644
index 000000000000..19fd581c7dd5
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Default is to do nothing.
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
diff --git a/contrib/llvm/lib/Target/TargetInstrInfo.cpp b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..d52ecb32cf75
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,174 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TargetInstrInfo
+//===----------------------------------------------------------------------===//
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (OpNum >= MCID.getNumOperands())
+ return 0;
+
+ short RegClass = MCID.OpInfo[OpNum].RegClass;
+ if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+ return TRI->getPointerRegClass(RegClass);
+
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return 0;
+
+ // Otherwise just look it up normally.
+ return TRI->getRegClass(RegClass);
+}
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+
+
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(*Str)) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
diff --git a/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 000000000000..e049a1d3b62f
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,30 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo() {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
+
+unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const {
+ const ValueName *ValName = F->getValueName();
+ if (!ValName)
+ return 0;
+ return lookupName(ValName->getKeyData(), ValName->getKeyLength());
+}
diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 000000000000..709dfd283f98
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,74 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+ "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple. This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+
+ // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+ if (T.isMacOSX()) {
+ if (T.isMacOSXVersionLT(10, 5))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else if (T.getOS() == Triple::IOS) {
+ if (T.isOSVersionLT(3, 0))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else {
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ }
+
+ // iprintf and friends are only available on XCore and TCE.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ TLI.setUnavailable(LibFunc::iprintf);
+ TLI.setUnavailable(LibFunc::siprintf);
+ TLI.setUnavailable(LibFunc::fiprintf);
+ }
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, Triple());
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, T);
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
+ : ImmutablePass(ID) {
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+}
+
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+ memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
new file mode 100644
index 000000000000..703431b3806e
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -0,0 +1,360 @@
+//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Code
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFile::TargetLoweringObjectFile() :
+ Ctx(0),
+ TextSection(0),
+ DataSection(0),
+ BSSSection(0),
+ ReadOnlySection(0),
+ StaticCtorSection(0),
+ StaticDtorSection(0),
+ LSDASection(0),
+ CompactUnwindSection(0),
+ DwarfAbbrevSection(0),
+ DwarfInfoSection(0),
+ DwarfLineSection(0),
+ DwarfFrameSection(0),
+ DwarfPubNamesSection(0),
+ DwarfPubTypesSection(0),
+ DwarfDebugInlineSection(0),
+ DwarfStrSection(0),
+ DwarfLocSection(0),
+ DwarfARangesSection(0),
+ DwarfRangesSection(0),
+ DwarfMacroInfoSection(0),
+ TLSExtraDataSection(0),
+ CommDirectiveSupportsAlignment(true),
+ SupportsWeakOmittedEHFrame(true),
+ IsFunctionEHFrameSymbolPrivate(true) {
+}
+
+TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+ const Constant *C = GV->getInitializer();
+
+ // Must have zero initializer.
+ if (!C->isNullValue())
+ return false;
+
+ // Leave constant zeros in readonly constant sections, so they can be shared.
+ if (GV->isConstant())
+ return false;
+
+ // If the global has an explicit section specified, don't put it in BSS.
+ if (!GV->getSection().empty())
+ return false;
+
+ // If -nozero-initialized-in-bss is specified, don't ever use BSS.
+ if (NoZerosInBSS)
+ return false;
+
+ // Otherwise, put it in BSS!
+ return true;
+}
+
+/// IsNullTerminatedString - Return true if the specified constant (which is
+/// known to have a type that is an array of 1/2/4 byte elements) ends with a
+/// nul value and contains no other nuls in it.
+static bool IsNullTerminatedString(const Constant *C) {
+ const ArrayType *ATy = cast<ArrayType>(C->getType());
+
+ // First check: is we have constant array of i8 terminated with zero
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
+ if (ATy->getNumElements() == 0) return false;
+
+ ConstantInt *Null =
+ dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
+ if (Null == 0 || !Null->isZero())
+ return false; // Not null terminated.
+
+ // Verify that the null doesn't occur anywhere else in the string.
+ for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
+ // Reject constantexpr elements etc.
+ if (!isa<ConstantInt>(CVA->getOperand(i)) ||
+ CVA->getOperand(i) == Null)
+ return false;
+ return true;
+ }
+
+ // Another possibility: [1 x i8] zeroinitializer
+ if (isa<ConstantAggregateZero>(C))
+ return ATy->getNumElements() == 1;
+
+ return false;
+}
+
+MCSymbol *TargetLoweringObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
+void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+}
+
+
+/// getKindForGlobal - This is a top-level target-independent classifier for
+/// a global variable. Given an global variable and information from TM, it
+/// classifies the global in a variety of ways that make various target
+/// implementations simpler. The target implementation is free to ignore this
+/// extra info of course.
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+ const TargetMachine &TM){
+ assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+ "Can only be used for global definitions");
+
+ Reloc::Model ReloModel = TM.getRelocationModel();
+
+ // Early exit - functions should be always in text sections.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (GVar == 0)
+ return SectionKind::getText();
+
+ // Handle thread-local data first.
+ if (GVar->isThreadLocal()) {
+ if (isSuitableForBSS(GVar))
+ return SectionKind::getThreadBSS();
+ return SectionKind::getThreadData();
+ }
+
+ // Variables with common linkage always get classified as common.
+ if (GVar->hasCommonLinkage())
+ return SectionKind::getCommon();
+
+ // Variable can be easily put to BSS section.
+ if (isSuitableForBSS(GVar)) {
+ if (GVar->hasLocalLinkage())
+ return SectionKind::getBSSLocal();
+ else if (GVar->hasExternalLinkage())
+ return SectionKind::getBSSExtern();
+ return SectionKind::getBSS();
+ }
+
+ const Constant *C = GVar->getInitializer();
+
+ // If the global is marked constant, we can put it into a mergable section,
+ // a mergable string section, or general .data if it contains relocations.
+ if (GVar->isConstant()) {
+ // If the initializer for the global contains something that requires a
+ // relocation, then we may have to drop this into a wriable data section
+ // even though it is marked const.
+ switch (C->getRelocationInfo()) {
+ default: assert(0 && "unknown relocation info kind");
+ case Constant::NoRelocation:
+ // If the global is required to have a unique address, it can't be put
+ // into a mergable section: just drop it into the general read-only
+ // section instead.
+ if (!GVar->hasUnnamedAddr())
+ return SectionKind::getReadOnly();
+
+ // If initializer is a null-terminated string, put it in a "cstring"
+ // section of the right width.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ if (const IntegerType *ITy =
+ dyn_cast<IntegerType>(ATy->getElementType())) {
+ if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
+ ITy->getBitWidth() == 32) &&
+ IsNullTerminatedString(C)) {
+ if (ITy->getBitWidth() == 8)
+ return SectionKind::getMergeable1ByteCString();
+ if (ITy->getBitWidth() == 16)
+ return SectionKind::getMergeable2ByteCString();
+
+ assert(ITy->getBitWidth() == 32 && "Unknown width");
+ return SectionKind::getMergeable4ByteCString();
+ }
+ }
+ }
+
+ // Otherwise, just drop it into a mergable constant section. If we have
+ // a section for this size, use it, otherwise use the arbitrary sized
+ // mergable section.
+ switch (TM.getTargetData()->getTypeAllocSize(C->getType())) {
+ case 4: return SectionKind::getMergeableConst4();
+ case 8: return SectionKind::getMergeableConst8();
+ case 16: return SectionKind::getMergeableConst16();
+ default: return SectionKind::getMergeableConst();
+ }
+
+ case Constant::LocalRelocation:
+ // In static relocation model, the linker will resolve all addresses, so
+ // the relocation entries will actually be constants by the time the app
+ // starts up. However, we can't put this into a mergable section, because
+ // the linker doesn't take relocations into consideration when it tries to
+ // merge entries in the section.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getReadOnly();
+
+ // Otherwise, the dynamic linker needs to fix it up, put it in the
+ // writable data.rel.local section.
+ return SectionKind::getReadOnlyWithRelLocal();
+
+ case Constant::GlobalRelocations:
+ // In static relocation model, the linker will resolve all addresses, so
+ // the relocation entries will actually be constants by the time the app
+ // starts up. However, we can't put this into a mergable section, because
+ // the linker doesn't take relocations into consideration when it tries to
+ // merge entries in the section.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getReadOnly();
+
+ // Otherwise, the dynamic linker needs to fix it up, put it in the
+ // writable data.rel section.
+ return SectionKind::getReadOnlyWithRel();
+ }
+ }
+
+ // Okay, this isn't a constant. If the initializer for the global is going
+ // to require a runtime relocation by the dynamic linker, put it into a more
+ // specific section to improve startup time of the app. This coalesces these
+ // globals together onto fewer pages, improving the locality of the dynamic
+ // linker.
+ if (ReloModel == Reloc::Static)
+ return SectionKind::getDataNoRel();
+
+ switch (C->getRelocationInfo()) {
+ default: assert(0 && "unknown relocation info kind");
+ case Constant::NoRelocation:
+ return SectionKind::getDataNoRel();
+ case Constant::LocalRelocation:
+ return SectionKind::getDataRelLocal();
+ case Constant::GlobalRelocations:
+ return SectionKind::getDataRel();
+ }
+}
+
+/// SectionForGlobal - This method computes the appropriate section to emit
+/// the specified global variable or function definition. This should not
+/// be passed external (or available externally) globals.
+const MCSection *TargetLoweringObjectFile::
+SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang,
+ const TargetMachine &TM) const {
+ // Select section name.
+ if (GV->hasSection())
+ return getExplicitSectionGlobal(GV, Kind, Mang, TM);
+
+
+ // Use default section depending on the 'type' of global
+ return SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+
+// Lame default implementation. Calculate the section name for global.
+const MCSection *
+TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const{
+ assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+ if (Kind.isText())
+ return getTextSection();
+
+ if (Kind.isBSS() && BSSSection != 0)
+ return BSSSection;
+
+ if (Kind.isReadOnly() && ReadOnlySection != 0)
+ return ReadOnlySection;
+
+ return getDataSection();
+}
+
+/// getSectionForConstant - Given a mergable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isReadOnly() && ReadOnlySection != 0)
+ return ReadOnlySection;
+
+ return DataSection;
+}
+
+/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// reference to the specified global variable from exception
+/// handling information.
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCSymbol *Sym = Mang->getSymbol(GV);
+ return getExprForDwarfReference(Sym, Encoding, Streamer);
+}
+
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ // Do nothing special
+ return Res;
+ case dwarf::DW_EH_PE_pcrel: {
+ // Emit a label to the streamer for the current position. This gives us
+ // .-foo addressing.
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ }
+ }
+}
+
+unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const {
+ return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
+}
+
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
new file mode 100644
index 000000000000..74a1f4e8da56
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -0,0 +1,315 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+ bool LessPreciseFPMADOption;
+ bool PrintMachineCode;
+ bool NoFramePointerElim;
+ bool NoFramePointerElimNonLeaf;
+ bool NoExcessFPPrecision;
+ bool UnsafeFPMath;
+ bool NoInfsFPMath;
+ bool NoNaNsFPMath;
+ bool HonorSignDependentRoundingFPMathOption;
+ bool UseSoftFloat;
+ FloatABI::ABIType FloatABIType;
+ bool NoImplicitFloat;
+ bool NoZerosInBSS;
+ bool JITExceptionHandling;
+ bool JITEmitDebugInfo;
+ bool JITEmitDebugInfoToDisk;
+ Reloc::Model RelocationModel;
+ CodeModel::Model CMModel;
+ bool GuaranteedTailCallOpt;
+ unsigned StackAlignmentOverride;
+ bool RealignStack;
+ bool DisableJumpTables;
+ bool StrongPHIElim;
+ bool HasDivModLibcall;
+ bool AsmVerbosityDefault(false);
+}
+
+static cl::opt<bool, true>
+PrintCode("print-machineinstrs",
+ cl::desc("Print generated machine code"),
+ cl::location(PrintMachineCode), cl::init(false));
+static cl::opt<bool, true>
+DisableFPElim("disable-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization"),
+ cl::location(NoFramePointerElim),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+ cl::location(NoFramePointerElimNonLeaf),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableExcessPrecision("disable-excess-fp-precision",
+ cl::desc("Disable optimizations that may increase FP precision"),
+ cl::location(NoExcessFPPrecision),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableFPMAD("enable-fp-mad",
+ cl::desc("Enable less precise MAD instructions to be generated"),
+ cl::location(LessPreciseFPMADOption),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::location(UnsafeFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::location(NoInfsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::location(NoNaNsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+ cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::location(HonorSignDependentRoundingFPMathOption),
+ cl::init(false));
+static cl::opt<bool, true>
+GenerateSoftFloatCalls("soft-float",
+ cl::desc("Generate software floating point library calls"),
+ cl::location(UseSoftFloat),
+ cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+ cl::desc("Choose float ABI type"),
+ cl::location(FloatABIType),
+ cl::init(FloatABI::Default),
+ cl::values(
+ clEnumValN(FloatABI::Default, "default",
+ "Target default float ABI type"),
+ clEnumValN(FloatABI::Soft, "soft",
+ "Soft float ABI (implied by -soft-float)"),
+ clEnumValN(FloatABI::Hard, "hard",
+ "Hard float ABI (uses FP registers)"),
+ clEnumValEnd));
+static cl::opt<bool, true>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::location(NoZerosInBSS),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableJITExceptionHandling("jit-enable-eh",
+ cl::desc("Emit exception handling information"),
+ cl::location(JITExceptionHandling),
+ cl::init(false));
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool, true>
+EmitJitDebugInfo("jit-emit-debug",
+ cl::desc("Emit debug information to debugger"),
+ cl::location(JITEmitDebugInfo),
+ cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+static cl::opt<bool, true>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+ cl::Hidden,
+ cl::desc("Emit debug info objfiles to disk"),
+ cl::location(JITEmitDebugInfoToDisk),
+ cl::init(false));
+
+static cl::opt<llvm::Reloc::Model, true>
+DefRelocationModel("relocation-model",
+ cl::desc("Choose relocation model"),
+ cl::location(RelocationModel),
+ cl::init(Reloc::Default),
+ cl::values(
+ clEnumValN(Reloc::Default, "default",
+ "Target default relocation model"),
+ clEnumValN(Reloc::Static, "static",
+ "Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ "Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ "Relocatable external references, non-relocatable code"),
+ clEnumValEnd));
+static cl::opt<llvm::CodeModel::Model, true>
+DefCodeModel("code-model",
+ cl::desc("Choose code model"),
+ cl::location(CMModel),
+ cl::init(CodeModel::Default),
+ cl::values(
+ clEnumValN(CodeModel::Default, "default",
+ "Target default code model"),
+ clEnumValN(CodeModel::Small, "small",
+ "Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel",
+ "Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium",
+ "Medium code model"),
+ clEnumValN(CodeModel::Large, "large",
+ "Large code model"),
+ clEnumValEnd));
+static cl::opt<bool, true>
+EnableGuaranteedTailCallOpt("tailcallopt",
+ cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+ cl::location(GuaranteedTailCallOpt),
+ cl::init(false));
+static cl::opt<unsigned, true>
+OverrideStackAlignment("stack-alignment",
+ cl::desc("Override default stack alignment"),
+ cl::location(StackAlignmentOverride),
+ cl::init(0));
+static cl::opt<bool, true>
+EnableRealignStack("realign-stack",
+ cl::desc("Realign stack if needed"),
+ cl::location(RealignStack),
+ cl::init(true));
+static cl::opt<bool, true>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables",
+ cl::desc("Do not generate jump tables."),
+ cl::location(DisableJumpTables),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+ cl::desc("Use strong PHI elimination."),
+ cl::location(StrongPHIElim),
+ cl::init(false));
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+ cl::desc("Emit a call to trap function rather than a trap instruction"),
+ cl::init(""));
+static cl::opt<bool>
+DataSections("fdata-sections",
+ cl::desc("Emit data into separate sections"),
+ cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+ cl::desc("Emit functions into separate sections"),
+ cl::init(false));
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::TargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS)
+ : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0),
+ MCRelaxAll(false),
+ MCNoExecStack(false),
+ MCSaveTempLabels(false),
+ MCUseLoc(true),
+ MCUseCFI(true) {
+ // Typically it will be subtargets that will adjust FloatABIType from Default
+ // to Soft or Hard.
+ if (UseSoftFloat)
+ FloatABIType = FloatABI::Soft;
+}
+
+TargetMachine::~TargetMachine() {
+ delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() {
+ return RelocationModel;
+}
+
+/// setRelocationModel - Sets the code generation relocation model.
+void TargetMachine::setRelocationModel(Reloc::Model Model) {
+ RelocationModel = Model;
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() {
+ return CMModel;
+}
+
+/// setCodeModel - Sets the code model.
+void TargetMachine::setCodeModel(CodeModel::Model Model) {
+ CMModel = Model;
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+ return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+ AsmVerbosityDefault = V;
+}
+
+bool TargetMachine::getFunctionSections() {
+ return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+ return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+ FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+ DataSections = V;
+}
+
+namespace llvm {
+ /// DisableFramePointerElim - This returns true if frame pointer elimination
+ /// optimization should be disabled for the given machine function.
+ bool DisableFramePointerElim(const MachineFunction &MF) {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+ }
+
+ /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+ /// is specified on the command line. When this flag is off(default), the
+ /// code generator is not allowed to generate mad (multiply add) if the
+ /// result is "less precise" than doing those operations individually.
+ bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
+
+ /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+ /// that the rounding mode of the FPU can change from its default.
+ bool HonorSignDependentRoundingFPMath() {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ }
+
+ /// getTrapFunctionName - If this returns a non-empty string, this means isel
+ /// should lower Intrinsic::trap to a call to the specified function name
+ /// instead of an ISD::TRAP node.
+ StringRef getTrapFunctionName() {
+ return TrapFuncName;
+ }
+}
diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..90a8f8d8fdcc
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
@@ -0,0 +1,141 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+ regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *subregindexnames)
+ : InfoDesc(ID), SubRegIndexNames(subregindexnames),
+ RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
+ else
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = 0;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+ for (unsigned i = 0; i != Order.size(); ++i)
+ R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(getNumRegs());
+ if (RC) {
+ getAllocatableSetForRC(MF, RC, Allocatable);
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ if ((*I)->isAllocatable())
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ }
+
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable &= Reserved.flip();
+
+ return Allocatable;
+}
+
+const TargetRegisterClass *
+llvm::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) {
+ // First take care of the trivial cases
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return 0;
+
+ // If B is a subclass of A, it will be handled in the loop below
+ if (B->hasSubClass(A))
+ return A;
+
+ const TargetRegisterClass *Best = 0;
+ for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
+ const TargetRegisterClass *X = *I; ++I) {
+ if (X == B)
+ return B; // B is a subclass of A
+
+ // X must be a common subclass of A and B
+ if (!B->hasSubClass(X))
+ continue;
+
+ // A superclass is definitely better.
+ if (!Best || Best->hasSuperClass(X)) {
+ Best = X;
+ continue;
+ }
+
+ // A subclass is definitely worse
+ if (Best->hasSubClass(X))
+ continue;
+
+ // Best and *I have no super/sub class relation - pick the larger class, or
+ // the smaller spill size.
+ int nb = std::distance(Best->begin(), Best->end());
+ int ni = std::distance(X->begin(), X->end());
+ if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
+ Best = X;
+ }
+ return Best;
+}
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
new file mode 100644
index 000000000000..59ffdea00ea6
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
@@ -0,0 +1,33 @@
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtargetInfo Class
+//
+TargetSubtargetInfo::TargetSubtargetInfo() {}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
+
+bool TargetSubtargetInfo::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = ANTIDEP_NONE;
+ CriticalPathRCs.clear();
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
new file mode 100644
index 000000000000..ec73087a3305
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -0,0 +1,165 @@
+//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "X86.h"
+
+using namespace llvm;
+
+namespace {
+
+class X86AsmLexer : public TargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ bool tentativeIsValid;
+ AsmToken tentativeToken;
+
+ const AsmToken &lexTentative() {
+ tentativeToken = getLexer()->Lex();
+ tentativeIsValid = true;
+ return tentativeToken;
+ }
+
+ const AsmToken &lexDefinite() {
+ if (tentativeIsValid) {
+ tentativeIsValid = false;
+ return tentativeToken;
+ }
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenATT();
+ AsmToken LexTokenIntel();
+protected:
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenATT();
+ case 1:
+ return LexTokenIntel();
+ }
+ }
+public:
+ X86AsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
+ }
+};
+
+} // end anonymous namespace
+
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
+
+AsmToken X86AsmLexer::LexTokenATT() {
+ AsmToken lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default:
+ return lexedToken;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return lexedToken;
+
+ case AsmToken::Percent: {
+ const AsmToken &nextToken = lexTentative();
+ if (nextToken.getKind() != AsmToken::Identifier)
+ return lexedToken;
+
+
+ if (unsigned regID = MatchRegisterName(nextToken.getString())) {
+ lexDefinite();
+
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
+ StringRef regStr(lexedToken.getString().data(),
+ lexedToken.getString().size() +
+ nextToken.getString().size());
+
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(regID));
+ }
+
+ // Match register name failed. If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (nextToken.getString().size() == 3 &&
+ nextToken.getString().startswith("db")) {
+ int RegNo = -1;
+ switch (nextToken.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != -1) {
+ lexDefinite();
+
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
+ StringRef regStr(lexedToken.getString().data(),
+ lexedToken.getString().size() +
+ nextToken.getString().size());
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(RegNo));
+ }
+ }
+
+
+ return lexedToken;
+ }
+ }
+}
+
+AsmToken X86AsmLexer::LexTokenIntel() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch(lexedToken.getKind()) {
+ default:
+ return lexedToken;
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return lexedToken;
+ case AsmToken::Identifier: {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+
+ if (regID)
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ return lexedToken;
+ }
+ }
+}
+
+extern "C" void LLVMInitializeX86AsmLexer() {
+ RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
+ RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
+}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 000000000000..d45dd352fbc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,1141 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86ATTAsmParser : public TargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+private:
+ MCAsmParser &getParser() const { return Parser; }
+
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ X86Operand *ParseOperand();
+ X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+
+ /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ bool isSrcOp(X86Operand &Op);
+
+ /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+ /// or %es:(%edi) in 32bit mode.
+ bool isDstOp(X86Operand &Op);
+
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+
+ /// @name Auto-generated Matcher Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
+
+ /// }
+
+public:
+ X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : TargetAsmParser(), STI(sti), Parser(parser) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Register,
+ Immediate,
+ Memory
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNo;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ } Mem;
+ };
+
+ X86Operand(KindTy K, SMLoc Start, SMLoc End)
+ : Kind(K), StartLoc(Start), EndLoc(End) {}
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ virtual void print(raw_ostream &OS) const {}
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+ void setTokenValue(StringRef Value) {
+ assert(Kind == Token && "Invalid access!");
+ Tok.Data = Value.data();
+ Tok.Length = Value.size();
+ }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNo;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getMemDisp() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Disp;
+ }
+ unsigned getMemSegReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.SegReg;
+ }
+ unsigned getMemBaseReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.BaseReg;
+ }
+ unsigned getMemIndexReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.IndexReg;
+ }
+ unsigned getMemScale() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Scale;
+ }
+
+ bool isToken() const {return Kind == Token; }
+
+ bool isImm() const { return Kind == Immediate; }
+
+ bool isImmSExti16i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti32i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i32() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+
+ bool isMem() const { return Kind == Memory; }
+
+ bool isAbsMem() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1;
+ }
+
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 5) && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+ Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+ Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+ addExpr(Inst, getMemDisp());
+ Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+ }
+
+ void addAbsMemOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 1) && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ }
+
+ static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+ X86Operand *Res = new X86Operand(Token, Loc, Loc);
+ Res->Tok.Data = Str.data();
+ Res->Tok.Length = Str.size();
+ return Res;
+ }
+
+ static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
+ X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+ Res->Reg.RegNo = RegNo;
+ return Res;
+ }
+
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
+ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+ Res->Imm.Val = Val;
+ return Res;
+ }
+
+ /// Create an absolute memory operand.
+ static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
+ SMLoc EndLoc) {
+ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ Res->Mem.SegReg = 0;
+ Res->Mem.Disp = Disp;
+ Res->Mem.BaseReg = 0;
+ Res->Mem.IndexReg = 0;
+ Res->Mem.Scale = 1;
+ return Res;
+ }
+
+ /// Create a generalized memory operand.
+ static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
+ // We should never just have a displacement, that should be parsed as an
+ // absolute memory operand.
+ assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+ // The scale should always be one of {1,2,4,8}.
+ assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+ "Invalid scale!");
+ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ Res->Mem.SegReg = SegReg;
+ Res->Mem.Disp = Disp;
+ Res->Mem.BaseReg = BaseReg;
+ Res->Mem.IndexReg = IndexReg;
+ Res->Mem.Scale = Scale;
+ return Res;
+ }
+};
+
+} // end anonymous namespace.
+
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+ unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
+
+ return (Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+ unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
+
+ return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
+
+bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = 0;
+ const AsmToken &TokPercent = Parser.getTok();
+ assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+ StartLoc = TokPercent.getLoc();
+ Parser.Lex(); // Eat percent token.
+
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(Tok.getLoc(), "invalid register name");
+
+ // FIXME: Validate register for the current architecture; we have to do
+ // validation later, so maybe there is no need for this here.
+ RegNo = MatchRegisterName(Tok.getString());
+
+ // If the match failed, try the register name as lowercase.
+ if (RegNo == 0)
+ RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+ // can be also checked.
+ if (RegNo == X86::RIZ && !is64BitMode())
+ return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
+ // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+ if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
+ RegNo = X86::ST0;
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat 'st'
+
+ // Check to see if we have '(4)' after %st.
+ if (getLexer().isNot(AsmToken::LParen))
+ return false;
+ // Lex the paren.
+ getParser().Lex();
+
+ const AsmToken &IntTok = Parser.getTok();
+ if (IntTok.isNot(AsmToken::Integer))
+ return Error(IntTok.getLoc(), "expected stack index");
+ switch (IntTok.getIntVal()) {
+ case 0: RegNo = X86::ST0; break;
+ case 1: RegNo = X86::ST1; break;
+ case 2: RegNo = X86::ST2; break;
+ case 3: RegNo = X86::ST3; break;
+ case 4: RegNo = X86::ST4; break;
+ case 5: RegNo = X86::ST5; break;
+ case 6: RegNo = X86::ST6; break;
+ case 7: RegNo = X86::ST7; break;
+ default: return Error(IntTok.getLoc(), "invalid stack index");
+ }
+
+ if (getParser().Lex().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat ')'
+ return false;
+ }
+
+ // If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (RegNo == 0 && Tok.getString().size() == 3 &&
+ Tok.getString().startswith("db")) {
+ switch (Tok.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != 0) {
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat it.
+ return false;
+ }
+ }
+
+ if (RegNo == 0)
+ return Error(Tok.getLoc(), "invalid register name");
+
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat identifier token.
+ return false;
+}
+
+X86Operand *X86ATTAsmParser::ParseOperand() {
+ switch (getLexer().getKind()) {
+ default:
+ // Parse a memory operand with no segment register.
+ return ParseMemOperand(0, Parser.getTok().getLoc());
+ case AsmToken::Percent: {
+ // Read the register.
+ unsigned RegNo;
+ SMLoc Start, End;
+ if (ParseRegister(RegNo, Start, End)) return 0;
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+
+ // If this is a segment register followed by a ':', then this is the start
+ // of a memory reference, otherwise this is a normal register reference.
+ if (getLexer().isNot(AsmToken::Colon))
+ return X86Operand::CreateReg(RegNo, Start, End);
+
+
+ getParser().Lex(); // Eat the colon.
+ return ParseMemOperand(RegNo, Start);
+ }
+ case AsmToken::Dollar: {
+ // $42 -> immediate.
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ Parser.Lex();
+ const MCExpr *Val;
+ if (getParser().ParseExpression(Val, End))
+ return 0;
+ return X86Operand::CreateImm(Val, Start, End);
+ }
+ }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+
+ // We have to disambiguate a parenthesized expression "(4+5)" from the start
+ // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
+ // only way to do this without lookahead is to eat the '(' and see what is
+ // after it.
+ const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ if (getLexer().isNot(AsmToken::LParen)) {
+ SMLoc ExprEnd;
+ if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg == 0)
+ return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ }
+
+ // Eat the '('.
+ Parser.Lex();
+ } else {
+ // Okay, we have a '('. We don't know if this is an expression or not, but
+ // so we have to eat the ( to see beyond it.
+ SMLoc LParenLoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '('.
+
+ if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+ // Nothing to do here, fall into the code below with the '(' part of the
+ // memory operand consumed.
+ } else {
+ SMLoc ExprEnd;
+
+ // It must be an parenthesized expression, parse it now.
+ if (getParser().ParseParenExpression(Disp, ExprEnd))
+ return 0;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // Unless we have a segment register, treat this as an immediate.
+ if (SegReg == 0)
+ return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ }
+
+ // Eat the '('.
+ Parser.Lex();
+ }
+ }
+
+ // If we reached here, then we just ate the ( of the memory operand. Process
+ // the rest of the memory operand.
+ unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+
+ if (getLexer().is(AsmToken::Percent)) {
+ SMLoc L;
+ if (ParseRegister(BaseReg, L, L)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(L, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+ }
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Following the comma we should have either an index register, or a scale
+ // value. We don't support the later form, but we want to parse it
+ // correctly.
+ //
+ // Not that even though it would be completely consistent to support syntax
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+ if (getLexer().is(AsmToken::Percent)) {
+ SMLoc L;
+ if (ParseRegister(IndexReg, L, L)) return 0;
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ // Parse the scale amount:
+ // ::= ',' [scale-expression]
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(),
+ "expected comma in scale expression");
+ return 0;
+ }
+ Parser.Lex(); // Eat the comma.
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ int64_t ScaleVal;
+ if (getParser().ParseAbsoluteExpression(ScaleVal))
+ return 0;
+
+ // Validate the scale amount.
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+ Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+ return 0;
+ }
+ Scale = (unsigned)ScaleVal;
+ }
+ }
+ } else if (getLexer().isNot(AsmToken::RParen)) {
+ // A scale amount without an index is ignored.
+ // index.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ int64_t Value;
+ if (getParser().ParseAbsoluteExpression(Value))
+ return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
+ }
+ }
+
+ // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
+ return 0;
+ }
+ SMLoc MemEnd = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+ MemStart, MemEnd);
+}
+
+bool X86ATTAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef PatchedName = Name;
+
+ // FIXME: Hack to recognize setneb as setne.
+ if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ PatchedName != "setb" && PatchedName != "setnb")
+ PatchedName = PatchedName.substr(0, Name.size()-1);
+
+ // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ const MCExpr *ExtraImmOp = 0;
+ if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
+ (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ bool IsVCMP = PatchedName.startswith("vcmp");
+ unsigned SSECCIdx = IsVCMP ? 4 : 3;
+ unsigned SSEComparisonCode = StringSwitch<unsigned>(
+ PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Case("eq_uq", 8)
+ .Case("nge", 9)
+ .Case("ngt", 0x0A)
+ .Case("false", 0x0B)
+ .Case("neq_oq", 0x0C)
+ .Case("ge", 0x0D)
+ .Case("gt", 0x0E)
+ .Case("true", 0x0F)
+ .Case("eq_os", 0x10)
+ .Case("lt_oq", 0x11)
+ .Case("le_oq", 0x12)
+ .Case("unord_s", 0x13)
+ .Case("neq_us", 0x14)
+ .Case("nlt_uq", 0x15)
+ .Case("nle_uq", 0x16)
+ .Case("ord_s", 0x17)
+ .Case("eq_us", 0x18)
+ .Case("nge_uq", 0x19)
+ .Case("ngt_uq", 0x1A)
+ .Case("false_os", 0x1B)
+ .Case("neq_os", 0x1C)
+ .Case("ge_oq", 0x1D)
+ .Case("gt_oq", 0x1E)
+ .Case("true_us", 0x1F)
+ .Default(~0U);
+ if (SSEComparisonCode != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+ getParser().getContext());
+ if (PatchedName.endswith("ss")) {
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+ } else if (PatchedName.endswith("sd")) {
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+ } else if (PatchedName.endswith("ps")) {
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+ } else {
+ assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+ }
+ }
+ }
+
+ Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+
+ if (ExtraImmOp)
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
+
+ // Determine whether this is an instruction prefix.
+ bool isPrefix =
+ Name == "lock" || Name == "rep" ||
+ Name == "repe" || Name == "repz" ||
+ Name == "repne" || Name == "repnz" ||
+ Name == "rex64" || Name == "data16";
+
+
+ // This does the actual operand parsing. Don't parse any more if we have a
+ // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+ // just want to parse the "lock" as the first instruction and the "incl" as
+ // the next one.
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
+
+ // Parse '*' modifier.
+ if (getLexer().is(AsmToken::Star)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("*", Loc));
+ Parser.Lex(); // Eat the star.
+ }
+
+ // Read the first operand.
+ if (X86Operand *Op = ParseOperand())
+ Operands.push_back(Op);
+ else {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (X86Operand *Op = ParseOperand())
+ Operands.push_back(Op);
+ else {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex(); // Consume the EndOfStatement
+ else if (isPrefix && getLexer().is(AsmToken::Slash))
+ Parser.Lex(); // Consume the prefix separator Slash
+
+ // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+ // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
+ // documented form in various unofficial manuals, so a lot of code uses it.
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+ // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+ if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+ // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+ if (Name.startswith("ins") && Operands.size() == 3 &&
+ (Name == "insb" || Name == "insw" || Name == "insl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+ if (Name.startswith("outs") && Operands.size() == 3 &&
+ (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+ if (Name.startswith("movs") && Operands.size() == 3 &&
+ (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+ (is64BitMode() && Name == "movsq"))) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+ // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+ if (Name.startswith("lods") && Operands.size() == 3 &&
+ (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+ Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isSrcOp(*Op1) && Op2->isReg()) {
+ const char *ins;
+ unsigned reg = Op2->getReg();
+ bool isLods = Name == "lods";
+ if (reg == X86::AL && (isLods || Name == "lodsb"))
+ ins = "lodsb";
+ else if (reg == X86::AX && (isLods || Name == "lodsw"))
+ ins = "lodsw";
+ else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+ ins = "lodsl";
+ else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+ ins = "lodsq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+ // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+ if (Name.startswith("stos") && Operands.size() == 3 &&
+ (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+ Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isDstOp(*Op2) && Op1->isReg()) {
+ const char *ins;
+ unsigned reg = Op1->getReg();
+ bool isStos = Name == "stos";
+ if (reg == X86::AL && (isStos || Name == "stosb"))
+ ins = "stosb";
+ else if (reg == X86::AX && (isStos || Name == "stosw"))
+ ins = "stosw";
+ else if (reg == X86::EAX && (isStos || Name == "stosl"))
+ ins = "stosl";
+ else if (reg == X86::RAX && (isStos || Name == "stosq"))
+ ins = "stosq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+
+ // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
+ // "shift <op>".
+ if ((Name.startswith("shr") || Name.startswith("sar") ||
+ Name.startswith("shl") || Name.startswith("sal") ||
+ Name.startswith("rcl") || Name.startswith("rcr") ||
+ Name.startswith("rol") || Name.startswith("ror")) &&
+ Operands.size() == 3) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ }
+ }
+
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (Name == "int" && Operands.size() == 2) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ }
+ }
+
+ return false;
+}
+
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+ // First, handle aliases that expand to multiple instructions.
+ // FIXME: This should be replaced with a real .td file alias mechanism.
+ // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+ // call.
+ if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+ Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+ Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+ Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ MCInst Inst;
+ Inst.setOpcode(X86::WAIT);
+ Out.EmitInstruction(Inst);
+
+ const char *Repl =
+ StringSwitch<const char*>(Op->getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(0);
+ assert(Repl && "Unknown wait-prefixed instruction");
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+ }
+
+ bool WasOriginallyInvalidOperand = false;
+ unsigned OrigErrorInfo;
+ MCInst Inst;
+
+ // First, try a direct match.
+ switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ WasOriginallyInvalidOperand = true;
+ break;
+ case Match_MnemonicFail:
+ break;
+ }
+
+ // FIXME: Ideally, we would only attempt suffix matches for things which are
+ // valid prefixes, and we could just infer the right unambiguous
+ // type. However, that requires substantially more matcher support than the
+ // following hack.
+
+ // Change the operand to point to a temporary token.
+ StringRef Base = Op->getToken();
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
+
+ // If this instruction starts with an 'f', then it is a floating point stack
+ // instruction. These come in up to three forms for 32-bit, 64-bit, and
+ // 80-bit floating point, which use the suffixes s,l,t respectively.
+ //
+ // Otherwise, we assume that this may be an integer instruction, which comes
+ // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+ const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+
+ // Check for the various suffix matches.
+ Tmp[Base.size()] = Suffixes[0];
+ unsigned ErrorInfoIgnore;
+ MatchResultTy Match1, Match2, Match3, Match4;
+
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[1];
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[2];
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[3];
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+
+ // Restore the old token.
+ Op->setTokenValue(Base);
+
+ // If exactly one matched, then we treat that as a successful match (and the
+ // instruction will already have been filled in correctly, since the failing
+ // matches won't have modified it).
+ unsigned NumSuccessfulMatches =
+ (Match1 == Match_Success) + (Match2 == Match_Success) +
+ (Match3 == Match_Success) + (Match4 == Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ Out.EmitInstruction(Inst);
+ return false;
+ }
+
+ // Otherwise, the match failed, try to produce a decent error message.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (NumSuccessfulMatches > 1) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+ if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+ if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+ if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str());
+ return true;
+ }
+
+ // Okay, we know that none of the variants matched successfully.
+
+ // If all of the instructions reported an invalid mnemonic, then the original
+ // mnemonic was invalid.
+ if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+ (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (!WasOriginallyInvalidOperand) {
+ Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+ return true;
+ }
+
+ // Recover location info for the operand if we know which was the problem.
+ SMLoc ErrorLoc = IDLoc;
+ if (OrigErrorInfo != ~0U) {
+ if (OrigErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+ (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+ (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+ Error(IDLoc, "invalid operand for instruction");
+ return true;
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+
+
+
+extern "C" void LLVMInitializeX86AsmLexer();
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+ RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+ RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+ LLVMInitializeX86AsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "X86GenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 000000000000..4a0d2ec727a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,560 @@
+//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+// MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+#include "X86GenEDInfo.inc"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+void x86DisassemblerDebug(const char *file,
+ unsigned line,
+ const char *s) {
+ dbgs() << file << ":" << line << ": " << s;
+}
+
+#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+
+namespace llvm {
+
+// Fill-ins to make the compiler happy. These constants are never actually
+// assigned; they are just filler to make an automatically-generated switch
+// statement work.
+namespace X86 {
+ enum {
+ BX_SI = 500,
+ BX_DI = 501,
+ BP_SI = 502,
+ BP_DI = 503,
+ sib = 504,
+ sib64 = 505
+ };
+}
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+}
+
+static bool translateInstruction(MCInst &target,
+ InternalInstruction &source);
+
+X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
+ MCDisassembler(),
+ fMode(mode) {
+}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+}
+
+EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+ return instInfoX86;
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+/// MemoryObject.
+///
+/// @param arg - The generic callback parameter. In this case, this should
+/// be a pointer to a MemoryObject.
+/// @param byte - A pointer to the byte to be read.
+/// @param address - The address to be read.
+static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
+ MemoryObject* region = static_cast<MemoryObject*>(arg);
+ return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+/// raw_ostream.
+///
+/// @param arg - The generic callback parameter. This should be a pointe
+/// to a raw_ostream.
+/// @param log - A string to be logged. logger() adds a newline.
+static void logger(void* arg, const char* log) {
+ if (!arg)
+ return;
+
+ raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+ vStream << log << "\n";
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool X86GenericDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const {
+ InternalInstruction internalInstr;
+
+ int ret = decodeInstruction(&internalInstr,
+ regionReader,
+ (void*)&region,
+ logger,
+ (void*)&vStream,
+ address,
+ fMode);
+
+ if (ret) {
+ size = internalInstr.readerCursor - address;
+ return false;
+ }
+ else {
+ size = internalInstr.length;
+ return !translateInstruction(instr, internalInstr);
+ }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+/// register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param reg - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+ uint8_t llvmRegnums[] = {
+ ALL_REGS
+ 0
+ };
+#undef ENTRY
+
+ uint8_t llvmRegnum = llvmRegnums[reg];
+ mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// translateImmediate - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param immediate - The immediate value to append.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+ const OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = operand.type;
+
+ if (type == TYPE_RELv) {
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 1:
+ type = TYPE_MOFFS8;
+ break;
+ case 2:
+ type = TYPE_MOFFS16;
+ break;
+ case 4:
+ type = TYPE_MOFFS32;
+ break;
+ case 8:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+
+ switch (type) {
+ case TYPE_MOFFS8:
+ case TYPE_REL8:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_MOFFS16:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_MOFFS32:
+ case TYPE_REL32:
+ case TYPE_REL64:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case TYPE_MOFFS64:
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
+ mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction to extract the R/M field
+/// from.
+/// @return - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
+ InternalInstruction &insn) {
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ debug("A R/M register operand may not have a SIB byte");
+ return true;
+ }
+
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected EA base register");
+ return true;
+ case EA_BASE_NONE:
+ debug("EA_BASE_NONE for ModR/M base");
+ return true;
+#define ENTRY(x) case EA_BASE_##x:
+ ALL_EA_BASES
+#undef ENTRY
+ debug("A R/M register operand may not have a base; "
+ "the operand must be a register.");
+ return true;
+#define ENTRY(x) \
+ case EA_REG_##x: \
+ mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+ ALL_REGS
+#undef ENTRY
+ }
+
+ return false;
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+/// fields of an internal instruction (and possibly its SIB byte) to a memory
+/// operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
+ // Addresses in an MCInst are represented as five operands:
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // SIB base
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // scale amount
+ // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
+ // the index (which is multiplied by the
+ // scale amount)
+ // 4. displacement (immediate) 0, or the displacement if there is one
+ // 5. segmentreg (register) x86_registerNONE for now, but could be set
+ // if we have segment overrides
+
+ MCOperand baseReg;
+ MCOperand scaleAmount;
+ MCOperand indexReg;
+ MCOperand displacement;
+ MCOperand segmentReg;
+
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ if (insn.sibBase != SIB_BASE_NONE) {
+ switch (insn.sibBase) {
+ default:
+ debug("Unexpected sibBase");
+ return true;
+#define ENTRY(x) \
+ case SIB_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_SIB_BASES
+#undef ENTRY
+ }
+ } else {
+ baseReg = MCOperand::CreateReg(0);
+ }
+
+ if (insn.sibIndex != SIB_INDEX_NONE) {
+ switch (insn.sibIndex) {
+ default:
+ debug("Unexpected sibIndex");
+ return true;
+#define ENTRY(x) \
+ case SIB_INDEX_##x: \
+ indexReg = MCOperand::CreateReg(X86::x); break;
+ EA_BASES_32BIT
+ EA_BASES_64BIT
+#undef ENTRY
+ }
+ } else {
+ indexReg = MCOperand::CreateReg(0);
+ }
+
+ scaleAmount = MCOperand::CreateImm(insn.sibScale);
+ } else {
+ switch (insn.eaBase) {
+ case EA_BASE_NONE:
+ if (insn.eaDisplacement == EA_DISP_NONE) {
+ debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+ return true;
+ }
+ if (insn.mode == MODE_64BIT)
+ baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+ else
+ baseReg = MCOperand::CreateReg(0);
+
+ indexReg = MCOperand::CreateReg(0);
+ break;
+ case EA_BASE_BX_SI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BX_DI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ case EA_BASE_BP_SI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BP_DI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ default:
+ indexReg = MCOperand::CreateReg(0);
+ switch (insn.eaBase) {
+ default:
+ debug("Unexpected eaBase");
+ return true;
+ // Here, we will use the fill-ins defined above. However,
+ // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+ // sib and sib64 were handled in the top-level if, so they're only
+ // placeholders to keep the compiler happy.
+#define ENTRY(x) \
+ case EA_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+ ALL_REGS
+#undef ENTRY
+ debug("A R/M memory operand may not be a register; "
+ "the base field must be a base.");
+ return true;
+ }
+ }
+
+ scaleAmount = MCOperand::CreateImm(1);
+ }
+
+ displacement = MCOperand::CreateImm(insn.displacement);
+
+ static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+ 0, // SEG_OVERRIDE_NONE
+ X86::CS,
+ X86::SS,
+ X86::DS,
+ X86::ES,
+ X86::FS,
+ X86::GS
+ };
+
+ segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+
+ mcInst.addOperand(baseReg);
+ mcInst.addOperand(scaleAmount);
+ mcInst.addOperand(indexReg);
+ mcInst.addOperand(displacement);
+ mcInst.addOperand(segmentReg);
+ return false;
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+/// byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+/// @return - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ switch (operand.type) {
+ default:
+ debug("Unexpected type for a R/M operand");
+ return true;
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ case TYPE_Rv:
+ case TYPE_MM:
+ case TYPE_MM32:
+ case TYPE_MM64:
+ case TYPE_XMM:
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ case TYPE_XMM256:
+ case TYPE_DEBUGREG:
+ case TYPE_CONTROLREG:
+ return translateRMRegister(mcInst, insn);
+ case TYPE_M:
+ case TYPE_M8:
+ case TYPE_M16:
+ case TYPE_M32:
+ case TYPE_M64:
+ case TYPE_M128:
+ case TYPE_M256:
+ case TYPE_M512:
+ case TYPE_Mv:
+ case TYPE_M32FP:
+ case TYPE_M64FP:
+ case TYPE_M80FP:
+ case TYPE_M16INT:
+ case TYPE_M32INT:
+ case TYPE_M64INT:
+ case TYPE_M1616:
+ case TYPE_M1632:
+ case TYPE_M1664:
+ case TYPE_LEA:
+ return translateRMMemory(mcInst, insn);
+ }
+}
+
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+/// LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param stackPos - The stack position to translate.
+/// @return - 0 on success; nonzero otherwise.
+static bool translateFPRegister(MCInst &mcInst,
+ uint8_t stackPos) {
+ if (stackPos >= 8) {
+ debug("Invalid FP stack position");
+ return true;
+ }
+
+ mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+
+ return false;
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction
+/// to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ switch (operand.encoding) {
+ default:
+ debug("Unhandled operand encoding during translation");
+ return true;
+ case ENCODING_REG:
+ translateRegister(mcInst, insn.reg);
+ return false;
+ case ENCODING_RM:
+ return translateRM(mcInst, operand, insn);
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ debug("Translation of code offsets isn't supported.");
+ return true;
+ case ENCODING_IB:
+ case ENCODING_IW:
+ case ENCODING_ID:
+ case ENCODING_IO:
+ case ENCODING_Iv:
+ case ENCODING_Ia:
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn);
+ return false;
+ case ENCODING_RB:
+ case ENCODING_RW:
+ case ENCODING_RD:
+ case ENCODING_RO:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_I:
+ return translateFPRegister(mcInst, insn.opcodeModifier);
+ case ENCODING_Rv:
+ translateRegister(mcInst, insn.opcodeRegister);
+ return false;
+ case ENCODING_VVVV:
+ translateRegister(mcInst, insn.vvvv);
+ return false;
+ case ENCODING_DUP:
+ return translateOperand(mcInst,
+ insn.spec->operands[operand.type - TYPE_DUP0],
+ insn);
+ }
+}
+
+/// translateInstruction - Translates an internal instruction and all its
+/// operands to an MCInst.
+///
+/// @param mcInst - The MCInst to populate with the instruction's data.
+/// @param insn - The internal instruction.
+/// @return - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+ InternalInstruction &insn) {
+ if (!insn.spec) {
+ debug("Instruction has no specification");
+ return true;
+ }
+
+ mcInst.setOpcode(insn.instructionID);
+
+ int index;
+
+ insn.numImmediatesTranslated = 0;
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ if (insn.spec->operands[index].encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static MCDisassembler *createX86_32Disassembler(const Target &T) {
+ return new X86Disassembler::X86_32Disassembler;
+}
+
+static MCDisassembler *createX86_64Disassembler(const Target &T) {
+ return new X86Disassembler::X86_64Disassembler;
+}
+
+extern "C" void LLVMInitializeX86Disassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ createX86_32Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+ createX86_64Disassembler);
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 000000000000..550cf9d40de2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,155 @@
+//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets. The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+// These attributes, recorded in enum attributeBits
+// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
+// provides a mapping from bitmasks to contexts, which are represented by
+// enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is. The
+// disassembler distinguishes four kinds of opcodes, which are enumerated in
+// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
+// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
+// a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
+// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+// ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
+// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+// meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+// (X86DisassemblerDecoderCommon.h) and its type is an entry from
+// OperandType (ibid.). The encoding indicates how to read it from the
+// instruction; the type indicates how to interpret the value once it has
+// been read. For example, a register operand could be stored in the R/M
+// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
+// register, for instance). Given this information, the operands can be
+// extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+// and operands into a format understandable by the client - in this case, an
+// MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself. The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+// adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+// invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+// table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+// factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+// responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ const char* name;
+
+#define INSTRUCTION_IDS \
+ const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+/// All each platform class should have to do is subclass the constructor, and
+/// provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+protected:
+ /// Constructor - Initializes the disassembler.
+ ///
+ /// @param mode - The X86 architecture mode to decode for.
+ X86GenericDisassembler(DisassemblerMode mode);
+public:
+ ~X86GenericDisassembler();
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ DisassemblerMode fMode;
+};
+
+/// X86_16Disassembler - 16-bit X86 disassembler.
+class X86_16Disassembler : public X86GenericDisassembler {
+public:
+ X86_16Disassembler() :
+ X86GenericDisassembler(MODE_16BIT) {
+ }
+};
+
+/// X86_16Disassembler - 32-bit X86 disassembler.
+class X86_32Disassembler : public X86GenericDisassembler {
+public:
+ X86_32Disassembler() :
+ X86GenericDisassembler(MODE_32BIT) {
+ }
+};
+
+/// X86_16Disassembler - 64-bit X86 disassembler.
+class X86_64Disassembler : public X86GenericDisassembler {
+public:
+ X86_64Disassembler() :
+ X86GenericDisassembler(MODE_64BIT) {
+ }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 000000000000..de1610ba3d66
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1610 @@
+/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <stdarg.h> /* for va_*() */
+#include <stdio.h> /* for vsnprintf() */
+#include <stdlib.h> /* for exit() */
+#include <string.h> /* for memset() */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE 1
+#define FALSE 0
+
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
+
+/*
+ * contextForAttrs - Client for the instruction context table. Takes a set of
+ * attributes and returns the appropriate decode context.
+ *
+ * @param attrMask - Attributes, from the enumeration attributeBits.
+ * @return - The InstructionContext to use when looking up an
+ * an instruction with these attributes.
+ */
+static InstructionContext contextForAttrs(uint8_t attrMask) {
+ return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ * the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ * contextForAttrs.
+ * @param opcode - The last byte of the instruction's opcode, not counting
+ * ModR/M extensions and escapes.
+ * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static int modRMRequired(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode) {
+ const struct ContextDecision* decision = 0;
+
+ switch (type) {
+ case ONEBYTE:
+ decision = &ONEBYTE_SYM;
+ break;
+ case TWOBYTE:
+ decision = &TWOBYTE_SYM;
+ break;
+ case THREEBYTE_38:
+ decision = &THREEBYTE38_SYM;
+ break;
+ case THREEBYTE_3A:
+ decision = &THREEBYTE3A_SYM;
+ break;
+ case THREEBYTE_A6:
+ decision = &THREEBYTEA6_SYM;
+ break;
+ case THREEBYTE_A7:
+ decision = &THREEBYTEA7_SYM;
+ break;
+ }
+
+ return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+ modrm_type != MODRM_ONEENTRY;
+
+ return 0;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ * an instruction.
+ *
+ * @param type - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode - See modRMRequired().
+ * @param modRM - The ModR/M byte if required, or any value if not.
+ * @return - The UID of the instruction, or 0 on failure.
+ */
+static InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ const struct ModRMDecision* dec;
+
+ switch (type) {
+ default:
+ debug("Unknown opcode type");
+ return 0;
+ case ONEBYTE:
+ dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case TWOBYTE:
+ dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_38:
+ dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_3A:
+ dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A6:
+ dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A7:
+ dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ }
+
+ switch (dec->modrm_type) {
+ default:
+ debug("Corrupt table! Unknown modrm_type");
+ return 0;
+ case MODRM_ONEENTRY:
+ return dec->instructionIDs[0];
+ case MODRM_SPLITRM:
+ if (modFromModRM(modRM) == 0x3)
+ return dec->instructionIDs[1];
+ else
+ return dec->instructionIDs[0];
+ case MODRM_FULL:
+ return dec->instructionIDs[modRM];
+ }
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ * that instruction.
+ *
+ * @param uid - The unique ID for the instruction. This should be returned by
+ * decode(); specifierForUID will not check bounds.
+ * @return - A pointer to the specification for that instruction.
+ */
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
+ return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ * byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn - The instruction with the reader function to use. The cursor
+ * for this instruction is advanced.
+ * @param byte - A pointer to a pre-allocated memory buffer to be populated
+ * with the data read.
+ * @return - 0 if the read was successful; nonzero otherwise.
+ */
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+ int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+
+ if (!ret)
+ ++(insn->readerCursor);
+
+ return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn - See consumeByte().
+ * @param byte - See consumeByte().
+ * @return - See consumeByte().
+ */
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+ return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static void unconsumeByte(struct InternalInstruction* insn) {
+ insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type) \
+ static int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((type)byte << ((type)offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
+ }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ * values of various sizes from the instruction's memory and advance the
+ * cursor appropriately. These readers perform endian conversion.
+ *
+ * @param insn - See consumeByte().
+ * @param ptr - A pointer to a pre-allocated memory of appropriate size to
+ * be populated with the data read.
+ * @return - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dbgprintf - Uses the logging function provided by the user to log a single
+ * message, typically without a carriage-return.
+ *
+ * @param insn - The instruction containing the logging function.
+ * @param format - See printf().
+ * @param ... - See printf().
+ */
+static void dbgprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
+ char buffer[256];
+ va_list ap;
+
+ if (!insn->dlog)
+ return;
+
+ va_start(ap, format);
+ (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
+
+ insn->dlog(insn->dlogArg, buffer);
+
+ return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ * location.
+ *
+ * @param insn - The instruction to be marked as having the prefix.
+ * @param prefix - The prefix that is present.
+ * @param location - The location where the prefix is located (in the address
+ * space of the instruction's reader).
+ */
+static void setPrefixPresent(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ insn->prefixPresent[prefix] = 1;
+ insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ * present at a given location.
+ *
+ * @param insn - The instruction to be queried.
+ * @param prefix - The prefix.
+ * @param location - The location to query.
+ * @return - Whether the prefix is at that location.
+ */
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ if (insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ * instruction as having them. Also sets the instruction's default operand,
+ * address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn - The instruction whose prefixes are to be read.
+ * @return - 0 if the instruction could be read until the end of the prefix
+ * bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+ BOOL isPrefix = TRUE;
+ BOOL prefixGroups[4] = { FALSE };
+ uint64_t prefixLocation;
+ uint8_t byte = 0;
+
+ BOOL hasAdSize = FALSE;
+ BOOL hasOpSize = FALSE;
+
+ dbgprintf(insn, "readPrefixes()");
+
+ while (isPrefix) {
+ prefixLocation = insn->readerCursor;
+
+ if (consumeByte(insn, &byte))
+ return -1;
+
+ switch (byte) {
+ case 0xf0: /* LOCK */
+ case 0xf2: /* REPNE/REPNZ */
+ case 0xf3: /* REP or REPE/REPZ */
+ if (prefixGroups[0])
+ dbgprintf(insn, "Redundant Group 1 prefix");
+ prefixGroups[0] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x2e: /* CS segment override -OR- Branch not taken */
+ case 0x36: /* SS segment override -OR- Branch taken */
+ case 0x3e: /* DS segment override */
+ case 0x26: /* ES segment override */
+ case 0x64: /* FS segment override */
+ case 0x65: /* GS segment override */
+ switch (byte) {
+ case 0x2e:
+ insn->segmentOverride = SEG_OVERRIDE_CS;
+ break;
+ case 0x36:
+ insn->segmentOverride = SEG_OVERRIDE_SS;
+ break;
+ case 0x3e:
+ insn->segmentOverride = SEG_OVERRIDE_DS;
+ break;
+ case 0x26:
+ insn->segmentOverride = SEG_OVERRIDE_ES;
+ break;
+ case 0x64:
+ insn->segmentOverride = SEG_OVERRIDE_FS;
+ break;
+ case 0x65:
+ insn->segmentOverride = SEG_OVERRIDE_GS;
+ break;
+ default:
+ debug("Unhandled override");
+ return -1;
+ }
+ if (prefixGroups[1])
+ dbgprintf(insn, "Redundant Group 2 prefix");
+ prefixGroups[1] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x66: /* Operand-size override */
+ if (prefixGroups[2])
+ dbgprintf(insn, "Redundant Group 3 prefix");
+ prefixGroups[2] = TRUE;
+ hasOpSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x67: /* Address-size override */
+ if (prefixGroups[3])
+ dbgprintf(insn, "Redundant Group 4 prefix");
+ prefixGroups[3] = TRUE;
+ hasAdSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ default: /* Not a prefix byte */
+ isPrefix = FALSE;
+ break;
+ }
+
+ if (isPrefix)
+ dbgprintf(insn, "Found prefix 0x%hhx", byte);
+ }
+
+ insn->vexSize = 0;
+
+ if (byte == 0xc4) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 3;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexSize == 3) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+ consumeByte(insn, &insn->vexPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+
+ switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ }
+ }
+ else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 2;
+ }
+ else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vexSize == 2) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+
+ switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ }
+ }
+ else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ }
+
+ if (insn->mode == MODE_16BIT) {
+ insn->registerSize = (hasOpSize ? 4 : 2);
+ insn->addressSize = (hasAdSize ? 4 : 2);
+ insn->displacementSize = (hasAdSize ? 4 : 2);
+ insn->immediateSize = (hasOpSize ? 4 : 2);
+ } else if (insn->mode == MODE_32BIT) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 2 : 4);
+ insn->displacementSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else if (insn->mode == MODE_64BIT) {
+ if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+ insn->registerSize = 8;
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = 4;
+ insn->immediateSize = 4;
+ } else if (insn->rexPrefix) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ * extended or escape opcodes).
+ *
+ * @param insn - The instruction whose opcode is to be read.
+ * @return - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {
+ /* Determine the length of the primary opcode */
+
+ uint8_t current;
+
+ dbgprintf(insn, "readOpcode()");
+
+ insn->opcodeType = ONEBYTE;
+
+ if (insn->vexSize == 3)
+ {
+ switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ return -1;
+ case 0:
+ break;
+ case VEX_LOB_0F:
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x38;
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x3a;
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+ else if (insn->vexSize == 2)
+ {
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ }
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x0f) {
+ dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+
+ insn->twoByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x38) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_38;
+ } else if (current == 0x3a) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0xa6) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A6;
+ } else if (current == 0xa7) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A7;
+ } else {
+ dbgprintf(insn, "Didn't find a three-byte escape prefix");
+
+ insn->opcodeType = TWOBYTE;
+ }
+ }
+
+ /*
+ * At this point we have consumed the full opcode.
+ * Anything we consume from here on must be unconsumed.
+ */
+
+ insn->opcode = current;
+
+ return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ * the ModR/M byte as appropriate for extended and escape opcodes,
+ * and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ * instruction.
+ * @param insn - The instruction whose ID is to be determined.
+ * @param attrMask - The attribute mask to search.
+ * @return - 0 if the ModR/M could be read when needed or was not
+ * needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+ struct InternalInstruction* insn,
+ uint8_t attrMask) {
+ BOOL hasModRMExtension;
+
+ uint8_t instructionClass;
+
+ instructionClass = contextForAttrs(attrMask);
+
+ hasModRMExtension = modRMRequired(insn->opcodeType,
+ instructionClass,
+ insn->opcode);
+
+ if (hasModRMExtension) {
+ if (readModRM(insn))
+ return -1;
+
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ insn->modRM);
+ } else {
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ 0);
+ }
+
+ return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
+ return TRUE;
+ if (orig[i] == '\0' || equiv[i] == '\0')
+ return FALSE;
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ continue;
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ continue;
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ continue;
+ return FALSE;
+ }
+ }
+}
+
+/*
+ * is64BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 64-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 64-bit
+ * @param equiv - The instruction that is 64-bit
+ */
+static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
+ return TRUE;
+ if (orig[i] == '\0' || equiv[i] == '\0')
+ return FALSE;
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+ continue;
+ if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+ continue;
+ if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+ continue;
+ return FALSE;
+ }
+ }
+}
+
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
+ * context for the instruction before doing so.
+ *
+ * @param insn - The instruction whose ID is to be determined.
+ * @return - 0 if the ModR/M could be read when needed or was not needed;
+ * nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn) {
+ uint8_t attrMask;
+ uint16_t instructionID;
+
+ dbgprintf(insn, "getID()");
+
+ attrMask = ATTR_NONE;
+
+ if (insn->mode == MODE_64BIT)
+ attrMask |= ATTR_64BIT;
+
+ if (insn->vexSize) {
+ attrMask |= ATTR_VEX;
+
+ if (insn->vexSize == 3) {
+ switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (wFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_REXW;
+ if (lFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexSize == 2) {
+ switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vexPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+
+ }
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ /* The following clauses compensate for limitations of the tables. */
+
+ if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+ /*
+ * Although for SSE instructions it is usually necessary to treat REX.W+F2
+ * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
+ * an occasional instruction where F2 is incidental and REX.W is the more
+ * significant. If the decoded instruction is 32-bit and adding REX.W
+ * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithREXw;
+ const struct InstructionSpecifier *specWithREXw;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithREXw,
+ insn,
+ attrMask & (~ATTR_XD))) {
+ /*
+ * Decoding with REX.w would yield nothing; give up and return original
+ * decode.
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithREXw = specifierForUID(instructionIDWithREXw);
+
+ if (is64BitEquivalent(spec->name, specWithREXw->name)) {
+ insn->instructionID = instructionIDWithREXw;
+ insn->spec = specWithREXw;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+ /*
+ * The instruction tables make no distinction between instructions that
+ * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+ * particular spot (i.e., many MMX operations). In general we're
+ * conservative, but in the specific case where OpSize is present but not
+ * in the right place we check if there's a 16-bit operation.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithOpsize;
+ const struct InstructionSpecifier *specWithOpsize;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithOpsize,
+ insn,
+ attrMask | ATTR_OPSIZE)) {
+ /*
+ * ModRM required with OpSize but not present; give up and return version
+ * without OpSize set
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithOpsize = specifierForUID(instructionIDWithOpsize);
+
+ if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+ insn->instructionID = instructionIDWithOpsize;
+ insn->spec = specWithOpsize;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(insn->instructionID);
+
+ return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ * instruction.
+ *
+ * @param insn - The instruction whose SIB byte is to be read.
+ * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+ SIBIndex sibIndexBase = 0;
+ SIBBase sibBaseBase = 0;
+ uint8_t index, base;
+
+ dbgprintf(insn, "readSIB()");
+
+ if (insn->consumedSIB)
+ return 0;
+
+ insn->consumedSIB = TRUE;
+
+ switch (insn->addressSize) {
+ case 2:
+ dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+ return -1;
+ break;
+ case 4:
+ sibIndexBase = SIB_INDEX_EAX;
+ sibBaseBase = SIB_BASE_EAX;
+ break;
+ case 8:
+ sibIndexBase = SIB_INDEX_RAX;
+ sibBaseBase = SIB_BASE_RAX;
+ break;
+ }
+
+ if (consumeByte(insn, &insn->sib))
+ return -1;
+
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ switch (index) {
+ case 0x4:
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ default:
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
+ if (insn->sibIndex == SIB_INDEX_sib ||
+ insn->sibIndex == SIB_INDEX_sib64)
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ }
+
+ switch (scaleFromSIB(insn->sib)) {
+ case 0:
+ insn->sibScale = 1;
+ break;
+ case 1:
+ insn->sibScale = 2;
+ break;
+ case 2:
+ insn->sibScale = 4;
+ break;
+ case 3:
+ insn->sibScale = 8;
+ break;
+ }
+
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+
+ switch (base) {
+ case 0x5:
+ switch (modFromModRM(insn->modRM)) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = SIB_BASE_NONE;
+ break;
+ case 0x1:
+ insn->eaDisplacement = EA_DISP_8;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x2:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x3:
+ debug("Cannot have Mod = 0b11 and a SIB byte");
+ return -1;
+ }
+ break;
+ default:
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn - The instruction whose displacement is to be read.
+ * @return - 0 if the displacement byte was successfully read; nonzero
+ * otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {
+ int8_t d8;
+ int16_t d16;
+ int32_t d32;
+
+ dbgprintf(insn, "readDisplacement()");
+
+ if (insn->consumedDisplacement)
+ return 0;
+
+ insn->consumedDisplacement = TRUE;
+
+ switch (insn->eaDisplacement) {
+ case EA_DISP_NONE:
+ insn->consumedDisplacement = FALSE;
+ break;
+ case EA_DISP_8:
+ if (consumeInt8(insn, &d8))
+ return -1;
+ insn->displacement = d8;
+ break;
+ case EA_DISP_16:
+ if (consumeInt16(insn, &d16))
+ return -1;
+ insn->displacement = d16;
+ break;
+ case EA_DISP_32:
+ if (consumeInt32(insn, &d32))
+ return -1;
+ insn->displacement = d32;
+ break;
+ }
+
+ insn->consumedDisplacement = TRUE;
+ return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ * displacement) for an instruction and interprets it.
+ *
+ * @param insn - The instruction whose addressing information is to be read.
+ * @return - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {
+ uint8_t mod, rm, reg;
+
+ dbgprintf(insn, "readModRM()");
+
+ if (insn->consumedModRM)
+ return 0;
+
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
+ insn->consumedModRM = TRUE;
+
+ mod = modFromModRM(insn->modRM);
+ rm = rmFromModRM(insn->modRM);
+ reg = regFromModRM(insn->modRM);
+
+ /*
+ * This goes by insn->registerSize to pick the correct register, which messes
+ * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
+ * fixupReg().
+ */
+ switch (insn->registerSize) {
+ case 2:
+ insn->regBase = MODRM_REG_AX;
+ insn->eaRegBase = EA_REG_AX;
+ break;
+ case 4:
+ insn->regBase = MODRM_REG_EAX;
+ insn->eaRegBase = EA_REG_EAX;
+ break;
+ case 8:
+ insn->regBase = MODRM_REG_RAX;
+ insn->eaRegBase = EA_REG_RAX;
+ break;
+ }
+
+ reg |= rFromREX(insn->rexPrefix) << 3;
+ rm |= bFromREX(insn->rexPrefix) << 3;
+
+ insn->reg = (Reg)(insn->regBase + reg);
+
+ switch (insn->addressSize) {
+ case 2:
+ insn->eaBaseBase = EA_BASE_BX_SI;
+
+ switch (mod) {
+ case 0x0:
+ if (rm == 0x6) {
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ } else {
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_NONE;
+ }
+ break;
+ case 0x1:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_8;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x2:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x3:
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 4:
+ case 8:
+ insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+
+ switch (mod) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = (insn->addressSize == 4 ?
+ EA_BASE_sib : EA_BASE_sib64);
+ readSIB(insn);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x5:
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_32;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ break;
+ }
+ break;
+ case 0x1:
+ case 0x2:
+ insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = EA_BASE_sib;
+ readSIB(insn);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 0x3:
+ insn->eaDisplacement = EA_DISP_NONE;
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ break;
+ }
+ break;
+ } /* switch (insn->addressSize) */
+
+ return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
+ static uint8_t name(struct InternalInstruction *insn, \
+ OperandType type, \
+ uint8_t index, \
+ uint8_t *valid) { \
+ *valid = 1; \
+ switch (type) { \
+ default: \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
+ case TYPE_Rv: \
+ return base + index; \
+ case TYPE_R8: \
+ if (insn->rexPrefix && \
+ index >= 4 && index <= 7) { \
+ return prefix##_SPL + (index - 4); \
+ } else { \
+ return prefix##_AL + index; \
+ } \
+ case TYPE_R16: \
+ return prefix##_AX + index; \
+ case TYPE_R32: \
+ return prefix##_EAX + index; \
+ case TYPE_R64: \
+ return prefix##_RAX + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
+ case TYPE_XMM128: \
+ case TYPE_XMM64: \
+ case TYPE_XMM32: \
+ case TYPE_XMM: \
+ return prefix##_XMM0 + index; \
+ case TYPE_MM64: \
+ case TYPE_MM32: \
+ case TYPE_MM: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_MM0 + index; \
+ case TYPE_SEGMENTREG: \
+ if (index > 5) \
+ *valid = 0; \
+ return prefix##_ES + index; \
+ case TYPE_DEBUGREG: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_DR0 + index; \
+ case TYPE_CONTROLREG: \
+ if (index > 8) \
+ *valid = 0; \
+ return prefix##_CR0 + index; \
+ } \
+ }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ * reg or R/M field. If the operand is an XMM operand, for example, an
+ * operand would be XMM0 instead of AX, which readModRM() would otherwise
+ * misinterpret it as.
+ *
+ * @param insn - The instruction containing the operand.
+ * @param type - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t. The target is set to 1 if the
+ * field is valid for the register class; 0 if not.
+ * @return - The proper value.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ * fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn - See fixup*Value().
+ * @param op - The operand specifier.
+ * @return - 0 if fixup was successful; -1 if the register returned was
+ * invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn,
+ const struct OperandSpecifier *op) {
+ uint8_t valid;
+
+ dbgprintf(insn, "fixupReg()");
+
+ switch ((OperandEncoding)op->encoding) {
+ default:
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_REG:
+ insn->reg = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->reg - insn->regBase,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_RM:
+ if (insn->eaBase >= insn->eaRegBase) {
+ insn->eaBase = (EABase)fixupRMValue(insn,
+ (OperandType)op->type,
+ insn->eaBase - insn->eaRegBase,
+ &valid);
+ if (!valid)
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an
+ * instruction. Handles AddRegFrm instructions.
+ *
+ * @param insn - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ * ModR/M extension; useful for escape opcodes
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeModifier(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readOpcodeModifier()");
+
+ if (insn->consumedOpcodeModifier)
+ return 0;
+
+ insn->consumedOpcodeModifier = TRUE;
+
+ switch (insn->spec->modifierType) {
+ default:
+ debug("Unknown modifier type.");
+ return -1;
+ case MODIFIER_NONE:
+ debug("No modifier but an operand expects one.");
+ return -1;
+ case MODIFIER_OPCODE:
+ insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+ return 0;
+ case MODIFIER_MODRM:
+ insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+ return 0;
+ }
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an
+ * instruction and interprets it appropriately given the operand width.
+ * Handles AddRegFrm instructions.
+ *
+ * @param insn - See readOpcodeModifier().
+ * @param size - The width (in bytes) of the register being specified.
+ * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ * RAX.
+ * @return - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+ dbgprintf(insn, "readOpcodeRegister()");
+
+ if (readOpcodeModifier(insn))
+ return -1;
+
+ if (size == 0)
+ size = insn->registerSize;
+
+ switch (size) {
+ case 1:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ if (insn->rexPrefix &&
+ insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ + (insn->opcodeRegister - MODRM_REG_AL - 4));
+ }
+
+ break;
+ case 2:
+ insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 4:
+ insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 8:
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @param size - The width (in bytes) of the operand.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+ uint8_t imm8;
+ uint16_t imm16;
+ uint32_t imm32;
+ uint64_t imm64;
+
+ dbgprintf(insn, "readImmediate()");
+
+ if (insn->numImmediatesConsumed == 2) {
+ debug("Already consumed two immediates");
+ return -1;
+ }
+
+ if (size == 0)
+ size = insn->immediateSize;
+ else
+ insn->immediateSize = size;
+
+ switch (size) {
+ case 1:
+ if (consumeByte(insn, &imm8))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm8;
+ break;
+ case 2:
+ if (consumeUInt16(insn, &imm16))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm16;
+ break;
+ case 4:
+ if (consumeUInt32(insn, &imm32))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm32;
+ break;
+ case 8:
+ if (consumeUInt64(insn, &imm64))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm64;
+ break;
+ }
+
+ insn->numImmediatesConsumed++;
+
+ return 0;
+}
+
+/*
+ * readVVVV - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ if (insn->vexSize == 3)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+ else if (insn->vexSize == 2)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ else
+ return -1;
+
+ return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ * operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn - The instruction whose operands are to be read and interpreted.
+ * @return - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+ int index;
+
+ dbgprintf(insn, "readOperands()");
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ switch (insn->spec->operands[index].encoding) {
+ case ENCODING_NONE:
+ break;
+ case ENCODING_REG:
+ case ENCODING_RM:
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ dbgprintf(insn, "We currently don't hande code-offset encodings");
+ return -1;
+ case ENCODING_IB:
+ if (readImmediate(insn, 1))
+ return -1;
+ if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+ return -1;
+ break;
+ case ENCODING_IW:
+ if (readImmediate(insn, 2))
+ return -1;
+ break;
+ case ENCODING_ID:
+ if (readImmediate(insn, 4))
+ return -1;
+ break;
+ case ENCODING_IO:
+ if (readImmediate(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Iv:
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
+ break;
+ case ENCODING_Ia:
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
+ break;
+ case ENCODING_RB:
+ if (readOpcodeRegister(insn, 1))
+ return -1;
+ break;
+ case ENCODING_RW:
+ if (readOpcodeRegister(insn, 2))
+ return -1;
+ break;
+ case ENCODING_RD:
+ if (readOpcodeRegister(insn, 4))
+ return -1;
+ break;
+ case ENCODING_RO:
+ if (readOpcodeRegister(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Rv:
+ if (readOpcodeRegister(insn, 0))
+ return -1;
+ break;
+ case ENCODING_I:
+ if (readOpcodeModifier(insn))
+ return -1;
+ break;
+ case ENCODING_VVVV:
+ if (readVVVV(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
+ case ENCODING_DUP:
+ break;
+ default:
+ dbgprintf(insn, "Encountered an operand with an unknown encoding.");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ * user.
+ *
+ * @param insn - A pointer to the instruction to be populated. Must be
+ * pre-allocated.
+ * @param reader - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ * any internal state.
+ * @param logger - If non-NULL, the function to be used to write log messages
+ * and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ * any internal state.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ * decode the instruction in.
+ * @return - 0 if the instruction's memory could be read; nonzero if
+ * not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ uint64_t startLoc,
+ DisassemblerMode mode) {
+ memset(insn, 0, sizeof(struct InternalInstruction));
+
+ insn->reader = reader;
+ insn->readerArg = readerArg;
+ insn->dlog = logger;
+ insn->dlogArg = loggerArg;
+ insn->startLocation = startLoc;
+ insn->readerCursor = startLoc;
+ insn->mode = mode;
+ insn->numImmediatesConsumed = 0;
+
+ if (readPrefixes(insn) ||
+ readOpcode(insn) ||
+ getID(insn) ||
+ insn->instructionID == 0 ||
+ readOperands(insn))
+ return -1;
+
+ insn->length = insn->readerCursor - insn->startLocation;
+
+ dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+ startLoc, insn->readerCursor, insn->length);
+
+ if (insn->length > 15)
+ dbgprintf(insn, "Instruction exceeds 15-byte limit");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 000000000000..a9c90f8f9bda
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,575 @@
+/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ const char* name;
+
+#define INSTRUCTION_IDS \
+ const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM) ((modRM) & 0x7)
+#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib) ((sib) & 0x7)
+#define wFromREX(rex) (((rex) & 0x8) >> 3)
+#define rFromREX(rex) (((rex) & 0x4) >> 2)
+#define xFromREX(rex) (((rex) & 0x2) >> 1)
+#define bFromREX(rex) ((rex) & 0x1)
+
+#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
+#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex) ((vex) & 0x3)
+
+#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex) ((vex) & 0x3)
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT \
+ ENTRY(AL) \
+ ENTRY(CL) \
+ ENTRY(DL) \
+ ENTRY(BL) \
+ ENTRY(AH) \
+ ENTRY(CH) \
+ ENTRY(DH) \
+ ENTRY(BH) \
+ ENTRY(R8B) \
+ ENTRY(R9B) \
+ ENTRY(R10B) \
+ ENTRY(R11B) \
+ ENTRY(R12B) \
+ ENTRY(R13B) \
+ ENTRY(R14B) \
+ ENTRY(R15B) \
+ ENTRY(SPL) \
+ ENTRY(BPL) \
+ ENTRY(SIL) \
+ ENTRY(DIL)
+
+#define EA_BASES_16BIT \
+ ENTRY(BX_SI) \
+ ENTRY(BX_DI) \
+ ENTRY(BP_SI) \
+ ENTRY(BP_DI) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(BP) \
+ ENTRY(BX) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define REGS_16BIT \
+ ENTRY(AX) \
+ ENTRY(CX) \
+ ENTRY(DX) \
+ ENTRY(BX) \
+ ENTRY(SP) \
+ ENTRY(BP) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define EA_BASES_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(sib) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define REGS_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(ESP) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define EA_BASES_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(sib64) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(RSP) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_MMX \
+ ENTRY(MM0) \
+ ENTRY(MM1) \
+ ENTRY(MM2) \
+ ENTRY(MM3) \
+ ENTRY(MM4) \
+ ENTRY(MM5) \
+ ENTRY(MM6) \
+ ENTRY(MM7)
+
+#define REGS_XMM \
+ ENTRY(XMM0) \
+ ENTRY(XMM1) \
+ ENTRY(XMM2) \
+ ENTRY(XMM3) \
+ ENTRY(XMM4) \
+ ENTRY(XMM5) \
+ ENTRY(XMM6) \
+ ENTRY(XMM7) \
+ ENTRY(XMM8) \
+ ENTRY(XMM9) \
+ ENTRY(XMM10) \
+ ENTRY(XMM11) \
+ ENTRY(XMM12) \
+ ENTRY(XMM13) \
+ ENTRY(XMM14) \
+ ENTRY(XMM15)
+
+#define REGS_YMM \
+ ENTRY(YMM0) \
+ ENTRY(YMM1) \
+ ENTRY(YMM2) \
+ ENTRY(YMM3) \
+ ENTRY(YMM4) \
+ ENTRY(YMM5) \
+ ENTRY(YMM6) \
+ ENTRY(YMM7) \
+ ENTRY(YMM8) \
+ ENTRY(YMM9) \
+ ENTRY(YMM10) \
+ ENTRY(YMM11) \
+ ENTRY(YMM12) \
+ ENTRY(YMM13) \
+ ENTRY(YMM14) \
+ ENTRY(YMM15)
+
+#define REGS_SEGMENT \
+ ENTRY(ES) \
+ ENTRY(CS) \
+ ENTRY(SS) \
+ ENTRY(DS) \
+ ENTRY(FS) \
+ ENTRY(GS)
+
+#define REGS_DEBUG \
+ ENTRY(DR0) \
+ ENTRY(DR1) \
+ ENTRY(DR2) \
+ ENTRY(DR3) \
+ ENTRY(DR4) \
+ ENTRY(DR5) \
+ ENTRY(DR6) \
+ ENTRY(DR7)
+
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8)
+
+#define ALL_EA_BASES \
+ EA_BASES_16BIT \
+ EA_BASES_32BIT \
+ EA_BASES_64BIT
+
+#define ALL_SIB_BASES \
+ REGS_32BIT \
+ REGS_64BIT
+
+#define ALL_REGS \
+ REGS_8BIT \
+ REGS_16BIT \
+ REGS_32BIT \
+ REGS_64BIT \
+ REGS_MMX \
+ REGS_XMM \
+ REGS_YMM \
+ REGS_SEGMENT \
+ REGS_DEBUG \
+ REGS_CONTROL \
+ ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address
+ * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
+ * distinguish between bases (EA_BASE_*) and registers that just happen to be
+ * referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+ EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ EA_max
+} EABase;
+
+/*
+ * SIBIndex - All possible values of the SIB index field.
+ * Borrows entries from ALL_EA_BASES with the special case that
+ * sib is synonymous with NONE.
+ */
+typedef enum {
+ SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+ ALL_EA_BASES
+#undef ENTRY
+ SIB_INDEX_max
+} SIBIndex;
+
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+ SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+ ALL_SIB_BASES
+#undef ENTRY
+ SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ * computations.
+ */
+typedef enum {
+ EA_DISP_NONE,
+ EA_DISP_8,
+ EA_DISP_16,
+ EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) MODRM_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ MODRM_REG_max
+} Reg;
+
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+ SEG_OVERRIDE_NONE,
+ SEG_OVERRIDE_CS,
+ SEG_OVERRIDE_SS,
+ SEG_OVERRIDE_DS,
+ SEG_OVERRIDE_ES,
+ SEG_OVERRIDE_FS,
+ SEG_OVERRIDE_GS,
+ SEG_OVERRIDE_max
+} SegmentOverride;
+
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+ VEX_LOB_0F = 0x1,
+ VEX_LOB_0F38 = 0x2,
+ VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+ VEX_PREFIX_NONE = 0x0,
+ VEX_PREFIX_66 = 0x1,
+ VEX_PREFIX_F3 = 0x2,
+ VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ * the decoder. Reads a single byte from the instruction's address space.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param byte - A pointer to a single byte in memory that should be set to
+ * contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ * be read from.
+ * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ * get debugging output from the decoder.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param log - A string that contains the message. Will be reused after
+ * the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+ /* Reader interface (C) */
+ byteReader_t reader;
+ /* Opaque value passed to the reader */
+ void* readerArg;
+ /* The address of the next byte to read via the reader */
+ uint64_t readerCursor;
+
+ /* Logger interface (C) */
+ dlog_t dlog;
+ /* Opaque value passed to the logger */
+ void* dlogArg;
+
+ /* General instruction information */
+
+ /* The mode to disassemble for (64-bit, protected, real) */
+ DisassemblerMode mode;
+ /* The start of the instruction, usable with the reader */
+ uint64_t startLocation;
+ /* The length of the instruction, in bytes */
+ size_t length;
+
+ /* Prefix state */
+
+ /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+ uint8_t prefixPresent[0x100];
+ /* contains the location (for use with the reader) of the prefix byte */
+ uint64_t prefixLocations[0x100];
+ /* The value of the VEX prefix, if present */
+ uint8_t vexPrefix[3];
+ /* The length of the VEX prefix (0 if not present) */
+ uint8_t vexSize;
+ /* The value of the REX prefix, if present */
+ uint8_t rexPrefix;
+ /* The location where a mandatory prefix would have to be (i.e., right before
+ the opcode, or right before the REX prefix if one is present) */
+ uint64_t necessaryPrefixLocation;
+ /* The segment override type */
+ SegmentOverride segmentOverride;
+
+ /* Sizes of various critical pieces of data, in bytes */
+ uint8_t registerSize;
+ uint8_t addressSize;
+ uint8_t displacementSize;
+ uint8_t immediateSize;
+
+ /* opcode state */
+
+ /* The value of the two-byte escape prefix (usually 0x0f) */
+ uint8_t twoByteEscape;
+ /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+ uint8_t threeByteEscape;
+ /* The last byte of the opcode, not counting any ModR/M extension */
+ uint8_t opcode;
+ /* The ModR/M byte of the instruction, if it is an opcode extension */
+ uint8_t modRMExtension;
+
+ /* decode state */
+
+ /* The type of opcode, used for indexing into the array of decode tables */
+ OpcodeType opcodeType;
+ /* The instruction ID, extracted from the decode table */
+ uint16_t instructionID;
+ /* The specifier for the instruction, from the instruction info table */
+ const struct InstructionSpecifier *spec;
+
+ /* state for additional bytes, consumed during operand decode. Pattern:
+ consumed___ indicates that the byte was already consumed and does not
+ need to be consumed again */
+
+ /* The VEX.vvvv field, which contains a third register operand for some AVX
+ instructions */
+ Reg vvvv;
+
+ /* The ModR/M byte, which contains most register operands and some portion of
+ all memory operands */
+ BOOL consumedModRM;
+ uint8_t modRM;
+
+ /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+ BOOL consumedSIB;
+ uint8_t sib;
+
+ /* The displacement, used for memory operands */
+ BOOL consumedDisplacement;
+ int32_t displacement;
+
+ /* Immediates. There can be two in some cases */
+ uint8_t numImmediatesConsumed;
+ uint8_t numImmediatesTranslated;
+ uint64_t immediates[2];
+
+ /* A register or immediate operand encoded into the opcode */
+ BOOL consumedOpcodeModifier;
+ uint8_t opcodeModifier;
+ Reg opcodeRegister;
+
+ /* Portions of the ModR/M byte */
+
+ /* These fields determine the allowable values for the ModR/M fields, which
+ depend on operand and address widths */
+ EABase eaBaseBase;
+ EABase eaRegBase;
+ Reg regBase;
+
+ /* The Mod and R/M fields can encode a base for an effective address, or a
+ register. These are separated into two fields here */
+ EABase eaBase;
+ EADisplacement eaDisplacement;
+ /* The reg field always encodes a register */
+ Reg reg;
+
+ /* SIB state */
+ SIBIndex sibIndex;
+ uint8_t sibScale;
+ SIBBase sibBase;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ * a buffer provided by the consumer.
+ * @param insn - The buffer to store the instruction in. Allocated by the
+ * consumer.
+ * @param reader - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ * specific to the consumer. May be NULL.
+ * @param logger - The dlog_t to be used in printing status messages from the
+ * disassembler. May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ * specific to the logger. May be NULL.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ uint64_t startLoc,
+ DisassemblerMode mode);
+
+/* x86DisassemblerDebug - C-accessible function for printing a message to
+ * debugs()
+ * @param file - The name of the file printing the debug message.
+ * @param line - The line number that printed the debug message.
+ * @param s - The message to print.
+ */
+
+void x86DisassemblerDebug(const char *file,
+ unsigned line,
+ const char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 000000000000..70315ed572b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,379 @@
+/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ * generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/Support/DataTypes.h"
+
+#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM x86DisassemblerContexts
+#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
+
+#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR "x86DisassemblerContexts"
+#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly. Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS \
+ ENUM_ENTRY(ATTR_NONE, 0x00) \
+ ENUM_ENTRY(ATTR_64BIT, 0x01) \
+ ENUM_ENTRY(ATTR_XS, 0x02) \
+ ENUM_ENTRY(ATTR_XD, 0x04) \
+ ENUM_ENTRY(ATTR_REXW, 0x08) \
+ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+ ENUM_ENTRY(ATTR_VEX, 0x20) \
+ ENUM_ENTRY(ATTR_VEXL, 0x40)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+ ATTRIBUTE_BITS
+ ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode. Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/* Class name Rank Rationale for rank assignment */
+#define INSTRUCTION_CONTEXTS \
+ ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
+ ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
+ "64-bit mode but no more") \
+ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
+ "change width; overrides IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
+ "secondary") \
+ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
+ "opcode") \
+ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
+ "IC_64BIT_REXW_XS") \
+ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
+ "else because this changes most " \
+ "operands' meaning") \
+ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
+ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
+ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
+ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
+ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
+ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
+
+
+#define ENUM_ENTRY(n, r, d) n,
+typedef enum {
+ INSTRUCTION_CONTEXTS
+ IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+ ONEBYTE = 0,
+ TWOBYTE = 1,
+ THREEBYTE_38 = 2,
+ THREEBYTE_3A = 3,
+ THREEBYTE_A6 = 4,
+ THREEBYTE_A7 = 5
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table. After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode. Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ * instruction is the same.
+ * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ * corresponds to one instruction; otherwise, it corresponds to
+ * a different instruction.
+ * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
+ * to a different instruction.
+ */
+
+#define MODRMTYPES \
+ ENUM_ENTRY(MODRM_ONEENTRY) \
+ ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODRMTYPES
+ MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * instruction each possible value of the ModR/M byte corresponds to. Once
+ * this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+ uint8_t modrm_type;
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ * given a particular opcode.
+ */
+struct OpcodeDecision {
+ struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ * a particular context (set of attributes). Since there are many possible
+ * contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ * applies given a specific set of attributes. Hence there are only IC_max
+ * entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+ struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/*
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS \
+ ENUM_ENTRY(ENCODING_NONE, "") \
+ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
+ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
+ ENUM_ENTRY(ENCODING_CW, "2-byte") \
+ ENUM_ENTRY(ENCODING_CD, "4-byte") \
+ ENUM_ENTRY(ENCODING_CP, "6-byte") \
+ ENUM_ENTRY(ENCODING_CO, "8-byte") \
+ ENUM_ENTRY(ENCODING_CT, "10-byte") \
+ ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
+ ENUM_ENTRY(ENCODING_IW, "2-byte") \
+ ENUM_ENTRY(ENCODING_ID, "4-byte") \
+ ENUM_ENTRY(ENCODING_IO, "8-byte") \
+ ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
+ "the opcode byte") \
+ ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
+ ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
+ ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
+ ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
+ "opcode byte") \
+ \
+ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
+ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
+ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
+ "opcode byte") \
+ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
+ "in type")
+
+#define ENUM_ENTRY(n, d) n,
+ typedef enum {
+ ENCODINGS
+ ENCODING_max
+ } OperandEncoding;
+#undef ENUM_ENTRY
+
+/*
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES \
+ ENUM_ENTRY(TYPE_NONE, "") \
+ ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
+ ENUM_ENTRY(TYPE_REL16, "2-byte") \
+ ENUM_ENTRY(TYPE_REL32, "4-byte") \
+ ENUM_ENTRY(TYPE_REL64, "8-byte") \
+ ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
+ ENUM_ENTRY(TYPE_R16, "2-byte") \
+ ENUM_ENTRY(TYPE_R32, "4-byte") \
+ ENUM_ENTRY(TYPE_R64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
+ ENUM_ENTRY(TYPE_IMM16, "2-byte") \
+ ENUM_ENTRY(TYPE_IMM32, "4-byte") \
+ ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
+ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
+ ENUM_ENTRY(TYPE_RM16, "2-byte") \
+ ENUM_ENTRY(TYPE_RM32, "4-byte") \
+ ENUM_ENTRY(TYPE_RM64, "8-byte") \
+ ENUM_ENTRY(TYPE_M, "Memory operand") \
+ ENUM_ENTRY(TYPE_M8, "1-byte") \
+ ENUM_ENTRY(TYPE_M16, "2-byte") \
+ ENUM_ENTRY(TYPE_M32, "4-byte") \
+ ENUM_ENTRY(TYPE_M64, "8-byte") \
+ ENUM_ENTRY(TYPE_LEA, "Effective address") \
+ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
+ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
+ "base)") \
+ ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
+ ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
+ ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
+ ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
+ "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
+ ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
+ ENUM_ENTRY(TYPE_M64FP, "64-bit") \
+ ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
+ "floating-point instructions") \
+ ENUM_ENTRY(TYPE_M32INT, "4-byte") \
+ ENUM_ENTRY(TYPE_M64INT, "8-byte") \
+ ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
+ ENUM_ENTRY(TYPE_MM, "MMX register operand") \
+ ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
+ ENUM_ENTRY(TYPE_MM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
+ ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
+ ENUM_ENTRY(TYPE_XMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM256, "32-byte") \
+ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
+ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
+ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
+ \
+ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
+ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
+ ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
+ ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
+ ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
+ ENUM_ENTRY(TYPE_DUP1, "operand 1") \
+ ENUM_ENTRY(TYPE_DUP2, "operand 2") \
+ ENUM_ENTRY(TYPE_DUP3, "operand 3") \
+ ENUM_ENTRY(TYPE_DUP4, "operand 4") \
+ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,
+typedef enum {
+ TYPES
+ TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/*
+ * OperandSpecifier - The specification for how to extract and interpret one
+ * operand.
+ */
+struct OperandSpecifier {
+ OperandEncoding encoding;
+ OperandType type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found. Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES \
+ ENUM_ENTRY(MODIFIER_NONE) \
+ ENUM_ENTRY(MODIFIER_OPCODE) \
+ ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODIFIER_TYPES
+ MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+ ModifierType modifierType;
+ uint8_t modifierBase;
+ struct OperandSpecifier operands[X86_MAX_OPERANDS];
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+ MODE_16BIT,
+ MODE_32BIT,
+ MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..033973eeeff9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmPrinter
+ X86ATTInstPrinter.cpp
+ X86IntelInstPrinter.cpp
+ X86InstComments.cpp
+ )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
new file mode 100644
index 000000000000..c82aa330a20c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..c37d8797b39c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,140 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <map>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter.inc"
+
+X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {
+}
+
+void X86ATTInstPrinter::printRegName(raw_ostream &OS,
+ unsigned RegNo) const {
+ OS << '%' << getRegisterName(RegNo);
+}
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, OS))
+ printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ switch (MI->getOperand(Op).getImm()) {
+ default: assert(0 && "Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls). These
+/// print slightly differently than normal immediates. For example, a $ is not
+/// emitted.
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ // Print this as a signed 32-bit value.
+ O << (int)Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << '%' << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << '$' << Op.getImm();
+
+ if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+ *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << '$' << *Op.getExpr();
+ }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
+
+ if (DispSpec.isImm()) {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << DispVal;
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ O << *DispSpec.getExpr();
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op, O);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, O);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
new file mode 100644
index 000000000000..5426e5cf38d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,86 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+ X86ATTInstPrinter(const MCAsmInfo &MAI);
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+ void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
new file mode 100644
index 000000000000..4e28dfe7fa81
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -0,0 +1,260 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../Utils/X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<unsigned, 8> ShuffleMask;
+ const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(2, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLBWMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLWDMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLDQMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLQDQMask(2, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLPDMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLPSMask(8, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ DecodeUNPCKHPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ DecodeUNPCKHPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ }
+
+
+ // If this was a shuffle operation, print the shuffle mask.
+ if (!ShuffleMask.empty()) {
+ if (DestName == 0) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e &&
+ (int)ShuffleMask[i] >= 0 &&
+ (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+ }
+
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
new file mode 100644
index 000000000000..6b86db4f9e5c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+ class MCInst;
+ class raw_ostream;
+ void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..506e26cbf7cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,143 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+ printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ switch (MI->getOperand(Op).getImm()) {
+ default: assert(0 && "Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << Op.getImm();
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ O << *Op.getExpr();
+ }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+ for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+ O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ PrintRegName(O, getRegisterName(Op.getReg()));
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op, O);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+2, O);
+ NeedPlus = true;
+ }
+
+
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ O << *DispSpec.getExpr();
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+
+ O << ']';
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
new file mode 100644
index 000000000000..e84a1940017d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,96 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+ X86IntelInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "OPAQUE PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+
+ void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ca88f8ffd08c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMX86Desc
+ X86MCTargetDesc.cpp
+ X86MCAsmInfo.cpp
+ )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..b19774ee379e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
new file mode 100644
index 000000000000..27031005bd09
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -0,0 +1,138 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
+ clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+ clEnumValEnd));
+
+
+static const char *const x86_asm_table[] = {
+ "{si}", "S",
+ "{di}", "D",
+ "{ax}", "a",
+ "{cx}", "c",
+ "{memory}", "memory",
+ "{flags}", "",
+ "{dirflag}", "",
+ "{fpsr}", "",
+ "{fpcr}", "",
+ "{cc}", "cc",
+ 0,0};
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ if (is64Bit)
+ PointerSize = 8;
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ if (!is64Bit)
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+
+ // Use ## as a comment string so that .s files generated by llvm can go
+ // through the GCC preprocessor without causing an error. This is needed
+ // because "clang foo.s" runs the C preprocessor, which is usually reserved
+ // for .S files on other systems. Perhaps this is because the file system
+ // wasn't always case preserving or something.
+ CommentString = "##";
+ PCSymbol = ".";
+
+ SupportsDebugInformation = true;
+ DwarfUsesInlineInfoSection = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+ : X86MCAsmInfoDarwin(Triple) {
+}
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+ if (T.getArch() == Triple::x86_64)
+ PointerSize = 8;
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
+ // .words.
+ if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+ Data64bitsDirective = 0;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
+
+X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
new file mode 100644
index 000000000000..2cd4c8eb30ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -0,0 +1,46 @@
+//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+ class Triple;
+
+ struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit X86MCAsmInfoDarwin(const Triple &Triple);
+ };
+
+ struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ virtual const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const;
+ };
+
+ struct X86ELFMCAsmInfo : public MCAsmInfo {
+ explicit X86ELFMCAsmInfo(const Triple &Triple);
+ virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+ };
+
+ struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
+ explicit X86MCAsmInfoCOFF(const Triple &Triple);
+ };
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 000000000000..b77f37b03f19
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,185 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::x86_64)
+ return "+64bit-mode";
+ return "-64bit-mode";
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = X86_MC::ParseX86Triple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86MCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+ X86_MC::createX86MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+ X86_MC::createX86MCSubtargetInfo);
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitX86MCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeX86MCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo() {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitX86MCRegisterInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeX86MCRegInfo() {
+ TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+}
+
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (TheTriple.getArch() == Triple::x86_64)
+ return new X86_64MCAsmInfoDarwin(TheTriple);
+ else
+ return new X86MCAsmInfoDarwin(TheTriple);
+ }
+
+ if (TheTriple.isOSWindows())
+ return new X86MCAsmInfoCOFF(TheTriple);
+
+ return new X86ELFMCAsmInfo(TheTriple);
+}
+
+extern "C" void LLVMInitializeX86MCAsmInfo() {
+ // Register the target asm info.
+ RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+ RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 000000000000..89ea22b31be2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,60 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+namespace X86_MC {
+ std::string ParseX86Triple(StringRef TT);
+
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+ void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+ /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp b/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 000000000000..13680c592e01
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,506 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Bitmask of available domains. For an open DomainValue, it is the still
+ // possible domains for collapsing. For a collapsed DomainValue it is the
+ // domains where the register is available for free.
+ unsigned AvailableDomains;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool isCollapsed() const { return Instrs.empty(); }
+
+ // Is domain available?
+ bool hasDomain(unsigned domain) const {
+ return AvailableDomains & (1u << domain);
+ }
+
+ // Mark domain as available.
+ void addDomain(unsigned domain) {
+ AvailableDomains |= 1u << domain;
+ }
+
+ // Restrict to a single domain available.
+ void setSingleDomain(unsigned domain) {
+ AvailableDomains = 1u << domain;
+ }
+
+ // Return bitmask of domains that are available and in mask.
+ unsigned getCommonDomains(unsigned mask) const {
+ return AvailableDomains & mask;
+ }
+
+ // First domain available.
+ unsigned getFirstDomain() const {
+ return CountTrailingZeros_32(AvailableDomains);
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = AvailableDomains = Dist = 0;
+ Instrs.clear();
+ }
+};
+}
+
+static const unsigned NumRegs = 16;
+
+namespace {
+class SSEDomainFixPass : public MachineFunctionPass {
+ static char ID;
+ SpecificBumpPtrAllocator<DomainValue> Allocator;
+ SmallVector<DomainValue*,16> Avail;
+
+ MachineFunction *MF;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineBasicBlock *MBB;
+ DomainValue **LiveRegs;
+ typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveOutMap LiveOuts;
+ unsigned Distance;
+
+public:
+ SSEDomainFixPass() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "SSE execution domain fixup";
+ }
+
+private:
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // DomainValue allocation.
+ DomainValue *Alloc(int domain = -1);
+ void Recycle(DomainValue*);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock();
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char SSEDomainFixPass::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+ assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ reg -= X86::XMM0;
+ return reg < NumRegs ? (int) reg : -1;
+}
+
+DomainValue *SSEDomainFixPass::Alloc(int domain) {
+ DomainValue *dv = Avail.empty() ?
+ new(Allocator.Allocate()) DomainValue :
+ Avail.pop_back_val();
+ dv->Dist = Distance;
+ if (domain >= 0)
+ dv->addDomain(domain);
+ return dv;
+}
+
+void SSEDomainFixPass::Recycle(DomainValue *dv) {
+ assert(dv && "Cannot recycle NULL");
+ dv->clear();
+ Avail.push_back(dv);
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs) {
+ LiveRegs = new DomainValue*[NumRegs];
+ std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
+ }
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs || !LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
+ Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ DomainValue *dv;
+ if (LiveRegs && (dv = LiveRegs[rx])) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ Collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and force
+ // the new value into domain. This costs a domain crossing.
+ Collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx] && "Not live after collapse?");
+ LiveRegs[rx]->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ SetLiveReg(rx, Alloc(domain));
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv)
+ SetLiveReg(rx, Alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = RegIndex(*i);
+ if (rx < 0) continue;
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) continue;
+ DomainValue *pdv = fi->second[rx];
+ if (!pdv) continue;
+ if (!LiveRegs || !LiveRegs[rx]) {
+ SetLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ unsigned domain = LiveRegs[rx]->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+ Collapse(pdv, domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ Merge(LiveRegs[rx], pdv);
+ else
+ Force(rx, pdv->getFirstDomain());
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common) available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = CountTrailingZeros_32(available);
+ TII->SetSSEDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!dv->getCommonDomains(available)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv) {
+ dv = doms.pop_back_val();
+ continue;
+ }
+
+ DomainValue *latest = doms.pop_back_val();
+ if (Merge(dv, latest)) continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i] == latest)
+ Kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Alloc();
+ dv->Dist = Distance;
+ dv->AvailableDomains = available;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any XMM registers redefined.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+ LiveRegs = 0;
+ Distance = 0;
+ assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+
+ // If no XMM registers are used in the function, we can skip it completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+ E = X86::VR128RegClass.end(); I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
+
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*, 16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
+ DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+ DFI != DFE; ++DFI) {
+ MBB = *DFI;
+ enterBasicBlock();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ ++Distance;
+ std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (LiveRegs)
+ visitGenericInstr(mi);
+ }
+
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ if (LiveRegs)
+ LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ LiveRegs = 0;
+ }
+
+ // Clear the LiveOuts vectors. Should we also collapse any remaining
+ // DomainValues?
+ for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+ i != e; ++i)
+ delete[] i->second;
+ LiveOuts.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+ return new SSEDomainFixPass();
+}
diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 000000000000..08d4d84f8a8a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() {
+ RegisterTarget<Triple::x86, /*HasJIT=*/true>
+ X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+ RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+ Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..3ad5f991c865
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+ X86ShuffleDecode.cpp
+ )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/Utils/Makefile b/contrib/llvm/lib/Target/X86/Utils/Makefile
new file mode 100644
index 000000000000..1df6f0f561d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 000000000000..cd06060748b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,190 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+}
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back(4+(Imm & 3));
+ Imm >>= 2;
+ }
+}
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm & 3));
+ Imm >>= 2;
+ }
+ ShuffleMask.push_back(4);
+ ShuffleMask.push_back(5);
+ ShuffleMask.push_back(6);
+ ShuffleMask.push_back(7);
+}
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(VT, ShuffleMask);
+}
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2);
+ ShuffleMask.push_back(i+NElts+NElts/2);
+ }
+}
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts + NElts);
+ Imm /= NElts;
+ }
+}
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2); // Reads from dest
+ ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ }
+}
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts / 2;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
+ }
+}
+
+} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 000000000000..b18f67033096
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,87 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+ SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
new file mode 100644
index 000000000000..ec52dfb3e7d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -0,0 +1,91 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class JITCodeEmitter;
+class MachineCodeEmitter;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class Target;
+class TargetAsmBackend;
+class X86TargetMachine;
+class formatted_raw_ostream;
+class raw_ostream;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
+/// which determines whether the frame pointer register should be
+/// reserved in case dynamic stack alignment is later required.
+///
+FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
new file mode 100644
index 000000000000..4ccb43fe18cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -0,0 +1,234 @@
+//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, referred to
+// here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget state.
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+ "64-bit mode (x86_64)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
+ "Enable conditional move instructions">;
+
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
+
+def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+ "Enable MMX instructions">;
+def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+ "Enable SSE instructions",
+ // SSE codegen depends on cmovs, and all
+ // SSE1+ processors support them.
+ [FeatureMMX, FeatureCMOV]>;
+def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+ "Enable SSE2 instructions",
+ [FeatureSSE1]>;
+def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+ "Enable SSE3 instructions",
+ [FeatureSSE2]>;
+def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+ "Enable SSSE3 instructions",
+ [FeatureSSE3]>;
+def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+ "Enable SSE 4.1 instructions",
+ [FeatureSSSE3]>;
+def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+ "Enable SSE 4.2 instructions",
+ [FeatureSSE41, FeaturePOPCNT]>;
+def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
+def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+ "Enable 3DNow! Athlon instructions",
+ [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions",
+ [FeatureCMOV]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+ "IsUAMemFast", "true",
+ "Fast unaligned memory access">;
+def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+ "Support SSE 4a instructions",
+ [FeaturePOPCNT]>;
+
+def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
+ "Enable AVX instructions">;
+def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
+ "Enable carry-less multiplication instructions">;
+def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
+ "Enable three-operand fused multiple-add">;
+def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
+ "Enable four-operand fused multiple-add">;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+ "HasVectorUAMem", "true",
+ "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
+ "Enable AES instructions">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"i386", []>;
+def : Proc<"i486", []>;
+def : Proc<"i586", []>;
+def : Proc<"pentium", []>;
+def : Proc<"pentium-mmx", [FeatureMMX]>;
+def : Proc<"i686", []>;
+def : Proc<"pentiumpro", [FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
+def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+// "Arrandale" along with corei3 and corei5
+def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem, FeatureAES]>;
+def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
+// Westmere is a similar machine to nehalem with some additional features.
+// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
+def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit,
+ FeatureAES, FeatureCLMUL]>;
+
+def : Proc<"k6", [FeatureMMX]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+ Feature3DNowA]>;
+def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+ Feature3DNowA]>;
+
+def : Proc<"winchip-c6", [FeatureMMX]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+// Currently the X86 assembly parser only supports ATT syntax.
+def ATTAsmParser : AsmParser {
+ string AsmParserClassName = "ATTAsmParser";
+ int Variant = 0;
+
+ // Discard comments in assembly strings.
+ string CommentDelimiter = "#";
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "%";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+ string AsmWriterClassName = "ATTInstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+def IntelAsmWriter : AsmWriter {
+ string AsmWriterClassName = "IntelInstPrinter";
+ int Variant = 1;
+ bit isMCAsmWriter = 1;
+}
+
+def X86 : Target {
+ // Information about the instructions...
+ let InstructionSet = X86InstrInfo;
+
+ let AssemblyParsers = [ATTAsmParser];
+
+ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
new file mode 100644
index 000000000000..9b556a55efd9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
@@ -0,0 +1,453 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+ cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default: assert(0 && "invalid fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1: return 0;
+ case FK_PCRel_2:
+ case FK_Data_2: return 1;
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case X86::reloc_global_offset_table:
+ case FK_Data_4: return 2;
+ case FK_PCRel_8:
+ case FK_Data_8: return 3;
+ }
+}
+
+namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class X86AsmBackend : public TargetAsmBackend {
+public:
+ X86AsmBackend(const Target &T)
+ : TargetAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return X86::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+ for (unsigned i = 0; i != Size; ++i)
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+};
+} // end anonymous namespace
+
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ case X86::JAE_1: return X86::JAE_4;
+ case X86::JA_1: return X86::JA_4;
+ case X86::JBE_1: return X86::JBE_4;
+ case X86::JB_1: return X86::JB_4;
+ case X86::JE_1: return X86::JE_4;
+ case X86::JGE_1: return X86::JGE_4;
+ case X86::JG_1: return X86::JG_4;
+ case X86::JLE_1: return X86::JLE_4;
+ case X86::JL_1: return X86::JL_4;
+ case X86::JMP_1: return X86::JMP_4;
+ case X86::JNE_1: return X86::JNE_4;
+ case X86::JNO_1: return X86::JNO_4;
+ case X86::JNP_1: return X86::JNP_4;
+ case X86::JNS_1: return X86::JNS_4;
+ case X86::JO_1: return X86::JO_4;
+ case X86::JP_1: return X86::JP_4;
+ case X86::JS_1: return X86::JS_4;
+ }
+}
+
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ // IMUL
+ case X86::IMUL16rri8: return X86::IMUL16rri;
+ case X86::IMUL16rmi8: return X86::IMUL16rmi;
+ case X86::IMUL32rri8: return X86::IMUL32rri;
+ case X86::IMUL32rmi8: return X86::IMUL32rmi;
+ case X86::IMUL64rri8: return X86::IMUL64rri32;
+ case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+ // AND
+ case X86::AND16ri8: return X86::AND16ri;
+ case X86::AND16mi8: return X86::AND16mi;
+ case X86::AND32ri8: return X86::AND32ri;
+ case X86::AND32mi8: return X86::AND32mi;
+ case X86::AND64ri8: return X86::AND64ri32;
+ case X86::AND64mi8: return X86::AND64mi32;
+
+ // OR
+ case X86::OR16ri8: return X86::OR16ri;
+ case X86::OR16mi8: return X86::OR16mi;
+ case X86::OR32ri8: return X86::OR32ri;
+ case X86::OR32mi8: return X86::OR32mi;
+ case X86::OR64ri8: return X86::OR64ri32;
+ case X86::OR64mi8: return X86::OR64mi32;
+
+ // XOR
+ case X86::XOR16ri8: return X86::XOR16ri;
+ case X86::XOR16mi8: return X86::XOR16mi;
+ case X86::XOR32ri8: return X86::XOR32ri;
+ case X86::XOR32mi8: return X86::XOR32mi;
+ case X86::XOR64ri8: return X86::XOR64ri32;
+ case X86::XOR64mi8: return X86::XOR64mi32;
+
+ // ADD
+ case X86::ADD16ri8: return X86::ADD16ri;
+ case X86::ADD16mi8: return X86::ADD16mi;
+ case X86::ADD32ri8: return X86::ADD32ri;
+ case X86::ADD32mi8: return X86::ADD32mi;
+ case X86::ADD64ri8: return X86::ADD64ri32;
+ case X86::ADD64mi8: return X86::ADD64mi32;
+
+ // SUB
+ case X86::SUB16ri8: return X86::SUB16ri;
+ case X86::SUB16mi8: return X86::SUB16mi;
+ case X86::SUB32ri8: return X86::SUB32ri;
+ case X86::SUB32mi8: return X86::SUB32mi;
+ case X86::SUB64ri8: return X86::SUB64ri32;
+ case X86::SUB64mi8: return X86::SUB64mi32;
+
+ // CMP
+ case X86::CMP16ri8: return X86::CMP16ri;
+ case X86::CMP16mi8: return X86::CMP16mi;
+ case X86::CMP32ri8: return X86::CMP32ri;
+ case X86::CMP32mi8: return X86::CMP32mi;
+ case X86::CMP64ri8: return X86::CMP64ri32;
+ case X86::CMP64mi8: return X86::CMP64mi32;
+
+ // PUSH
+ case X86::PUSHi8: return X86::PUSHi32;
+ case X86::PUSHi16: return X86::PUSHi32;
+ case X86::PUSH64i8: return X86::PUSH64i32;
+ case X86::PUSH64i16: return X86::PUSH64i32;
+ }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ unsigned R = getRelaxedOpcodeArith(Op);
+ if (R != Op)
+ return R;
+ return getRelaxedOpcodeBranch(Op);
+}
+
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Branches can always be relaxed.
+ if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+
+ if (MCDisableArithRelaxation)
+ return false;
+
+ // Check if this instruction is ever relaxable.
+ if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+
+ // Check if it has an expression and is not RIP relative.
+ bool hasExp = false;
+ bool hasRIP = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const MCOperand &Op = Inst.getOperand(i);
+ if (Op.isExpr())
+ hasExp = true;
+
+ if (Op.isReg() && Op.getReg() == X86::RIP)
+ hasRIP = true;
+ }
+
+ // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+ // how we do relaxations?
+ return hasExp && !hasRIP;
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
+}
+
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes. This returns the number of bytes written. It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ static const uint8_t Nops[10][10] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ };
+
+ // Write an optimal sequence for the first 15 bytes.
+ const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+ for (uint64_t i = 0, e = Prefixes; i != e; i++)
+ OW->Write8(0x66);
+ const uint64_t Rest = OptimalCount - Prefixes;
+ for (uint64_t i = 0, e = Rest; i != e; i++)
+ OW->Write8(Nops[Rest - 1][i]);
+
+ // Finish with single byte nops.
+ for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
+ OW->Write8(0x90);
+
+ return true;
+}
+
+/* *** */
+
+namespace {
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+ : X86AsmBackend(T), OSType(_OSType) {
+ HasReliableSymbolDifference = true;
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+ return ES.getFlags() & ELF::SHF_MERGE;
+ }
+};
+
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+ }
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+ }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+ bool Is64Bit;
+
+public:
+ WindowsX86AsmBackend(const Target &T, bool is64Bit)
+ : X86AsmBackend(T)
+ , Is64Bit(is64Bit) {
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createWinCOFFObjectWriter(OS, Is64Bit);
+ }
+};
+
+class DarwinX86AsmBackend : public X86AsmBackend {
+public:
+ DarwinX86AsmBackend(const Target &T)
+ : X86AsmBackend(T) { }
+};
+
+class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+public:
+ DarwinX86_32AsmBackend(const Target &T)
+ : DarwinX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_i386,
+ object::mach::CSX86_ALL);
+ }
+};
+
+class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+public:
+ DarwinX86_64AsmBackend(const Target &T)
+ : DarwinX86AsmBackend(T) {
+ HasReliableSymbolDifference = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+ object::mach::CTM_x86_64,
+ object::mach::CSX86_ALL);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ // Temporary labels in the string literals sections require symbols. The
+ // issue is that the x86_64 relocation format does not allow symbol +
+ // offset, and so the linker does not have enough information to resolve the
+ // access to the appropriate atom unless an external relocation is used. For
+ // non-cstring sections, we expect the compiler to use a non-temporary label
+ // for anything that could have an addend pointing outside the symbol.
+ //
+ // See <rdar://problem/4765733>.
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
+ }
+
+ virtual bool isSectionAtomizable(const MCSection &Section) const {
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+ switch (SMO.getType()) {
+ default:
+ return true;
+
+ case MCSectionMachO::S_4BYTE_LITERALS:
+ case MCSectionMachO::S_8BYTE_LITERALS:
+ case MCSectionMachO::S_16BYTE_LITERALS:
+ case MCSectionMachO::S_LITERAL_POINTERS:
+ case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+ case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+ case MCSectionMachO::S_INTERPOSING:
+ return false;
+ }
+ }
+};
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+ const std::string &TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return new DarwinX86_32AsmBackend(T);
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, false);
+
+ return new ELFX86_32AsmBackend(T, TheTriple.getOS());
+}
+
+TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+ const std::string &TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return new DarwinX86_64AsmBackend(T);
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, true);
+
+ return new ELFX86_64AsmBackend(T, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
new file mode 100644
index 000000000000..99b4479a9fc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -0,0 +1,728 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to X86 machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+/// runOnMachineFunction - Emit the function body.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+ bool Intrn = MF.getFunction()->hasInternalLinkage();
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Have common code print out the function header with linkage info etc.
+ EmitFunctionHeader();
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // We didn't modify anything.
+ return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand. This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown symbol type!");
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ printOffset(MO.getOffset(), O);
+ break;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+
+ MCSymbol *GVSym;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+ GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
+ else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ else
+ GVSym = Mang->getSymbol(GV);
+
+ // Handle dllimport linkage.
+ if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+ GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (GVSym->getName()[0] != '$')
+ O << *GVSym;
+ else
+ O << '(' << *GVSym << ')';
+ printOffset(MO.getOffset(), O);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const MCSymbol *SymToPrint;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+ SmallString<128> TempNameStr;
+ TempNameStr += StringRef(MO.getSymbolName());
+ TempNameStr += StringRef("$stub");
+
+ MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
+ true);
+ }
+ SymToPrint = StubSym.getPointer();
+ } else {
+ SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
+ }
+
+ // If the name begins with a dollar-sign, enclose it in parens. We do this
+ // to avoid having it look like an integer immediate to the assembler.
+ if (SymToPrint->getName()[0] != '$')
+ O << *SymToPrint;
+ else
+ O << '(' << *SymToPrint << '(';
+ break;
+ }
+ }
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ break;
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ // These affect the name of the symbol, not any suffix.
+ break;
+ case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+ O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+ break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ O << '-' << *MF->getPICBaseSymbol();
+ break;
+ case X86II::MO_TLSGD: O << "@TLSGD"; break;
+ case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
+ case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+ case X86II::MO_TPOFF: O << "@TPOFF"; break;
+ case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
+ case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
+ case X86II::MO_GOT: O << "@GOT"; break;
+ case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
+ case X86II::MO_PLT: O << "@PLT"; break;
+ case X86II::MO_TLVP: O << "@TLVP"; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
+ break;
+ }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ printOperand(MI, OpNo, O);
+ return;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ printSymbolOperand(MO, O);
+ return;
+ }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type!");
+ case MachineOperand::MO_Register: {
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << X86ATTInstPrinter::getRegisterName(Reg);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ O << '$' << MO.getImm();
+ return;
+
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol: {
+ O << '$';
+ printSymbolOperand(MO, O);
+ break;
+ }
+ }
+}
+
+void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ // If we really don't want to print out (rip), don't.
+ bool HasBaseReg = BaseReg.getReg() != 0;
+ if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+ BaseReg.getReg() == X86::RIP)
+ HasBaseReg = false;
+
+ // HasParenPart - True if we will print out the () part of the mem ref.
+ bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+
+ if (DispSpec.isImm()) {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || !HasParenPart)
+ O << DispVal;
+ } else {
+ assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+ DispSpec.isJTI() || DispSpec.isSymbol());
+ printSymbolOperand(MI->getOperand(Op+3), O);
+ }
+
+ if (Modifier && strcmp(Modifier, "H") == 0)
+ O << "+8";
+
+ if (HasParenPart) {
+ assert(IndexReg.getReg() != X86::ESP &&
+ "X86 doesn't allow scaling by ESP");
+
+ O << '(';
+ if (HasBaseReg)
+ printOperand(MI, Op, O, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2, O, Modifier);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O, const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ const MachineOperand &Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, O, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, O, Modifier);
+}
+
+void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
+ raw_ostream &O) {
+ O << *MF->getPICBaseSymbol() << '\n';
+ O << *MF->getPICBaseSymbol() << ':';
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+ raw_ostream &O) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ case 'q': // Print DImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ break;
+ }
+
+ O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // This is an address. Currently only 'i' and 'r' are expected.
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+ printSymbolOperand(MO, O);
+ if (Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
+ return false;
+ }
+ if (MO.isReg()) {
+ O << '(';
+ printOperand(MI, OpNo, O);
+ O << ')';
+ return false;
+ }
+ return true;
+
+ case 'c': // Don't print "$" before a global var name or constant.
+ if (MO.isImm())
+ O << MO.getImm();
+ else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+ printSymbolOperand(MO, O);
+ else
+ printOperand(MI, OpNo, O);
+ return false;
+
+ case 'A': // Print '*' before a register (it must be a register)
+ if (MO.isReg()) {
+ O << '*';
+ printOperand(MI, OpNo, O);
+ return false;
+ }
+ return true;
+
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print DImode register
+ if (MO.isReg())
+ return printAsmMRegister(MO, ExtraCode[0], O);
+ printOperand(MI, OpNo, O);
+ return false;
+
+ case 'P': // This is the operand of a call, treat specially.
+ print_pcrel_imm(MI, OpNo, O);
+ return false;
+
+ case 'n': // Negate the immediate or print a '-' before the operand.
+ // Note: this is a temporary solution. It should be handled target
+ // independently as part of the 'MC' work.
+ if (MO.isImm()) {
+ O << -MO.getImm();
+ return false;
+ }
+ O << '-';
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print SImode register
+ // These only apply to registers, ignore on mem.
+ break;
+ case 'H':
+ printMemReference(MI, OpNo, O, "H");
+ return false;
+ case 'P': // Don't print @PLT, but do print as memory.
+ printMemReference(MI, OpNo, O, "no-rip");
+ return false;
+ }
+ }
+ printMemReference(MI, OpNo, O);
+ return false;
+}
+
+void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget->isTargetEnvMacho())
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetEnvMacho()) {
+ // All darwin targets use mach-o.
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output stubs for dynamically-linked functions.
+ MachineModuleInfoMachO::SymbolListTy Stubs;
+
+ Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__jump_table",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 5, SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+ MCSA_IndirectSymbol);
+ // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12.
+ const char HltInsts[] = { -12, -12, -12, -12, -12 };
+ OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Output stubs for external and common global variables.
+ Stubs = MMIMacho.GetGVStubList();
+ if (!Stubs.empty()) {
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__pointers",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$non_lazy_ptr:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
+ MCSA_IndirectSymbol);
+ // .long 0
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs. However, sometimes the types are local to the file. So
+ // we need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$non_lazy_ptr:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ 4/*size*/, 0/*addrspace*/);
+ }
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+ MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+ StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ }
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+ // Emit type information for external functions
+ typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+ for (externals_iterator I = COFFMMI.externals_begin(),
+ E = COFFMMI.externals_end();
+ I != E; ++I) {
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Necessary for dllexport support
+ std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getSymbol(I));
+
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+ SmallString<128> name;
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedGlobals[i]->getName();
+ if (Subtarget->isTargetWindows())
+ name += ",DATA";
+ else
+ name += ",data";
+ OutStreamer.EmitBytes(name, 0);
+ }
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedFns[i]->getName();
+ OutStreamer.EmitBytes(name, 0);
+ }
+ }
+ }
+
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const TargetData *TD = TM.getTargetData();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(), 0);
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &O) {
+ // Only the target-dependent form of DBG_VALUE should get here.
+ // Referencing the offset and metadata as NOps-2 and NOps-1 is
+ // probably portable to other targets; frame pointer location is not.
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==7);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ if (V.getContext().isSubprogram())
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ if (SyntaxVariant == 0)
+ return new X86ATTInstPrinter(MAI);
+ if (SyntaxVariant == 1)
+ return new X86IntelInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() {
+ RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+ RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+
+ TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
new file mode 100644
index 000000000000..3a50435d38ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -0,0 +1,87 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+class MCContext;
+class MCInst;
+class MCStreamer;
+class MCSymbol;
+
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
+ const X86Subtarget *Subtarget;
+ public:
+ explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 AT&T-Style Assembly Printer";
+ }
+
+ const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+ virtual void EmitStartOfAsmFile(Module &M);
+
+ virtual void EmitEndOfAsmFile(Module &M);
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+ void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+ const char *Modifier=NULL);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+ const char *Modifier=NULL);
+
+ void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 000000000000..4326814a7a96
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,20 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+using namespace llvm;
+
+
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 000000000000..98ab2a66a17f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,46 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "X86MachineFunctionInfo.h"
+
+namespace llvm {
+ class X86MachineFunctionInfo;
+ class TargetData;
+
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+ DenseSet<MCSymbol const *> Externals;
+public:
+ X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
+ virtual ~X86COFFMachineModuleInfo();
+
+ void addExternalFunction(MCSymbol* Symbol) {
+ Externals.insert(Symbol);
+ }
+
+ typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+ externals_iterator externals_begin() const { return Externals.begin(); }
+ externals_iterator externals_end() const { return Externals.end(); }
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 000000000000..77b99056ae00
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,401 @@
+//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+ // Scalar values are returned in AX first, then DX. For i8, the ABI
+ // requires the values to be in AL and AH, however this code uses AL and DL
+ // instead. This is because using AH for the second register conflicts with
+ // the way LLVM does multiple return values -- a return of {i16,i8} would end
+ // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+ // for functions that return two i8 values are currently expected to pack the
+ // values into an i16 (which uses AX, and thus AL:AH).
+ CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
+ // can only be used by ABI non-compliant code. If the target doesn't have XMM
+ // registers, it won't have vector types.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+ // can only be used by ABI non-compliant code. This vector type is only
+ // supported while using the AVX target feature.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // MMX vector types are always returned in MM0. If the target doesn't have
+ // MM0, it doesn't support these vector types.
+ CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
+
+ // Long double types are always returned in ST0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+ // The X86-32 calling convention returns FP values in ST0, unless marked
+ // with "inreg" (used here to distinguish one kind of reg from another,
+ // weirdly; this is really the sse-regparm calling convention) in which
+ // case they use XMM0, otherwise it is the same as the common X86 calling
+ // conv.
+ CCIfInReg<CCIfSubtarget<"hasXMMInt()",
+ CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+ CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+ // SSE2.
+ // This can happen when a float, 2 x float, or 3 x float vector is split by
+ // target lowering, and is returned in 1-3 sse regs.
+ CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+ // For integers, ECX can be used as an extra return register
+ CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+ // Otherwise, it is the same as the common X86 calling convention.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+ // The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+ // MMX vector types are always returned in XMM0.
+ CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+ // The X86-Win64 calling convention always returns __m64 values in RAX.
+ CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+ // Otherwise, everything is the same as 'normal' X86-64 C CC.
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+ // If FastCC, use RetCC_X86_32_Fast.
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // Otherwise, use RetCC_X86_32_C.
+ CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+ CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
+ CCIfType<[x86mmx],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCIfSubtarget<"hasXMMInt()",
+ CCPromoteToType<v2i64>>>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasXMM()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+ // FIXME: Handle byval stuff.
+ // FIXME: Handle varargs.
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // 128 bit vectors are passed by pointer
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+ // The first 4 MMX vector arguments are passed in GPRs.
+ CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Do not pass the sret argument in RCX, the Win64 thiscall calling
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
+ [XMM1, XMM2, XMM3]>>>>,
+
+ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>
+]>;
+
+def CC_X86_64_GHC : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+ CCIfType<[i64],
+ CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+ // Pass in STG registers: F1, F2, F3, F4, D1, D2
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasXMM()",
+ CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // The first 3 float or double arguments, if marked 'inreg' and if the call
+ // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasXMMInt()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+ // The first 3 __m64 vector arguments are passed in mmx registers if the
+ // call is not a vararg call.
+ CCIfNotVarArg<CCIfType<[x86mmx],
+ CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Doubles get 8-byte slots that are 4-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // Long doubles get slots whose size depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+ // The first 4 SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+ // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+ // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
+ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+ // passed in the parameter area.
+ CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in ECX.
+ CCIfNest<CCAssignToReg<[ECX]>>,
+
+ // The first 3 integer arguments, if marked 'inreg' and if the call is not
+ // a vararg call, are passed in integer registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_ThisCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first integer argument is passed in ECX
+ CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+ // Handles byval parameters. Note that we can't rely on the delegation
+ // to CC_X86_32_Common for this because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // The first 3 float or double arguments, if the call is not a vararg
+ // call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasXMMInt()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+ // Doubles get 8-byte slots that are 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_GHC : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1
+ CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+ CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+ // Otherwise, drop to normal X86-32 CC
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+ CCDelegateTo<CC_X86_32>
+]>;
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 000000000000..4b11db7c0331
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,1005 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+ template<class CodeEmitter>
+ class Emitter : public MachineFunctionPass {
+ const X86InstrInfo *II;
+ const TargetData *TD;
+ X86TargetMachine &TM;
+ CodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+ intptr_t PICBaseOffset;
+ bool Is64BitMode;
+ bool IsPIC;
+ public:
+ static char ID;
+ explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+ const X86InstrInfo &ii, const TargetData &td, bool is64)
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Machine Code Emitter";
+ }
+
+ void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp = 0, intptr_t PCAdj = 0,
+ bool Indirect = false);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+ intptr_t PCAdj = 0);
+ void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj = 0);
+
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ intptr_t Adj = 0, bool IsPCRel = true);
+
+ void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+ void emitRegModRMByte(unsigned RegOpcodeField);
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+ void emitConstant(uint64_t Val, unsigned Size);
+
+ void emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj = 0);
+
+ unsigned getX86RegNum(unsigned RegNo) const;
+ };
+
+template<class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+} // end anonymous namespace.
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified templated MachineCodeEmitter object.
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+
+ II = TM.getInstrInfo();
+ TD = TM.getTargetData();
+ Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ do {
+ DEBUG(dbgs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MCInstrDesc &Desc = I->getDesc();
+ emitInstruction(*I, &Desc);
+ // MOVPC32r is basically a call plus a pop instruction.
+ if (Desc.getOpcode() == X86::MOVPC32r)
+ emitInstruction(*I, &II->get(X86::POP32r));
+ ++NumEmitted; // Keep track of the # of mi's emitted
+ }
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const MCInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+ // Remember where this reference was and where it is to so we can
+ // deal with it later.
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, MBB));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+ unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */,
+ bool Indirect /* = false */) {
+ intptr_t RelocCST = Disp;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV),
+ RelocCST, false)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), RelocCST, false);
+ MCE.addRelocation(MR);
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+
+ // X86 never needs stubs because instruction selection will always pick
+ // an instruction sequence that is large enough to hold any address
+ // to a symbol.
+ // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
+ bool NeedStub = false;
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, RelocCST,
+ 0, NeedStub));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
+ return X86RegisterInfo::getX86RegNum(RegNo);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+ unsigned RegOpcodeFld){
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
+ unsigned Index,
+ unsigned Base) {
+ // SIB byte is in the same format as the ModRMByte...
+ MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+ // Output the constant in little endian byte order...
+ for (unsigned i = 0; i != Size; ++i) {
+ MCE.emitByte(Val & 255);
+ Val >>= 8;
+ }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+ const TargetMachine &TM) {
+ // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
+ // mechanism as 32-bit mode.
+ if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+ return false;
+
+ // Return true if this is a reference to a stub containing the address of the
+ // global, not the global itself.
+ return isGlobalStubReference(GVOp.getTargetFlags());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+ int DispVal,
+ intptr_t Adj /* = 0 */,
+ bool IsPCRel /* = true */) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (!RelocOp) {
+ emitConstant(DispVal, 4);
+ return;
+ }
+
+ // Otherwise, this is something that requires a relocation. Emit it as such
+ // now.
+ unsigned RelocType = Is64BitMode ?
+ (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (RelocOp->isGlobal()) {
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial. If the MCE supports using RIP directly
+ // do it, otherwise fallback to absolute (this is determined by IsPCRel).
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+ emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+ Adj, Indirect);
+ } else if (RelocOp->isSymbol()) {
+ emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
+ } else if (RelocOp->isCPI()) {
+ emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+ RelocOp->getOffset(), Adj);
+ } else {
+ assert(RelocOp->isJTI() && "Unexpected machine operand!");
+ emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op,unsigned RegOpcodeField,
+ intptr_t PCAdj) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isSymbol()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+ DispVal += Op3.getOffset();
+ }
+ } else if (Op3.isJTI()) {
+ if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+ }
+ } else {
+ DispVal = Op3.getImm();
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &Scale = MI.getOperand(Op+1);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP ||
+ (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+ assert(IndexReg.getReg() == 0 && Is64BitMode &&
+ "Invalid rip-relative address");
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+ return;
+ }
+
+ // Indicate that the displacement will use an pcrel or absolute reference
+ // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+ // while others, unless explicit asked to use RIP, use absolute references.
+ bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
+ // Is a SIB byte needed?
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
+ unsigned BaseRegNo = -1U;
+ if (BaseReg != 0 && BaseReg != X86::RIP)
+ BaseRegNo = getX86RegNum(BaseReg);
+
+ if (// The SIB byte must be used if there is an index register.
+ IndexReg.getReg() == 0 &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ (!Is64BitMode || BaseReg != 0)) {
+ if (BaseReg == 0 || // [disp32] in X86-32 mode
+ BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+ return;
+ }
+
+ // If the base is not EBP/ESP and there is no displacement, use simple
+ // indirect register encoding, this handles addresses like [EAX]. The
+ // encoding for [EBP] with no displacement means [disp32] so we handle it
+ // by emitting a displacement of 0 below.
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+ return;
+ }
+
+ // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+ if (!DispForReloc && isDisp8(DispVal)) {
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+ emitConstant(DispVal, 1);
+ return;
+ }
+
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+ return;
+ }
+
+ // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispForReloc) {
+ // Emit the normal disp32 encoding.
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ } else if (isDisp8(DispVal)) {
+ // Emit the disp8 encoding...
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding...
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
+ emitSIBByte(SS, IndexRegNo, 5);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, BaseRegNo);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8) {
+ emitConstant(DispVal, 1);
+ } else if (DispVal != 0 || ForceDisp32) {
+ emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+ const MCInstrDesc *Desc) {
+ DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+ case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+ case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+ case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+ case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+ case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+ case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+ case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+ case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+ }
+
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ unsigned Opcode = Desc->Opcode;
+
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK)
+ MCE.emitByte(0xF0);
+
+ // Emit segment override opcode prefix as needed.
+ switch (Desc->TSFlags & X86II::SegOvrMask) {
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ default: llvm_unreachable("Invalid segment!");
+ case 0: break; // No segment override!
+ }
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+ MCE.emitByte(0xF3);
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize)
+ MCE.emitByte(0x66);
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize)
+ MCE.emitByte(0x67);
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::TF: // F2 0F 38
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: llvm_unreachable("Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ // Handle REX prefix.
+ if (Is64BitMode) {
+ if (unsigned REX = determineREX(MI))
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TF: // F2 0F 38
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
+ }
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
+ ++CurOp;
+ else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default:
+ llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ llvm_unreachable("pseudo instructions should be removed before code"
+ " emission");
+ break;
+ // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
+ // to make it slightly easier to see.
+ case X86::Int_MemBarrier:
+ DEBUG(dbgs() << "#MEMBARRIER\n");
+ break;
+
+ case TargetOpcode::INLINEASM:
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0])
+ report_fatal_error("JIT does not support inline asm!");
+ break;
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::GC_LABEL:
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ MCE.emitByte(BaseOpcode);
+ emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
+ // Remember PIC base.
+ PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+ X86JITInfo *JTI = TM.getJITInfo();
+ JTI->setPICBase(MCE.getCurrentPCValue());
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm: {
+ MCE.emitByte(BaseOpcode);
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+
+ DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+ DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+ DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+ DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+ DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
+
+ if (MO.isMBB()) {
+ emitPCRelativeBlockAddress(MO.getMBB());
+ break;
+ }
+
+ if (MO.isGlobal()) {
+ emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+ MO.getOffset(), 0);
+ break;
+ }
+
+ if (MO.isSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
+ if (MO.isJTI()) {
+ emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
+ break;
+ }
+
+ assert(MO.isImm() && "Unknown RawFrm operand!");
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+ Opcode == X86::WINCALL64pcrel32) {
+ // Fix up immediate operand for pc relative calls.
+ intptr_t Imm = (intptr_t)MO.getImm();
+ Imm = Imm - MCE.getCurrentPCValue() - 4;
+ emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
+ } else
+ emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::AddRegFrm: {
+ MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri64i32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ // This should not occur on Darwin for relocatable objects.
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRMDestReg: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+ case X86II::MRMDestMem: {
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp,
+ getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
+ .getReg()));
+ CurOp += X86::AddrNumOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+
+ intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+ X86II::getSizeOfImm(Desc->TSFlags) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+ PCAdj);
+ CurOp += AddrOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO1.isImm()) {
+ emitConstant(MO1.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
+ X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
+ CurOp += X86::AddrNumOperands;
+
+ if (CurOp == NumOps)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+ if (MO.isImm()) {
+ emitConstant(MO.getImm(), Size);
+ break;
+ }
+
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
+ if (MO.isGlobal()) {
+ bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+ emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+ Indirect);
+ } else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), rt);
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), rt);
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ MCE.emitByte(BaseOpcode);
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ ++CurOp;
+ break;
+
+ case X86II::MRM_C1:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC1);
+ break;
+ case X86II::MRM_C8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC8);
+ break;
+ case X86II::MRM_C9:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC9);
+ break;
+ case X86II::MRM_E8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xE8);
+ break;
+ case X86II::MRM_F0:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xF0);
+ break;
+ }
+
+ if (!Desc->isVariadic() && CurOp != NumOps) {
+#ifndef NDEBUG
+ dbgs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 000000000000..f1d7edea7210
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,153 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+#include "X86Relocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the X86ELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+ : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+ EMachine = is64Bit ? EM_X86_64 : EM_386;
+ }
+
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ if (is64Bit) {
+ switch(MachineRelTy) {
+ case X86::reloc_pcrel_word:
+ return ELF::R_X86_64_PC32;
+ case X86::reloc_absolute_word:
+ return ELF::R_X86_64_32;
+ case X86::reloc_absolute_word_sext:
+ return ELF::R_X86_64_32S;
+ case X86::reloc_absolute_dword:
+ return ELF::R_X86_64_64;
+ case X86::reloc_picrel_word:
+ default:
+ llvm_unreachable("unknown x86_64 machine relocation type");
+ }
+ } else {
+ switch(MachineRelTy) {
+ case X86::reloc_pcrel_word:
+ return ELF::R_386_PC32;
+ case X86::reloc_absolute_word:
+ return ELF::R_386_32;
+ case X86::reloc_absolute_word_sext:
+ case X86::reloc_absolute_dword:
+ case X86::reloc_picrel_word:
+ default:
+ llvm_unreachable("unknown x86 machine relocation type");
+ }
+ }
+ return 0;
+}
+
+long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case ELF::R_X86_64_PC32: return Modifier - 4;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
+ return Modifier;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case ELF::R_386_PC32: return Modifier - 4;
+ case ELF::R_386_32: return Modifier;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ return 32;
+ case ELF::R_X86_64_64:
+ return 64;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case ELF::R_386_PC32:
+ case ELF::R_386_32:
+ return 32;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ if (is64Bit) {
+ switch(RelTy) {
+ case ELF::R_X86_64_PC32:
+ return true;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
+ return false;
+ default:
+ llvm_unreachable("unknown x86_64 relocation type");
+ }
+ } else {
+ switch(RelTy) {
+ case ELF::R_386_PC32:
+ return true;
+ case ELF::R_386_32:
+ return false;
+ default:
+ llvm_unreachable("unknown x86 relocation type");
+ }
+ }
+ return 0;
+}
+
+unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return is64Bit ?
+ X86::reloc_absolute_dword : X86::reloc_absolute_word;
+}
+
+long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+
+ if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
+ return SymOffset - (RelOffset + 4);
+ else
+ assert("computeRelocation unknown for this relocation type");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 000000000000..a45b5bb66a07
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class X86ELFWriterInfo : public TargetELFWriterInfo {
+
+ public:
+ X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+ virtual ~X86ELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 000000000000..21e163a30054
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,2133 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// StackPtr - Register used as the stack pointer.
+ ///
+ unsigned StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf64;
+ bool X86ScalarSSEf32;
+
+public:
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ }
+
+ virtual bool TargetSelectInstruction(const Instruction *I);
+
+ /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+
+#include "X86GenFastISel.inc"
+
+private:
+ bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+
+ bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+
+ bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+ bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
+
+ bool X86SelectLoad(const Instruction *I);
+
+ bool X86SelectStore(const Instruction *I);
+
+ bool X86SelectRet(const Instruction *I);
+
+ bool X86SelectCmp(const Instruction *I);
+
+ bool X86SelectZExt(const Instruction *I);
+
+ bool X86SelectBranch(const Instruction *I);
+
+ bool X86SelectShift(const Instruction *I);
+
+ bool X86SelectSelect(const Instruction *I);
+
+ bool X86SelectTrunc(const Instruction *I);
+
+ bool X86SelectFPExt(const Instruction *I);
+ bool X86SelectFPTrunc(const Instruction *I);
+
+ bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+ bool X86SelectCall(const Instruction *I);
+
+ bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine()->getInstrInfo();
+ }
+ const X86TargetMachine *getTargetMachine() const {
+ return static_cast<const X86TargetMachine *>(&TM);
+ }
+
+ unsigned TargetMaterializeConstant(const Constant *C);
+
+ unsigned TargetMaterializeAlloca(const AllocaInst *C);
+
+ unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool IsMemcpySmall(uint64_t Len);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ VT = evt.getSimpleVT();
+ // For now, require SSE/SSE2 for performing floating-point operations,
+ // since x87 requires additional work.
+ if (VT == MVT::f64 && !X86ScalarSSEf64)
+ return false;
+ if (VT == MVT::f32 && !X86ScalarSSEf32)
+ return false;
+ // Similarly, no f80 support yet.
+ if (VT == MVT::f80)
+ return false;
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
+ unsigned &ResultReg) {
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), ResultReg), AM);
+ return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+ // Get opcode and regclass of the output for the given store instruction.
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f80: // No f80 support yet.
+ default: return false;
+ case MVT::i1: {
+ // Mask out all but lowest bit.
+ unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+ Val = AndResult;
+ }
+ // FALLTHROUGH, handling i1 as i8.
+ case MVT::i8: Opc = X86::MOV8mr; break;
+ case MVT::i16: Opc = X86::MOV16mr; break;
+ case MVT::i32: Opc = X86::MOV32mr; break;
+ case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::f32:
+ Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+ break;
+ case MVT::f64:
+ Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+ break;
+ }
+
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM).addReg(Val);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
+ const X86AddressMode &AM) {
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Val))
+ Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
+
+ // If this is a store of a simple constant, fold the constant into the store.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ unsigned Opc = 0;
+ bool Signed = true;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
+ case MVT::i8: Opc = X86::MOV8mi; break;
+ case MVT::i16: Opc = X86::MOV16mi; break;
+ case MVT::i32: Opc = X86::MOV32mi; break;
+ case MVT::i64:
+ // Must be a 32-bit sign extended value.
+ if ((int)CI->getSExtValue() == CI->getSExtValue())
+ Opc = X86::MOV64mi32;
+ break;
+ }
+
+ if (Opc) {
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM)
+ .addImm(Signed ? (uint64_t) CI->getSExtValue() :
+ CI->getZExtValue());
+ return true;
+ }
+ }
+
+ unsigned ValReg = getRegForValue(Val);
+ if (ValReg == 0)
+ return false;
+
+ return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+ unsigned Src, EVT SrcVT,
+ unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::Alloca: {
+ // Do static allocas.
+ const AllocaInst *A = cast<AllocaInst>(V);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(A);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = SI->second;
+ return true;
+ }
+ break;
+ }
+
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+ // They have to fit in the 32-bit signed displacement field though.
+ if (isInt<32>(Disp)) {
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM);
+ }
+ }
+ break;
+ }
+
+ case Instruction::GetElementPtr: {
+ X86AddressMode SavedAM = AM;
+
+ // Pattern-match simple GEPs.
+ uint64_t Disp = (int32_t)AM.Disp;
+ unsigned IndexReg = AM.IndexReg;
+ unsigned Scale = AM.Scale;
+ gep_type_iterator GTI = gep_type_begin(U);
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
+ }
+ // Unsupported.
+ goto unsupported_gep;
+ }
+ }
+ // Check for displacement overflow.
+ if (!isInt<32>(Disp))
+ break;
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ AM.IndexReg = IndexReg;
+ AM.Scale = Scale;
+ AM.Disp = (uint32_t)Disp;
+ if (X86SelectAddress(U->getOperand(0), AM))
+ return true;
+
+ // If we couldn't merge the gep value into this addr mode, revert back to
+ // our address and just match the value instead of completely failing.
+ AM = SavedAM;
+ break;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
+ }
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models or TLS yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectCallAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (Subtarget->isPICStyleRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle DLLImport.
+ if (GV->hasDLLImportLinkage())
+ return false;
+
+ // Can't handle TLS.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Okay, we've committed to selecting this global. Set up the basic address.
+ AM.GV = GV;
+
+ // No ABI requires an extra load for anything other than DLLImport, which
+ // we rejected above. Return a direct reference to the global.
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ AM.GVOpFlags = X86II::MO_GOTOFF;
+ }
+
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(1), AM))
+ return false;
+
+ return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ // Don't handle popping bytes on return for now.
+ if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+ ->getBytesToPopOnReturn() != 0)
+ return 0;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+ I->getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+
+ unsigned SrcReg = Reg + VA.getValNo();
+ EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT DstVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (SrcVT != DstVT) {
+ if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+ if (SrcVT == MVT::i1) {
+ if (Outs[0].Flags.isSExt())
+ return false;
+ SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+ }
+ unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg, /*TODO: Kill=*/false);
+ }
+
+ // Make the copy.
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ // Now emit the RET.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ return true;
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(0), AM))
+ return false;
+
+ unsigned ResultReg = 0;
+ if (X86FastEmitLoad(VT, AM, ResultReg)) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ return false;
+}
+
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8: return X86::CMP8rr;
+ case MVT::i16: return X86::CMP16rr;
+ case MVT::i32: return X86::CMP32rr;
+ case MVT::i64: return X86::CMP64rr;
+ case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
+ case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
+ }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ // Otherwise, we can't fold the immediate into this comparison.
+ default: return 0;
+ case MVT::i8: return X86::CMP8ri;
+ case MVT::i16: return X86::CMP16ri;
+ case MVT::i32: return X86::CMP32ri;
+ case MVT::i64:
+ // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+ // field.
+ if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ return X86::CMP64ri32;
+ return 0;
+ }
+}
+
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+ EVT VT) {
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (Op0Reg == 0) return false;
+
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Op1))
+ Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
+
+ // We have two options: compare with register or immediate. If the RHS of
+ // the compare is an immediate that we can fold into this compare, use
+ // CMPri, otherwise use CMPrr.
+ if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+ .addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
+ if (CompareOpc == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(Op1);
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
+
+ return true;
+}
+
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+ unsigned SetCCOpc;
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ switch (CI->getPredicate()) {
+ case CmpInst::FCMP_OEQ: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned EReg = createResultReg(&X86::GR8RegClass);
+ unsigned NPReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNPr), NPReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_UNE: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned NEReg = createResultReg(&X86::GR8RegClass);
+ unsigned PReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
+ .addReg(PReg).addReg(NEReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+ default:
+ return false;
+ }
+
+ const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of Op0/Op1.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
+ // Handle zero-extension from i1 to i8, which is common.
+ if (!I->getOperand(0)->getType()->isIntegerTy(1))
+ return false;
+
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ if (ResultReg == 0)
+ return false;
+
+ if (DstVT != MVT::i8) {
+ ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+ }
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
+ // Unconditional branches are selected by tablegen-generated code.
+ // Handle a conditional branch.
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+ EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+ switch (Predicate) {
+ case CmpInst::FCMP_OEQ:
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::FCMP_UNE;
+ // FALL THROUGH
+ case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
+ default:
+ return false;
+ }
+
+ const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+
+ if (Predicate == CmpInst::FCMP_UNE) {
+ // X86 requires a second branch to handle UNE (and OEQ,
+ // which is mapped to UNE above).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+ .addMBB(TrueMBB);
+ }
+
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
+
+ unsigned JmpOpc = X86::JNE_4;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_4;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
+ }
+
+ // Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
+ unsigned OpReg = getRegForValue(BI->getCondition());
+ if (OpReg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+}
+
+bool X86FastISel::X86SelectShift(const Instruction *I) {
+ unsigned CReg = 0, OpReg = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (I->getType()->isIntegerTy(8)) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(16)) {
+ CReg = X86::CX;
+ RC = &X86::GR16RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(32)) {
+ CReg = X86::ECX;
+ RC = &X86::GR32RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(64)) {
+ CReg = X86::RCX;
+ RC = &X86::GR64RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
+ default: return false;
+ }
+ } else {
+ return false;
+ }
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
+
+ // The shift instruction uses X86::CL. If we defined a super-register
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
+ if (CReg != X86::CL)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+ .addReg(Op0Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // We only use cmov here, if we don't have a cmov instruction bail.
+ if (!Subtarget->hasCMov()) return false;
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (VT == MVT::i16) {
+ Opc = X86::CMOVE16rr;
+ RC = &X86::GR16RegClass;
+ } else if (VT == MVT::i32) {
+ Opc = X86::CMOVE32rr;
+ RC = &X86::GR32RegClass;
+ } else if (VT == MVT::i64) {
+ Opc = X86::CMOVE64rr;
+ RC = &X86::GR64RegClass;
+ } else {
+ return false;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(Op0Reg).addReg(Op0Reg);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
+ // fpext from float to double.
+ if (Subtarget->hasSSE2() &&
+ I->getType()->isDoubleTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isFloatTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSS2SDrr), ResultReg)
+ .addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
+ if (Subtarget->hasSSE2()) {
+ if (I->getType()->isFloatTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isDoubleTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSD2SSrr), ResultReg)
+ .addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ // This code only handles truncation to byte.
+ if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ return false;
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ UpdateValueMap(I, InputReg);
+ return true;
+ }
+
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+ ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
+
+ // Issue an extract_subreg.
+ unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ InputReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
+
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+ RV &= X86FastEmitStore(VT, Reg, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
+ }
+
+ return true;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memcpy: {
+ const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI.isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI.getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memcpy");
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memset");
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code inline code to store the stack guard onto the stack.
+ EVT PtrTy = TLI.getPointerTy();
+
+ const Value *Op1 = I.getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ // Grab the frame index.
+ X86AddressMode AM;
+ if (!X86SelectAddress(Slot, AM)) return false;
+ if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+ X86AddressMode AM;
+ assert(DI->getAddress() && "Null address should be checked earlier!");
+ if (!X86SelectAddress(DI->getAddress(), AM))
+ return false;
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ // FIXME may need to add RegState::Debug to any registers produced,
+ // although ESP/EBP should be the only ones at the moment.
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+ addImm(0).addMetadata(DI->getVariable());
+ return true;
+ }
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ // FIXME: Should fold immediates.
+
+ // Replace "add with overflow" intrinsics with an "add" instruction followed
+ // by a seto/setc instruction.
+ const Function *Callee = I.getCalledFunction();
+ const Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ const Value *Op1 = I.getArgOperand(0);
+ const Value *Op2 = I.getArgOperand(1);
+ unsigned Reg1 = getRegForValue(Op1);
+ unsigned Reg2 = getRegForValue(Op2);
+
+ if (Reg1 == 0 || Reg2 == 0)
+ // FIXME: Handle values *not* in registers.
+ return false;
+
+ unsigned OpC = 0;
+ if (VT == MVT::i32)
+ OpC = X86::ADD32rr;
+ else if (VT == MVT::i64)
+ OpC = X86::ADD64rr;
+ else
+ return false;
+
+ // The call to CreateRegs builds two sequential registers, to store the
+ // both the the returned values.
+ unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+ .addReg(Reg1).addReg(Reg2);
+
+ unsigned Opc = X86::SETBr;
+ if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+ Opc = X86::SETOr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
+
+ UpdateValueMap(&I, ResultReg, 2);
+ return true;
+ }
+ }
+}
+
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm yet.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Handle intrinsic calls.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+ return X86VisitIntrinsicCall(*II);
+
+ return DoSelectCall(I, 0);
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Handle only C and fastcc calling conventions for now.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC != CallingConv::C && CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ return false;
+
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool isVarArg = FTy->isVarArg();
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (isVarArg && Subtarget->isTargetWin64())
+ return false;
+
+ // Fast-isel doesn't know about callee-pop yet.
+ if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+ GuaranteedTailCallOpt))
+ return false;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ SmallVector<uint64_t, 4> Offsets;
+ GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
+ if (!CanLowerReturn)
+ return false;
+
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectCallAddress(Callee, CalleeAM))
+ return false;
+ unsigned CalleeOp = 0;
+ const GlobalValue *GV = 0;
+ if (CalleeAM.GV != 0) {
+ GV = CalleeAM.GV;
+ } else if (CalleeAM.Base.Reg != 0) {
+ CalleeOp = CalleeAM.Base.Reg;
+ } else
+ return false;
+
+ // Deal with call operands first.
+ SmallVector<const Value *, 8> ArgVals;
+ SmallVector<unsigned, 8> Args;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgVals.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ // If we're lowering a mem intrinsic instead of a regular call, skip the
+ // last two arguments, which should not passed to the underlying functions.
+ if (MemIntName && e-i <= 2)
+ break;
+ Value *ArgVal = *i;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ const PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+ unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByVal();
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ if (!IsMemcpySmall(FrameSize))
+ return false;
+ }
+
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ Flags.setInReg();
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ Flags.setNest();
+
+ // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all fastisel supported
+ // calling conventions on x86.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+ if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+ CI->getBitWidth() == 16) {
+ if (Flags.isSExt())
+ ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ else
+ ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ unsigned ArgReg;
+
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+ cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+ ArgVal->hasOneUse()) {
+ ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+ ArgReg = getRegForValue(ArgVal);
+ if (ArgReg == 0) return false;
+
+ MVT ArgVT;
+ if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+ ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+ ArgVal->hasOneUse(), 1);
+ } else {
+ ArgReg = getRegForValue(ArgVal);
+ }
+
+ if (ArgReg == 0) return false;
+
+ const Type *ArgTy = ArgVal->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ if (ArgVT == MVT::x86mmx)
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(ArgReg);
+ ArgVals.push_back(ArgVal);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+ I->getParent()->getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64())
+ CCInfo.AllocateStack(32, 8);
+
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+ .addImm(NumBytes);
+
+ // Process argument: walk the register/memloc assignments, inserting
+ // copies / loads.
+ SmallVector<unsigned, 4> RegArgs;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = Args[VA.getValNo()];
+ EVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+ ISD::BITCAST, Arg, /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg()).addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ X86AddressMode AM;
+ AM.Base.Reg = StackPtr;
+ AM.Disp = LocMemOffset;
+ const Value *ArgVal = ArgVals[VA.getValNo()];
+ ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = Arg;
+ bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+ assert(Res && "memcpy length already checked!"); (void)Res;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ //of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
+ X86FastEmitStore(ArgVT, ArgVal, AM);
+ } else {
+ X86FastEmitStore(ArgVT, Arg, AM);
+ }
+ }
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (Subtarget->isPICStyleGOT()) {
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::EBX).addReg(Base);
+ }
+
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
+ // Issue the call.
+ MachineInstrBuilder MIB;
+ if (CalleeOp) {
+ // Register-indirect call.
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64r;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addReg(CalleeOp);
+
+ } else {
+ // Direct call.
+ assert(GV && "Not a direct call");
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64pcrel32;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
+
+ // See if we need any target-specific flags on the GV operand.
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+ if (MemIntName)
+ MIB.addExternalSymbol(MemIntName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
+ }
+
+ // Add an implicit use GOT pointer in EBX.
+ if (Subtarget->isPICStyleGOT())
+ MIB.addReg(X86::EBX);
+
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ MIB.addReg(X86::AL);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ unsigned NumBytesCallee = 0;
+ if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+ NumBytesCallee = 4;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(NumBytesCallee);
+
+ // Build info for return calling conv lowering code.
+ // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, I->getType(), RetTys);
+ for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
+ EVT VT = RetTys[i];
+ EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
+ for (unsigned j = 0; j != NumRegs; ++j) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.Used = !CS.getInstruction()->use_empty();
+ if (CS.paramHasAttr(0, Attribute::SExt))
+ MyFlags.Flags.setSExt();
+ if (CS.paramHasAttr(0, Attribute::ZExt))
+ MyFlags.Flags.setZExt();
+ if (CS.paramHasAttr(0, Attribute::InReg))
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
+
+ // Now handle call return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+ I->getParent()->getContext());
+ unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ unsigned CopyReg = ResultReg + i;
+
+ // If this is a call to a function that returns an fp value on the x87 fp
+ // stack, but where we prefer to use the value in xmm registers, copy it
+ // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((RVLocs[i].getLocReg() == X86::ST0 ||
+ RVLocs[i].getLocReg() == X86::ST1)) {
+ if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(X86::RFP80RegisterClass);
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+ CopyReg);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(RVLocs[i].getLocReg());
+ UsedRegs.push_back(RVLocs[i].getLocReg());
+ }
+
+ if (CopyVT != RVLocs[i].getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register. This is accomplished by storing the F80 value in memory and
+ // then loading it back. Ewww...
+ EVT ResVT = RVLocs[i].getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc)), FI)
+ .addReg(CopyReg);
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg + i), FI);
+ }
+ }
+
+ if (RVLocs.size())
+ UpdateValueMap(I, ResultReg, RVLocs.size());
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ return X86SelectLoad(I);
+ case Instruction::Store:
+ return X86SelectStore(I);
+ case Instruction::Ret:
+ return X86SelectRet(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return X86SelectCmp(I);
+ case Instruction::ZExt:
+ return X86SelectZExt(I);
+ case Instruction::Br:
+ return X86SelectBranch(I);
+ case Instruction::Call:
+ return X86SelectCall(I);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ return X86SelectShift(I);
+ case Instruction::Select:
+ return X86SelectSelect(I);
+ case Instruction::Trunc:
+ return X86SelectTrunc(I);
+ case Instruction::FPExt:
+ return X86SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return X86SelectFPTrunc(I);
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return X86SelectZExt(I);
+ if (DstVT.bitsLT(SrcVT))
+ return X86SelectTrunc(I);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
+ MVT VT;
+ if (!isTypeLegal(C->getType(), VT))
+ return false;
+
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ // Materialize addresses with LEA instructions.
+ if (isa<GlobalValue>(C)) {
+ X86AddressMode AM;
+ if (X86SelectAddress(C, AM)) {
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ return AM.Base.Reg;
+
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+ }
+ return 0;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ unsigned char OpFlag = 0;
+ if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOTOFF;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ TM.getCodeModel() == CodeModel::Small) {
+ PICBase = X86::RIP;
+ }
+
+ // Create the load from the constant pool.
+ unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned ResultReg = createResultReg(RC);
+ addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg),
+ MCPOffset, PICBase, OpFlag);
+
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+ // Fail on dynamic allocas. At this point, getRegForValue has already
+ // checked its CSE maps, so if we're here trying to handle a dynamic
+ // alloca, we're not going to succeed. X86SelectAddress has a
+ // check for dynamic allocas, because it's called directly from
+ // various places, but TargetMaterializeAlloca also needs a check
+ // in order to avoid recursion between getRegForValue,
+ // X86SelectAddrss, and TargetMaterializeAlloca.
+ if (!FuncInfo.StaticAllocaMap.count(C))
+ return 0;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(C, AM))
+ return 0;
+ unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return false;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::FsFLD0SS;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::FsFLD0SD;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ X86AddressMode AM;
+ if (!X86SelectAddress(LI->getOperand(0), AM))
+ return false;
+
+ X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+ unsigned Size = TD.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ if (Result == 0) return false;
+
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
+namespace llvm {
+ llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+ return new X86FastISel(funcInfo);
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/X86FixupKinds.h
new file mode 100644
index 000000000000..17d242ab761e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FixupKinds.h
@@ -0,0 +1,33 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+ reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+ reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq
+ reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
+ // this will be sign extended at
+ // runtime.
+ reloc_global_offset_table, // 32-bit, relative to the start
+ // of the instruction. Used only
+ // for _GLOBAL_OFFSET_TABLE_.
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 000000000000..6eed6abd43e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1709 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// pseudo registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
+//
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP , "Number of floating point instructions");
+
+namespace {
+ struct FPS : public MachineFunctionPass {
+ static char ID;
+ FPS() : MachineFunctionPass(ID) {
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ // This is really only to keep valgrind quiet.
+ // The logic in isLive() is too much for it.
+ memset(Stack, 0, sizeof(Stack));
+ memset(RegMap, 0, sizeof(RegMap));
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<EdgeBundles>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Two CFG edges are related if they leave the same block, or enter the same
+ // block. The transitive closure of an edge under this relation is a
+ // LiveBundle. It represents a set of CFG edges where the live FP stack
+ // registers must be allocated identically in the x87 stack.
+ //
+ // A LiveBundle is usually all the edges leaving a block, or all the edges
+ // entering a block, but it can contain more edges if critical edges are
+ // present.
+ //
+ // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+ // but the exact mapping of FP registers to stack slots is fixed later.
+ struct LiveBundle {
+ // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+ unsigned Mask;
+
+ // Number of pre-assigned live registers in FixStack. This is 0 when the
+ // stack order has not yet been fixed.
+ unsigned FixCount;
+
+ // Assigned stack order for live-in registers.
+ // FixStack[i] == getStackEntry(i) for all i < FixCount.
+ unsigned char FixStack[8];
+
+ LiveBundle() : Mask(0), FixCount(0) {}
+
+ // Have the live registers been assigned a stack order yet?
+ bool isFixed() const { return !Mask || FixCount; }
+ };
+
+ // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+ // with no live FP registers.
+ SmallVector<LiveBundle, 8> LiveBundles;
+
+ // The edge bundle analysis provides indices into the LiveBundles vector.
+ EdgeBundles *Bundles;
+
+ // Return a bitmask of FP registers in block's live-in list.
+ unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ unsigned Mask = 0;
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I - X86::FP0;
+ if (Reg < 8)
+ Mask |= 1 << Reg;
+ }
+ return Mask;
+ }
+
+ // Partition all the CFG edges into LiveBundles.
+ void bundleCFG(MachineFunction &MF);
+
+ MachineBasicBlock *MBB; // Current basic block
+
+ // The hardware keeps track of how many FP registers are live, so we have
+ // to model that exactly. Usually, each live register corresponds to an
+ // FP<n> register, but when dealing with calls, returns, and inline
+ // assembly, it is sometimes neccesary to have live scratch registers.
+ unsigned Stack[8]; // FP<n> Registers in each stack slot...
+ unsigned StackTop; // The current top of the FP stack.
+
+ enum {
+ NumFPRegs = 16 // Including scratch pseudo-registers.
+ };
+
+ // For each live FP<n> register, point to its Stack[] entry.
+ // The first entries correspond to FP0-FP6, the rest are scratch registers
+ // used when we need slightly different live registers than what the
+ // register allocator thinks.
+ unsigned RegMap[NumFPRegs];
+
+ // Pending fixed registers - Inline assembly needs FP registers to appear
+ // in fixed stack slot positions. This is handled by copying FP registers
+ // to ST registers before the instruction, and copying back after the
+ // instruction.
+ //
+ // This is modeled with pending ST registers. NumPendingSTs is the number
+ // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
+ // register that holds the ST value. The ST registers are not moved into
+ // place until immediately before the instruction that needs them.
+ //
+ // It can happen that we need an ST register to be live when no FP register
+ // holds the value:
+ //
+ // %ST0 = COPY %FP4<kill>
+ //
+ // When that happens, we allocate a scratch FP register to hold the ST
+ // value. That means every register in PendingST must be live.
+
+ unsigned NumPendingSTs;
+ unsigned char PendingST[8];
+
+ // Set up our stack model to match the incoming registers to MBB.
+ void setupBlockStack();
+
+ // Shuffle live registers to match the expectations of successor blocks.
+ void finishBlockStack();
+
+ void dumpStack() const {
+ dbgs() << "Stack contents:";
+ for (unsigned i = 0; i != StackTop; ++i) {
+ dbgs() << " FP" << Stack[i];
+ assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+ }
+ for (unsigned i = 0; i != NumPendingSTs; ++i)
+ dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
+ dbgs() << "\n";
+ }
+
+ /// getSlot - Return the stack slot number a particular register number is
+ /// in.
+ unsigned getSlot(unsigned RegNo) const {
+ assert(RegNo < NumFPRegs && "Regno out of range!");
+ return RegMap[RegNo];
+ }
+
+ /// isLive - Is RegNo currently live in the stack?
+ bool isLive(unsigned RegNo) const {
+ unsigned Slot = getSlot(RegNo);
+ return Slot < StackTop && Stack[Slot] == RegNo;
+ }
+
+ /// getScratchReg - Return an FP register that is not currently in use.
+ unsigned getScratchReg() {
+ for (int i = NumFPRegs - 1; i >= 8; --i)
+ if (!isLive(i))
+ return i;
+ llvm_unreachable("Ran out of scratch FP registers");
+ }
+
+ /// isScratchReg - Returns trus if RegNo is a scratch FP register.
+ bool isScratchReg(unsigned RegNo) {
+ return RegNo > 8 && RegNo < NumFPRegs;
+ }
+
+ /// getStackEntry - Return the X86::FP<n> register in register ST(i).
+ unsigned getStackEntry(unsigned STi) const {
+ if (STi >= StackTop)
+ report_fatal_error("Access past stack top!");
+ return Stack[StackTop-1-STi];
+ }
+
+ /// getSTReg - Return the X86::ST(i) register which contains the specified
+ /// FP<RegNo> register.
+ unsigned getSTReg(unsigned RegNo) const {
+ return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ }
+
+ // pushReg - Push the specified FP<n> register onto the stack.
+ void pushReg(unsigned Reg) {
+ assert(Reg < NumFPRegs && "Register number out of range!");
+ if (StackTop >= 8)
+ report_fatal_error("Stack overflow!");
+ Stack[StackTop] = Reg;
+ RegMap[Reg] = StackTop++;
+ }
+
+ bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+ void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+ if (isAtTop(RegNo)) return;
+
+ unsigned STReg = getSTReg(RegNo);
+ unsigned RegOnTop = getStackEntry(0);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+ // Emit an fxch to update the runtime processors version of the state.
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+ ++NumFXCH;
+ }
+
+ void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+ unsigned STReg = getSTReg(RegNo);
+ pushReg(AsReg); // New register on top of stack
+
+ BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+ }
+
+ /// popStackAfter - Pop the current value off of the top of the FP stack
+ /// after the specified instruction.
+ void popStackAfter(MachineBasicBlock::iterator &I);
+
+ /// freeStackSlotAfter - Free the specified register from the register
+ /// stack, so that it is no longer in a register. If the register is
+ /// currently at the top of the stack, we just pop the current instruction,
+ /// otherwise we store the current top-of-stack into the specified slot,
+ /// then pop the top of stack.
+ void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+ /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+ /// instruction.
+ MachineBasicBlock::iterator
+ freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+ /// Adjust the live registers to be the set in Mask.
+ void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+ /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
+ /// st(0), FP reg FixStack[1] is st(1) etc.
+ void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+ MachineBasicBlock::iterator I);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void handleZeroArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+ void handleTwoArgFP(MachineBasicBlock::iterator &I);
+ void handleCompareFP(MachineBasicBlock::iterator &I);
+ void handleCondMovFP(MachineBasicBlock::iterator &I);
+ void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+ // Check if a COPY instruction is using FP registers.
+ bool isFPCopy(MachineInstr *MI) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ return X86::RFP80RegClass.contains(DstReg) ||
+ X86::RFP80RegClass.contains(SrcReg);
+ }
+ };
+ char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+ assert(MO.isReg() && "Expected an FP register!");
+ unsigned Reg = MO.getReg();
+ assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+ return Reg - X86::FP0;
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+ // We only need to run this pass if there are any FP registers used in this
+ // function. If it is all integer, there is nothing for us to do!
+ bool FPIsUsed = false;
+
+ assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ for (unsigned i = 0; i <= 6; ++i)
+ if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ FPIsUsed = true;
+ break;
+ }
+
+ // Early exit.
+ if (!FPIsUsed) return false;
+
+ Bundles = &getAnalysis<EdgeBundles>();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Prepare cross-MBB liveness.
+ bundleCFG(MF);
+
+ StackTop = 0;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ SmallPtrSet<MachineBasicBlock*, 8> Processed;
+ MachineBasicBlock *Entry = MF.begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, **I);
+
+ // Process any unreachable blocks in arbitrary order now.
+ if (MF.size() != Processed.size())
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
+ LiveBundles.clear();
+
+ return Changed;
+}
+
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+ assert(LiveBundles.empty() && "Stale data in LiveBundles");
+ LiveBundles.resize(Bundles->getNumBundles());
+
+ // Gather the actual live-in masks for all MBBs.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ const unsigned Mask = calcLiveInMask(MBB);
+ if (!Mask)
+ continue;
+ // Update MBB ingoing bundle mask.
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+ }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+ NumPendingSTs = 0;
+
+ setupBlockStack();
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ uint64_t Flags = MI->getDesc().TSFlags;
+
+ unsigned FPInstClass = Flags & X86II::FPTypeMask;
+ if (MI->isInlineAsm())
+ FPInstClass = X86II::SpecialFP;
+
+ if (MI->isCopy() && isFPCopy(MI))
+ FPInstClass = X86II::SpecialFP;
+
+ if (FPInstClass == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
+
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
+
+ ++NumFP; // Keep track of # of pseudo instrs
+ DEBUG(dbgs() << "\nFPInst:\t" << *MI);
+
+ // Get dead variables list now because the MI pointer may be deleted as part
+ // of processing!
+ SmallVector<unsigned, 8> DeadRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadRegs.push_back(MO.getReg());
+ }
+
+ switch (FPInstClass) {
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::CompareFP: handleCompareFP(I); break;
+ case X86II::CondMovFP: handleCondMovFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
+ default: llvm_unreachable("Unknown FP Type!");
+ }
+
+ // Check to see if any of the values defined by this instruction are dead
+ // after definition. If so, pop them.
+ for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+ unsigned Reg = DeadRegs[i];
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+ freeStackSlotAfter(I, Reg-X86::FP0);
+ }
+ }
+
+ // Print out all of the instructions expanded to if -debug
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ dbgs() << "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ dbgs() << "Inserted instructions:\n\t";
+ Start->print(dbgs(), &MF.getTarget());
+ while (++Start != llvm::next(I)) {}
+ }
+ dumpStack();
+ );
+
+ Changed = true;
+ }
+
+ finishBlockStack();
+
+ return Changed;
+}
+
+/// setupBlockStack - Use the live bundles to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+ DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+ StackTop = 0;
+ // Get the live-in bundle for MBB.
+ const LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "Block has no FP live-ins.\n");
+ return;
+ }
+
+ // Depth-first iteration should ensure that we always have an assigned stack.
+ assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+ // Push the fixed live-in registers.
+ for (unsigned i = Bundle.FixCount; i > 0; --i) {
+ MBB->addLiveIn(X86::ST0+i-1);
+ DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+ << unsigned(Bundle.FixStack[i-1]) << '\n');
+ pushReg(Bundle.FixStack[i-1]);
+ }
+
+ // Kill off unwanted live-ins. This can happen with a critical edge.
+ // FIXME: We could keep these live registers around as zombies. They may need
+ // to be revived at the end of a short block. It might save a few instrs.
+ adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+ // The RET handling below takes care of return blocks for us.
+ if (MBB->succ_empty())
+ return;
+
+ DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+
+ // Get MBB's live-out bundle.
+ unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
+ LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+ // We may need to kill and define some registers to match successors.
+ // FIXME: This can probably be combined with the shuffle below.
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ adjustLiveRegs(Bundle.Mask, Term);
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "No live-outs.\n");
+ return;
+ }
+
+ // Has the stack order been fixed yet?
+ DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+ if (Bundle.isFixed()) {
+ DEBUG(dbgs() << "Shuffling stack to match.\n");
+ shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+ } else {
+ // Not fixed yet, we get to choose.
+ DEBUG(dbgs() << "Fixing stack order now.\n");
+ Bundle.FixCount = StackTop;
+ for (unsigned i = 0; i < StackTop; ++i)
+ Bundle.FixStack[i] = getStackEntry(i);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct TableEntry {
+ unsigned from;
+ unsigned to;
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+ const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ if (!(Table[i] < Table[i+1])) return false;
+ return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I->to;
+ return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+ { X86::ABS_Fp32 , X86::ABS_F },
+ { X86::ABS_Fp64 , X86::ABS_F },
+ { X86::ABS_Fp80 , X86::ABS_F },
+ { X86::ADD_Fp32m , X86::ADD_F32m },
+ { X86::ADD_Fp64m , X86::ADD_F64m },
+ { X86::ADD_Fp64m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m64 , X86::ADD_F64m },
+ { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+ { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+ { X86::CHS_Fp32 , X86::CHS_F },
+ { X86::CHS_Fp64 , X86::CHS_F },
+ { X86::CHS_Fp80 , X86::CHS_F },
+ { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
+ { X86::CMOVB_Fp32 , X86::CMOVB_F },
+ { X86::CMOVB_Fp64 , X86::CMOVB_F },
+ { X86::CMOVB_Fp80 , X86::CMOVB_F },
+ { X86::CMOVE_Fp32 , X86::CMOVE_F },
+ { X86::CMOVE_Fp64 , X86::CMOVE_F },
+ { X86::CMOVE_Fp80 , X86::CMOVE_F },
+ { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+ { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
+ { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
+ { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
+ { X86::CMOVP_Fp32 , X86::CMOVP_F },
+ { X86::CMOVP_Fp64 , X86::CMOVP_F },
+ { X86::CMOVP_Fp80 , X86::CMOVP_F },
+ { X86::COS_Fp32 , X86::COS_F },
+ { X86::COS_Fp64 , X86::COS_F },
+ { X86::COS_Fp80 , X86::COS_F },
+ { X86::DIVR_Fp32m , X86::DIVR_F32m },
+ { X86::DIVR_Fp64m , X86::DIVR_F64m },
+ { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+ { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+ { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+ { X86::DIV_Fp32m , X86::DIV_F32m },
+ { X86::DIV_Fp64m , X86::DIV_F64m },
+ { X86::DIV_Fp64m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m64 , X86::DIV_F64m },
+ { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+ { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+ { X86::ILD_Fp16m32 , X86::ILD_F16m },
+ { X86::ILD_Fp16m64 , X86::ILD_F16m },
+ { X86::ILD_Fp16m80 , X86::ILD_F16m },
+ { X86::ILD_Fp32m32 , X86::ILD_F32m },
+ { X86::ILD_Fp32m64 , X86::ILD_F32m },
+ { X86::ILD_Fp32m80 , X86::ILD_F32m },
+ { X86::ILD_Fp64m32 , X86::ILD_F64m },
+ { X86::ILD_Fp64m64 , X86::ILD_F64m },
+ { X86::ILD_Fp64m80 , X86::ILD_F64m },
+ { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+ { X86::IST_Fp16m32 , X86::IST_F16m },
+ { X86::IST_Fp16m64 , X86::IST_F16m },
+ { X86::IST_Fp16m80 , X86::IST_F16m },
+ { X86::IST_Fp32m32 , X86::IST_F32m },
+ { X86::IST_Fp32m64 , X86::IST_F32m },
+ { X86::IST_Fp32m80 , X86::IST_F32m },
+ { X86::IST_Fp64m32 , X86::IST_FP64m },
+ { X86::IST_Fp64m64 , X86::IST_FP64m },
+ { X86::IST_Fp64m80 , X86::IST_FP64m },
+ { X86::LD_Fp032 , X86::LD_F0 },
+ { X86::LD_Fp064 , X86::LD_F0 },
+ { X86::LD_Fp080 , X86::LD_F0 },
+ { X86::LD_Fp132 , X86::LD_F1 },
+ { X86::LD_Fp164 , X86::LD_F1 },
+ { X86::LD_Fp180 , X86::LD_F1 },
+ { X86::LD_Fp32m , X86::LD_F32m },
+ { X86::LD_Fp32m64 , X86::LD_F32m },
+ { X86::LD_Fp32m80 , X86::LD_F32m },
+ { X86::LD_Fp64m , X86::LD_F64m },
+ { X86::LD_Fp64m80 , X86::LD_F64m },
+ { X86::LD_Fp80m , X86::LD_F80m },
+ { X86::MUL_Fp32m , X86::MUL_F32m },
+ { X86::MUL_Fp64m , X86::MUL_F64m },
+ { X86::MUL_Fp64m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m64 , X86::MUL_F64m },
+ { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+ { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+ { X86::SIN_Fp32 , X86::SIN_F },
+ { X86::SIN_Fp64 , X86::SIN_F },
+ { X86::SIN_Fp80 , X86::SIN_F },
+ { X86::SQRT_Fp32 , X86::SQRT_F },
+ { X86::SQRT_Fp64 , X86::SQRT_F },
+ { X86::SQRT_Fp80 , X86::SQRT_F },
+ { X86::ST_Fp32m , X86::ST_F32m },
+ { X86::ST_Fp64m , X86::ST_F64m },
+ { X86::ST_Fp64m32 , X86::ST_F32m },
+ { X86::ST_Fp80m32 , X86::ST_F32m },
+ { X86::ST_Fp80m64 , X86::ST_F64m },
+ { X86::ST_FpP80m , X86::ST_FP80m },
+ { X86::SUBR_Fp32m , X86::SUBR_F32m },
+ { X86::SUBR_Fp64m , X86::SUBR_F64m },
+ { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+ { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+ { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+ { X86::SUB_Fp32m , X86::SUB_F32m },
+ { X86::SUB_Fp64m , X86::SUB_F64m },
+ { X86::SUB_Fp64m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m64 , X86::SUB_F64m },
+ { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+ { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+ { X86::TST_Fp32 , X86::TST_F },
+ { X86::TST_Fp64 , X86::TST_F },
+ { X86::TST_Fp80 , X86::TST_F },
+ { X86::UCOM_FpIr32 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr64 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr80 , X86::UCOM_FIr },
+ { X86::UCOM_Fpr32 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr64 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr80 , X86::UCOM_Fr },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+ ASSERT_SORTED(OpcodeTable);
+ int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+ return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version. The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+ { X86::ADD_FrST0 , X86::ADD_FPrST0 },
+
+ { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+ { X86::DIV_FrST0 , X86::DIV_FPrST0 },
+
+ { X86::IST_F16m , X86::IST_FP16m },
+ { X86::IST_F32m , X86::IST_FP32m },
+
+ { X86::MUL_FrST0 , X86::MUL_FPrST0 },
+
+ { X86::ST_F32m , X86::ST_FP32m },
+ { X86::ST_F64m , X86::ST_FP64m },
+ { X86::ST_Frr , X86::ST_FPrr },
+
+ { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+ { X86::SUB_FrST0 , X86::SUB_FPrST0 },
+
+ { X86::UCOM_FIr , X86::UCOM_FIPr },
+
+ { X86::UCOM_FPr , X86::UCOM_FPPr },
+ { X86::UCOM_Fr , X86::UCOM_FPr },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction. This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible. The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+ MachineInstr* MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ ASSERT_SORTED(PopTable);
+ if (StackTop == 0)
+ report_fatal_error("Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ // Check to see if there is a popping version of this instruction...
+ int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ if (Opcode != -1) {
+ I->setDesc(TII->get(Opcode));
+ if (Opcode == X86::UCOM_FPPr)
+ I->RemoveOperand(0);
+ } else { // Insert an explicit pop
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+ }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register. If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+ if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
+ popStackAfter(I);
+ return;
+ }
+
+ // Otherwise, store the top of stack into the dead slot, killing the operand
+ // without having to add in an explicit xchg then pop.
+ //
+ I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
+ unsigned STReg = getSTReg(FPRegNo);
+ unsigned OldSlot = getSlot(FPRegNo);
+ unsigned TopReg = Stack[StackTop-1];
+ Stack[OldSlot] = TopReg;
+ RegMap[TopReg] = OldSlot;
+ RegMap[FPRegNo] = ~0;
+ Stack[--StackTop] = ~0;
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+ unsigned Defs = Mask;
+ unsigned Kills = 0;
+ for (unsigned i = 0; i < StackTop; ++i) {
+ unsigned RegNo = Stack[i];
+ if (!(Defs & (1 << RegNo)))
+ // This register is live, but we don't want it.
+ Kills |= (1 << RegNo);
+ else
+ // We don't need to imp-def this live register.
+ Defs &= ~(1 << RegNo);
+ }
+ assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+ // Produce implicit-defs for free by using killed registers.
+ while (Kills && Defs) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+ std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+ std::swap(RegMap[KReg], RegMap[DReg]);
+ Kills &= ~(1 << KReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Kill registers by popping.
+ if (Kills && I != MBB->begin()) {
+ MachineBasicBlock::iterator I2 = llvm::prior(I);
+ while (StackTop) {
+ unsigned KReg = getStackEntry(0);
+ if (!(Kills & (1 << KReg)))
+ break;
+ DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+ popStackAfter(I2);
+ Kills &= ~(1 << KReg);
+ }
+ }
+
+ // Manually kill the rest.
+ while (Kills) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+ freeStackSlotBefore(I, KReg);
+ Kills &= ~(1 << KReg);
+ }
+
+ // Load zeros for all the imp-defs.
+ while(Defs) {
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+ BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+ pushReg(DReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Now we should have the correct registers live.
+ DEBUG(dumpStack());
+ assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+ unsigned FixCount,
+ MachineBasicBlock::iterator I) {
+ // Move items into place, starting from the desired stack bottom.
+ while (FixCount--) {
+ // Old register at position FixCount.
+ unsigned OldReg = getStackEntry(FixCount);
+ // Desired register at position FixCount.
+ unsigned Reg = FixStack[FixCount];
+ if (Reg == OldReg)
+ continue;
+ // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+ moveToTop(Reg, I);
+ if (FixCount > 0)
+ moveToTop(OldReg, I);
+ }
+ DEBUG(dumpStack());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // Result gets pushed on the stack.
+ pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
+ "Can only handle fst* & ftst instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ // FISTP64m is strange because there isn't a non-popping versions.
+ // If we have one _and_ we don't want to pop the operand, duplicate the value
+ // on the stack instead of moving it. This ensure that popping the value is
+ // always ok.
+ // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+ //
+ if (!KillsSrc &&
+ (MI->getOpcode() == X86::IST_Fp64m32 ||
+ MI->getOpcode() == X86::ISTT_Fp16m32 ||
+ MI->getOpcode() == X86::ISTT_Fp32m32 ||
+ MI->getOpcode() == X86::ISTT_Fp64m32 ||
+ MI->getOpcode() == X86::IST_Fp64m64 ||
+ MI->getOpcode() == X86::ISTT_Fp16m64 ||
+ MI->getOpcode() == X86::ISTT_Fp32m64 ||
+ MI->getOpcode() == X86::ISTT_Fp64m64 ||
+ MI->getOpcode() == X86::IST_Fp64m80 ||
+ MI->getOpcode() == X86::ISTT_Fp16m80 ||
+ MI->getOpcode() == X86::ISTT_Fp32m80 ||
+ MI->getOpcode() == X86::ISTT_Fp64m80 ||
+ MI->getOpcode() == X86::ST_FpP80m)) {
+ duplicateToTop(Reg, getScratchReg(), I);
+ } else {
+ moveToTop(Reg, I); // Move to the top of the stack...
+ }
+
+ // Convert from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ if (MI->getOpcode() == X86::IST_FP64m ||
+ MI->getOpcode() == X86::ISTT_FP16m ||
+ MI->getOpcode() == X86::ISTT_FP32m ||
+ MI->getOpcode() == X86::ISTT_FP64m ||
+ MI->getOpcode() == X86::ST_FP80m) {
+ if (StackTop == 0)
+ report_fatal_error("Stack empty??");
+ --StackTop;
+ } else if (KillsSrc) { // Last use of operand?
+ popStackAfter(I);
+ }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value. These instructions may have
+/// non-fp operands after their FP operands.
+///
+/// Examples:
+/// R1 = fchs R2
+/// R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+#ifndef NDEBUG
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc) {
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ if (StackTop == 0)
+ report_fatal_error("Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r },
+ { X86::ADD_Fp64 , X86::ADD_FST0r },
+ { X86::ADD_Fp80 , X86::ADD_FST0r },
+ { X86::DIV_Fp32 , X86::DIV_FST0r },
+ { X86::DIV_Fp64 , X86::DIV_FST0r },
+ { X86::DIV_Fp80 , X86::DIV_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r },
+ { X86::MUL_Fp64 , X86::MUL_FST0r },
+ { X86::MUL_Fp80 , X86::MUL_FST0r },
+ { X86::SUB_Fp32 , X86::SUB_FST0r },
+ { X86::SUB_Fp64 , X86::SUB_FST0r },
+ { X86::SUB_Fp80 , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FST0r },
+ { X86::DIV_Fp64 , X86::DIVR_FST0r },
+ { X86::DIV_Fp80 , X86::DIVR_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FST0r },
+ { X86::SUB_Fp64 , X86::SUBR_FST0r },
+ { X86::SUB_Fp80 , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp64 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp80 , X86::DIVR_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp64 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp80 , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 },
+ { X86::ADD_Fp64 , X86::ADD_FrST0 },
+ { X86::ADD_Fp80 , X86::ADD_FrST0 },
+ { X86::DIV_Fp32 , X86::DIV_FrST0 },
+ { X86::DIV_Fp64 , X86::DIV_FrST0 },
+ { X86::DIV_Fp80 , X86::DIV_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 },
+ { X86::MUL_Fp64 , X86::MUL_FrST0 },
+ { X86::MUL_Fp80 , X86::MUL_FrST0 },
+ { X86::SUB_Fp32 , X86::SUB_FrST0 },
+ { X86::SUB_Fp64 , X86::SUB_FrST0 },
+ { X86::SUB_Fp80 , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub ST(0), ST(i)
+/// ST(i) = fsub ST(0), ST(i)
+/// ST(0) = fsubr ST(0), ST(i)
+/// ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+ unsigned Dest = getFPReg(MI->getOperand(0));
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned TOS = getStackEntry(0);
+
+ // One of our operands must be on the top of the stack. If neither is yet, we
+ // need to move one.
+ if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
+ // We can choose to move either operand to the top of the stack. If one of
+ // the operands is killed by this instruction, we want that one so that we
+ // can update right on top of the old version.
+ if (KillsOp0) {
+ moveToTop(Op0, I); // Move dead operand to TOS.
+ TOS = Op0;
+ } else if (KillsOp1) {
+ moveToTop(Op1, I);
+ TOS = Op1;
+ } else {
+ // All of the operands are live after this instruction executes, so we
+ // cannot update on top of any operand. Because of this, we must
+ // duplicate one of the stack elements to the top. It doesn't matter
+ // which one we pick.
+ //
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+ } else if (!KillsOp0 && !KillsOp1) {
+ // If we DO have one of our operands at the top of the stack, but we don't
+ // have a dead operand, we must duplicate one of the operands to a new slot
+ // on the stack.
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+
+ // Now we know that one of our operands is on the top of the stack, and at
+ // least one of our operands is killed by this instruction.
+ assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+ "Stack conditions not set up right!");
+
+ // We decide which form to use based on what is on the top of the stack, and
+ // which operand is killed by this instruction.
+ const TableEntry *InstTable;
+ bool isForward = TOS == Op0;
+ bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+ if (updateST0) {
+ if (isForward)
+ InstTable = ForwardST0Table;
+ else
+ InstTable = ReverseST0Table;
+ } else {
+ if (isForward)
+ InstTable = ForwardSTiTable;
+ else
+ InstTable = ReverseSTiTable;
+ }
+
+ int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+ MI->getOpcode());
+ assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+ // NotTOS - The register which is not on the top of stack...
+ unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+ // Replace the old instruction with a new instruction
+ MBB->remove(I++);
+ I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+ // If both operands are killed, pop one off of the stack in addition to
+ // overwriting the other one.
+ if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+ assert(!updateST0 && "Should have updated other operand!");
+ popStackAfter(I); // Pop the top of stack
+ }
+
+ // Update stack information so that we know the destination register is now on
+ // the stack.
+ unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+ assert(UpdatedSlot < StackTop && Dest < 7);
+ Stack[UpdatedSlot] = Dest;
+ RegMap[Dest] = UpdatedSlot;
+ MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // Make sure the first operand is on the top of stack, the other one can be
+ // anywhere.
+ moveToTop(Op0, I);
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->RemoveOperand(1);
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If any of the operands are killed by this instruction, free them.
+ if (KillsOp0) freeStackSlotAfter(I, Op0);
+ if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions. These
+/// instructions move a st(i) register to st(0) iff a condition is true. These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ unsigned Op1 = getFPReg(MI->getOperand(2));
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // The first operand *must* be on the top of the stack.
+ moveToTop(Op0, I);
+
+ // Change the second operand to the stack register that the operand is in.
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0);
+ MI->RemoveOperand(1);
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If we kill the second operand, make sure to pop it from the stack.
+ if (Op0 != Op1 && KillsOp1) {
+ // Get this value off of the register stack.
+ freeStackSlotAfter(I, Op1);
+ }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions. This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SpecialFP instruction!");
+ case TargetOpcode::COPY: {
+ // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
+ const MachineOperand &MO1 = MI->getOperand(1);
+ const MachineOperand &MO0 = MI->getOperand(0);
+ unsigned DstST = MO0.getReg() - X86::ST0;
+ unsigned SrcST = MO1.getReg() - X86::ST0;
+ bool KillsSrc = MI->killsRegister(MO1.getReg());
+
+ // ST = COPY FP. Set up a pending ST register.
+ if (DstST < 8) {
+ unsigned SrcFP = getFPReg(MO1);
+ assert(isLive(SrcFP) && "Cannot copy dead register");
+ assert(!MO0.isDead() && "Cannot copy to dead ST register");
+
+ // Unallocated STs are marked as the nonexistent FP255.
+ while (NumPendingSTs <= DstST)
+ PendingST[NumPendingSTs++] = NumFPRegs;
+
+ // STi could still be live from a previous inline asm.
+ if (isScratchReg(PendingST[DstST])) {
+ DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
+ << '\n');
+ freeStackSlotBefore(MI, PendingST[DstST]);
+ }
+
+ // When the source is killed, allocate a scratch FP register.
+ if (KillsSrc) {
+ unsigned Slot = getSlot(SrcFP);
+ unsigned SR = getScratchReg();
+ PendingST[DstST] = SR;
+ Stack[Slot] = SR;
+ RegMap[SR] = Slot;
+ } else
+ PendingST[DstST] = SrcFP;
+ break;
+ }
+
+ // FP = COPY ST. Extract fixed stack value.
+ // Any instruction defining ST registers must have assigned them to a
+ // scratch register.
+ if (SrcST < 8) {
+ unsigned DstFP = getFPReg(MO0);
+ assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
+ assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
+ unsigned SrcFP = PendingST[SrcST];
+ assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
+ assert(isLive(SrcFP) && "Scratch holding ST is dead");
+
+ // DstFP steals the stack slot from SrcFP.
+ unsigned Slot = getSlot(SrcFP);
+ Stack[Slot] = DstFP;
+ RegMap[DstFP] = Slot;
+
+ // Always treat the ST as killed.
+ PendingST[SrcST] = NumFPRegs;
+ while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+ --NumPendingSTs;
+ break;
+ }
+
+ // FP <- FP copy.
+ unsigned DstFP = getFPReg(MO0);
+ unsigned SrcFP = getFPReg(MO1);
+ assert(isLive(SrcFP) && "Cannot copy dead register");
+ if (KillsSrc) {
+ // If the input operand is killed, we can just change the owner of the
+ // incoming stack slot into the result.
+ unsigned Slot = getSlot(SrcFP);
+ Stack[Slot] = DstFP;
+ RegMap[DstFP] = Slot;
+ } else {
+ // For COPY we just duplicate the specified value to a new stack slot.
+ // This could be made better, but would require substantial changes.
+ duplicateToTop(SrcFP, DstFP, I);
+ }
+ break;
+ }
+
+ case X86::FpPOP_RETVAL: {
+ // The FpPOP_RETVAL instruction is used after calls that return a value on
+ // the floating point stack. We cannot model this with ST defs since CALL
+ // instructions have fixed clobber lists. This instruction is interpreted
+ // to mean that there is one more live register on the stack than we
+ // thought.
+ //
+ // This means that StackTop does not match the hardware stack between a
+ // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
+ // between CALL and FpPOP_RETVAL as long as they don't overflow the
+ // hardware stack.
+ unsigned DstFP = getFPReg(MI->getOperand(0));
+
+ // Move existing stack elements up to reflect reality.
+ assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+ if (StackTop) {
+ std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+ for (unsigned i = 0; i != NumFPRegs; ++i)
+ ++RegMap[i];
+ }
+ ++StackTop;
+
+ // DstFP is the new bottom of the stack.
+ Stack[0] = DstFP;
+ RegMap[DstFP] = 0;
+
+ // DstFP will be killed by processBasicBlock if this was a dead def.
+ break;
+ }
+
+ case TargetOpcode::INLINEASM: {
+ // The inline asm MachineInstr currently only *uses* FP registers for the
+ // 'f' constraint. These should be turned into the current ST(x) register
+ // in the machine instr.
+ //
+ // There are special rules for x87 inline assembly. The compiler must know
+ // exactly how many registers are popped and pushed implicitly by the asm.
+ // Otherwise it is not possible to restore the stack state after the inline
+ // asm.
+ //
+ // There are 3 kinds of input operands:
+ //
+ // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+ // popped input operand must be in a fixed stack slot, and it is either
+ // tied to an output operand, or in the clobber list. The MI has ST use
+ // and def operands for these inputs.
+ //
+ // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+ // preserved by the inline asm. The fixed stack slots must be STn-STm
+ // following the popped inputs. A fixed input operand cannot be tied to
+ // an output or appear in the clobber list. The MI has ST use operands
+ // and no defs for these inputs.
+ //
+ // 3. Preserved inputs. These inputs use the "f" constraint which is
+ // represented as an FP register. The inline asm won't change these
+ // stack slots.
+ //
+ // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+ // registers do not count as output operands. The inline asm changes the
+ // stack as if it popped all the popped inputs and then pushed all the
+ // output operands.
+
+ // Scan the assembly for ST registers used, defined and clobbered. We can
+ // only tell clobbers from defs by looking at the asm descriptor.
+ unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+ unsigned NumOps = 0;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+ i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+ unsigned Flags = MI->getOperand(i).getImm();
+ NumOps = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumOps != 1)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i + 1);
+ if (!MO.isReg())
+ continue;
+ unsigned STReg = MO.getReg() - X86::ST0;
+ if (STReg >= 8)
+ continue;
+
+ switch (InlineAsm::getKind(Flags)) {
+ case InlineAsm::Kind_RegUse:
+ STUses |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ STDefs |= (1u << STReg);
+ if (MO.isDead())
+ STDeadDefs |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_Clobber:
+ STClobbers |= (1u << STReg);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (STUses && !isMask_32(STUses))
+ MI->emitError("fixed input regs must be last on the x87 stack");
+ unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+ // Defs must be contiguous from the stack top. ST0-STn.
+ if (STDefs && !isMask_32(STDefs)) {
+ MI->emitError("output regs must be last on the x87 stack");
+ STDefs = NextPowerOf2(STDefs) - 1;
+ }
+ unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+ // So must the clobbered stack slots. ST0-STm, m >= n.
+ if (STClobbers && !isMask_32(STDefs | STClobbers))
+ MI->emitError("clobbers must be last on the x87 stack");
+
+ // Popped inputs are the ones that are also clobbered or defined.
+ unsigned STPopped = STUses & (STDefs | STClobbers);
+ if (STPopped && !isMask_32(STPopped))
+ MI->emitError("implicitly popped regs must be last on the x87 stack");
+ unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+ DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+ << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+ // Scan the instruction for FP uses corresponding to "f" constraints.
+ // Collect FP registers to kill afer the instruction.
+ // Always kill all the scratch regs.
+ unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+ unsigned FPUsed = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ if (!Op.isUse())
+ MI->emitError("illegal \"f\" output constraint");
+ unsigned FPReg = getFPReg(Op);
+ FPUsed |= 1U << FPReg;
+
+ // If we kill this operand, make sure to pop it from the stack after the
+ // asm. We just remember it for now, and pop them all off at the end in
+ // a batch.
+ if (Op.isKill())
+ FPKills |= 1U << FPReg;
+ }
+
+ // The popped inputs will be killed by the instruction, so duplicate them
+ // if the FP register needs to be live after the instruction, or if it is
+ // used in the instruction itself. We effectively treat the popped inputs
+ // as early clobbers.
+ for (unsigned i = 0; i < NumSTPopped; ++i) {
+ if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+ continue;
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+ PendingST[i] = SR;
+ }
+
+ // Make sure we have a unique live register for every fixed use. Some of
+ // them could be undef uses, and we need to emit LD_F0 instructions.
+ for (unsigned i = 0; i < NumSTUses; ++i) {
+ if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+ // Check for shared assignments.
+ for (unsigned j = 0; j < i; ++j) {
+ if (PendingST[j] != PendingST[i])
+ continue;
+ // STi and STj are inn the same register, create a copy.
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i])
+ << " to avoid collision with ST" << j << '\n');
+ PendingST[i] = SR;
+ }
+ continue;
+ }
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(SR);
+ PendingST[i] = SR;
+ if (NumPendingSTs == i)
+ ++NumPendingSTs;
+ }
+ assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+
+ // Now we can rearrange the live registers to match what was requested.
+ shuffleStackTop(PendingST, NumPendingSTs, I);
+ DEBUG({dbgs() << "Before asm: "; dumpStack();});
+
+ // With the stack layout fixed, rewrite the FP registers.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ unsigned FPReg = getFPReg(Op);
+ Op.setReg(getSTReg(FPReg));
+ }
+
+ // Simulate the inline asm popping its inputs and pushing its outputs.
+ StackTop -= NumSTPopped;
+
+ // Hold the fixed output registers in scratch FP registers. They will be
+ // transferred to real FP registers by copies.
+ NumPendingSTs = 0;
+ for (unsigned i = 0; i < NumSTDefs; ++i) {
+ unsigned SR = getScratchReg();
+ pushReg(SR);
+ FPKills &= ~(1u << SR);
+ }
+ for (unsigned i = 0; i < NumSTDefs; ++i)
+ PendingST[NumPendingSTs++] = getStackEntry(i);
+ DEBUG({dbgs() << "After asm: "; dumpStack();});
+
+ // If any of the ST defs were dead, pop them immediately. Our caller only
+ // handles dead FP defs.
+ MachineBasicBlock::iterator InsertPt = MI;
+ for (unsigned i = 0; STDefs & (1u << i); ++i) {
+ if (!(STDeadDefs & (1u << i)))
+ continue;
+ freeStackSlotAfter(InsertPt, PendingST[i]);
+ PendingST[i] = NumFPRegs;
+ }
+ while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+ --NumPendingSTs;
+
+ // If this asm kills any FP registers (is the last use of them) we must
+ // explicitly emit pop instructions for them. Do this now after the asm has
+ // executed so that the ST(x) numbers are not off (which would happen if we
+ // did this inline with operand rewriting).
+ //
+ // Note: this might be a non-optimal pop sequence. We might be able to do
+ // better by trying to pop in stack order or something.
+ while (FPKills) {
+ unsigned FPReg = CountTrailingZeros_32(FPKills);
+ if (isLive(FPReg))
+ freeStackSlotAfter(InsertPt, FPReg);
+ FPKills &= ~(1U << FPReg);
+ }
+ // Don't delete the inline asm!
+ return;
+ }
+
+ case X86::RET:
+ case X86::RETI:
+ // If RET has an FP register use operand, pass the first one in ST(0) and
+ // the second one in ST(1).
+
+ // Find the register operands.
+ unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+ unsigned LiveMask = 0;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ // FP Register uses must be kills unless there are two uses of the same
+ // register, in which case only one will be a kill.
+ assert(Op.isUse() &&
+ (Op.isKill() || // Marked kill.
+ getFPReg(Op) == FirstFPRegOp || // Second instance.
+ MI->killsRegister(Op.getReg())) && // Later use is marked kill.
+ "Ret only defs operands, and values aren't live beyond it");
+
+ if (FirstFPRegOp == ~0U)
+ FirstFPRegOp = getFPReg(Op);
+ else {
+ assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+ SecondFPRegOp = getFPReg(Op);
+ }
+ LiveMask |= (1 << getFPReg(Op));
+
+ // Remove the operand so that later passes don't see it.
+ MI->RemoveOperand(i);
+ --i, --e;
+ }
+
+ // We may have been carrying spurious live-ins, so make sure only the returned
+ // registers are left live.
+ adjustLiveRegs(LiveMask, MI);
+ if (!LiveMask) return; // Quick check to see if any are possible.
+
+ // There are only four possibilities here:
+ // 1) we are returning a single FP value. In this case, it has to be in
+ // ST(0) already, so just declare success by removing the value from the
+ // FP Stack.
+ if (SecondFPRegOp == ~0U) {
+ // Assert that the top of stack contains the right FP register.
+ assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+ "Top of stack not the right register for RET!");
+
+ // Ok, everything is good, mark the value as not being on the stack
+ // anymore so that our assertion about the stack being empty at end of
+ // block doesn't fire.
+ StackTop = 0;
+ return;
+ }
+
+ // Otherwise, we are returning two values:
+ // 2) If returning the same value for both, we only have one thing in the FP
+ // stack. Consider: RET FP1, FP1
+ if (StackTop == 1) {
+ assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+ "Stack misconfiguration for RET!");
+
+ // Duplicate the TOS so that we return it twice. Just pick some other FPx
+ // register to hold it.
+ unsigned NewReg = getScratchReg();
+ duplicateToTop(FirstFPRegOp, NewReg, MI);
+ FirstFPRegOp = NewReg;
+ }
+
+ /// Okay we know we have two different FPx operands now:
+ assert(StackTop == 2 && "Must have two values live!");
+
+ /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+ /// in ST(1). In this case, emit an fxch.
+ if (getStackEntry(0) == SecondFPRegOp) {
+ assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+ moveToTop(FirstFPRegOp, MI);
+ }
+
+ /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+ /// ST(1). Just remove both from our understanding of the stack and return.
+ assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+ assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+ StackTop = 0;
+ return;
+ }
+
+ I = MBB->erase(I); // Remove the pseudo instruction
+
+ // We want to leave I pointing to the previous instruction, but what if we
+ // just erased the first instruction?
+ if (I == MBB->begin()) {
+ DEBUG(dbgs() << "Inserting dummy KILL\n");
+ I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ } else
+ --I;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 000000000000..ed45a9a4c1c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,1210 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetRegisterInfo &TRI,
+ bool Is64Bit) {
+ const MachineFunction *MF = MBB.getParent();
+ const Function *F = MF->getFunction();
+ if (!F || MF->getMMI().callsEHReturn())
+ return 0;
+
+ static const unsigned CallerSavedRegs32Bit[] = {
+ X86::EAX, X86::EDX, X86::ECX
+ };
+
+ static const unsigned CallerSavedRegs64Bit[] = {
+ X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+ X86::R8, X86::R9, X86::R10, X86::R11
+ };
+
+ unsigned Opc = MBBI->getOpcode();
+ switch (Opc) {
+ default: return 0;
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<unsigned, 8> Uses;
+ for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+ Uses.insert(*AsI);
+ }
+
+ const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ for (; *CS; ++CS)
+ if (!Uses.count(*CS))
+ return *CS;
+ }
+ }
+
+ return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes,
+ bool Is64Bit, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
+ // Use push / pop instead.
+ unsigned Reg = isSub
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ if (Reg) {
+ Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
+ Offset -= ThisVal;
+ continue;
+ }
+ }
+
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ Offset -= ThisVal;
+ }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ // FIXME: THIS ISN'T RUN!!!
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ int Offset = 0;
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+ MCSymbol *Label,
+ unsigned FramePtr) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty()) return;
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = TM.getTargetData();
+ bool HasFP = hasFP(MF);
+
+ // Calculate amount of bytes used for return address storing.
+ int stackGrowth = -TD->getPointerSize();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minimum due to stack growth).
+ int64_t MaxOffset = 0;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(I->getFrameIdx()));
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+ Offset = MaxOffset - Offset + saveAreaOffset;
+
+ // Don't output a new machine move if we're re-saving the frame
+ // pointer. This happens when the PrologEpilogInserter has inserted an extra
+ // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+ // generates one when frame pointers are used. If we generate a "machine
+ // move" for this extra "PUSH", the linker will lose track of the fact that
+ // the frame pointer should have the value of the first "PUSH" when it's
+ // trying to unwind.
+ //
+ // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+ // another bug. I.e., one where we generate a prolog like this:
+ //
+ // pushl %ebp
+ // movl %esp, %ebp
+ // pushl %ebp
+ // pushl %esi
+ // ...
+ //
+ // The immediate re-push of EBP is unnecessary. At the least, it's an
+ // optimization bug. EBP can be used as a scratch register in certain
+ // cases, but probably not when we have a frame pointer.
+ if (HasFP && FramePtr == Reg)
+ continue;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *Fn = MF.getFunction();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ Fn->needsUnwindTableEntry();
+ uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+ uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool HasFP = hasFP(MF);
+ bool Is64Bit = STI.is64Bit();
+ bool IsWin64 = STI.isTargetWin64();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ DebugLoc DL;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone).
+ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ !RegInfo->needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64) { // Win64 has no Red Zone
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (HasFP) MinSize += SlotSize;
+ StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(-TailCallReturnAddrDelta)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
+ // Mapping for machine moves:
+ //
+ // DST: VirtualFP AND
+ // SRC: VirtualFP => DW_CFA_def_cfa_offset
+ // ELSE => DW_CFA_def_cfa
+ //
+ // SRC: VirtualFP AND
+ // DST: Register => DW_CFA_def_cfa_register
+ //
+ // ELSE
+ // OFFSET < 0 => DW_CFA_offset_extended_sf
+ // REG < 64 => DW_CFA_offset + Reg
+ // ELSE => DW_CFA_offset_extended
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+ uint64_t NumBytes = 0;
+ int stackGrowth = -TD->getPointerSize();
+
+ if (HasFP) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register, which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP/RBP into the appropriate stack slot.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (needsFrameMoves) {
+ // Mark the place where EBP/RBP was saved.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Update EBP with the new base value...
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Define the current CFA to use the EBP/RBP register.
+ MachineLocation FPDst(FramePtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+
+ // Realign stack
+ if (RegInfo->needsStackRealignment(MF)) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+ StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ // Skip the callee-saved push instructions.
+ bool PushedRegs = false;
+ int StackOffset = 2 * stackGrowth;
+
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r)) {
+ PushedRegs = true;
+ ++MBBI;
+
+ if (!HasFP && needsFrameMoves) {
+ // Mark callee-saved push instruction.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ // Define the current CFA rule to use the provided offset.
+ unsigned Ptr = StackSize ?
+ MachineLocation::VirtualFP : StackPtr;
+ MachineLocation SPDst(Ptr);
+ MachineLocation SPSrc(Ptr, StackOffset);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ StackOffset += stackGrowth;
+ }
+ }
+
+ DL = MBB.findDebugLoc(MBBI);
+
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ // Adjust stack pointer: ESP -= numbytes.
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
+ // stack and adjust the stack pointer in one go. The 64-bit version of
+ // __chkstk is only responsible for probing the stack. The 64-bit prologue is
+ // responsible for adjusting the stack pointer. Touching the stack at 4K
+ // increments is necessary to ensure that the guard pages used by the OS
+ // virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ const char *StackProbeSymbol;
+ bool isSPUpdateNeeded = false;
+
+ if (Is64Bit) {
+ if (STI.isTargetCygMing())
+ StackProbeSymbol = "___chkstk";
+ else {
+ StackProbeSymbol = "__chkstk";
+ isSPUpdateNeeded = true;
+ }
+ } else if (STI.isTargetCygMing())
+ StackProbeSymbol = "_alloca";
+ else
+ StackProbeSymbol = "_chkstk";
+
+ // Check whether EAX is livein for this function.
+ bool isEAXAlive = isEAXLiveIn(MF);
+
+ if (isEAXAlive) {
+ // Sanity check that EAX is not livein for this function.
+ // It should not be, so throw an assert.
+ assert(!Is64Bit && "EAX is livein in x64 case!");
+
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+ }
+
+ if (Is64Bit) {
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+ .addImm(NumBytes);
+ } else {
+ // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+ // We'll also use 4 already allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+ // MSVC x64's __chkstk needs to adjust %rsp.
+ // FIXME: %rax preserves the offset and should be available.
+ if (isSPUpdateNeeded)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if (isEAXAlive) {
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+ // Mark end of stack pointer adjustment.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ if (!HasFP && NumBytes) {
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize + stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ }
+ }
+
+ // Emit DWARF info specifying the offsets of the callee-saved registers.
+ if (PushedRegs)
+ emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+ }
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no instructions");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+ bool Is64Bit = STI.is64Bit();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ switch (RetOpcode) {
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ break; // These are ok
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+
+ if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+ !PI->getDesc().isTerminator())
+ break;
+
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned.
+ if (RegInfo->needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back.
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
+ if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI =
+ addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ MBB.insert(MBBI, MI);
+ } else {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes) {
+ // Adjust stack pointer back: ESP += numbytes.
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode == X86::TCRETURNmi ||
+ RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+ RetOpcode == X86::TCRETURNmi64) {
+ bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+ ? X86::TAILJMPm : X86::TAILJMPm64));
+ for (unsigned i = 0; i != 5; ++i)
+ MIB.addOperand(MBBI->getOperand(i));
+ } else if (RetOpcode == X86::TCRETURNri64) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = MBB.getLastNonDebugInstr();
+
+ // Check for possible merge with preceding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (STI.is64Bit() ? -8 : -4);
+ const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(RI->getStackRegister(), stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+ MachineLocation CSSrc(RI->getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (RI->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+ } else {
+ unsigned Align = MFI->getObjectAlignment(FI);
+ assert((-(Offset + StackSize)) % Align == 0);
+ Align = 0;
+ return Offset + StackSize;
+ }
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+
+ // Skip the RETADDR move area
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+
+ unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned CalleeFrameSize = 0;
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ // Push GPRs. It increases frame size.
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+ // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+ // It can be done by spilling XMMs to stack frame.
+ // Note that only Win64 ABI might spill XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+
+ return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Reload XMMs from stack frame.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+
+ // POP GPRs.
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ }
+ return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned SlotSize = RegInfo->getSlotSize();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+ }
+
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta,
+ true);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
+
+/// permuteEncode - Create the permutation encoding used with frameless
+/// stacks. It is passed the number of registers to be saved and an array of the
+/// registers saved.
+static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
+ // The saved registers are numbered from 1 to 6. In order to encode the order
+ // in which they were saved, we re-number them according to their place in the
+ // register order. The re-numbering is relative to the last re-numbered
+ // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ bool Used[7] = { false, false, false, false, false, false, false };
+ uint32_t RenumRegs[6];
+ for (unsigned I = 0; I < SavedCount; ++I) {
+ uint32_t Renum = 0;
+ for (unsigned U = 1; U < 7; ++U) {
+ if (U == Registers[I])
+ break;
+ if (!Used[U])
+ ++Renum;
+ }
+
+ Used[Registers[I]] = true;
+ RenumRegs[I] = Renum;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (SavedCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
+ + 3 * RenumRegs[2] + RenumRegs[3];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
+ + RenumRegs[2];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[0];
+ break;
+ }
+
+ return permutationEncoding;
+}
+
+uint32_t X86FrameLowering::
+getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+ int DataAlignmentFactor, bool IsEH) const {
+ uint32_t Encoding = 0;
+ int CFAOffset = 0;
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+ unsigned SavedRegIdx = 0;
+ int FramePointerReg = -1;
+
+ for (ArrayRef<MCCFIInstruction>::const_iterator
+ I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+ const MCCFIInstruction &Inst = *I;
+ MCSymbol *Label = Inst.getLabel();
+
+ // Ignore invalid labels.
+ if (Label && !Label->isDefined()) continue;
+
+ unsigned Operation = Inst.getOperation();
+ if (Operation != MCCFIInstruction::Move &&
+ Operation != MCCFIInstruction::RelMove)
+ // FIXME: We can't handle this frame just yet.
+ return 0;
+
+ const MachineLocation &Dst = Inst.getDestination();
+ const MachineLocation &Src = Inst.getSource();
+ const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
+
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() != MachineLocation::VirtualFP) {
+ // DW_CFA_def_cfa
+ assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
+ if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
+ TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
+ // The frame pointer isn't EBP/RBP. Cannot make unwind information
+ // compact.
+ return 0;
+ FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
+ } // else DW_CFA_def_cfa_offset
+
+ if (IsRelative)
+ CFAOffset += Src.getOffset();
+ else
+ CFAOffset -= Src.getOffset();
+
+ continue;
+ }
+
+ if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ // DW_CFA_def_cfa_register
+ assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+
+ if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
+ TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
+ // The frame pointer isn't EBP/RBP. Cannot make unwind information
+ // compact.
+ return 0;
+
+ FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
+ if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
+ return 0;
+
+ SavedRegs[0] = 0;
+ SavedRegIdx = 0;
+ continue;
+ }
+
+ unsigned Reg = Src.getReg();
+ int Offset = Dst.getOffset();
+ if (IsRelative)
+ Offset -= CFAOffset;
+ Offset /= DataAlignmentFactor;
+
+ if (Offset < 0) {
+ // FIXME: Handle?
+ // DW_CFA_offset_extended_sf
+ return 0;
+ } else if (Reg < 64) {
+ // DW_CFA_offset + Reg
+ if (SavedRegIdx >= 6) return 0;
+ int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
+ if (CURegNum == -1) return 0;
+ SavedRegs[SavedRegIdx++] = CURegNum;
+ } else {
+ // FIXME: Handle?
+ // DW_CFA_offset_extended
+ return 0;
+ }
+ }
+
+ // Bail if there are too many registers to encode.
+ if (SavedRegIdx > 6) return 0;
+
+ // Check if the offset is too big.
+ CFAOffset /= 4;
+ if ((CFAOffset & 0xFF) != CFAOffset)
+ return 0;
+ Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
+
+ if (FramePointerReg != -1) {
+ Encoding |= 0x01000000; // EBP/RBP Unwind Frame
+ for (unsigned I = 0; I != SavedRegIdx; ++I) {
+ unsigned Reg = SavedRegs[I];
+ if (Reg == unsigned(FramePointerReg)) continue;
+ Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
+ }
+ } else {
+ Encoding |= 0x02000000; // Frameless unwind with small stack
+ Encoding |= (SavedRegIdx & 0x7) << 10;
+ Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+ }
+
+ return Encoding;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 000000000000..14c31ed47cf1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,69 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MCSymbol;
+ class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+ const X86TargetMachine &TM;
+ const X86Subtarget &STI;
+public:
+ explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+ : TargetFrameLowering(StackGrowsDown,
+ sti.getStackAlignment(),
+ (sti.is64Bit() ? -8 : -4)),
+ TM(tm), STI(sti) {
+ }
+
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+ unsigned FramePtr) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+ int DataAlignmentFactor, bool IsEH) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2b0f283bec75
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,2253 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+// Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+ /// SDValue's instead of register numbers for the leaves of the matched
+ /// tree.
+ struct X86ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ // This is really a union, discriminated by BaseType!
+ SDValue Base_Reg;
+ int Base_FrameIndex;
+
+ unsigned Scale;
+ SDValue IndexReg;
+ int32_t Disp;
+ SDValue Segment;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+ unsigned char SymbolFlags; // X86II::MO_*
+
+ X86ISelAddressMode()
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
+ SymbolFlags(X86II::MO_NO_FLAG) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
+ }
+
+ bool hasBaseOrIndexReg() const {
+ return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+ }
+
+ /// isRIPRelative - Return true if this addressing mode is already RIP
+ /// relative.
+ bool isRIPRelative() const {
+ if (BaseType != RegBase) return false;
+ if (RegisterSDNode *RegNode =
+ dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
+ return RegNode->getReg() == X86::RIP;
+ return false;
+ }
+
+ void setBaseReg(SDValue Reg) {
+ BaseType = RegBase;
+ Base_Reg = Reg;
+ }
+
+ void dump() {
+ dbgs() << "X86ISelAddressMode " << this << '\n';
+ dbgs() << "Base_Reg ";
+ if (Base_Reg.getNode() != 0)
+ Base_Reg.getNode()->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
+ << " Scale" << Scale << '\n'
+ << "IndexReg ";
+ if (IndexReg.getNode() != 0)
+ IndexReg.getNode()->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " Disp " << Disp << '\n'
+ << "GV ";
+ if (GV)
+ GV->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << " CP ";
+ if (CP)
+ CP->dump();
+ else
+ dbgs() << "nul";
+ dbgs() << '\n'
+ << "ES ";
+ if (ES)
+ dbgs() << ES;
+ else
+ dbgs() << "nul";
+ dbgs() << " JT" << JT << " Align" << Align << '\n';
+ }
+ };
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// ISel - X86 specific code to select X86 machine instructions for
+ /// SelectionDAG operations.
+ ///
+ class X86DAGToDAGISel : public SelectionDAGISel {
+ /// X86Lowering - This object fully describes how to lower LLVM code to an
+ /// X86-specific SelectionDAG.
+ const X86TargetLowering &X86Lowering;
+
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// OptForSize - If true, selector should try to optimize for code size
+ /// instead of performance.
+ bool OptForSize;
+
+ public:
+ explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel),
+ X86Lowering(*tm.getTargetLowering()),
+ Subtarget(&tm.getSubtarget<X86Subtarget>()),
+ OptForSize(false) {}
+
+ virtual const char *getPassName() const {
+ return "X86 DAG->DAG Instruction Selection";
+ }
+
+ virtual void EmitFunctionEntryCode();
+
+ virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+ virtual void PreprocessISelDAG();
+
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDNode *N);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+
+ bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+ bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
+ bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectLEAAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment,
+ SDValue &NodeWithChain);
+
+ bool TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+ AM.Base_Reg;
+ Scale = getI8Imm(AM.Scale);
+ Index = AM.IndexReg;
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+ MVT::i32, AM.Disp,
+ AM.SymbolFlags);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+ AM.Align, AM.Disp, AM.SymbolFlags);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
+ else if (AM.BlockAddr)
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+ true, AM.SymbolFlags);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+ if (AM.Segment.getNode())
+ Segment = AM.Segment;
+ else
+ Segment = CurDAG->getRegister(0, MVT::i32);
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getGlobalBaseReg - Return an SDNode that returns the value of
+ /// the global base register. Output instructions required to
+ /// initialize the global base register, if necessary.
+ ///
+ SDNode *getGlobalBaseReg();
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const X86TargetMachine &getTargetMachine() {
+ return static_cast<const X86TargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const X86InstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+ };
+}
+
+
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ if (!N.hasOneUse())
+ return false;
+
+ if (N.getOpcode() != ISD::LOAD)
+ return true;
+
+ // If N is a load, do additional profitability checks.
+ if (U == Root) {
+ switch (U->getOpcode()) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::AND:
+ case X86ISD::XOR:
+ case X86ISD::OR:
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
+ // If the other operand is a 8-bit immediate we should fold the immediate
+ // instead. This reduces code size.
+ // e.g.
+ // movl 4(%esp), %eax
+ // addl $4, %eax
+ // vs.
+ // movl $4, %eax
+ // addl 4(%esp), %eax
+ // The former is 2 bytes shorter. In case where the increment is 1, then
+ // the saving can be 4 bytes (by using incl %eax).
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+ if (Imm->getAPIntValue().isSignedIntN(8))
+ return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Call, SDValue OrigChain) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Chain = OrigChain.getOperand(0);
+ if (Chain.getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else {
+ assert(Chain.getOpcode() == ISD::TokenFactor &&
+ "Unexpected chain operand");
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+ if (Chain.getOperand(i).getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(Chain.getOperand(i));
+ SDValue NewChain =
+ CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ Ops.clear();
+ Ops.push_back(NewChain);
+ }
+ for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+ Ops.push_back(OrigChain.getOperand(i));
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
+ Load.getOperand(1), Load.getOperand(2));
+ Ops.clear();
+ Ops.push_back(SDValue(Load.getNode(), 1));
+ for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+ Ops.push_back(Call.getOperand(i));
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+ if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+ return false;
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+ if (!LD ||
+ LD->isVolatile() ||
+ LD->getAddressingMode() != ISD::UNINDEXED ||
+ LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ // Now let's find the callseq_start.
+ while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
+ if (!Chain.hasOneUse())
+ return false;
+ Chain = Chain.getOperand(0);
+ }
+
+ if (!Chain.getNumOperands())
+ return false;
+ if (Chain.getOperand(0).getNode() == Callee.getNode())
+ return true;
+ if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+ Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+ Callee.getValue(1).hasOneUse())
+ return true;
+ return false;
+}
+
+void X86DAGToDAGISel::PreprocessISelDAG() {
+ // OptForSize is used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (OptLevel != CodeGenOpt::None &&
+ (N->getOpcode() == X86ISD::CALL ||
+ N->getOpcode() == X86ISD::TC_RETURN)) {
+ /// Also try moving call address load from outside callseq_start to just
+ /// before the call to allow it to be folded.
+ ///
+ /// [Load chain]
+ /// ^
+ /// |
+ /// [Load]
+ /// ^ ^
+ /// | |
+ /// / \--
+ /// / |
+ ///[CALLSEQ_START] |
+ /// ^ |
+ /// | |
+ /// [LOAD/C2Reg] |
+ /// | |
+ /// \ /
+ /// \ /
+ /// [CALL]
+ bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
+ SDValue Chain = N->getOperand(0);
+ SDValue Load = N->getOperand(1);
+ if (!isCalleeLoad(Load, Chain, HasCallSeq))
+ continue;
+ MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+ ++NumLoadMoved;
+ continue;
+ }
+
+ // Lower fpround and fpextend nodes that target the FP stack to be store and
+ // load to the stack. This is a gross hack. We would like to simply mark
+ // these as being illegal, but when we do that, legalize produces these when
+ // it expands calls, then expands these in the same legalize pass. We would
+ // like dag combine to be able to hack on these between the call expansion
+ // and the node legalization. As such this pass basically does "really
+ // late" legalization of these inline with the X86 isel pass.
+ // FIXME: This should only happen when not compiled with -O0.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ EVT MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+ N->getOperand(0),
+ MemTmp, MachinePointerInfo(), MemVT,
+ false, false, 0);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ CurDAG->DeleteNode(N);
+ }
+}
+
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (Subtarget->isTargetCygMing()) {
+ unsigned CallOp =
+ Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+ BuildMI(BB, DebugLoc(),
+ TII->get(CallOp)).addExternalSymbol("__main");
+ }
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
+ // If this is main, emit special code for main.
+ if (const Function *Fn = MF->getFunction())
+ if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+ EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
+}
+
+static bool isDispSafeForFrameIndex(int64_t Val) {
+ // On 64-bit platforms, we can run into an issue where a frame index
+ // includes a displacement that, when added to the explicit displacement,
+ // will overflow the displacement field. Assuming that the frame index
+ // displacement fits into a 31-bit integer (which is only slightly more
+ // aggressive than the current fundamental assumption that it fits into
+ // a 32-bit integer), a 31-bit disp should always be safe.
+ return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+ X86ISelAddressMode &AM) {
+ int64_t Val = AM.Disp + Offset;
+ CodeModel::Model M = TM.getCodeModel();
+ if (Subtarget->is64Bit()) {
+ if (!X86::isOffsetSuitableForCodeModel(Val, M,
+ AM.hasSymbolicDisplacement()))
+ return true;
+ // In addition to the checks required for a register base, check that
+ // we do not try to use an unsafe Disp with a frame index.
+ if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+ !isDispSafeForFrameIndex(Val))
+ return true;
+ }
+ AM.Disp = Val;
+ return false;
+
+}
+
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+ SDValue Address = N->getOperand(1);
+
+ // load gs:0 -> GS segment register.
+ // load fs:0 -> FS segment register.
+ //
+ // This optimization is valid because the GNU TLS model defines that
+ // gs:0 (or fs:0 on X86-64) contains its own address.
+ // For more information see http://people.redhat.com/drepper/tls.pdf
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ Subtarget->isTargetELF())
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case 256:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case 257:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode. These wrap things that will resolve down into a
+/// symbol reference. If no match is possible, this returns true, otherwise it
+/// returns false.
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
+ return true;
+
+ SDValue N0 = N.getOperand(0);
+ CodeModel::Model M = TM.getCodeModel();
+
+ // Handle X86-64 rip-relative addresses. We check this before checking direct
+ // folding because RIP is preferable to non-RIP accesses.
+ if (Subtarget->is64Bit() &&
+ // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+ // they cannot be folded into immediate fields.
+ // FIXME: This can be improved for kernel and other models?
+ (M == CodeModel::Small || M == CodeModel::Kernel) &&
+ // Base and index reg must be 0 in order to use %rip as base and lowering
+ // must allow RIP.
+ !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.GV = G->getGlobal();
+ AM.SymbolFlags = G->getTargetFlags();
+ if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.SymbolFlags = CP->getTargetFlags();
+ if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+
+ if (N.getOpcode() == X86ISD::WrapperRIP)
+ AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
+ return false;
+ }
+
+ // Handle the case when globals fit in our immediate field: This is true for
+ // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
+ // mode, this results in a non-RIP-relative computation.
+ if (!Subtarget->is64Bit() ||
+ ((M == CodeModel::Small || M == CodeModel::Kernel) &&
+ TM.getRelocationModel() == Reloc::Static)) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+ if (MatchAddressRecursively(N, AM, 0))
+ return true;
+
+ // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+ // a smaller encoding and avoids a scaled-index.
+ if (AM.Scale == 2 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0) {
+ AM.Base_Reg = AM.IndexReg;
+ AM.Scale = 1;
+ }
+
+ // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+ // because it has a smaller encoding.
+ // TODO: Which other code models can use this?
+ if (TM.getCodeModel() == CodeModel::Small &&
+ Subtarget->is64Bit() &&
+ AM.Scale == 1 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0 &&
+ AM.SymbolFlags == X86II::MO_NO_FLAG &&
+ AM.hasSymbolicDisplacement())
+ AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+ return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ DebugLoc dl = N.getDebugLoc();
+ DEBUG({
+ dbgs() << "MatchAddress: ";
+ AM.dump();
+ });
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // If this is already a %rip relative address, we can only merge immediates
+ // into it. Instead of handling this in every case, we handle it here.
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRelative()) {
+ // FIXME: JumpTable and ExternalSymbol address currently don't like
+ // displacements. It isn't very important, but this should be fixed for
+ // consistency.
+ if (!AM.ES && AM.JT != -1) return true;
+
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+ if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+ return false;
+ return true;
+ }
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!FoldOffsetIntoAddress(Val, AM))
+ return false;
+ break;
+ }
+
+ case X86ISD::Wrapper:
+ case X86ISD::WrapperRIP:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::LOAD:
+ if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
+ AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+ AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SHL:
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
+ break;
+
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+ unsigned Val = CN->getZExtValue();
+ // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+ // that the base operand remains free for further matching. If
+ // the base doesn't end up getting used, a post-processing step
+ // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDValue ShVal = N.getNode()->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (CurDAG->isBaseWithConstantOffset(ShVal)) {
+ AM.IndexReg = ShVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+ uint64_t Disp = AddVal->getSExtValue() << Val;
+ if (!FoldOffsetIntoAddress(Disp, AM))
+ return false;
+ }
+
+ AM.IndexReg = ShVal;
+ return false;
+ }
+ break;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ // A mul_lohi where we need the low part can be folded as a plain multiply.
+ if (N.getResNo() != 0) break;
+ // FALL THROUGH
+ case ISD::MUL:
+ case X86ISD::MUL_IMM:
+ // X*[3,5,9] -> X+X*[2,4,8]
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0) {
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+ if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+ CN->getZExtValue() == 9) {
+ AM.Scale = unsigned(CN->getZExtValue())-1;
+
+ SDValue MulVal = N.getNode()->getOperand(0);
+ SDValue Reg;
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+ isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+ Reg = MulVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+ uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+ if (FoldOffsetIntoAddress(Disp, AM))
+ Reg = N.getNode()->getOperand(0);
+ } else {
+ Reg = N.getNode()->getOperand(0);
+ }
+
+ AM.IndexReg = AM.Base_Reg = Reg;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ // Test if the LHS of the sub can be folded.
+ X86ISelAddressMode Backup = AM;
+ if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
+ AM = Backup;
+ break;
+ }
+
+ int Cost = 0;
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+ // If the RHS involves a register with multiple uses, this
+ // transformation incurs an extra mov, due to the neg instruction
+ // clobbering its operand.
+ if (!RHS.getNode()->hasOneUse() ||
+ RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+ RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+ RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+ (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+ ++Cost;
+ // If the base is a register with multiple uses, this
+ // transformation may save a mov.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() &&
+ !AM.Base_Reg.getNode()->hasOneUse()) ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ --Cost;
+ // If the folded LHS was interesting, this transformation saves
+ // address arithmetic.
+ if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+ ((AM.Disp != 0) && (Backup.Disp == 0)) +
+ (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+ --Cost;
+ // If it doesn't look like it may be an overall win, don't do it.
+ if (Cost >= 0) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+ AM.Scale = 1;
+
+ // Insert the new nodes into the topological ordering.
+ if (Zero.getNode()->getNodeId() == -1 ||
+ Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+ Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Neg.getNode()->getNodeId() == -1 ||
+ Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+ Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ return false;
+ }
+
+ case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+ !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base_Reg.getNode() &&
+ !AM.IndexReg.getNode()) {
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ N = Handle.getValue();
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ X86ISelAddressMode Backup = AM;
+ ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
+
+ // Start with the LHS as an addr mode.
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
+ return false;
+ AM = Backup;
+ }
+ break;
+
+ case ISD::AND: {
+ // Perform some heroic transforms on an and of a constant-count shift
+ // with a constant to enable use of the scaled offset field.
+
+ SDValue Shift = N.getOperand(0);
+ if (Shift.getNumOperands() != 2) break;
+
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ SDValue X = Shift.getOperand(0);
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!C1 || !C2) break;
+
+ // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+ // allows us to convert the shift and and into an h-register extract and
+ // a scaled index.
+ if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+ unsigned ScaleLog = 8 - C1->getZExtValue();
+ if (ScaleLog > 0 && ScaleLog < 4 &&
+ C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+ SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+ SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+ SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ X, Eight);
+ SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+ Srl, Mask);
+ SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ And, ShlCount);
+
+ // Insert the new nodes into the topological ordering.
+ if (Eight.getNode()->getNodeId() == -1 ||
+ Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+ Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Mask.getNode()->getNodeId() == -1 ||
+ Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+ Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Srl.getNode()->getNodeId() == -1 ||
+ Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+ Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (And.getNode()->getNodeId() == -1 ||
+ And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), And.getNode());
+ And.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (ShlCount.getNode()->getNodeId() == -1 ||
+ ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
+ ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Shl.getNode()->getNodeId() == -1 ||
+ Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Shl.getNode());
+ Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ CurDAG->ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+ }
+ }
+
+ // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+ // allows us to fold the shift into this addressing mode.
+ if (Shift.getOpcode() != ISD::SHL) break;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ break;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftCst = C1->getZExtValue();
+ if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+ break;
+
+ // Get the new AND mask, this folds to a constant.
+ SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ SDValue(C2, 0), SDValue(C1, 0));
+ SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
+ NewANDMask);
+ SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ NewAND, SDValue(C1, 0));
+
+ // Insert the new nodes into the topological ordering.
+ if (C1->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), C1);
+ C1->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewANDMask.getNode()->getNodeId() == -1 ||
+ NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
+ NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewAND.getNode()->getNodeId() == -1 ||
+ NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
+ NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (NewSHIFT.getNode()->getNodeId() == -1 ||
+ NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
+ NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+
+ CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+
+ AM.Scale = 1 << ShiftCst;
+ AM.IndexReg = NewAND;
+ return false;
+ }
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
+ // If so, check to see if the scale index register is set.
+ if (AM.IndexReg.getNode() == 0) {
+ AM.IndexReg = N;
+ AM.Scale = 1;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base_Reg = N;
+ return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+///
+/// Parent is the parent node of the addr operand that is being matched. It
+/// is always a load, store, atomic node, or null. It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ X86ISelAddressMode AM;
+
+ if (Parent &&
+ // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+ // that are not a MemSDNode, and thus don't have proper addrspace info.
+ Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+ Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+ Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ unsigned AddrSpace =
+ cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ // AddrSpace 256 -> GS, 257 -> FS.
+ if (AddrSpace == 256)
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ if (AddrSpace == 257)
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ }
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base_Reg.getNode())
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+///
+/// We also return:
+/// PatternChainNode: this is the matched node that has a chain input and
+/// output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+ SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment,
+ SDValue &PatternNodeWithChain) {
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ PatternNodeWithChain = N.getOperand(0);
+ if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+ PatternNodeWithChain.hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ return true;
+ }
+ }
+
+ // Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+ if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+ // Check to see if the top elements are all zeros (or bitcast of zeros).
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getNode()->hasOneUse() &&
+ ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+ N.getOperand(0).getOperand(0).hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ // Okay, this is a zero extending load. Fold it.
+ LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ PatternNodeWithChain = SDValue(LD, 0);
+ return true;
+ }
+ return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ X86ISelAddressMode AM;
+
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
+ if (MatchAddress(N, AM))
+ return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
+
+ EVT VT = N.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == X86ISelAddressMode::RegBase)
+ if (AM.Base_Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity++;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+ // a simple shift.
+ if (AM.Scale > 1)
+ Complexity++;
+
+ // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+ // to a LEA. This is determined with some expermentation but is by no means
+ // optimal (especially for code size consideration). LEA is nice because of
+ // its three-address nature. Tweak the cost function again when we can run
+ // convertToThreeAddress() at register allocation time.
+ if (AM.hasSymbolicDisplacement()) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
+
+ if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity++;
+
+ // If it isn't worth using an LEA, reject it.
+ if (Complexity <= 2)
+ return false;
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+
+ X86ISelAddressMode AM;
+ AM.GV = GA->getGlobal();
+ AM.Disp += GA->getOffset();
+ AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
+ AM.SymbolFlags = GA->getTargetFlags();
+
+ if (N.getValueType() == MVT::i32) {
+ AM.Scale = 1;
+ AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+ } else {
+ AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+ }
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ if (!ISD::isNON_EXTLoad(N.getNode()) ||
+ !IsProfitableToFold(N, P, P) ||
+ !IsLegalToFold(N, P, P, OptLevel))
+ return false;
+
+ return SelectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = Node->getOperand(2);
+ SDValue In2H = Node->getOperand(3);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return NULL;
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
+// FIXME: Figure out some way to unify this with the 'or' and other code
+// below.
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_add_and_fetch and
+ // __sync_sub_and_fetch where the result is not used. This allows us
+ // to use "lock" version of add, sub, inc, dec instructions.
+ // FIXME: Do not use special instructions but instead add the "lock"
+ // prefix to the target node somehow. The extra information will then be
+ // transferred to machine instruction and it denotes the prefix.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ bool isInc = false, isDec = false, isSub = false, isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
+ isCN = true;
+ int64_t CNVal = CN->getSExtValue();
+ if (CNVal == 1)
+ isInc = true;
+ else if (CNVal == -1)
+ isDec = true;
+ else if (CNVal >= 0)
+ Val = CurDAG->getTargetConstant(CNVal, NVT);
+ else {
+ isSub = true;
+ Val = CurDAG->getTargetConstant(-CNVal, NVT);
+ }
+ } else if (Val.hasOneUse() &&
+ Val.getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0))) {
+ isSub = true;
+ Val = Val.getOperand(1);
+ }
+
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isInc)
+ Opc = X86::LOCK_INC8m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC8m;
+ else if (isSub) {
+ if (isCN)
+ Opc = X86::LOCK_SUB8mi;
+ else
+ Opc = X86::LOCK_SUB8mr;
+ } else {
+ if (isCN)
+ Opc = X86::LOCK_ADD8mi;
+ else
+ Opc = X86::LOCK_ADD8mr;
+ }
+ break;
+ case MVT::i16:
+ if (isInc)
+ Opc = X86::LOCK_INC16m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC16m;
+ else if (isSub) {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_SUB16mi8;
+ else
+ Opc = X86::LOCK_SUB16mi;
+ } else
+ Opc = X86::LOCK_SUB16mr;
+ } else {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_ADD16mi8;
+ else
+ Opc = X86::LOCK_ADD16mi;
+ } else
+ Opc = X86::LOCK_ADD16mr;
+ }
+ break;
+ case MVT::i32:
+ if (isInc)
+ Opc = X86::LOCK_INC32m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC32m;
+ else if (isSub) {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_SUB32mi8;
+ else
+ Opc = X86::LOCK_SUB32mi;
+ } else
+ Opc = X86::LOCK_SUB32mr;
+ } else {
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_ADD32mi8;
+ else
+ Opc = X86::LOCK_ADD32mi;
+ } else
+ Opc = X86::LOCK_ADD32mr;
+ }
+ break;
+ case MVT::i64:
+ if (isInc)
+ Opc = X86::LOCK_INC64m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC64m;
+ else if (isSub) {
+ Opc = X86::LOCK_SUB64mr;
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi8;
+ else if (i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi32;
+ }
+ } else {
+ Opc = X86::LOCK_ADD64mr;
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi8;
+ else if (i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi32;
+ }
+ }
+ break;
+ }
+
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ if (isInc || isDec) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ }
+}
+
+enum AtomicOpc {
+ OR,
+ AND,
+ XOR,
+ AtomicOpcEnd
+};
+
+enum AtomicSz {
+ ConstantI8,
+ I8,
+ SextConstantI16,
+ ConstantI16,
+ I16,
+ SextConstantI32,
+ ConstantI32,
+ I32,
+ SextConstantI64,
+ ConstantI64,
+ I64,
+ AtomicSzEnd
+};
+
+static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+ {
+ X86::LOCK_OR8mi,
+ X86::LOCK_OR8mr,
+ X86::LOCK_OR16mi8,
+ X86::LOCK_OR16mi,
+ X86::LOCK_OR16mr,
+ X86::LOCK_OR32mi8,
+ X86::LOCK_OR32mi,
+ X86::LOCK_OR32mr,
+ X86::LOCK_OR64mi8,
+ X86::LOCK_OR64mi32,
+ X86::LOCK_OR64mr
+ },
+ {
+ X86::LOCK_AND8mi,
+ X86::LOCK_AND8mr,
+ X86::LOCK_AND16mi8,
+ X86::LOCK_AND16mi,
+ X86::LOCK_AND16mr,
+ X86::LOCK_AND32mi8,
+ X86::LOCK_AND32mi,
+ X86::LOCK_AND32mr,
+ X86::LOCK_AND64mi8,
+ X86::LOCK_AND64mi32,
+ X86::LOCK_AND64mr
+ },
+ {
+ X86::LOCK_XOR8mi,
+ X86::LOCK_XOR8mr,
+ X86::LOCK_XOR16mi8,
+ X86::LOCK_XOR16mi,
+ X86::LOCK_XOR16mr,
+ X86::LOCK_XOR32mi8,
+ X86::LOCK_XOR32mi,
+ X86::LOCK_XOR32mr,
+ X86::LOCK_XOR64mi8,
+ X86::LOCK_XOR64mi32,
+ X86::LOCK_XOR64mr
+ }
+};
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_or_and_fetch and similar arith
+ // operations where the result is not used. This allows us to use the "lock"
+ // version of the arithmetic instruction.
+ // FIXME: Same as for 'add' and 'sub', try to merge those down here.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ // Which index into the table.
+ enum AtomicOpc Op;
+ switch (Node->getOpcode()) {
+ case ISD::ATOMIC_LOAD_OR:
+ Op = OR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Op = AND;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Op = XOR;
+ break;
+ default:
+ return 0;
+ }
+
+ bool isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
+ isCN = true;
+ Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+ }
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isCN)
+ Opc = AtomicOpcTbl[Op][ConstantI8];
+ else
+ Opc = AtomicOpcTbl[Op][I8];
+ break;
+ case MVT::i16:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI16];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI16];
+ } else
+ Opc = AtomicOpcTbl[Op][I16];
+ break;
+ case MVT::i32:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI32];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI32];
+ } else
+ Opc = AtomicOpcTbl[Op][I32];
+ break;
+ case MVT::i64:
+ Opc = AtomicOpcTbl[Op][I64];
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI64];
+ else if (i64immSExt32(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][ConstantI64];
+ }
+ break;
+ }
+
+ assert(Opc != 0 && "Invalid arith lock transform!");
+
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+ // Examine each user of the node.
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); UI != UE; ++UI) {
+ // Only examine CopyToReg uses.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+ X86::EFLAGS)
+ return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(),
+ FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1) continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode()) return false;
+ // Examine the opcode of the user.
+ switch (FlagUI->getMachineOpcode()) {
+ // These comparisons don't treat the most significant bit specially.
+ case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+ case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+ case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+ case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+ case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+ case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+ case X86::CMOVA16rr: case X86::CMOVA16rm:
+ case X86::CMOVA32rr: case X86::CMOVA32rm:
+ case X86::CMOVA64rr: case X86::CMOVA64rm:
+ case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+ case X86::CMOVB16rr: case X86::CMOVB16rm:
+ case X86::CMOVB32rr: case X86::CMOVB32rm:
+ case X86::CMOVB64rr: case X86::CMOVB64rm:
+ case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+ case X86::CMOVE16rr: case X86::CMOVE16rm:
+ case X86::CMOVE32rr: case X86::CMOVE32rm:
+ case X86::CMOVE64rr: case X86::CMOVE64rm:
+ case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+ case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+ case X86::CMOVP16rr: case X86::CMOVP16rm:
+ case X86::CMOVP32rr: case X86::CMOVP32rm:
+ case X86::CMOVP64rr: case X86::CMOVP64rm:
+ continue;
+ // Anything else: assume conservatively.
+ default: return false;
+ }
+ }
+ }
+ return true;
+}
+
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
+ EVT NVT = Node->getValueType(0);
+ unsigned Opc, MOpc;
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
+
+ if (Node->isMachineOpcode()) {
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case X86ISD::ATOMOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMOR6432);
+ case X86ISD::ATOMXOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMXOR6432);
+ case X86ISD::ATOMADD64_DAG:
+ return SelectAtomic64(Node, X86::ATOMADD6432);
+ case X86ISD::ATOMSUB64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSUB6432);
+ case X86ISD::ATOMNAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMNAND6432);
+ case X86ISD::ATOMAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMAND6432);
+ case X86ISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+ case ISD::ATOMIC_LOAD_ADD: {
+ SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR: {
+ SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ break;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!Cst || !ShlCst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+ uint64_t ShlVal = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ break;
+
+ unsigned ShlOp, Op = 0;
+ EVT CstVT = NVT;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: AND32ri is the same as AND64ri32 with zext imm.
+ // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+ CstVT = MVT::i8;
+ else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+ CstVT = MVT::i32;
+
+ // Bail if there is no smaller encoding.
+ if (NVT == CstVT)
+ break;
+
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ assert(CstVT == MVT::i8);
+ ShlOp = X86::SHL32ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = X86::AND32ri8; break;
+ case ISD::OR: Op = X86::OR32ri8; break;
+ case ISD::XOR: Op = X86::XOR32ri8; break;
+ }
+ break;
+ case MVT::i64:
+ assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+ ShlOp = X86::SHL64ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+ case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
+ case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ }
+ break;
+ }
+
+ // Emit the smaller op and the shift.
+ SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+ SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+ getI8Imm(ShlVal));
+ break;
+ }
+ case X86ISD::UMUL: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ unsigned LoReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
+ case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
+ case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+ case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+ return NULL;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ }
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+ }
+ }
+
+ unsigned LoReg, HiReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
+ case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
+ case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (foldedLoad)
+ std::swap(N0, N1);
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+ InFlag = SDValue(CNode, 0);
+ }
+
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
+ // Copy the low half of the result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+
+ return NULL;
+ }
+
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
+ if (!isSigned) {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ }
+ } else {
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+ }
+
+ unsigned LoReg, HiReg, ClrReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; ClrReg = HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+ SDValue InFlag;
+ if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+ MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
+ } else {
+ Move =
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+ InFlag = Chain.getValue(1);
+ } else {
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ LoReg, N0, SDValue()).getValue(1);
+ if (isSigned && !signBitIsZero) {
+ // Sign extend the low part into the high part.
+ InFlag =
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDValue ClrNode =
+ SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
+ ClrNode, InFlag).getValue(1);
+ }
+ }
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
+ }
+
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
+ // Copy the division (low) result, if it is needed.
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ }
+ return NULL;
+ }
+
+ case X86ISD::CMP: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+ // use a smaller encoding.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
+ // Look past the truncate if CMP is the only use of it.
+ N0 = N0.getOperand(0);
+ if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ N0.getValueType() != MVT::i8 &&
+ X86::isZeroNode(N1)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+ if (!C) break;
+
+ // For example, convert "testl %eax, $8" to "testb %al, $8"
+ if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+ (!(C->getZExtValue() & 0x80) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // On x86-32, only the ABCD registers have 8-bit subregisters.
+ if (!Subtarget->is64Bit()) {
+ TargetRegisterClass *TRC = 0;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+ }
+
+ // Extract the l-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb.
+ return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ }
+
+ // For example, "testl %eax, $2048" to "testb %ah, $8".
+ if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ // Shift the immediate right by 8 bits.
+ SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+ MVT::i8);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Put the value in an ABCD register.
+ TargetRegisterClass *TRC = 0;
+ switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+ case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+ case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+ default: llvm_unreachable("Unsupported TEST operand type!");
+ }
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+ Reg.getValueType(), Reg, RC), 0);
+
+ // Extract the h-register.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
+ MVT::i8, Reg);
+
+ // Emit a testb. No special NOREX tricks are needed since there's
+ // only one GPR operand!
+ return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, ShiftedImm);
+ }
+
+ // For example, "testl %eax, $32776" to "testw %ax, $32776".
+ if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+ N0.getValueType() != MVT::i16 &&
+ (!(C->getZExtValue() & 0x8000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 16-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
+ MVT::i16, Reg);
+
+ // Emit a testw.
+ return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ }
+
+ // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+ if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+ N0.getValueType() == MVT::i64 &&
+ (!(C->getZExtValue() & 0x80000000) ||
+ HasNoSignedComparisonUses(Node))) {
+ SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ SDValue Reg = N0.getNode()->getOperand(0);
+
+ // Extract the 32-bit subregister.
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
+ MVT::i32, Reg);
+
+ // Emit a testl.
+ return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ }
+ }
+ break;
+ }
+ }
+
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << '\n');
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1, Op2, Op3, Op4;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ OutOps.push_back(Op4);
+ return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 000000000000..5096d9ae2edf
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,13157 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2);
+
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference. Idx is an index in the 128 bits we
+/// want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ int Factor = VT.getSizeInBits() / 128;
+
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+ ElVT,
+ VT.getVectorNumElements() / Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+ if (isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ if (isa<ConstantSDNode>(Idx)) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+ DebugLoc dl = Lower.getDebugLoc();
+
+ assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(),
+ Lower.getValueType().getVectorElementType(),
+ Lower.getValueType().getVectorNumElements() * 2);
+
+ // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+ assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+ // Insert the upper subvector.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+ DAG.getConstant(
+ // This is half the length of the result
+ // vector. Start inserting the upper 128
+ // bits here.
+ Lower.getValueType().getVectorNumElements(),
+ MVT::i32),
+ DAG, dl);
+
+ // Insert the lower subvector.
+ Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Vec;
+}
+
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ if (Subtarget->isTargetEnvMacho()) {
+ if (is64Bit)
+ return new X8664_MachoTargetObjectFile();
+ return new TargetLoweringObjectFileMachO();
+ }
+
+ if (Subtarget->isTargetELF()) {
+ if (is64Bit)
+ return new X8664_ELFTargetObjectFile(TM);
+ return new X8632_ELFTargetObjectFile(TM);
+ }
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ return new TargetLoweringObjectFileCOFF();
+ llvm_unreachable("unknown subtarget type");
+}
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasXMMInt();
+ X86ScalarSSEf32 = Subtarget->hasXMM();
+ X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+ RegInfo = TM.getRegisterInfo();
+ TD = getTargetData();
+
+ // Set up the TargetLowering object.
+ static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
+
+ // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // For 64-bit since we have so many registers use the ILP scheduler, for
+ // 32-bit code use the register pressure specific scheduling.
+ if (Subtarget->is64Bit())
+ setSchedulingPreference(Sched::ILP);
+ else
+ setSchedulingPreference(Sched::RegPressure);
+ setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::SREM_I64, "_allrem");
+ setLibcallName(RTLIB::UREM_I64, "_aullrem");
+ setLibcallName(RTLIB::MUL_I64, "_allmul");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+ }
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(false);
+ setUseUnderscoreLongJmp(false);
+ } else if (Subtarget->isTargetMingw()) {
+ // MS runtime is weird: it exports _setjmp, but longjmp!
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(false);
+ } else {
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+ }
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+ addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+ addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ // SETOEQ and SETUNE require checking two conditions.
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
+ } else if (!UseSoftFloat) {
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ // We have an algorithm for SSE2, and we turn this into a 64-bit
+ // FILD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ }
+
+ // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
+
+ if (!UseSoftFloat) {
+ // SSE has no i16 to fp conversion, only i32
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ }
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
+ }
+
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else if (!UseSoftFloat) {
+ if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64; without
+ // SSE, we're stuck with a fistpll.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ }
+
+ // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory.
+ setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
+ }
+ }
+
+ // Scalar integer divide and remainder are lowered to use operations that
+ // produce two results, to match the available instructions. This exposes
+ // the two-result form to trivial CSE, which is able to combine x/y and x%y
+ // into a single instruction.
+ //
+ // Scalar integer multiply-high is also lowered to use two-result
+ // operations, to match the available instructions. However, plain multiply
+ // (low) operations are left as Legal, as there are single-result
+ // instructions for this in x86. Using the two-result multiply instructions
+ // when both high and low results are needed must be arranged by dagcombine.
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+ setOperationAction(ISD::ADDC, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Custom);
+ setOperationAction(ISD::SUBC, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Custom);
+ }
+
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND , MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+ setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
+ setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ // These should be promoted to a larger select which is supported.
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
+ // X86 wants to expand cmov itself.
+ setOperationAction(ISD::SELECT , MVT::i8 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
+ setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+
+ // Darwin ABI issue.
+ setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
+ }
+ // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+ setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasXMM())
+ setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+
+ // We may not have a libcall for MEMBARRIER so we should lower this.
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+
+ // On X86 and X86-64, atomic operations are lowered to locked instructions.
+ // Locked instructions, in turn, have implicit fence semantics (all memory
+ // operations are flushed before issuing the locked instruction, and they
+ // are not buffered), so we can fold away the common pattern of
+ // fence-atomic-fence.
+ setShouldFoldAtomicFences(true);
+
+ // Expand certain atomics
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ }
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ }
+
+ // FIXME - use subtarget debug flags
+ if (!Subtarget->isTargetDarwin() &&
+ !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing()) {
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ if (Subtarget->is64Bit()) {
+ setExceptionPointerRegister(X86::RAX);
+ setExceptionSelectorRegister(X86::RDX);
+ } else {
+ setExceptionPointerRegister(X86::EAX);
+ setExceptionSelectorRegister(X86::EDX);
+ }
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC,
+ (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
+ (Subtarget->isTargetCOFF()
+ && !Subtarget->isTargetEnvMacho()
+ ? Custom : Expand));
+
+ if (!UseSoftFloat && X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ // Use ANDPD and ORPD to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // Lower this to FGETSIGNx86 plus an AND.
+ setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
+ setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Special cases we handle for FP constants.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ } else if (!UseSoftFloat) {
+ // f32 and f64 in x87.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // We don't support FMA.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ // Long double always uses X87.
+ if (!UseSoftFloat) {
+ addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+ {
+ APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
+ addLegalFPImmediate(TmpFlt); // FLD0
+ TmpFlt.changeSign();
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+
+ bool ignored;
+ APFloat TmpFlt2(+1.0);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.changeSign();
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ }
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f80 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ }
+
+ setOperationAction(ISD::FMA, MVT::f80, Expand);
+ }
+
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+ // First set operation action for all vector types to either promote
+ // (for widening) or expand (for scalarization). Then we will selectively
+ // turn on ones that can be effectively codegen'd.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+ // with -msoft-float, disable use of MMX as well.
+ if (!UseSoftFloat && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+ // No operations on x86mmx supported, everything uses intrinsics.
+ }
+
+ // MMX-sized vectors (other than x86mmx) are expected to be expanded
+ // into smaller operations.
+ setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
+ setOperationAction(ISD::AND, MVT::v8i8, Expand);
+ setOperationAction(ISD::AND, MVT::v4i16, Expand);
+ setOperationAction(ISD::AND, MVT::v2i32, Expand);
+ setOperationAction(ISD::AND, MVT::v1i64, Expand);
+ setOperationAction(ISD::OR, MVT::v8i8, Expand);
+ setOperationAction(ISD::OR, MVT::v4i16, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
+ setOperationAction(ISD::OR, MVT::v1i64, Expand);
+ setOperationAction(ISD::XOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasXMM()) {
+ addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
+ }
+
+ if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+ addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+
+ // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // registers cannot be used even for integer operations.
+ addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v16i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+ EVT VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ continue;
+ // Do not attempt to custom lower non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
+ setOperationAction(ISD::BUILD_VECTOR,
+ VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE,
+ VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+ VT.getSimpleVT().SimpleTy, Custom);
+ }
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
+
+ // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Do not attempt to promote non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
+
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom lower v2i64 and v2f64 selects.
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ }
+
+ if (Subtarget->hasSSE41()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+ // FIXME: Do we need to handle scalar-to-vector here?
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
+ // Can turn SHL into an integer multiply.
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+
+ // i8 and i16 vectors are custom , because the source register and source
+ // source memory operand types are not the same width. f32 vectors are
+ // custom since the immediate controlling the insert encodes additional
+ // information.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ }
+
+ if (Subtarget->hasSSE2()) {
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ }
+
+ if (Subtarget->hasSSE42())
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
+
+ if (!UseSoftFloat && Subtarget->hasAVX()) {
+ addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+
+ setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+
+ setOperationAction(ISD::FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+
+ // Custom lower several nodes for 256-bit types.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Extract subvector is special because the value type
+ // (result) is 128-bit but the source is 256-bit wide.
+ if (VT.is128BitVector())
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+ // Do not attempt to custom lower other non-256-bit vectors
+ if (!VT.is256BitVector())
+ continue;
+
+ setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
+ }
+
+ // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+ for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Do not attempt to promote non-256-bit vectors
+ if (!VT.is256BitVector())
+ continue;
+
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+ }
+ }
+
+ // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+ // of this type with custom code.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+
+ // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+ // handle type legalization for these operations here.
+ //
+ // FIXME: We really should do custom legalization for addition and
+ // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
+ // than generic legalization for 64-bit multiplication-with-overflow, though.
+ for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SMULO, VT, Custom);
+ setOperationAction(ISD::UMULO, VT, Custom);
+ }
+
+ // There are no 8-bit 3-address imul/mul instructions
+ setOperationAction(ISD::SMULO, MVT::i8, Expand);
+ setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
+ if (!Subtarget->is64Bit()) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ if (Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::MUL);
+
+ computeRegisterProperties();
+
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
+ maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ setPrefLoopAlignment(16);
+ benefitFromCodePlacementOpt = true;
+
+ setPrefFunctionAlignment(4);
+}
+
+
+MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i8;
+}
+
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+ return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ if (Subtarget->is64Bit()) {
+ // Max of 8 and alignment of type.
+ unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ if (TyAlign > 8)
+ return TyAlign;
+ return 8;
+ }
+
+ unsigned Align = 4;
+ if (Subtarget->hasXMM())
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+ // linux. This is because the stack realignment code can't handle certain
+ // cases like PR2962. This should be removed when PR2962 is fixed.
+ const Function *F = MF.getFunction();
+ if (NonScalarIntSafe &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16))) &&
+ Subtarget->getStackAlignment() >= 16) {
+ if (Subtarget->hasSSE2())
+ return MVT::v4i32;
+ if (Subtarget->hasSSE1())
+ return MVT::v4f32;
+ } else if (!MemcpyStrSrc && Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->getStackAlignment() >= 8 &&
+ Subtarget->hasXMMInt()) {
+ // Do not use f64 to lower memcpy if source is string constant. It's
+ // better to use i32 to avoid the loads.
+ return MVT::f64;
+ }
+ }
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+ // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+ // symbol.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ return MachineJumpTableInfo::EK_Custom32;
+
+ // Otherwise, use the normal jump table encoding heuristics.
+ return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid,MCContext &Ctx) const{
+ assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT());
+ // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+ // entries.
+ return MCSymbolRefExpr::Create(MBB->getSymbol(),
+ MCSymbolRefExpr::VK_GOTOFF, Ctx);
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (!Subtarget->is64Bit())
+ // This doesn't have DebugLoc associated with it, but is not really the
+ // same as a Register.
+ return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+ MCContext &Ctx) const {
+ // X86-64 uses RIP relative addressing based on the jump table label.
+ if (Subtarget->isPICStyleRIPRel())
+ return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+ // Otherwise, the reference is relative to the PIC base.
+ return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+}
+
+// FIXME: Why this routine is here? Move to RegInfo!
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = (Subtarget->is64Bit()
+ ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+ break;
+ case MVT::x86mmx:
+ RRC = X86::VR64RegisterClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+ case MVT::v4f64:
+ RRC = X86::VR128RegisterClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+ unsigned &Offset) const {
+ if (!Subtarget->isTargetLinux())
+ return false;
+
+ if (Subtarget->is64Bit()) {
+ // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x28;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x14 on i386
+ Offset = 0x14;
+ AddressSpace = 256;
+ }
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
+}
+
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ // Add the regs to the liveout set for the function.
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+
+ SDValue Flag;
+
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+ MVT::i16));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue ValToCopy = OutVals[i];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values,
+ // or SSE or MMX vectors.
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+ VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+ (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+ // Likewise we can't return F64 values with SSE1 only. gcc does so, but
+ // llvm-gcc has never done it right and no one has noticed, so this
+ // should be OK for now.
+ if (ValVT == MVT::f64 &&
+ (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
+ report_fatal_error("SSE2 register return with SSE2 disabled");
+
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(VA.getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+ RetOps.push_back(ValToCopy);
+ // Don't emit a copytoreg.
+ continue;
+ }
+
+ // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+ // which is returned in RAX / RDX.
+ if (Subtarget->is64Bit()) {
+ if (ValVT == MVT::x86mmx) {
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget->hasSSE2())
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+ }
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() &&
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments().");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ Flag = Chain.getValue(1);
+
+ // RAX now acts like a return value.
+ MRI.addLiveOut(X86::RAX);
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(X86ISD::RET_FLAG, dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+}
+
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg &&
+ Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ return HasRet;
+}
+
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const {
+ MVT ReturnMVT;
+ // TODO: Is this also valid on 32-bit?
+ if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+ ReturnMVT = MVT::i8;
+ else
+ ReturnMVT = MVT::i32;
+
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
+ return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool Is64Bit = Subtarget->is64Bit();
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ EVT CopyVT = VA.getValVT();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+
+ SDValue Val;
+
+ // If this is a call to a function that returns an fp value on the floating
+ // point stack, we must guarantee the the value is popped from the stack, so
+ // a CopyFromReg is not good enough - the copy instruction may be eliminated
+ // if the return value is not used. We use the FpPOP_RETVAL instruction
+ // instead.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+ SDValue Ops[] = { Chain, InFlag };
+ Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
+ MVT::Other, MVT::Glue, Ops, 2), 1);
+ Val = Chain.getValue(0);
+
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register.
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a call uses struct return
+/// semantics.
+static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ if (Outs.empty())
+ return false;
+
+ return Outs[0].Flags.isSRet();
+}
+
+/// ArgsAreStructReturn - Determines whether a function uses struct
+/// return semantics.
+static bool
+ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ if (Ins.empty())
+ return false;
+
+ return Ins[0].Flags.isSRet();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+}
+
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+
+ CallSite CS(CI);
+ CallingConv::ID CalleeCC = CS.getCallingConv();
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ return true;
+}
+
+/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// a tailcall target by changing its ABI.
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+ return GuaranteedTailCallOpt && IsTailCallConvention(CC);
+}
+
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const {
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+ EVT ValVT;
+
+ // If value is passed by pointer we have address passed instead of the value
+ // itself.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ValVT = VA.getLocVT();
+ else
+ ValVT = VA.getValVT();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ if (Flags.isByVal()) {
+ unsigned Bytes = Flags.getByValSize();
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+ return DAG.getFrameIndex(FI, getPointerTy());
+ } else {
+ int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ return DAG.getLoad(ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ }
+}
+
+SDValue
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc or ghc");
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+
+ unsigned LastVal = ~0U;
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = NULL;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = X86::GR64RegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = X86::FR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ RC = X86::VR256RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ RC = X86::VR128RegisterClass;
+ else if (RegVT == MVT::x86mmx)
+ RC = X86::VR64RegisterClass;
+ else
+ llvm_unreachable("Unknown argument type!");
+
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+
+ if (VA.isExtInLoc()) {
+ // Handle MMX values passed in XMM regs.
+ if (RegVT.isVector()) {
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+ ArgValue);
+ } else
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+ } else {
+ assert(VA.isMemLoc());
+ ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
+ }
+
+ // If value is passed via pointer - do a load.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+ MachinePointerInfo(), false, false, 0);
+
+ InVals.push_back(ArgValue);
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+ // Align stack specially for tail calls.
+ if (FuncIsMadeTailCallSafe(CallConv))
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
+ }
+ if (Is64Bit) {
+ unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+ // FIXME: We should really autogenerate these arrays
+ static const unsigned GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ static const unsigned GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ const unsigned *GPR64ArgRegs;
+ unsigned NumXMMRegs = 0;
+
+ if (IsWin64) {
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ TotalNumIntRegs = 4;
+ GPR64ArgRegs = GPR64ArgRegsWin64;
+ } else {
+ TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+ GPR64ArgRegs = GPR64ArgRegs64Bit;
+
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
+ }
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+ TotalNumIntRegs);
+
+ bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
+ "SSE register cannot be used when SSE is disabled!");
+ assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+ // Kernel mode asks for SSE to be disabled, so don't push them
+ // on the stack.
+ TotalNumXMMRegs = 0;
+
+ if (IsWin64) {
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+ false));
+ }
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ unsigned Offset = FuncInfo->getVarArgsGPOffset();
+ for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(Offset));
+ unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ Offset += 8;
+ }
+
+ if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+ // Now store the XMM (fp + vector) parameter registers.
+ SmallVector<SDValue, 11> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+ SaveXMMOps.push_back(ALVal);
+
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getRegSaveFrameIndex()));
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getVarArgsFPOffset()));
+
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+ unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+ SaveXMMOps.push_back(Val);
+ }
+ MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+ MVT::Other,
+ &SaveXMMOps[0], SaveXMMOps.size()));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+ }
+
+ // Some CCs need callee pop.
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+ FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+ } else {
+ FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
+ FuncInfo->setBytesToPopOnReturn(4);
+ }
+
+ if (!Is64Bit) {
+ // RegSaveFrameIndex is X86-64 only.
+ FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+ if (CallConv == CallingConv::X86_FastCall ||
+ CallConv == CallingConv::X86_ThisCall)
+ // fastcc functions can't have varargs.
+ FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+ }
+
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal())
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDValue &OutRetAddr, SDValue Chain,
+ bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl) const {
+ // Adjust the Return address stack slot.
+ EVT VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+ false, false, 0);
+ return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx,
+ bool Is64Bit, int FPDiff, DebugLoc dl) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
+ EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+ false, false, 0);
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+ bool IsStructRet = CallIsStructReturn(Outs);
+ bool IsSibcall = false;
+
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+
+ // Sibcalls are automatically detected tailcalls which do not require
+ // ABI changes.
+ if (!GuaranteedTailCallOpt && isTailCall)
+ IsSibcall = true;
+
+ if (isTailCall)
+ ++NumTailCalls;
+ }
+
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc or ghc");
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ if (IsSibcall)
+ // This is a sibcall. The memory operands are available in caller's
+ // own caller's stack.
+ NumBytes = 0;
+ else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ int FPDiff = 0;
+ if (isTailCall && !IsSibcall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ if (!IsSibcall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue RetAddrFrIdx;
+ // Load return address for tail calls.
+ if (isTailCall && FPDiff)
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+ Is64Bit, FPDiff, dl);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ } else
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+ break;
+ case CCValAssign::Indirect: {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+ Arg = SpillSlot;
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (isVarArg && IsWin64) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ unsigned ShadowReg = 0;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
+ } else if (!IsSibcall && (!isTailCall || isByVal)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (Subtarget->isPICStyleGOT()) {
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (!isTailCall) {
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ // If we are tail calling and generating PIC/GOT style code load the
+ // address of the callee into ECX. The value in ecx is used as target of
+ // the tail jump. This is done to circumvent the ebx/callee-saved problem
+ // for tail calls on PIC/GOT architectures. Normally we would just put the
+ // address of GOT into ebx and then call target@PLT. But for tail calls
+ // ebx would be restored (since ebx is callee saved) before jumping to the
+ // target@PLT.
+
+ // Note: The actual moving to ECX is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee, DAG);
+ }
+ }
+
+ if (Is64Bit && isVarArg && !IsWin64) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasXMM() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
+ DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
+ SmallVector<SDValue, 8> MemOpChains2;
+ SDValue FIN;
+ int FI = 0;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ if (GuaranteedTailCallOpt) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc())
+ continue;
+ assert(VA.isMemLoc());
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+ ArgChain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Copy arguments to their registers.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ FPDiff, dl);
+ }
+
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ const GlobalValue *GV = G->getGlobal();
+ if (!GV->hasDLLImportLinkage()) {
+ unsigned char OpFlags = 0;
+ bool ExtraLoad = false;
+ unsigned WrapperKind = ISD::DELETED_NODE;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ isa<Function>(GV) &&
+ cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ // If the function is marked as non-lazy, generate an indirect call
+ // which loads from the GOT directly. This avoids runtime overhead
+ // at the cost of eager binding (and one extra byte of encoding).
+ OpFlags = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ ExtraLoad = true;
+ }
+
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+ G->getOffset(), OpFlags);
+
+ // Add a wrapper if needed.
+ if (WrapperKind != ISD::DELETED_NODE)
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ // Add extra indirection if needed.
+ if (ExtraLoad)
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ OpFlags);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+
+ if (!IsSibcall && isTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!isTailCall && Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+ if (Is64Bit && isVarArg && !IsWin64)
+ Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ if (isTailCall) {
+ // We used to do:
+ //// If this is the first return lowered for this function, add the regs
+ //// to the liveout set for the function.
+ // This isn't right, although it's probably harmless on x86; liveouts
+ // should be computed from returns not tail calls. Consider a void
+ // function making a tail call to a function returning int.
+ return DAG.getNode(X86ISD::TC_RETURN, dl,
+ NodeTys, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
+ // If this is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
+
+ // Returns a flag for retval copy to use.
+ if (!IsSibcall) {
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ uint64_t SlotSize = TD->getPointerSize();
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ return Offset;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const X86InstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ unsigned Opcode = Def->getOpcode();
+ if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+ Def->getOperand(1).isFI()) {
+ FI = Def->getOperand(1).getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+ FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+ FI = FINode->getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!IsTailCallConvention(CalleeCC) &&
+ CalleeCC != CallingConv::C)
+ return false;
+
+ // If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ if (GuaranteedTailCallOpt) {
+ if (IsTailCallConvention(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that.
+ if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ return false;
+
+ // Do not sibcall optimize vararg calls unless all arguments are passed via
+ // registers.
+ if (isVarArg && !Outs.empty()) {
+
+ // Optimizing for varargs on Win64 is unlikely to be safe without
+ // additional testing.
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
+ // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+ // Therefore if it's not used by the call it is not safe to optimize this into
+ // a sibcall.
+ bool Unused = false;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (!Ins[i].Used) {
+ Unused = true;
+ break;
+ }
+ }
+ if (Unused) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+ }
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+ return false;
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+
+ // If the tailcall address may be in a register, then make sure it's
+ // possible to register allocate for it. In 32-bit, the call address can
+ // only target EAX, EDX, or ECX since the tail call must be scheduled after
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget->is64Bit() &&
+ !isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
+ unsigned NumInRegs = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc())
+ continue;
+ unsigned Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == 3)
+ return false;
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return X86::createFastISel(funcInfo);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+static bool MayFoldLoad(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
+ case X86ISD::SHUFPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return true;
+ }
+ return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
+ return DAG.getNode(Opc, dl, VT, V1);
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PALIGN:
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ return DAG.getNode(Opc, dl, VT, V1, V2,
+ DAG.getConstant(TargetMask, MVT::i8));
+ }
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return DAG.getNode(Opc, dl, VT, V1, V2);
+ }
+ return SDValue();
+}
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ false);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement) {
+ // Offset should fit into 32 bit immediate field.
+ if (!isInt<32>(Offset))
+ return false;
+
+ // If we don't have a symbolic displacement - we don't have any extra
+ // restrictions.
+ if (!hasSymbolicDisplacement)
+ return true;
+
+ // FIXME: Some tweaks might be needed for medium code model.
+ if (M != CodeModel::Small && M != CodeModel::Kernel)
+ return false;
+
+ // For small code model we assume that latest object is 16MB before end of 31
+ // bits boundary. We may also accept pretty large negative constants knowing
+ // that all objects are in the positive half of address space.
+ if (M == CodeModel::Small && Offset < 16*1024*1024)
+ return true;
+
+ // For kernel code model we know that all object resist in the negative half
+ // of 32bits address space. We may not accept negative offsets, since they may
+ // be just off and we may accept pretty large positive ones.
+ if (M == CodeModel::Kernel && Offset > 0)
+ return true;
+
+ return false;
+}
+
+/// isCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !is64Bit;
+ case CallingConv::X86_FastCall:
+ return !is64Bit;
+ case CallingConv::X86_ThisCall:
+ return !is64Bit;
+ case CallingConv::Fast:
+ return TailCallOpt;
+ case CallingConv::GHC:
+ return TailCallOpt;
+ }
+}
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+ SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+ if (!isFP) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ // X > -1 -> X == 0, jump !sign.
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_NS;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ // X < 0 -> X == 0, jump on sign.
+ return X86::COND_S;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ // X < 1 -> X <= 0
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_LE;
+ }
+ }
+
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+ }
+
+ // First determine if it is required or is profitable to flip the operands.
+
+ // If LHS is a foldable load, but RHS is not, flip the condition.
+ if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+ !ISD::isNON_EXTLoad(RHS.getNode())) {
+ SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+ std::swap(LHS, RHS);
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Condcode should be pre-legalized away");
+ case ISD::SETUEQ:
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETOLT: // flipped
+ case ISD::SETOGT:
+ case ISD::SETGT: return X86::COND_A;
+ case ISD::SETOLE: // flipped
+ case ISD::SETOGE:
+ case ISD::SETGE: return X86::COND_AE;
+ case ISD::SETUGT: // flipped
+ case ISD::SETULT:
+ case ISD::SETLT: return X86::COND_B;
+ case ISD::SETUGE: // flipped
+ case ISD::SETULE:
+ case ISD::SETLE: return X86::COND_BE;
+ case ISD::SETONE:
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETUO: return X86::COND_P;
+ case ISD::SETO: return X86::COND_NP;
+ case ISD::SETOEQ:
+ case ISD::SETUNE: return X86::COND_INVALID;
+ }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+ switch (X86CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
+ if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+ return true;
+ }
+ return false;
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+ return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+ if (Val < 0 || Val == CmpVal)
+ return true;
+ return false;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 )
+ return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return (Mask[0] < 2 && Mask[1] < 2);
+ return false;
+}
+
+bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFDMask(M, N->getValueType(0));
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Lower quadword copied in order or undef.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Upper quadword shuffled.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFHWMask(M, N->getValueType(0));
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Upper quadword copied in order.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Lower quadword shuffled.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 4)
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFLWMask(M, N->getValueType(0));
+}
+
+/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PALIGNR.
+static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool hasSSSE3) {
+ int i, e = VT.getVectorNumElements();
+
+ // Do not handle v2i64 / v2f64 shuffles with palignr.
+ if (e < 4 || !hasSSSE3)
+ return false;
+
+ for (i = 0; i != e; ++i)
+ if (Mask[i] >= 0)
+ break;
+
+ // All undef, not a palignr.
+ if (i == e)
+ return false;
+
+ // Determine if it's ok to perform a palignr with only the LHS, since we
+ // don't have access to the actual shuffle elements to see if RHS is undef.
+ bool Unary = Mask[i] < (int)e;
+ bool NeedsUnary = false;
+
+ int s = Mask[i] - i;
+
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != e; ++i) {
+ int m = Mask[i];
+ if (m < 0)
+ continue;
+
+ Unary = Unary && (m < (int)e);
+ NeedsUnary = NeedsUnary || (m < s);
+
+ if (NeedsUnary && !Unary)
+ return false;
+ if (Unary && m != ((s+i) & (e-1)))
+ return false;
+ if (!Unary && m != (s+i))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPALIGNRMask(M, N->getValueType(0), true);
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+
+ return true;
+}
+
+bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isSHUFPMask(M, N->getValueType(0));
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ return true;
+}
+
+static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedSHUFPMask(M, N->getValueType(0));
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getMaskElt(0), 6) &&
+ isUndefOrEqual(N->getMaskElt(1), 7) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+ return false;
+
+ for (unsigned i = NumElems/2; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ return true;
+}
+
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if ((NumElems != 2 && NumElems != 4)
+ || N->getValueType(0).getSizeInBits() > 128)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+ return false;
+
+ return true;
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start, j = s * NumSectionElts;
+ i != End;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
+ }
+
+ return true;
+}
+
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j + NumElts/2))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElems / NumSections;
+
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+ i != NumSectionElts * (s + 1);
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ if (VT.getVectorElementType().getSizeInBits() < 32)
+ return false;
+
+ int NumElts = VT.getVectorNumElements();
+
+ if (!isUndefOrEqual(Mask[0], NumElts))
+ return false;
+
+ for (int i = 1; i < NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ return true;
+}
+
+bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isMOVLMask(M, N->getValueType(0));
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool V2IsSplat = false, bool V2IsUndef = false) {
+ int NumOps = VT.getVectorNumElements();
+ if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], 0))
+ return false;
+
+ for (int i = 1; i < NumOps; ++i)
+ if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+ return false;
+
+ return true;
+}
+
+static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+ bool V2IsUndef = false) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect 1, 1, 3, 3
+ for (unsigned i = 0; i < 2; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 1)
+ return false;
+ }
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 3)
+ return false;
+ if (Elt == 3)
+ HasHi = true;
+ }
+ // Don't use movshdup if it can be done with a shufps.
+ // FIXME: verify that matching u, u, 3, 3 is what we want.
+ return HasHi;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect 0, 0, 2, 2
+ for (unsigned i = 0; i < 2; ++i)
+ if (N->getMaskElt(i) > 0)
+ return false;
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 2)
+ return false;
+ if (Elt == 2)
+ HasHi = true;
+ }
+ // Don't use movsldup if it can be done with a shufps.
+ return HasHi;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
+ int e = N->getValueType(0).getVectorNumElements() / 2;
+
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+ return false;
+ return true;
+}
+
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+
+ unsigned Shift = (NumOperands == 4) ? 2 : 1;
+ unsigned Mask = 0;
+ for (int i = 0; i < NumOperands; ++i) {
+ int Val = SVOp->getMaskElt(NumOperands-i-1);
+ if (Val < 0) Val = 0;
+ if (Val >= NumOperands) Val -= NumOperands;
+ Mask |= Val;
+ if (i != NumOperands - 1)
+ Mask <<= Shift;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the last 4.
+ for (unsigned i = 7; i >= 4; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= (Val - 4);
+ if (i != 4)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the first 4.
+ for (int i = 3; i >= 0; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= Val;
+ if (i != 0)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VVT = N->getValueType(0);
+ unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+ int Val = 0;
+
+ unsigned i, e;
+ for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+ Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ break;
+ }
+ return (Val - i) * EltSize;
+}
+
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ EVT VecVT = N->getOperand(0).getValueType();
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ EVT VecVT = N->getValueType(0);
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->isNullValue()) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = SVOp->getMaskElt(i);
+ if (idx < 0)
+ MaskVec.push_back(idx);
+ else if (idx < (int)NumElems)
+ MaskVec.push_back(idx + NumElems);
+ else
+ MaskVec.push_back(idx - NumElems);
+ }
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+ SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
+ unsigned NumElems = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
+ }
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
+ if (Op->getValueType(0).getVectorNumElements() != 4)
+ return false;
+ for (unsigned i = 0, e = 2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+ return false;
+ for (unsigned i = 2; i != 4; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+ return false;
+ return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+ ShuffleVectorSDNode *Op) {
+ if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+ return false;
+ // Is V2 is a vector load, don't do this transformation. We will try to use
+ // load folding shufps op.
+ if (ISD::isNON_EXTLoad(V2))
+ return false;
+
+ unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i))
+ return false;
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+ return false;
+ return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ SDValue SplatValue = N->getOperand(0);
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i) != SplatValue)
+ return false;
+ return true;
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx >= (int)NumElems) {
+ unsigned Opc = V2.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
+ return false;
+ } else if (Idx >= 0) {
+ unsigned Opc = V1.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V1.getOperand(Idx)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build SSE zero vectors as <4 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
+ SDValue Vec;
+ if (VT.getSizeInBits() == 128) { // SSE
+ if (HasSSE2) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ } else if (VT.getSizeInBits() == 256) { // AVX
+ // 256-bit logic and arithmetic instructions in AVX are
+ // all floating-point, no support for integer ops. Default
+ // to emitting fp zeroed vectors then.
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
+/// their original type, ensuring they get CSE'd.
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ assert((VT.is128BitVector() || VT.is256BitVector())
+ && "Expected a 128-bit or 256-bit vector type");
+
+ SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+
+ SDValue Vec;
+ if (VT.is256BitVector()) {
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> MaskVec;
+ SVOp->getMask(MaskVec);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (MaskVec[i] > (int)NumElems) {
+ MaskVec[i] = NumElems;
+ Changed = true;
+ }
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
+ SVOp->getOperand(1), &MaskVec[0]);
+ return SDValue(SVOp, 0);
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ Mask.push_back(i);
+ Mask.push_back(i + NumElems);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Half = NumElems/2;
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != Half; ++i) {
+ Mask.push_back(i + Half);
+ Mask.push_back(i + NumElems + Half);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+ EVT PVT = MVT::v4f32;
+ EVT VT = SV->getValueType(0);
+ DebugLoc dl = SV->getDebugLoc();
+ SDValue V1 = SV->getOperand(0);
+ int NumElems = VT.getVectorNumElements();
+ int EltNo = SV->getSplatIndex();
+
+ // unpack elements to the correct location
+ while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ V1 = getUnpackl(DAG, dl, VT, V1, V1);
+ } else {
+ V1 = getUnpackh(DAG, dl, VT, V1, V1);
+ EltNo -= NumElems/2;
+ }
+ NumElems >>= 1;
+ }
+
+ // Perform the splat.
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
+ V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, V1);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+ bool isZero, bool HasSSE2,
+ SelectionDAG &DAG) {
+ EVT VT = V2.getValueType();
+ SDValue V1 = isZero
+ ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(i == Idx ? NumElems : i);
+ return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
+ if (Depth == 6)
+ return SDValue(); // Limit search depth.
+
+ SDValue V = SDValue(N, 0);
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+
+ // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+ if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ Index = SV->getMaskElt(Index);
+
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ int NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ }
+
+ // Recurse into target specific vector shuffles to find scalars.
+ if (isTargetShuffle(Opcode)) {
+ int NumElems = VT.getVectorNumElements();
+ SmallVector<unsigned, 16> ShuffleMask;
+ SDValue ImmN;
+
+ switch(Opcode) {
+ case X86ISD::SHUFPS:
+ case X86ISD::SHUFPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPSMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ DecodePUNPCKHMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ DecodeUNPCKHPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ DecodePUNPCKLMask(VT, ShuffleMask);
+ break;
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ DecodeUNPCKLPMask(VT, ShuffleMask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PSHUFD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector.
+ unsigned OpNum = (Index == 0) ? 1 : 0;
+ return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+ Depth+1);
+ }
+ default:
+ assert("not implemented for target shuffle node");
+ return SDValue();
+ }
+
+ Index = ShuffleMask[Index];
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ Depth+1);
+ }
+
+ // Actual nodes that may contain scalar elements
+ if (Opcode == ISD::BITCAST) {
+ V = V.getOperand(0);
+ EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+ return SDValue();
+ }
+
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : DAG.getUNDEF(VT.getVectorElementType());
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+
+ return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two different directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+ bool ZerosFromLeft, SelectionDAG &DAG) {
+ int i = 0;
+
+ while (i < NumElems) {
+ unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+ SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ if (!(Elt.getNode() &&
+ (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+ break;
+ ++i;
+ }
+
+ return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+ int OpIdx, int NumElems, unsigned &OpNum) {
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+
+ for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ int Idx = SVOp->getMaskElt(i);
+ // Ignore undef indicies
+ if (Idx < 0)
+ continue;
+
+ if (Idx < NumElems)
+ SeenV1 = true;
+ else
+ SeenV2 = true;
+
+ // Only accept consecutive elements from the same vector
+ if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
+ return false;
+ }
+
+ OpNum = SeenV1 ? 0 : 1;
+ return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ false /* check zeros from right */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // V1 = {X, A, B, C} 0
+ // \ \ \ /
+ // vector_shuffle V1, V2 <1, 2, 3, X>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ 0, // Mask Start Index
+ NumElems-NumZeros-1, // Mask End Index
+ NumZeros, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = false;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ true /* check zeros from left */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // 0 { A, B, X, X } = V2
+ // / \ / /
+ // vector_shuffle V1, V2 <X, X, 4, 5>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ NumZeros, // Mask Start Index
+ NumElems-1, // Mask End Index
+ 0, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = true;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+ isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+ return true;
+
+ return false;
+}
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (NumNonZero > 8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+ if (ThisIsNonZero && First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+
+ if ((i & 1) != 0) {
+ SDValue ThisElt(0, 0), LastElt(0, 0);
+ bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ if (LastIsNonZero) {
+ LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i16, Op.getOperand(i-1));
+ }
+ if (ThisIsNonZero) {
+ ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+ ThisElt, DAG.getConstant(8, MVT::i8));
+ if (LastIsNonZero)
+ ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+ } else
+ ThisElt = LastElt;
+
+ if (ThisElt.getNode())
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i/2));
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (NumNonZero > 4)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 8; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v8i16, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI, DebugLoc dl) {
+ EVT ShVT = MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(Opc, dl, ShVT, SrcOp,
+ DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(SrcOp.getValueType()))));
+}
+
+SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) const {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (DAG.isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. FIXME: It's possible to compute
+ // the exact stack offset and reference FI + adjust offset instead.
+ // If someone *really* cares about this. That's the way to implement it.
+ return SDValue();
+ } else {
+ MFI->setObjectAlignment(FI, 16);
+ }
+ }
+
+ // (Offset % 16) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % 16) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~15;
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+ EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(StartOffset),
+ false, false, 0);
+ // Canonicalize it to a v4i32 shuffle.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+ DAG.getUNDEF(MVT::v4i32),&Mask[0]));
+ }
+
+ return SDValue();
+}
+
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+ DebugLoc &DL, SelectionDAG &DAG) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = Elts.size();
+
+ LoadSDNode *LDBase = NULL;
+ unsigned LastLoadedElt = -1U;
+
+ // For each element in the initializer, see if we've found a load or an undef.
+ // If we don't find an initial load element, or later load elements are
+ // non-consecutive, bail out.
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Elts[i];
+
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return SDValue();
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+ return SDValue();
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+ return SDValue();
+ LastLoadedElt = i;
+ }
+
+ // If we have found an entire vector of loads and undefs, then return a large
+ // load of the entire vector width starting at the base pointer. If we found
+ // consecutive loads for the low half, generate a vzext_load node.
+ if (LastLoadedElt == NumElems - 1) {
+ if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->getAlignment());
+ } else if (NumElems == 4 && LastLoadedElt == 1) {
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+ Ops, 2, MVT::i32,
+ LDBase->getMemOperand());
+ return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT VT = Op.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+
+ unsigned NumElems = Op.getNumOperands();
+
+ // For AVX-length vectors, build the individual 128-bit pieces and
+ // use shuffles to put them in place.
+ if (VT.getSizeInBits() > 256 &&
+ Subtarget->hasAVX() &&
+ !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ V[i] = Op.getOperand(i);
+ }
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build the lower subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ // Build the upper subvector.
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ return ConcatVectors(Lower, Upper, DAG);
+ }
+
+ // All zero's:
+ // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
+ // All one's:
+ // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
+ if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+ ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
+ // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+ // eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 ||
+ Op.getValueType() == MVT::v8i32)
+ return Op;
+
+ if (ISD::isBuildVectorAllOnes(Op.getNode()))
+ return getOnesVector(Op.getValueType(), DAG, dl);
+ return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+ }
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ bool IsAllConstants = true;
+ SmallSet<SDValue, 8> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ IsAllConstants = false;
+ if (X86::isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
+ }
+ }
+
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NumNonZero == 0)
+ return DAG.getUNDEF(VT);
+
+ // Special case for single non-zero, non-undef, element.
+ if (NumNonZero == 1) {
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle SSE only.
+ assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+ EVT VecVT = MVT::v4i32;
+ unsigned VecElts = 4;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SmallVector<int, 4> Mask;
+ Mask.push_back(Idx);
+ for (unsigned i = 1; i != VecElts; ++i)
+ Mask.push_back(i);
+ Item = DAG.getVectorShuffle(VecVT, dl, Item,
+ DAG.getUNDEF(Item.getValueType()),
+ &Mask[0]);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+ }
+ }
+
+ // If we have a constant or non-constant insertion into the low element of
+ // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+ // the rest of the elements. This will be matched as movd/movq/movss/movsd
+ // depending on what the source datatype is.
+ if (Idx == 0) {
+ if (NumZero == 0) {
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+ (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+ DAG);
+ } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+ Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ EVT MiddleVT = MVT::v4i32;
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+ }
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ X86::isZeroNode(Op.getOperand(0)) &&
+ !X86::isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = VT.getSizeInBits();
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this, dl);
+ }
+
+ if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+ return SDValue();
+
+ // Otherwise, if this is a vector with i32 or f32 elements, and the element
+ // is a non-constant being inserted into an element other than the low one,
+ // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
+ // movd/movss) to move this into the low element, then shuffle it into
+ // place.
+ if (EVTBits == 32) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i++)
+ MaskVec.push_back(i == Idx ? 0 : 1);
+ return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+ }
+ }
+
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
+ return SDValue();
+ }
+
+ // A vector full of immediates; various special cases are already
+ // handled, so this is best done with a single constant-pool load.
+ if (IsAllConstants)
+ return SDValue();
+
+ // Let legalizer expand 2-wide build_vectors.
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true,
+ Subtarget->hasSSE2(), DAG);
+ }
+ return SDValue();
+ }
+
+ // If element VT is < 32 bits, convert it to inserts into a zero vector.
+ if (EVTBits == 8 && NumElems == 16) {
+ SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ if (EVTBits == 16 && NumElems == 8) {
+ SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ // If element VT is == 32 bits, turn it into a number of shuffles.
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ if (NumElems == 4 && NumZero > 0) {
+ for (unsigned i = 0; i < 4; ++i) {
+ bool isZero = !(NonZeros & (1 << i));
+ if (isZero)
+ V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ else
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ }
+
+ for (unsigned i = 0; i < 2; ++i) {
+ switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+ default: break;
+ case 0:
+ V[i] = V[i*2]; // Must be a zero vector.
+ break;
+ case 1:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+ break;
+ case 2:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ case 3:
+ V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ }
+ }
+
+ SmallVector<int, 8> MaskVec;
+ bool Reverse = (NonZeros & 0x3) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i : i);
+ Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+ return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+ }
+
+ if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ // Check for a build vector of consecutive loads.
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = Op.getOperand(i);
+
+ // Check for elements which are consecutive loads.
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ if (LD.getNode())
+ return LD;
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
+ if (getSubtarget()->hasSSE41()) {
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
+ Op.getOperand(i), DAG.getIntPtrConstant(i));
+ }
+ return Result;
+ }
+
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i) {
+ // If V[i+EltStride] is undef and this is the first round of mixing,
+ // then it is safe to just drop this shuffle: V[i] is already in the
+ // right place, the one element (since it's the first round) being
+ // inserted as undef can be dropped. This isn't safe for successive
+ // rounds because they will permute elements within both vectors.
+ if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+ EltStride == NumElems/2)
+ continue;
+
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ }
+ EltStride >>= 1;
+ }
+ return V[0];
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ // We support concatenate two MMX registers and place them in a MMX
+ // register. This is better than doing a stack convert.
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ResVT = Op.getValueType();
+ assert(Op.getNumOperands() == 2);
+ assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
+ ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
+ int Mask[2];
+ SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
+ SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+ InVec = Op.getOperand(1);
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ unsigned NumElts = ResVT.getVectorNumElements();
+ VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
+ InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
+ } else {
+ InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
+ SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+ Mask[0] = 0; Mask[1] = 2;
+ VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all] pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 8> MaskVals;
+
+ // Determine if more than 1 of the words in each of the low and high quadwords
+ // of the result come from the same quadword of one of the two inputs. Undef
+ // mask values count as coming from any quadword, for better codegen.
+ SmallVector<unsigned, 4> LoQuad(4);
+ SmallVector<unsigned, 4> HiQuad(4);
+ BitVector InputQuads(4);
+ for (unsigned i = 0; i < 8; ++i) {
+ SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+ int EltIdx = SVOp->getMaskElt(i);
+ MaskVals.push_back(EltIdx);
+ if (EltIdx < 0) {
+ ++Quad[0];
+ ++Quad[1];
+ ++Quad[2];
+ ++Quad[3];
+ continue;
+ }
+ ++Quad[EltIdx / 4];
+ InputQuads.set(EltIdx / 4);
+ }
+
+ int BestLoQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LoQuad[i] > MaxQuad) {
+ BestLoQuad = i;
+ MaxQuad = LoQuad[i];
+ }
+ }
+
+ int BestHiQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HiQuad[i] > MaxQuad) {
+ BestHiQuad = i;
+ MaxQuad = HiQuad[i];
+ }
+ }
+
+ // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+ // of the two input vectors, shuffle them into one input vector so only a
+ // single pshufb instruction is necessary. If There are more than 2 input
+ // quads, disable the next transformation since it does not help SSSE3.
+ bool V1Used = InputQuads[0] || InputQuads[1];
+ bool V2Used = InputQuads[2] || InputQuads[3];
+ if (Subtarget->hasSSSE3()) {
+ if (InputQuads.count() == 2 && V1Used && V2Used) {
+ BestLoQuad = InputQuads.find_first();
+ BestHiQuad = InputQuads.find_next(BestLoQuad);
+ }
+ if (InputQuads.count() > 2) {
+ BestLoQuad = -1;
+ BestHiQuad = -1;
+ }
+ }
+
+ // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+ // the shuffle mask. If a quad is scored as -1, that means that it contains
+ // words from all 4 input quadwords.
+ SDValue NewV;
+ if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
+ MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+ NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
+
+ // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+ // source words for the shuffle, to aid later transformations.
+ bool AllWordsInNewV = true;
+ bool InOrder[2] = { true, true };
+ for (unsigned i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx != (int)i)
+ InOrder[i/4] = false;
+ if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+ continue;
+ AllWordsInNewV = false;
+ break;
+ }
+
+ bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+ if (AllWordsInNewV) {
+ for (int i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0)
+ continue;
+ idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+ if ((idx != i) && idx < 4)
+ pshufhw = false;
+ if ((idx != i) && idx > 3)
+ pshuflw = false;
+ }
+ V1 = NewV;
+ V2Used = false;
+ BestLoQuad = 0;
+ BestHiQuad = 1;
+ }
+
+ // If we've eliminated the use of V2, and the new mask is a pshuflw or
+ // pshufhw, that's as cheap as it gets. Return the new shuffle.
+ if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+ unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+ unsigned TargetMask = 0;
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+ X86::getShufflePSHUFLWImmediate(NewV.getNode());
+ V1 = NewV.getOperand(0);
+ return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
+ }
+ }
+
+ // If we have SSSE3, and all words of the result are from 1 input vector,
+ // case 2 is generated, otherwise case 3 is generated. If no SSSE3
+ // is present, fall back to case 4.
+ if (Subtarget->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If we have elements from both input vectors, set the high bit of the
+ // shuffle mask element to zero out elements that come from V2 in the V1
+ // mask, and elements that come from V1 in the V2 mask, so that the two
+ // results can be OR'd together.
+ bool TwoInputs = V1Used && V2Used;
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (TwoInputs && (EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+ }
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+ }
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ }
+
+ // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+ // and update MaskVals with new element order.
+ BitVector InOrder(8);
+ if (BestLoQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (int i = 0; i != 4; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestLoQuad) {
+ MaskV.push_back(idx & 3);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ MaskV.push_back(i);
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+ DAG);
+ }
+
+ // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+ // and update MaskVals with the new element order.
+ if (BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (unsigned i = 0; i != 4; ++i)
+ MaskV.push_back(i);
+ for (unsigned i = 4; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestHiQuad) {
+ MaskV.push_back((idx & 3) + 4);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+ DAG);
+ }
+
+ // In case BestHi & BestLo were both -1, which means each quadword has a word
+ // from each of the four input quadwords, calculate the InOrder bitvector now
+ // before falling through to the insert/extract cleanup.
+ if (BestLoQuad == -1 && BestHiQuad == -1) {
+ NewV = V1;
+ for (int i = 0; i != 8; ++i)
+ if (MaskVals[i] < 0 || MaskVals[i] == i)
+ InOrder.set(i);
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ SDValue ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+ DAG.getIntPtrConstant(EltIdx))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+ DAG.getIntPtrConstant(EltIdx - 8));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+ DAG.getIntPtrConstant(i));
+ }
+ return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ const X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 16> MaskVals;
+ SVOp->getMask(MaskVals);
+
+ // If we have SSSE3, case 1 is generated when all result bytes come from
+ // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
+ // present, fall back to case 3.
+ // FIXME: kill V2Only once shuffles are canonizalized by getNode.
+ bool V1Only = true;
+ bool V2Only = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ if (EltIdx < 16)
+ V2Only = false;
+ else
+ V1Only = false;
+ }
+
+ // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If all result elements are from one input vector, then only translate
+ // undef mask values to 0x80 (zero out result) in the pshufb mask.
+ //
+ // Otherwise, we have elements from both input vectors, and must zero out
+ // elements that come from V2 in the first mask, and V1 in the second mask
+ // so that we can OR them together.
+ bool TwoInputs = !(V1Only || V2Only);
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ // If all the elements are from V2, assign it to V1 and return after
+ // building the first pshufb.
+ if (V2Only)
+ V1 = V2;
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return V1;
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ }
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ }
+
+ // No SSSE3 - Calculate in place words and then fix all out of place words
+ // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
+ // the 16 different words that comprise the two doublequadword input vectors.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+ SDValue NewV = V2Only ? V2 : V1;
+ for (int i = 0; i != 8; ++i) {
+ int Elt0 = MaskVals[i*2];
+ int Elt1 = MaskVals[i*2+1];
+
+ // This word of the result is all undef, skip it.
+ if (Elt0 < 0 && Elt1 < 0)
+ continue;
+
+ // This word of the result is already in the correct place, skip it.
+ if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
+ continue;
+ if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+ continue;
+
+ SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+ SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+ SDValue InsElt;
+
+ // If Elt0 and Elt1 are defined, are consecutive, and can be load
+ // using a single extract together, load it and store it.
+ if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ continue;
+ }
+
+ // If Elt1 is defined, extract it from the appropriate source. If the
+ // source byte is not also odd, shift the extracted word left 8 bits
+ // otherwise clear the bottom 8 bits if we need to do an or.
+ if (Elt1 >= 0) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ if ((Elt1 & 1) == 0)
+ InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt.getValueType())));
+ else if (Elt0 >= 0)
+ InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+ DAG.getConstant(0xFF00, MVT::i16));
+ }
+ // If Elt0 is defined, extract it from the appropriate source. If the
+ // source byte is not also even, shift the extracted word right 8 bits. If
+ // Elt1 was also defined, OR the extracted values together before
+ // inserting them in the result.
+ if (Elt0 >= 0) {
+ SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+ if ((Elt0 & 1) != 0)
+ InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt0.getValueType())));
+ else if (Elt1 >= 0)
+ InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+ DAG.getConstant(0x00FF, MVT::i16));
+ InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+ : InsElt0;
+ }
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG, DebugLoc dl) {
+ EVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ EVT NewVT;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: assert(false && "Unexpected!");
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ }
+
+ int Scale = NumElems / NewWidth;
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ int StartIdx = -1;
+ for (int j = 0; j < Scale; ++j) {
+ int EltIdx = SVOp->getMaskElt(i+j);
+ if (EltIdx < 0)
+ continue;
+ if (StartIdx == -1)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDValue();
+ }
+ if (StartIdx == -1)
+ MaskVec.push_back(-1);
+ else
+ MaskVec.push_back(StartIdx / Scale);
+ }
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(EVT VT, EVT OpVT,
+ SDValue SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, DebugLoc dl) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ OpVT,
+ SrcOp.getOperand(0)
+ .getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ OpVT, SrcOp)));
+}
+
+/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
+/// shuffles.
+static SDValue
+LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = SVOp->getValueType(0);
+
+ SmallVector<std::pair<int, int>, 8> Locs;
+ Locs.resize(4);
+ SmallVector<int, 8> Mask1(4U, -1);
+ SmallVector<int, 8> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned NumHi = 0;
+ unsigned NumLo = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else {
+ assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+ if (Idx < 4) {
+ Locs[i] = std::make_pair(0, NumLo);
+ Mask1[NumLo] = Idx;
+ NumLo++;
+ } else {
+ Locs[i] = std::make_pair(1, NumHi);
+ if (2+NumHi < 4)
+ Mask1[2+NumHi] = Idx;
+ NumHi++;
+ }
+ }
+ }
+
+ if (NumLo <= 2 && NumHi <= 2) {
+ // If no more than two elements come from either vector. This can be
+ // implemented with two shuffles. First shuffle gather the elements.
+ // The second shuffle, which takes the first shuffle as both of its
+ // vector operands, put the elements into the right order.
+ V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ SmallVector<int, 8> Mask2(4U, -1);
+
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1)
+ continue;
+ else {
+ unsigned Idx = (i < 2) ? 0 : 4;
+ Idx += Locs[i].first * 2 + Locs[i].second;
+ Mask2[i] = Idx;
+ }
+ }
+
+ return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+ } else if (NumLo == 3 || NumHi == 3) {
+ // Otherwise, we must have three elements from one vector, call it X, and
+ // one element from the other, call it Y. First, use a shufps to build an
+ // intermediate vector with the one element from Y and the element from X
+ // that will be in the same half in the final destination (the indexes don't
+ // matter). Then, use a shufps to build the final vector, taking the half
+ // containing the element from Y from the intermediate, and the other half
+ // from X.
+ if (NumHi == 3) {
+ // Normalize it so the 3 elements come from V1.
+ CommuteVectorShuffleMask(PermMask, VT);
+ std::swap(V1, V2);
+ }
+
+ // Find the element from V2.
+ unsigned HiIndex;
+ for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+ int Val = PermMask[HiIndex];
+ if (Val < 0)
+ continue;
+ if (Val >= 4)
+ break;
+ }
+
+ Mask1[0] = PermMask[HiIndex];
+ Mask1[1] = -1;
+ Mask1[2] = PermMask[HiIndex^1];
+ Mask1[3] = -1;
+ V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ if (HiIndex >= 2) {
+ Mask1[0] = PermMask[0];
+ Mask1[1] = PermMask[1];
+ Mask1[2] = HiIndex & 1 ? 6 : 4;
+ Mask1[3] = HiIndex & 1 ? 4 : 6;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ } else {
+ Mask1[0] = HiIndex & 1 ? 2 : 0;
+ Mask1[1] = HiIndex & 1 ? 0 : 2;
+ Mask1[2] = PermMask[2];
+ Mask1[3] = PermMask[3];
+ if (Mask1[2] >= 0)
+ Mask1[2] += 4;
+ if (Mask1[3] >= 0)
+ Mask1[3] += 4;
+ return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+ }
+ }
+
+ // Break it into (shuffle shuffle_hi, shuffle_lo).
+ Locs.clear();
+ Locs.resize(4);
+ SmallVector<int,8> LoMask(4U, -1);
+ SmallVector<int,8> HiMask(4U, -1);
+
+ SmallVector<int,8> *MaskPtr = &LoMask;
+ unsigned MaskIdx = 0;
+ unsigned LoIdx = 0;
+ unsigned HiIdx = 2;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (i == 2) {
+ MaskPtr = &HiMask;
+ MaskIdx = 1;
+ LoIdx = 0;
+ HiIdx = 2;
+ }
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else if (Idx < 4) {
+ Locs[i] = std::make_pair(MaskIdx, LoIdx);
+ (*MaskPtr)[LoIdx] = Idx;
+ LoIdx++;
+ } else {
+ Locs[i] = std::make_pair(MaskIdx, HiIdx);
+ (*MaskPtr)[HiIdx] = Idx;
+ HiIdx++;
+ }
+ }
+
+ SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+ SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+ SmallVector<int, 8> MaskOps;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1) {
+ MaskOps.push_back(-1);
+ } else {
+ unsigned Idx = Locs[i].first * 4 + Locs[i].second;
+ MaskOps.push_back(Idx);
+ }
+ }
+ return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+static bool MayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (MayFoldLoad(V))
+ return true;
+ return false;
+}
+
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (ISD::isNormalLoad(V.getNode()))
+ return true;
+ return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+ ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+ // Be sure that the vector shuffle is present in a pattern like this:
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+ if (!V.hasOneUse())
+ return false;
+
+ SDNode *N = *V.getNode()->use_begin();
+ if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ SDValue EltNo = N->getOperand(1);
+ if (!isa<ConstantSDNode>(EltNo))
+ return false;
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ bool HasShuffleIntoBitcast = false;
+ if (V.getOpcode() == ISD::BITCAST) {
+ EVT SrcVT = V.getOperand(0).getValueType();
+ if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+ return false;
+ V = V.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+ V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+ // Skip one more bit_convert if necessary
+ if (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (ISD::isNormalLoad(V.getNode())) {
+ // Is the original load suitable?
+ LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+ // FIXME: avoid the multi-use bug that is preventing lots of
+ // of foldings to be detected, this is still wrong of course, but
+ // give the temporary desired behavior, and if it happens that
+ // the load has real more uses, during isel it will not fold, and
+ // will generate poor code.
+ if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+ return false;
+
+ if (!HasShuffleIntoBitcast)
+ return true;
+
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return false;
+ }
+
+ return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+ bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+ if (HasSSE2 && VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+ // v4f32 or v4i32
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ "unsupported shuffle type");
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ // v4i32 or v4f32
+ return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+
+ // Trivial case, when V2 comes from a load.
+ if (MayFoldVectorLoad(V2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ CanFoldLoad = true;
+
+ // Both of them can't be memory operations though.
+ if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+ CanFoldLoad = false;
+
+ if (CanFoldLoad) {
+ if (HasSSE2 && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ else if (HasSSE2)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v4f32:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+ case MVT::v2f64:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4f64: return X86ISD::VUNPCKLPDY;
+ case MVT::v16i8: return X86ISD::PUNPCKLBW;
+ case MVT::v8i16: return X86ISD::PUNPCKLWD;
+ default:
+ llvm_unreachable("Unknown type for unpckl");
+ }
+ return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKHPS;
+ case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v16i8: return X86ISD::PUNPCKHBW;
+ case MVT::v8i16: return X86ISD::PUNPCKHWD;
+ default:
+ llvm_unreachable("Unknown type for unpckh");
+ }
+ return 0;
+}
+
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const X86Subtarget *Subtarget) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+
+ if (isZeroShuffle(SVOp))
+ return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+
+ // Handle splat operations
+ if (SVOp->isSplat()) {
+ // Special case, this is the only place now where it's
+ // allowed to return a vector_shuffle operation without
+ // using a target specific node, because *hopefully* it
+ // will be optimized away by the dag combiner.
+ if (VT.getVectorNumElements() <= 4 &&
+ CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
+ return Op;
+
+ // Handle splats by matching through known masks
+ if (VT.getVectorNumElements() <= 4)
+ return SDValue();
+
+ // Canonicalize all of the remaining to v4f32.
+ return PromoteSplat(SVOp, DAG);
+ }
+
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ if (NewOp.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ if (NewOp.getNode()) {
+ if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+ DAG, Subtarget, dl);
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+ DAG, Subtarget, dl);
+ }
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ // Shuffle operations on MMX not supported.
+ if (isMMX)
+ return Op;
+
+ // Vector shuffle lowering takes 3 steps:
+ //
+ // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled.
+ // 2) Matching of shuffles with known shuffle masks to x86 target specific
+ // shuffle nodes.
+ // 3) Rewriting of unmatched masks into new generic shuffle operations,
+ // so the shuffle can be broken into other shuffles and the legalizer can
+ // try the lowering again.
+ //
+ // The general ideia is that no vector_shuffle operation should be left to
+ // be matched during isel, all of them must be converted to a target specific
+ // node here.
+
+ // Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled. The actual code
+ // doesn't include all of those, work in progress...
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ if (NewOp.getNode())
+ return NewOp;
+
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
+ if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+ RelaxedMayFoldVectorLoad(V1))
+ return getMOVDDup(Op, dl, V1, DAG);
+
+ if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ // Use to match splats
+ if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isPSHUFDMask(SVOp)) {
+ // The actual implementation will match the mask in the if above and then
+ // during isel it can match several different instructions, not only pshufd
+ // as its name says, sad but true, emulate the behavior for now...
+ if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+ if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+ TargetMask, DAG);
+
+ if (VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+ TargetMask, DAG);
+ }
+
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDValue ShVal;
+ bool isShift = getSubtarget()->hasSSE2() &&
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ EVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ if (X86::isMOVLMask(SVOp)) {
+ if (V1IsUndef)
+ return V2;
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+ if (!X86::isMOVLPMask(SVOp)) {
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
+ }
+
+ // FIXME: fold these into legal mask.
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
+
+ if (ShouldXformToMOVHLPS(SVOp) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ EVT EltVT = VT.getVectorElementType();
+ ShAmt *= EltVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
+ V1IsSplat = isSplatVector(V1.getNode());
+ V2IsSplat = isSplatVector(V2.getNode());
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
+ if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ Op = CommuteVectorShuffle(SVOp, DAG);
+ SVOp = cast<ShuffleVectorSDNode>(Op);
+ V1 = SVOp->getOperand(0);
+ V2 = SVOp->getOperand(1);
+ std::swap(V1IsSplat, V2IsSplat);
+ std::swap(V1IsUndef, V2IsUndef);
+ Commuted = true;
+ }
+
+ if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ // Shuffling low element of v1 into undef, just return v1.
+ if (V2IsUndef)
+ return V1;
+ // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+ // the instruction selector will not match, so get a canonical MOVL with
+ // swapped operands to undo the commute.
+ return getMOVL(DAG, dl, VT, V2, V1);
+ }
+
+ if (X86::isUNPCKLMask(SVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V2, DAG);
+
+ if (X86::isUNPCKHMask(SVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+
+ if (V2IsSplat) {
+ // Normalize mask so all entries that point to V2 points to its first
+ // element then try to match unpck{h|l} again. If match, return a
+ // new vector_shuffle with the corrected mask.
+ SDValue NewMask = NormalizeMask(SVOp, DAG);
+ ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
+ if (NSVOp != SVOp) {
+ if (X86::isUNPCKLMask(NSVOp, true)) {
+ return NewMask;
+ } else if (X86::isUNPCKHMask(NSVOp, true)) {
+ return NewMask;
+ }
+ }
+ }
+
+ if (Commuted) {
+ // Commute is back and try unpck* again.
+ // FIXME: this seems wrong.
+ SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
+ ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+
+ if (X86::isUNPCKLMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V2, V1, DAG);
+
+ if (X86::isUNPCKHMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ }
+
+ // Normalize the node to match x86 shuffle ops if needed
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // The checks below are all present in isShuffleMaskLegal, but they are
+ // inlined here right now to enable us to directly emit target specific
+ // nodes, and remove one by one until they don't return Op anymore.
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+
+ if (isPALIGNRMask(M, VT, HasSSSE3))
+ return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ X86::getShufflePALIGNRImmediate(SVOp),
+ DAG);
+
+ if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+ SVOp->getSplatIndex() == 0 && V2IsUndef) {
+ if (VT == MVT::v2f64) {
+ X86ISD::NodeType Opcode =
+ getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+ }
+ if (VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ }
+
+ if (isPSHUFHWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ X86::getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ X86::getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT)) {
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (VT == MVT::v4f32 || VT == MVT::v4i32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+ TargetMask, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+ TargetMask, DAG);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ if (VT == MVT::v16i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4)
+ return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ if (VT.getSizeInBits() == 8) {
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 16) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1)));
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store or a bitcast to i32. And in
+ // the case of a store, it's not worth it if the index is a constant 0,
+ // because a MOVSSmr can be used instead, which is smaller and faster.
+ if (!Op.hasOneUse())
+ return SDValue();
+ SDNode *User = *Op.getNode()->use_begin();
+ if ((User->getOpcode() != ISD::STORE ||
+ (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ (User->getOpcode() != ISD::BITCAST ||
+ User->getValueType(0) != MVT::i32))
+ return SDValue();
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+ } else if (VT == MVT::i32) {
+ // ExtractPS works with constant index.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return Op;
+ }
+ return SDValue();
+}
+
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+
+ // If this is a 256-bit vector result, first extract the 128-bit
+ // vector and then extract from the 128-bit vector.
+ if (VecVT.getSizeInBits() > 128) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ unsigned NumElems = VecVT.getVectorNumElements();
+ SDValue Idx = Op.getOperand(1);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Get the 128-bit vector.
+ bool Upper = IdxVal >= ExtractNumElems;
+ Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+ // Extract from it.
+ SDValue ScaledIdx = Idx;
+ if (Upper)
+ ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(ExtractNumElems,
+ Idx.getValueType()));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ ScaledIdx);
+ }
+
+ assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
+ if (Subtarget->hasSSE41()) {
+ SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.getNode())
+ return Res;
+ }
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ // TODO: handle v16i8.
+ if (VT.getSizeInBits() == 16) {
+ SDValue Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, Vec),
+ Op.getOperand(1)));
+ // Transform it so it match pextrw which produces a 32-bit result.
+ EVT EltVT = MVT::i32;
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 32) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // SHUFPS the element to the lowest double word, then movss.
+ int Mask[4] = { Idx, -1, -1, -1 };
+ EVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ } else if (VT.getSizeInBits() == 64) {
+ // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+ // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+ // to match extract_elt for f64.
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+ // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+ int Mask[2] = { 1, -1 };
+ EVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ }
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
+ isa<ConstantSDNode>(N2)) {
+ unsigned Opc;
+ if (VT == MVT::v8i16)
+ Opc = X86ISD::PINSRW;
+ else if (VT == MVT::v16i8)
+ Opc = X86ISD::PINSRB;
+ else
+ Opc = X86ISD::PINSRB;
+
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into these
+ // bits. For example (insert (extract, 3), 2) could be matched by putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+ // Create this as a scalar to vector..
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+ // PINSR* works with constant index.
+ return Op;
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (VT.getSizeInBits() > 128) {
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ // Get the 128-bit vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+ bool Upper = IdxVal >= NumElems / 2;
+
+ SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+ // Insert into it.
+ SDValue ScaledN2 = N2;
+ if (Upper)
+ ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+ DAG.getConstant(NumElems /
+ (VT.getSizeInBits() / 128),
+ N2.getValueType()));
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+ N1, ScaledN2);
+
+ // Insert the 128-bit vector
+ // FIXME: Why UNDEF?
+ return Insert128BitVector(N0, Op, N2, DAG, dl);
+ }
+
+ if (Subtarget->hasSSE41())
+ return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+ if (EltVT == MVT::i8)
+ return SDValue();
+
+ if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT OpVT = Op.getValueType();
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (OpVT.getSizeInBits() > 128) {
+ // Insert into a 128-bit vector.
+ EVT VT128 = EVT::getVectorVT(*Context,
+ OpVT.getVectorElementType(),
+ OpVT.getVectorNumElements() / 2);
+
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+ // Insert the 128-bit vector.
+ return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+ DAG.getConstant(0, MVT::i32),
+ DAG, dl);
+ }
+
+ if (Op.getValueType() == MVT::v1i64 &&
+ Op.getOperand(0).getValueType() == MVT::i64)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
+ SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+ assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+ "Expected an SSE type!");
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+ && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+ return Extract128BitVector(Vec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue SubVec = Op.getNode()->getOperand(1);
+ SDValue Idx = Op.getNode()->getOperand(2);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+ && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+ return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment(),
+ CP->getOffset(), OpFlag);
+ DebugLoc DL = CP->getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (OpFlag) {
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+ OpFlag);
+ DebugLoc DL = JT->getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (OpFlag)
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ WrapperKind = X86ISD::WrapperRIP;
+ else if (Subtarget->isPICStyleGOT())
+ OpFlag = X86II::MO_GOTOFF;
+ else if (Subtarget->isPICStyleStubPIC())
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+
+ DebugLoc DL = Op.getDebugLoc();
+ Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit()) {
+ Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ // Create the TargetBlockAddressAddress node.
+ unsigned char OpFlags =
+ Subtarget->ClassifyBlockAddressReference();
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+ /*isTarget=*/true, OpFlags);
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset,
+ SelectionDAG &DAG) const {
+ // Create the TargetGlobalAddress node, folding in the constant
+ // offset if it is legal.
+ unsigned char OpFlags =
+ Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ SDValue Result;
+ if (OpFlags == X86II::MO_NO_FLAG &&
+ X86::isOffsetSuitableForCodeModel(Offset, M)) {
+ // A direct static reference to a global.
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Offset = 0;
+ } else {
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
+
+ if (Subtarget->isPICStyleRIPRel() &&
+ (M == CodeModel::Small || M == CodeModel::Kernel))
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ // For globals that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlags))
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+ SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
+ unsigned char OperandFlags) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(),
+ OperandFlags);
+ if (InFlag) {
+ SDValue Ops[] = { Chain, TGA, *InFlag };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+ } else {
+ SDValue Ops[] = { Chain, TGA };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+ }
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setAdjustsStack(true);
+
+ SDValue Flag = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ SDValue InFlag;
+ DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+ X86::RAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const EVT PtrVT, TLSModel::Model model,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+ is64Bit ? 257 : 256));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, 0);
+
+ unsigned char OperandFlags = 0;
+ // Most TLS accesses are not RIP relative, even on x86-64. One exception is
+ // initialexec.
+ unsigned WrapperKind = X86ISD::Wrapper;
+ if (model == TLSModel::LocalExec) {
+ OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+ } else if (is64Bit) {
+ assert(model == TLSModel::InitialExec);
+ OperandFlags = X86II::MO_GOTTPOFF;
+ WrapperKind = X86ISD::WrapperRIP;
+ } else {
+ assert(model == TLSModel::InitialExec);
+ OperandFlags = X86II::MO_INDNTPOFF;
+ }
+
+ // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+ // exec)
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
+ SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+ if (model == TLSModel::InitialExec)
+ Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GA->getGlobal();
+
+ if (Subtarget->isTargetELF()) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+
+ TLSModel::Model model
+ = getTLSModel(GV, getTargetMachine().getRelocationModel());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ if (Subtarget->is64Bit())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+ Subtarget->is64Bit());
+ }
+ } else if (Subtarget->isTargetDarwin()) {
+ // Darwin only has one model of TLS. Lower to that.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+ X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
+ if (PIC32)
+ OpFlag = X86II::MO_TLVP_PIC_BASE;
+ else
+ OpFlag = X86II::MO_TLVP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+ GA->getValueType(0),
+ GA->getOffset(), OpFlag);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC32, the address is actually $g + Offset.
+ if (PIC32)
+ Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Offset);
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Args[] = { Chain, Offset };
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
+ // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // And our return value (tls address) is in the standard call return value
+ // location.
+ unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ }
+
+ assert(false &&
+ "TLS not implemented for this target.");
+
+ llvm_unreachable("Unreachable");
+ return SDValue();
+}
+
+
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i8))
+ : DAG.getConstant(0, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ }
+
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits, MVT::i8));
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDValue Hi, Lo;
+ SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+ SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ } else {
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ }
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+
+ if (SrcVT.isVector())
+ return SDValue();
+
+ assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ // These are really Legal; return the operand so the caller accepts it as
+ // Legal.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return Op;
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ Subtarget->is64Bit()) {
+ return Op;
+ }
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Size = SrcVT.getSizeInBits()/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+ SDValue StackSlot,
+ SelectionDAG &DAG) const {
+ // Build the FILD
+ DebugLoc DL = Op.getDebugLoc();
+ SDVTList Tys;
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ if (useSSE)
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+ else
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+ unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
+ MachineMemOperand *MMO;
+ if (FI) {
+ int SSFI = FI->getIndex();
+ MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ } else {
+ MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+ StackSlot = StackSlot.getOperand(1);
+ }
+ SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
+ SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+ X86ISD::FILD, DL,
+ Tys, Ops, array_lengthof(Ops),
+ SrcVT, MMO);
+
+ if (useSSE) {
+ Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {
+ Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
+ };
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+ Ops, array_lengthof(Ops),
+ Op.getValueType(), MMO);
+ Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ }
+
+ return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+ SelectionDAG &DAG) const {
+ // This algorithm is not obvious. Here it is in C code, more or less:
+ /*
+ double uint64_to_double( uint32_t hi, uint32_t lo ) {
+ static const __m128i exp = { 0x4330000045300000ULL, 0 };
+ static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+ // Copy ints to xmm registers.
+ __m128i xh = _mm_cvtsi32_si128( hi );
+ __m128i xl = _mm_cvtsi32_si128( lo );
+
+ // Combine into low half of a single xmm register.
+ __m128i x = _mm_unpacklo_epi32( xh, xl );
+ __m128d d;
+ double sd;
+
+ // Merge in appropriate exponents to give the integer bits the right
+ // magnitude.
+ x = _mm_unpacklo_epi32( x, exp );
+
+ // Subtract away the biases to deal with the IEEE-754 double precision
+ // implicit 1.
+ d = _mm_sub_pd( (__m128d) x, bias );
+
+ // All conversions up to here are exact. The correctly rounded result is
+ // calculated using the current rounding mode using the following
+ // horizontal add.
+ d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+ _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this
+ // store doesn't really need to be here (except
+ // maybe to zero the other double)
+ return sd;
+ }
+ */
+
+ DebugLoc dl = Op.getDebugLoc();
+ LLVMContext *Context = DAG.getContext();
+
+ // Build some magic constants.
+ std::vector<Constant*> CV0;
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+ Constant *C0 = ConstantVector::get(CV0);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+ std::vector<Constant*> CV1;
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ CV1.push_back(
+ ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ Constant *C1 = ConstantVector::get(CV1);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(1)));
+ SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+ SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+ SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
+ SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+
+ // Add the halves; easiest way is to swap them into another reg first.
+ int ShufMask[2] = { 1, -1 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
+ DAG.getUNDEF(MVT::v2f64), ShufMask);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+ DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // FP constant to bias correct the final result.
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+
+ // Load the 32-bit value into an XMM register.
+ SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+
+ Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+ DAG.getIntPtrConstant(0));
+
+ // Or the load with the bias.
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Load)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Bias)));
+ Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
+ DAG.getIntPtrConstant(0));
+
+ // Subtract the bias.
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+ // Handle final rounding.
+ EVT DestVT = Op.getValueType();
+
+ if (DestVT.bitsLT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+
+ // Handle final rounding.
+ return Sub;
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+ // the optimization here.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+ EVT SrcVT = N0.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
+ return LowerUINT_TO_FP_i64(Op, DAG);
+ else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+ return LowerUINT_TO_FP_i32(Op, DAG);
+
+ // Make a 64-bit buffer, and use it to build an FILD.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+ if (SrcVT == MVT::i32) {
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, MachinePointerInfo(),
+ false, false, 0);
+ SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ return Fild;
+ }
+
+ assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
+ // For i64 source, we need to add the appropriate power of 2 if the input
+ // was negative. This is the same as the optimization in
+ // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+ // we must be careful to do the computation in x87 extended precision, not
+ // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
+
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+ MVT::i64, MMO);
+
+ APInt FF(32, 0x5F800000ULL);
+
+ // Check whether the sign bit is set.
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+ // Load the value out, extending it from f32 to f80.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, 4);
+ // Extend everything to 80 bits to force it to be done on x87.
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
+}
+
+std::pair<SDValue,SDValue> X86TargetLowering::
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
+ DebugLoc DL = Op.getDebugLoc();
+
+ EVT DstTy = Op.getValueType();
+
+ if (!IsSigned) {
+ assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+ DstTy = MVT::i64;
+ }
+
+ assert(DstTy.getSimpleVT() <= MVT::i64 &&
+ DstTy.getSimpleVT() >= MVT::i16 &&
+ "Unknown FP_TO_SINT to lower!");
+
+ // These are really Legal.
+ if (DstTy == MVT::i32 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+ if (Subtarget->is64Bit() &&
+ DstTy == MVT::i64 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = DstTy.getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+
+ unsigned Opc;
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Value = Op.getOperand(0);
+ EVT TheVT = Op.getOperand(0).getValueType();
+ if (isScalarFPTypeInSSEReg(TheVT)) {
+ assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ Chain, StackSlot, DAG.getValueType(TheVT)
+ };
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+ DstTy, MMO);
+ Chain = Value.getValue(1);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+ StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
+ // Build the FP_TO_INT*_IN_MEM
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
+
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return SDValue();
+
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (FIST.getNode() == 0) return Op;
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ assert(FIST.getNode() && "Unexpected failure");
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op,
+ SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ if (VT.isVector()) {
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::XOR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+ } else {
+ return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+ }
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Op1.getValueType();
+
+ // If second operand is smaller, extend it first.
+ if (SrcVT.bitsLT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+ SrcVT = VT;
+ }
+ // And if it is bigger, shrink it first.
+ if (SrcVT.bitsGT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
+
+ // First get the sign bit of second operand.
+ std::vector<Constant*> CV;
+ if (SrcVT == MVT::f64) {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+ // Shift sign bit right or left if the two operands have different types.
+ if (SrcVT.bitsGT(VT)) {
+ // Op0 is MVT::f32, Op1 is MVT::f64.
+ SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+ SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+ DAG.getConstant(32, MVT::i32));
+ SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Clear first operand sign bit.
+ CV.clear();
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ }
+ C = ConstantVector::get(CV);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+ SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
+ SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CF and OF aren't always set the way we want. Determine which
+ // of these we need.
+ bool NeedCF = false;
+ bool NeedOF = false;
+ switch (X86CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ NeedOF = true;
+ break;
+ }
+
+ // See if we can use the EFLAGS value from the operand instead of
+ // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+ // we prove that the arithmetic won't overflow, we can't use OF or CF.
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
+ }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
+ }
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ NonFlagUse = true;
+ break;
+ }
+ }
+
+ if (!NonFlagUse)
+ break;
+ }
+ // FALL THROUGH
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
+ }
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
+ }
+
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) const {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+ if (C->getAPIntValue() == 0)
+ return EmitTest(Op0, X86CC, DAG);
+
+ DebugLoc dl = Op0.getDebugLoc();
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ SDValue Op0 = And.getOperand(0);
+ SDValue Op1 = And.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
+ SDValue LHS, RHS;
+ if (Op1.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+ if (And00C->getZExtValue() == 1) {
+ // If we looked past a truncate, check that it's only truncating away
+ // known zeros.
+ unsigned BitWidth = Op0.getValueSizeInBits();
+ unsigned AndBitWidth = And.getValueSizeInBits();
+ if (BitWidth > AndBitWidth) {
+ APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+ return SDValue();
+ }
+ LHS = Op1;
+ RHS = Op0.getOperand(1);
+ }
+ } else if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ SDValue AndLHS = Op0;
+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (LHS.getValueType() == MVT::i8 ||
+ LHS.getValueType() == MVT::i16)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
+ }
+
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+ }
+
+ bool isFP = Op1.getValueType().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+ if (X86CC == X86::COND_INVALID)
+ return SDValue();
+
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (isFP) {
+ unsigned SSECC = 8;
+ EVT VT0 = Op0.getValueType();
+ assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
+ unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+ bool Swap = false;
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOEQ:
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETOGT:
+ case ISD::SETGT: Swap = true; // Fallthrough
+ case ISD::SETLT:
+ case ISD::SETOLT: SSECC = 1; break;
+ case ISD::SETOGE:
+ case ISD::SETGE: Swap = true; // Fallthrough
+ case ISD::SETLE:
+ case ISD::SETOLE: SSECC = 2; break;
+ case ISD::SETUO: SSECC = 3; break;
+ case ISD::SETUNE:
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: SSECC = 5; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: SSECC = 6; break;
+ case ISD::SETO: SSECC = 7; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // In the two special cases we can't handle, emit two comparisons.
+ if (SSECC == 8) {
+ if (SetCCOpcode == ISD::SETUEQ) {
+ SDValue UNORD, EQ;
+ UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
+ }
+ else if (SetCCOpcode == ISD::SETONE) {
+ SDValue ORD, NEQ;
+ ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ }
+ llvm_unreachable("Illegal FP comparison");
+ }
+ // Handle all other FP comparisons here.
+ return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+ }
+
+ // We are handling one of the integer comparisons here. Since SSE only has
+ // GT and EQ comparisons for integer, swapping operands and multiple
+ // operations may be required for some comparisons.
+ unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+ bool Swap = false, Invert = false, FlipSigns = false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+ case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+ case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+ case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = EQOpc; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = GTOpc; break;
+ case ISD::SETGE: Swap = true;
+ case ISD::SETLE: Opc = GTOpc; Invert = true; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+ case ISD::SETUGE: Swap = true;
+ case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations.
+ if (FlipSigns) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+ EltVT);
+ std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+ SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+ SignBits.size());
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ }
+
+ SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+ // If the logical-not of the result is required, perform that now.
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+ unsigned Opc = Op.getNode()->getOpcode();
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+ return true;
+ if (Op.getResNo() == 1 &&
+ (Opc == X86ISD::ADD ||
+ Opc == X86ISD::SUB ||
+ Opc == X86ISD::ADC ||
+ Opc == X86ISD::SBB ||
+ Opc == X86ISD::SMUL ||
+ Opc == X86ISD::UMUL ||
+ Opc == X86ISD::INC ||
+ Opc == X86ISD::DEC ||
+ Opc == X86ISD::OR ||
+ Opc == X86ISD::XOR ||
+ Opc == X86ISD::AND))
+ return true;
+
+ if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ return true;
+
+ return false;
+}
+
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ bool addTest = true;
+ SDValue Cond = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
+
+ // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+ // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+ // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+ if (Cond.getOpcode() == X86ISD::SETCC &&
+ Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+ isZero(Cond.getOperand(1).getOperand(1))) {
+ SDValue Cmp = Cond.getOperand(1);
+
+ unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+ if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+ SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue Res = // Res = 0 or -1.
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+ if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ Res = DAG.getNOT(DL, Res, Res.getValueType());
+
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
+ if (N2C == 0 || !N2C->isNullValue())
+ Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ return Res;
+ }
+ }
+
+ // Look past (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ EVT VT = Op.getValueType();
+
+ bool IllegalFPCMov = false;
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+ if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+ Opc == X86ISD::BT) { // FIXME
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+
+ // a < b ? -1 : 0 -> RES = ~setcc_carry
+ // a < b ? 0 : -1 -> RES = setcc_carry
+ // a >= b ? -1 : 0 -> RES = setcc_carry
+ // a >= b ? 0 : -1 -> RES = ~setcc_carry
+ if (Cond.getOpcode() == X86ISD::CMP) {
+ unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+ (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+ if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ return DAG.getNOT(DL, Res, Res.getValueType());
+ return Res;
+ }
+ }
+
+ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+ // condition is true.
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ SDValue Ops[] = { Op2, Op1, CC, Cond };
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+ Opc = Op.getOpcode();
+ if (Opc != ISD::OR && Opc != ISD::AND)
+ return false;
+ return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse() &&
+ Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+ if (Op.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (N1C && N1C->getAPIntValue() == 1) {
+ return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse();
+ }
+ return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ bool addTest = true;
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDValue NewCond = LowerSETCC(Cond, DAG);
+ if (NewCond.getNode())
+ Cond = NewCond;
+ }
+#if 0
+ // FIXME: LowerXALUO doesn't handle these!!
+ else if (Cond.getOpcode() == X86ISD::ADD ||
+ Cond.getOpcode() == X86ISD::SUB ||
+ Cond.getOpcode() == X86ISD::SMUL ||
+ Cond.getOpcode() == X86ISD::UMUL)
+ Cond = LowerXALUO(Cond, DAG);
+#endif
+
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+ if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+ Cond = Cmp;
+ addTest = false;
+ } else {
+ switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+ default: break;
+ case X86::COND_O:
+ case X86::COND_B:
+ // These can only come from an arithmetic instruction with overflow,
+ // e.g. SADDO, UADDO.
+ Cond = Cond.getNode()->getOperand(1);
+ addTest = false;
+ break;
+ }
+ }
+ } else {
+ unsigned CondOpc;
+ if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+ SDValue Cmp = Cond.getOperand(0).getOperand(1);
+ if (CondOpc == ISD::OR) {
+ // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp)) {
+ CC = Cond.getOperand(0).getOperand(0);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = Cond.getOperand(1).getOperand(0);
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else { // ISD::AND
+ // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp) &&
+ Op.getNode()->hasOneUse()) {
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ SDNode *User = *Op.getNode()->use_begin();
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ assert(NewBR == User);
+ (void)NewBR;
+ Dest = FalseBB;
+
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ }
+ } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+ // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+ // It should be transformed during dag combiner except when the condition
+ // is set by a arithmetics with overflow node.
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cond.getOperand(0).getOperand(1);
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+ return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cond);
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+ "This should be used only on Windows targets");
+ assert(!Subtarget->isTargetEnvMacho());
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ // FIXME: Ensure alignment here
+
+ SDValue Flag;
+
+ EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+ Flag = Chain.getValue(1);
+
+ Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ SmallVector<SDValue, 8> MemOps;
+ SDValue FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDValue Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+ MVT::i32),
+ FIN, MachinePointerInfo(SV), false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+ MVT::i32),
+ FIN, MachinePointerInfo(SV, 4), false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+ MachinePointerInfo(SV, 8),
+ false, false, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(8));
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+ MachinePointerInfo(SV, 16), false, false, 0);
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ Subtarget->hasXMM());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+ DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+ false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ // Comparison intrinsics.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ unsigned Opc = 0;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse2_comilt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse2_comile_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse2_comigt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse2_comige_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse2_comineq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETNE;
+ break;
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETNE;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+ SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+ // ptest and testp intrinsics. The intrinsic these come from are designed to
+ // return an integer value, not just an instruction so lower it to the ptest
+ // or testp pattern and a setcc for the result.
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestz_256:
+ case Intrinsic::x86_avx_ptestc_256:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256: {
+ bool IsTestPacked = false;
+ unsigned X86CC = 0;
+ switch (IntNo) {
+ default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_avx_ptestz_256:
+ // ZF = 1
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_avx_ptestc_256:
+ // CF = 1
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256:
+ IsTestPacked = true; // Fallthrough
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ // ZF and CF = 0
+ X86CC = X86::COND_A;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+ SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = Op.getOperand(2);
+ if (isa<ConstantSDNode>(ShAmt))
+ return SDValue();
+
+ unsigned NewIntNo = 0;
+ EVT ShAmtVT = MVT::v4i32;
+ switch (IntNo) {
+ case Intrinsic::x86_sse2_pslli_w:
+ NewIntNo = Intrinsic::x86_sse2_psll_w;
+ break;
+ case Intrinsic::x86_sse2_pslli_d:
+ NewIntNo = Intrinsic::x86_sse2_psll_d;
+ break;
+ case Intrinsic::x86_sse2_pslli_q:
+ NewIntNo = Intrinsic::x86_sse2_psll_q;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ break;
+ case Intrinsic::x86_sse2_psrli_d:
+ NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ break;
+ case Intrinsic::x86_sse2_psrli_q:
+ NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ NewIntNo = Intrinsic::x86_sse2_psra_w;
+ break;
+ case Intrinsic::x86_sse2_psrai_d:
+ NewIntNo = Intrinsic::x86_sse2_psra_d;
+ break;
+ default: {
+ ShAmtVT = MVT::v2i32;
+ switch (IntNo) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+ break;
+ }
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ if (ShAmtVT == MVT::v4i32) {
+ ShOps[2] = DAG.getUNDEF(MVT::i32);
+ ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+ } else {
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
+ }
+
+ EVT VT = Op.getValueType();
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(NewIntNo, MVT::i32),
+ Op.getOperand(1), ShAmt);
+ }
+ }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(),
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+ SelectionDAG &DAG) const {
+ return DAG.getIntPtrConstant(2*TD->getPointerSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
+ unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(TD->getPointerSize()));
+ StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+ MF.getRegInfo().addLiveOut(StoreAddrReg);
+
+ return DAG.getNode(X86ISD::EH_RETURN, dl,
+ MVT::Other,
+ Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Root = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ if (Subtarget->is64Bit()) {
+ SDValue OutChains[6];
+
+ // Large code-model.
+ const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
+ const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
+
+ const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+ const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDValue Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2),
+ false, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr, 10),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12),
+ false, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, MachinePointerInfo(TrmpAddr, 20),
+ false, false, 0);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 22),
+ false, false, 0);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
+ return DAG.getMergeValues(Ops, 2, dl);
+ } else {
+ const Function *Func =
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ CallingConv::ID CC = Func->getCallingConv();
+ unsigned NestReg;
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::X86_StdCall: {
+ // Pass 'nest' parameter in ECX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::ECX;
+
+ // Check that ECX wasn't needed by an 'inreg' parameter.
+ const FunctionType *FTy = Func->getFunctionType();
+ const AttrListPtr &Attrs = Func->getAttributes();
+
+ if (!Attrs.isEmpty() && !Func->isVarArg()) {
+ unsigned InRegCount = 0;
+ unsigned Idx = 1;
+
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I, ++Idx)
+ if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+ // FIXME: should only count parameters that are lowered to integers.
+ InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+ if (InRegCount > 2) {
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
+ }
+ }
+ break;
+ }
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::Fast:
+ // Pass 'nest' parameter in EAX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::EAX;
+ break;
+ }
+
+ SDValue OutChains[4];
+ SDValue Addr, Disp;
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(10, MVT::i32));
+ Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+ // This is storing the opcode for MOV32ri.
+ const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
+ const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+ OutChains[0] = DAG.getStore(Root, dl,
+ DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+ Trmp, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(1, MVT::i32));
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1),
+ false, false, 1);
+
+ const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(5, MVT::i32));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+ MachinePointerInfo(TrmpAddr, 5),
+ false, false, 1);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(6, MVT::i32));
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6),
+ false, false, 1);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, 2, 2);
+
+ SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, 2, MVT::i16, MMO);
+
+ // Load FP Control Word from stack slot
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+ MachinePointerInfo(), false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ DAG.getNode(ISD::ADD, DL, MVT::i16,
+ DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
+}
+
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SDValue Ops[] = {
+ Op,
+ DAG.getConstant(NumBits+NumBits-1, OpVT),
+ DAG.getConstant(X86::COND_E, MVT::i8),
+ Op.getValue(1)
+ };
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SDValue Ops[] = {
+ Op,
+ DAG.getConstant(NumBits, OpVT),
+ DAG.getConstant(X86::COND_E, MVT::i8),
+ Op.getValue(1)
+ };
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+ // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+ // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+ // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+ // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ //
+ // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ A, DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ B, DAG.getConstant(32, MVT::i32));
+ SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, B);
+ SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, Bhi);
+ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ Ahi, B);
+ AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AloBhi, DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+ Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+ return Res;
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ LLVMContext *Context = DAG.getContext();
+
+ // Must have SSE2.
+ if (!Subtarget->hasSSE2()) return SDValue();
+
+ // Optimize shl/srl/sra with constant shift amount.
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ // Lower SHL with variable shift amount.
+ // Cannot lower SHL without SSE2 or later.
+ if (!Subtarget->hasSSE2()) return SDValue();
+
+ if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+ ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+ std::vector<Constant*> CV(4, CI);
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+ // a = a << 5;
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+ ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+ ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+ std::vector<Constant*> CVM1(16, CM1);
+ std::vector<Constant*> CVM2(16, CM2);
+ Constant *C = ConstantVector::get(CVM1);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)15, 4), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(4, MVT::i32));
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ C = ConstantVector::get(CVM2);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(2, MVT::i32));
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ // return pblendv(r, r+r, a);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+ return R;
+ }
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned BaseOp = 0;
+ unsigned Cond = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown ovf instruction!");
+ case ISD::SADDO:
+ // A subtract of one will be selected as a INC. Note that INC doesn't
+ // set CF, so we can't do this for UADDO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ // A subtract of one will be selected as a DEC. Note that DEC doesn't
+ // set CF, so we can't do this for USUBO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+ MVT::i32);
+ SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(X86::COND_O, MVT::i32),
+ SDValue(Sum.getNode(), 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+ }
+ }
+
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32),
+ SDValue(Sum.getNode(), 1));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+}
+
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode* Node = Op.getNode();
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+
+ if (Subtarget->hasSSE2() && VT.isVector()) {
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+ unsigned SHLIntrinsicsID = 0;
+ unsigned SRAIntrinsicsID = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i64: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
+ SRAIntrinsicsID = 0;
+ break;
+ }
+ case MVT::v4i32: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+ break;
+ }
+ case MVT::v8i16: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+ break;
+ }
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+ Node->getOperand(0), ShAmt);
+
+ // In case of 1 bit sext, no need to shr
+ if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+ if (SRAIntrinsicsID) {
+ Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+ Tmp1, ShAmt);
+ }
+ return Tmp1;
+ }
+
+ return SDValue();
+}
+
+
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+ // There isn't any reason to disable it if the target processor supports it.
+ if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if (!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+ EVT T = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned Reg = 0;
+ unsigned size = 0;
+ switch(T.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: Reg = X86::AL; size = 1; break;
+ case MVT::i16: Reg = X86::AX; size = 2; break;
+ case MVT::i32: Reg = X86::EAX; size = 4; break;
+ case MVT::i64:
+ assert(Subtarget->is64Bit() && "Node not type legal!");
+ Reg = X86::RAX; size = 8;
+ break;
+ }
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
+ Op.getOperand(2), SDValue());
+ SDValue Ops[] = { cpIn.getValue(0),
+ Op.getOperand(1),
+ Op.getOperand(3),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+ Ops, 5, T, MMO);
+ SDValue cpOut =
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+ return cpOut;
+}
+
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->is64Bit() && "Result not type legalized?");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TheChain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+ SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+ rax.getValue(2));
+ SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+ DAG.getConstant(32, MVT::i8));
+ SDValue Ops[] = {
+ DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+ rdx.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ EVT DstVT = Op.getValueType();
+ assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && "Unexpected custom BITCAST");
+ assert((DstVT == MVT::i64 ||
+ (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+ "Unexpected custom BITCAST");
+ // i64 <=> MMX conversions are Legal.
+ if (SrcVT==MVT::i64 && DstVT.isVector())
+ return Op;
+ if (DstVT==MVT::i64 && SrcVT.isVector())
+ return Op;
+ // MMX <=> MMX conversions are Legal.
+ if (SrcVT.isVector() && DstVT.isVector())
+ return Op;
+ // All other conversions need to be expanded.
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT T = Node->getValueType(0);
+ SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment());
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = X86ISD::ADD; break;
+ case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+ case ISD::SUBC: Opc = X86ISD::SUB; break;
+ case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Should not custom lower this!");
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
+ case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: return LowerShift(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO: return LowerXALUO(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ }
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG, unsigned NewOp) const {
+ EVT T = Node->getValueType(0);
+ DebugLoc dl = Node->getDebugLoc();
+ assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0));
+ SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1));
+ SDValue Ops[] = { Chain, In1, In2L, In2H };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ // We don't want to expand or promote these.
+ return;
+ case ISD::FP_TO_SINT: {
+ std::pair<SDValue,SDValue> Vals =
+ FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.getNode() != 0) {
+ EVT VT = N->getValueType(0);
+ // Return a load from the stack slot.
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(), false, false, 0));
+ }
+ return;
+ }
+ case ISD::READCYCLECOUNTER: {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TheChain = N->getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+ rd.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+ eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { eax, edx };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+ Results.push_back(edx.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_CMP_SWAP: {
+ EVT T = N->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+ SDValue cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+ DAG.getConstant(0, MVT::i32));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+ DAG.getConstant(1, MVT::i32));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
+ cpInL.getValue(1));
+ SDValue swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(0, MVT::i32));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(1, MVT::i32));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
+ cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
+ swapInL.getValue(1));
+ SDValue Ops[] = { swapInH.getValue(0),
+ N->getOperand(1),
+ swapInH.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ Ops, 3, T, MMO);
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
+ MVT::i32, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
+ MVT::i32, cpOutL.getValue(2));
+ SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(cpOutH.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ return;
+ }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
+ case X86ISD::SHLD: return "X86ISD::SHLD";
+ case X86ISD::SHRD: return "X86ISD::SHRD";
+ case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FOR: return "X86ISD::FOR";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
+ case X86ISD::FSRL: return "X86ISD::FSRL";
+ case X86ISD::FILD: return "X86ISD::FILD";
+ case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+ case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+ case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FLD: return "X86ISD::FLD";
+ case X86ISD::FST: return "X86ISD::FST";
+ case X86ISD::CALL: return "X86ISD::CALL";
+ case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::BT: return "X86ISD::BT";
+ case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::COMI: return "X86ISD::COMI";
+ case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
+ case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
+ case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
+ case X86ISD::CMOV: return "X86ISD::CMOV";
+ case X86ISD::BRCOND: return "X86ISD::BRCOND";
+ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
+ case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
+ case X86ISD::Wrapper: return "X86ISD::Wrapper";
+ case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
+ case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
+ case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
+ case X86ISD::PINSRB: return "X86ISD::PINSRB";
+ case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::ANDNP: return "X86ISD::ANDNP";
+ case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
+ case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
+ case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
+ case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
+ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
+ case X86ISD::CMPPD: return "X86ISD::CMPPD";
+ case X86ISD::CMPPS: return "X86ISD::CMPPS";
+ case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
+ case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
+ case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
+ case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
+ case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
+ case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
+ case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
+ case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::ADD: return "X86ISD::ADD";
+ case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::ADC: return "X86ISD::ADC";
+ case X86ISD::SBB: return "X86ISD::SBB";
+ case X86ISD::SMUL: return "X86ISD::SMUL";
+ case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::INC: return "X86ISD::INC";
+ case X86ISD::DEC: return "X86ISD::DEC";
+ case X86ISD::OR: return "X86ISD::OR";
+ case X86ISD::XOR: return "X86ISD::XOR";
+ case X86ISD::AND: return "X86ISD::AND";
+ case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
+ case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
+ case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD";
+ case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
+ case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD";
+ case X86ISD::SHUFPS: return "X86ISD::SHUFPS";
+ case X86ISD::SHUFPD: return "X86ISD::SHUFPD";
+ case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
+ case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
+ case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
+ case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
+ case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
+ case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
+ case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
+ case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD";
+ case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
+ case X86ISD::MOVSD: return "X86ISD::MOVSD";
+ case X86ISD::MOVSS: return "X86ISD::MOVSS";
+ case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
+ case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS";
+ case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD";
+ case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
+ case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
+ case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
+ case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
+ case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
+ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
+ case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
+ case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
+ case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
+ case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
+ case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
+ }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // X86 supports extremely general addressing modes.
+ CodeModel::Model M = getTargetMachine().getCodeModel();
+ Reloc::Model R = getTargetMachine().getRelocationModel();
+
+ // X86 allows a sign-extended 32-bit immediate field as a displacement.
+ if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
+ return false;
+
+ if (AM.BaseGV) {
+ unsigned GVFlags =
+ Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+ // If a reference to this global requires an extra load, we can't fold it.
+ if (isGlobalStubReference(GVFlags))
+ return false;
+
+ // If BaseGV requires a register for the PIC base, we cannot also have a
+ // BaseReg specified.
+ if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
+ return false;
+
+ // If lower 4G is not available, then we must use rip-relative addressing.
+ if ((M != CodeModel::Small || R != Reloc::Static) &&
+ Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ return false;
+ }
+
+ switch (AM.Scale) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ // These scales always work.
+ break;
+ case 3:
+ case 5:
+ case 9:
+ // These scales are formed with basereg+scalereg. Only accept if there is
+ // no basereg yet.
+ if (AM.HasBaseReg)
+ return false;
+ break;
+ default: // Other stuff never works.
+ return false;
+ }
+
+ return true;
+}
+
+
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return true;
+}
+
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return true;
+}
+
+bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ // Very little shuffling can be done for 64-bit vectors right now.
+ if (VT.getSizeInBits() == 64)
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
+
+ // FIXME: pshufb, blends, shifts.
+ return (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isMOVLMask(M, VT) ||
+ isSHUFPMask(M, VT) ||
+ isPSHUFDMask(M, VT) ||
+ isPSHUFHWMask(M, VT) ||
+ isPSHUFLWMask(M, VT) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
+ isUNPCKLMask(M, VT) ||
+ isUNPCKHMask(M, VT) ||
+ isUNPCKL_v_undef_Mask(M, VT) ||
+ isUNPCKH_v_undef_Mask(M, VT));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+ // FIXME: This collection of masks seems suspect.
+ if (NumElts == 2)
+ return true;
+ if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ return (isMOVLMask(Mask, VT) ||
+ isCommutedMOVLMask(Mask, VT, true) ||
+ isSHUFPMask(Mask, VT) ||
+ isCommutedSHUFPMask(Mask, VT));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned LoadOpc,
+ unsigned CXchgOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [bitinstr.addr]
+ // op t2 = t1, [bitinstr.val]
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ DebugLoc dl = bInstr->getDebugLoc();
+ MachineOperand& destOper = bInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ int numArgs = bInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &bInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ unsigned tt = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
+ }
+ else
+ tt = t1;
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
+ MIB.addReg(tt);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t2);
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).setMemRefs(bInstr->memoperands_begin(),
+ bInstr->memoperands_end());
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+ MIB.addReg(EAXreg);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function: 64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB (instructions are in pairs, except cmpxchg8b)
+ // ld t1,t2 = [bitinstr.addr]
+ // newMBB:
+ // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+ // op t5, t6 <- out1, out2, [bitinstr.val]
+ // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
+ // mov ECX, EBX <- t5, t6
+ // mov EAX, EDX <- t1, t2
+ // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
+ // mov t3, t4 <- EAX, EDX
+ // bz newMBB
+ // result in out1, out2
+ // fallthrough -->nextMBB
+
+ const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const unsigned LoadOpc = X86::MOV32rm;
+ const unsigned NotOpc = X86::NOT32r;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = bInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
+ "unexpected number of operands");
+ MachineOperand& dest1Oper = bInstr->getOperand(0);
+ MachineOperand& dest2Oper = bInstr->getOperand(1);
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
+ argOpers[i] = &bInstr->getOperand(i+2);
+
+ // We use some of the operands multiple times, so conservatively just
+ // clear any kill flags that might be present.
+ if (argOpers[i]->isReg() && argOpers[i]->isUse())
+ argOpers[i]->setIsKill(false);
+ }
+
+ // x86 address has 5 operands: base, index, scale, displacement, and segment.
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
+ // add 4 to displacement.
+ for (int i=0; i <= lastAddrIndx-2; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MachineOperand newOp3 = *(argOpers[3]);
+ if (newOp3.isImm())
+ newOp3.setImm(newOp3.getImm()+4);
+ else
+ newOp3.setOffset(newOp3.getOffset()+4);
+ (*MIB).addOperand(newOp3);
+ (*MIB).addOperand(*argOpers[lastAddrIndx]);
+
+ // t3/4 are defined later, at the bottom of the loop
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
+ .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+ // The subsequent operations should be using the destination registers of
+ //the PHI instructions.
+ if (invSrc) {
+ t1 = F->getRegInfo().createVirtualRegister(RC);
+ t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
+ } else {
+ t1 = dest1Oper.getReg();
+ t2 = dest2Oper.getReg();
+ }
+
+ int valArgIndx = lastAddrIndx + 1;
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
+ if (regOpcL != X86::MOV32rr)
+ MIB.addReg(t1);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+ assert(argOpers[valArgIndx + 1]->isReg() ==
+ argOpers[valArgIndx]->isReg());
+ assert(argOpers[valArgIndx + 1]->isImm() ==
+ argOpers[valArgIndx]->isImm());
+ if (argOpers[valArgIndx + 1]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
+ if (regOpcH != X86::MOV32rr)
+ MIB.addReg(t2);
+ (*MIB).addOperand(*argOpers[valArgIndx + 1]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+ MIB.addReg(t1);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
+ MIB.addReg(t5);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
+ MIB.addReg(t6);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).setMemRefs(bInstr->memoperands_begin(),
+ bInstr->memoperands_end());
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
+ MIB.addReg(X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
+ MIB.addReg(X86::EDX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+ MachineBasicBlock *MBB,
+ unsigned cmovOpc) const {
+ // For the atomic min/max operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [min/max.addr]
+ // mov t2 = [min/max.val]
+ // cmp t1, t2
+ // cmov[cond] t2 = t1
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ //
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(mInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to newMBB and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = mInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ MachineOperand& destOper = mInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ int numArgs = mInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &mInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ // We only support register and immediate values
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
+ MIB.addReg(t1);
+ MIB.addReg(t2);
+
+ // Generate movc
+ unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
+ MIB.addReg(t2);
+ MIB.addReg(t1);
+
+ // Cmp and exchange if none has modified the memory location
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t3);
+ assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).setMemRefs(mInstr->memoperands_begin(),
+ mInstr->memoperands_end());
+
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+ mInstr->eraseFromParent(); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
+MachineBasicBlock *
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned numArgs, bool memArg) const {
+ assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ "Target must have SSE4.2 or AVX features enabled");
+
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned Opc;
+ if (!Subtarget->hasAVX()) {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+ } else {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+ for (unsigned i = 0; i < numArgs; ++i) {
+ MachineOperand &Op = MI->getOperand(i+1);
+ if (!(Op.isReg() && Op.isImplicit()))
+ MIB.addOperand(Op);
+ }
+ BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ .addReg(X86::XMM0);
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // Address into RAX/EAX, other two args into ECX, EDX.
+ unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ unsigned ValOps = X86::AddrNumOperands;
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(ValOps).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+ .addReg(MI->getOperand(ValOps+1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // First arg in ECX, the second in EAX.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(0).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI->getOperand(1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit code to save XMM registers to the stack. The ABI says that the
+ // number of registers to save is given in %al, so it's theoretically
+ // possible to do an indirect jump trick to avoid saving all of them,
+ // however this code takes a simpler approach and just executes all
+ // of the stores if %al is non-zero. It's less code, and it's probably
+ // easier on the hardware branch predictor, and stores aren't all that
+ // expensive anyway.
+
+ // Create the new basic blocks. One block contains all the XMM stores,
+ // and one block is the final destination regardless of whether any
+ // stores were performed.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *F = MBB->getParent();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+ MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, XMMSaveMBB);
+ F->insert(MBBIter, EndMBB);
+
+ // Transfer the remainder of MBB and its successor edges to EndMBB.
+ EndMBB->splice(EndMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // The original block will now fall through to the XMM save block.
+ MBB->addSuccessor(XMMSaveMBB);
+ // The XMMSaveMBB will fall through to the end block.
+ XMMSaveMBB->addSuccessor(EndMBB);
+
+ // Now add the instructions.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned CountReg = MI->getOperand(0).getReg();
+ int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+ int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+ if (!Subtarget->isTargetWin64()) {
+ // If %al is 0, branch around the XMM save block.
+ BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+ BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ }
+
+ // In the XMM save block, save all the XMM argument registers.
+ for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+ MachineMemOperand *MMO =
+ F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand::MOStore,
+ /*Size=*/16, /*Align=*/16);
+ BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+ .addFrameIndex(RegSaveFrameIndex)
+ .addImm(/*Scale=*/1)
+ .addReg(/*IndexReg=*/0)
+ .addImm(/*Disp=*/Offset)
+ .addReg(/*Segment=*/0)
+ .addReg(MI->getOperand(i).getReg())
+ .addMemOperand(MMO);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+
+ return EndMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // If the EFLAGS register isn't dead in the terminator, then claim that it's
+ // live into the sink and copy blocks.
+ const MachineFunction *MF = BB->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ BitVector ReservedRegs = TRI->getReservedRegs(*MF);
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != X86::EFLAGS) continue;
+ copy0MBB->addLiveIn(Reg);
+ sinkMBB->addLiveIn(Reg);
+ }
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Create the conditional branch instruction.
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ copy0MBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(!Subtarget->isTargetEnvMacho());
+
+ // The lowering is pretty easy: we're just emitting the call to _alloca. The
+ // non-trivial part is impdef of ESP.
+
+ if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetCygMing()) {
+ // ___chkstk(Mingw64):
+ // Clobbers R10, R11, RAX and EFLAGS.
+ // Updates RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("___chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::RSP, RegState::Implicit)
+ .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // __chkstk(MSVCRT): does not update stack pointer.
+ // Clobbers R10, R11 and EFLAGS.
+ // FIXME: RAX(allocated size) might be reused and not killed.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("__chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ // RAX has the offset to subtracted from RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(X86::RAX);
+ }
+ } else {
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(X86::EAX, RegState::Implicit)
+ .addReg(X86::ESP, RegState::Implicit)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // This is pretty easy. We're taking the value that we received from
+ // our load from the relocation, sticking it in either RDI (x86-64)
+ // or EAX and doing an indirect call. The return value will then
+ // be in the normal return register.
+ const X86InstrInfo *TII
+ = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *F = BB->getParent();
+
+ assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+ assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+ if (Subtarget->is64Bit()) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV64rm), X86::RDI)
+ .addReg(X86::RIP)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ addDirectMem(MIB, X86::RDI);
+ } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(0)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(TII->getGlobalBaseReg(F))
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ assert(!"TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+ // On AMD64, additional defs should be added before register allocation.
+ if (!Subtarget->isTargetWin64()) {
+ MI->addRegisterDefined(X86::RSI);
+ MI->addRegisterDefined(X86::RDI);
+ MI->addRegisterDefined(X86::XMM6);
+ MI->addRegisterDefined(X86::XMM7);
+ MI->addRegisterDefined(X86::XMM8);
+ MI->addRegisterDefined(X86::XMM9);
+ MI->addRegisterDefined(X86::XMM10);
+ MI->addRegisterDefined(X86::XMM11);
+ MI->addRegisterDefined(X86::XMM12);
+ MI->addRegisterDefined(X86::XMM13);
+ MI->addRegisterDefined(X86::XMM14);
+ MI->addRegisterDefined(X86::XMM15);
+ }
+ return BB;
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
+ case X86::TLSCall_32:
+ case X86::TLSCall_64:
+ return EmitLoweredTLSCall(MI, BB);
+ case X86::CMOV_GR8:
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
+ return EmitLoweredSelect(MI, BB);
+
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM:
+ case X86::FP80_TO_INT16_IN_MEM:
+ case X86::FP80_TO_INT32_IN_MEM:
+ case X86::FP80_TO_INT64_IN_MEM: {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ MachineFunction *F = BB->getParent();
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned OldCW =
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
+ CWFrameIdx);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+ .addImm(0xC7F);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+ .addReg(OldCW);
+
+ // Get the X86 opcode to use.
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+ case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+ case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+ case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+ }
+
+ X86AddressMode AM;
+ MachineOperand &Op = MI->getOperand(0);
+ if (Op.isReg()) {
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Op.getReg();
+ } else {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = Op.getIndex();
+ }
+ Op = MI->getOperand(1);
+ if (Op.isImm())
+ AM.Scale = Op.getImm();
+ Op = MI->getOperand(2);
+ if (Op.isImm())
+ AM.IndexReg = Op.getImm();
+ Op = MI->getOperand(3);
+ if (Op.isGlobal()) {
+ AM.GV = Op.getGlobal();
+ } else {
+ AM.Disp = Op.getImm();
+ }
+ addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+ // String/text processing lowering.
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ return EmitPCMP(MI, BB, 3, false /* in-mem */);
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ return EmitPCMP(MI, BB, 3, true /* in-mem */);
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ return EmitPCMP(MI, BB, 5, false /* in mem */);
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+ // Thread synchronization.
+ case X86::MONITOR:
+ return EmitMonitor(MI, BB);
+ case X86::MWAIT:
+ return EmitMwait(MI, BB);
+
+ // Atomic Lowering.
+ case X86::ATOMAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+ X86::OR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMXOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+ X86::XOR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMNAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass, true);
+ case X86::ATOMMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+ case X86::ATOMMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+ case X86::ATOMUMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+ case X86::ATOMUMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+
+ case X86::ATOMAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
+ X86::OR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMXOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
+ X86::XOR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMNAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass, true);
+ case X86::ATOMMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
+ case X86::ATOMMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
+ case X86::ATOMUMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
+ case X86::ATOMUMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+
+ case X86::ATOMAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
+ X86::OR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMXOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
+ X86::XOR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMNAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass, true);
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ // This group is for 64-bit host.
+ case X86::ATOMAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
+ X86::OR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMXOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
+ X86::XOR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMNAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass, true);
+ case X86::ATOMMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+ case X86::ATOMMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
+ case X86::ATOMUMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+ case X86::ATOMUMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ false);
+ case X86::ATOMOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::OR32rr, X86::OR32rr,
+ X86::OR32ri, X86::OR32ri,
+ false);
+ case X86::ATOMXOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::XOR32rr, X86::XOR32rr,
+ X86::XOR32ri, X86::XOR32ri,
+ false);
+ case X86::ATOMNAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ true);
+ case X86::ATOMADD6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::ADD32rr, X86::ADC32rr,
+ X86::ADD32ri, X86::ADC32ri,
+ false);
+ case X86::ATOMSUB6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::SUB32rr, X86::SBB32rr,
+ X86::SUB32ri, X86::SBB32ri,
+ false);
+ case X86::ATOMSWAP6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::MOV32rr, X86::MOV32rr,
+ X86::MOV32ri, X86::MOV32ri,
+ false);
+ case X86::VASTART_SAVE_XMM_REGS:
+ return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ switch (Opc) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ // These nodes' second result is a boolean.
+ if (Op.getResNo() == 0)
+ break;
+ // Fallthrough
+ case X86ISD::SETCC:
+ KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
+ break;
+ }
+}
+
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+ return Op.getValueType().getScalarType().getSizeInBits();
+
+ // Fallback case.
+ return 1;
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ const GlobalValue* &GA,
+ int64_t &Offset) const {
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+ return true;
+ }
+ }
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ if (VT.getSizeInBits() != 128)
+ return SDValue();
+
+ // Don't create instructions with illegal types after legalize types has run.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+ return SDValue();
+
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+}
+
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue InputVector = N->getOperand(0);
+
+ // Only operate on vectors of 4 elements, where the alternative shuffling
+ // gets to be more expensive.
+ if (InputVector.getValueType() != MVT::v4i32)
+ return SDValue();
+
+ // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+ // single use which is a sign-extend or zero-extend, and all elements are
+ // used.
+ SmallVector<SDNode *, 4> Uses;
+ unsigned ExtractedElements = 0;
+ for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+ UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != InputVector.getResNo())
+ return SDValue();
+
+ SDNode *Extract = *UI;
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ if (Extract->getValueType(0) != MVT::i32)
+ return SDValue();
+ if (!Extract->hasOneUse())
+ return SDValue();
+ if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+ Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+ return SDValue();
+
+ // Record which element was extracted.
+ ExtractedElements |=
+ 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+ Uses.push_back(Extract);
+ }
+
+ // If not all the elements were used, this may not be worthwhile.
+ if (ExtractedElements != 15)
+ return SDValue();
+
+ // Ok, we've now decided to do the transformation.
+ DebugLoc dl = InputVector.getDebugLoc();
+
+ // Store the value to a temporary stack slot.
+ SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Replace each use (extract) with a load of the appropriate element.
+ for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+ UE = Uses.end(); UI != UE; ++UI) {
+ SDNode *Extract = *UI;
+
+ // cOMpute the element's address.
+ SDValue Idx = Extract->getOperand(1);
+ unsigned EltSize =
+ InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+ uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ StackPtr, OffsetVal);
+
+ // Load the scalar.
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Replace the exact with the load.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+ }
+
+ // The replacement was made in place; don't return anything.
+ return SDValue();
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Cond = N->getOperand(0);
+ // Get the LHS/RHS of the select.
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+ // instructions match the semantics of the common C idiom x<y?x:y but not
+ // x<=y?x:y, because of how they handle negative zero (which can be
+ // ignored in unsafe-math mode).
+ if (Subtarget->hasSSE2() &&
+ (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
+ Cond.getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ unsigned Opcode = 0;
+ // Check for x CC y ? x : y.
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ // Converting this to a min would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETOLE:
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETULE:
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOGE:
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+ break;
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGT:
+ // Converting this to a max would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGE:
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOGE:
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGT:
+ // Converting this to a min would handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGE:
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETULT:
+ // Converting this to a max would handle NaNs incorrectly.
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETOLE:
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+ break;
+ std::swap(LHS, RHS);
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETULE:
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ }
+
+ if (Opcode)
+ return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+ // Don't do this for crazy integer types.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+ // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+ // so that TrueC (the true value) is larger than FalseC.
+ bool NeedsCondInvert = false;
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+ // Efficiently invertible.
+ (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+ (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
+ isa<ConstantSDNode>(Cond.getOperand(1))))) {
+ NeedsCondInvert = true;
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
+ if (FalseC->getAPIntValue() == 0 &&
+ TrueC->getAPIntValue().isPowerOf2()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ return Cond;
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the flag operand isn't dead, don't touch this CMOV.
+ if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+ return SDValue();
+
+ SDValue FalseOp = N->getOperand(0);
+ SDValue TrueOp = N->getOperand(1);
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+ SDValue Cond = N->getOperand(3);
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ switch (Cond.getOpcode()) {
+ default: break;
+ case X86ISD::BSR:
+ case X86ISD::BSF:
+ // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
+ if (DAG.isKnownNeverZero(Cond.getOperand(0)))
+ return (CC == X86::COND_E) ? FalseOp : TrueOp;
+ }
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations. Note that the operands are ordered the opposite of SELECT
+ // operands.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
+ // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+ // larger than FalseC (the false value).
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
+ // This is efficient for any integer data type (including i8/i16) and
+ // shift amount.
+ if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
+ // for any integer data type, including i8/i16.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ uint64_t MulAmt = C->getZExtValue();
+ if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+ return SDValue();
+
+ uint64_t MulAmt1 = 0;
+ uint64_t MulAmt2 = 0;
+ if ((MulAmt % 9) == 0) {
+ MulAmt1 = 9;
+ MulAmt2 = MulAmt / 9;
+ } else if ((MulAmt % 5) == 0) {
+ MulAmt1 = 5;
+ MulAmt2 = MulAmt / 5;
+ } else if ((MulAmt % 3) == 0) {
+ MulAmt1 = 3;
+ MulAmt2 = MulAmt / 3;
+ }
+ if (MulAmt2 &&
+ (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ DebugLoc DL = N->getDebugLoc();
+
+ if (isPowerOf2_64(MulAmt2) &&
+ !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ // If second multiplifer is pow2, issue it first. We want the multiply by
+ // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+ // is an add.
+ std::swap(MulAmt1, MulAmt2);
+
+ SDValue NewMul;
+ if (isPowerOf2_64(MulAmt1))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(MulAmt1, VT));
+
+ if (isPowerOf2_64(MulAmt2))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+ DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+ DAG.getConstant(MulAmt2, VT));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, NewMul, false);
+ }
+ return SDValue();
+}
+
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+/// when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() && VT.isInteger() &&
+ N->getOpcode() == ISD::SHL)
+ return PerformSHLCombine(N, DAG);
+
+ // On X86 with SSE2 support, we can transform this to a vector shift if
+ // all elements are shifted by the same amount. We can't do this in legalize
+ // because the a constant vector is typically transformed to a constant pool
+ // so we have no knowledge of the shift amount.
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+ return SDValue();
+
+ SDValue ShAmtOp = N->getOperand(1);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc DL = N->getDebugLoc();
+ SDValue BaseShAmt = SDValue();
+ if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != BaseShAmt) {
+ return SDValue();
+ }
+ }
+ } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
+ SDValue InVec = ShAmtOp.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+ DAG.getIntPtrConstant(0));
+ } else
+ return SDValue();
+
+ // The shift amount is an i32.
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
+
+ // The shift amount is identical so we can do a vector shift.
+ SDValue ValOp = N->getOperand(0);
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ break;
+ case ISD::SHL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRA:
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ }
+ return SDValue();
+}
+
+
+// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
+// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
+// and friends. Likewise for OR -> CMPNEQSS.
+static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ unsigned opcode;
+
+ // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
+ // we're requiring SSE2 for both.
+ if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CMP0 = N0->getOperand(1);
+ SDValue CMP1 = N1->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // The SETCCs should both refer to the same CMP.
+ if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+ return SDValue();
+
+ SDValue CMP00 = CMP0->getOperand(0);
+ SDValue CMP01 = CMP0->getOperand(1);
+ EVT VT = CMP00.getValueType();
+
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ bool ExpectingFlags = false;
+ // Check for any users that want flags:
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end();
+ !ExpectingFlags && UI != UE; ++UI)
+ switch (UI->getOpcode()) {
+ default:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::SELECT:
+ ExpectingFlags = true;
+ break;
+ case ISD::CopyToReg:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ break;
+ }
+
+ if (!ExpectingFlags) {
+ enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+
+ if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
+ X86::CondCode tmp = cc0;
+ cc0 = cc1;
+ cc1 = tmp;
+ }
+
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
+ bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+ X86ISD::NodeType NTOperator = is64BitFP ?
+ X86ISD::FSETCCsd : X86ISD::FSETCCss;
+ // FIXME: need symbolic constants for these magic numbers.
+ // See X86ATTInstPrinter.cpp:printSSECC().
+ unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+ SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ DAG.getConstant(x86cc, MVT::i8));
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+ OnesOrZeroesF);
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
+ DAG.getConstant(1, MVT::i32));
+ SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+ return OneBitOfTruth;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (Subtarget->hasSSSE3()) {
+ if (VT == MVT::v2i64) {
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::ANDNP)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and ANDNP and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ break;
+ default: return SDValue();
+ }
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+ unsigned Opc = 0;
+ switch (EltBits) {
+ case 8: Opc = X86ISD::PSIGNB; break;
+ case 16: Opc = X86ISD::PSIGNW; break;
+ case 32: Opc = X86ISD::PSIGND; break;
+ default: break;
+ }
+ if (Opc) {
+ SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+ }
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+ Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ }
+ }
+ }
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SDValue ShAmt0 = N0.getOperand(1);
+ if (ShAmt0.getValueType() != MVT::i8)
+ return SDValue();
+ SDValue ShAmt1 = N1.getOperand(1);
+ if (ShAmt1.getValueType() != MVT::i8)
+ return SDValue();
+ if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+ ShAmt0 = ShAmt0.getOperand(0);
+ if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+ ShAmt1 = ShAmt1.getOperand(0);
+
+ DebugLoc DL = N->getDebugLoc();
+ unsigned Opc = X86ISD::SHLD;
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N1.getOperand(0);
+ if (ShAmt0.getOpcode() == ISD::SUB) {
+ Opc = X86ISD::SHRD;
+ std::swap(Op0, Op1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+
+ unsigned Bits = VT.getSizeInBits();
+ if (ShAmt1.getOpcode() == ISD::SUB) {
+ SDValue Sum = ShAmt1.getOperand(0);
+ if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+ if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+ ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+ if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
+ return DAG.getNode(Opc, DL, VT,
+ Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+ } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if (ShAmt0C &&
+ ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
+ return DAG.getNode(Opc, DL, VT,
+ N0.getOperand(0), N1.getOperand(0),
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
+
+ // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT VT = St->getValue().getValueType();
+ if (VT.getSizeInBits() != 64)
+ return SDValue();
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+ && Subtarget->hasSSE2();
+ if ((VT.isVector() ||
+ (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ SDNode* LdVal = St->getValue().getNode();
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDValue, 8> Ops;
+ SDNode* ChainVal = St->getChain().getNode();
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == LdVal)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).getNode() == LdVal) {
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+
+ if (!Ld || !ISD::isNormalLoad(Ld))
+ return SDValue();
+
+ // If this is not the MMX case, i.e. we are just turning i64 load/store
+ // into f64 load/store, avoid the transformation if there are multiple
+ // uses of the loaded value.
+ if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+ return SDValue();
+
+ DebugLoc LdDL = Ld->getDebugLoc();
+ DebugLoc StDL = N->getDebugLoc();
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+ // pair instead.
+ if (Subtarget->is64Bit() || F64IsLegal) {
+ EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->getAlignment());
+ SDValue NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewChain);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+ St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
+ }
+
+ // Otherwise, lower to two pairs of 32-bit loads / stores.
+ SDValue LoAddr = Ld->getBasePtr();
+ SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+ SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDValue NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+ St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
+ SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+ St->getPointerInfo().getWithOffset(4),
+ St->isVolatile(),
+ St->isNonTemporal(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+ }
+ return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+ // F[X]OR(0.0, x) -> x
+ // F[X]OR(x, 0.0) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ return SDValue();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // FAND(0.0, x) -> 0.0
+ // FAND(x, 0.0) -> 0.0
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // BT ignores high bits in the bit index operand.
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.hasOneUse()) {
+ unsigned BitWidth = Op1.getValueSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return SDValue();
+}
+
+static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ EVT VT = N->getValueType(0), OpVT = Op.getValueType();
+ if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
+ VT.getVectorElementType().getSizeInBits() ==
+ OpVT.getVectorElementType().getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+ }
+ return SDValue();
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+ // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
+ // (and (i32 x86isd::setcc_carry), 1)
+ // This eliminates the zext. This transformation is necessary because
+ // ISD::SETCC is always legalized to i8.
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() == ISD::AND &&
+ N0.hasOneUse() &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() != X86ISD::SETCC_CARRY)
+ return SDValue();
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
+
+ return SDValue();
+}
+
+// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned X86CC = N->getConstantOperandVal(0);
+ SDValue EFLAG = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+ // a zext and produces an all-ones bit which is more useful than 0/1 in some
+ // cases.
+ if (X86CC == X86::COND_B)
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(1, MVT::i8));
+
+ return SDValue();
+}
+
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86TargetLowering *XTLI) {
+ SDValue Op0 = N->getOperand(0);
+ // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
+ // a 32-bit target where SSE doesn't support i64->FP operations.
+ if (Op0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
+ EVT VT = Ld->getValueType(0);
+ if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
+ !XTLI->getSubtarget()->is64Bit() &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+ Ld->getChain(), Op0, DAG);
+ DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
+ return FILDChain;
+ }
+ }
+ return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+ X86TargetLowering::DAGCombinerInfo &DCI) {
+ // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+ // the result is either zero or one (depending on the input carry bit).
+ // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+ if (X86::isZeroNode(N->getOperand(0)) &&
+ X86::isZeroNode(N->getOperand(1)) &&
+ // We don't have a good way to replace an EFLAGS use, so only do this when
+ // dead right now.
+ SDValue(N, 1).use_empty()) {
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+ SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B,MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, VT));
+ return DCI.CombineTo(N, Res1, CarryOut);
+ }
+
+ return SDValue();
+}
+
+// fold (add Y, (sete X, 0)) -> adc 0, Y
+// (add Y, (setne X, 0)) -> sbb -1, Y
+// (sub (sete X, 0), Y) -> sbb 0, Y
+// (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // Look through ZExts.
+ SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+ if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC = Ext.getOperand(0);
+ if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SDValue Cmp = SetCC.getOperand(1);
+ if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+ !X86::isZeroNode(Cmp.getOperand(1)) ||
+ !Cmp.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+ DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ if (CC == X86::COND_NE)
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::ADD:
+ case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
+ case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
+ case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case X86ISD::FXOR:
+ case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG);
+ case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
+ case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
+ case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::SHUFPS: // Handle all target specific shuffles
+ case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
+ }
+
+ return SDValue();
+}
+
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+ if (!isTypeLegal(VT))
+ return false;
+ if (VT != MVT::i16)
+ return true;
+
+ switch (Opc) {
+ default:
+ return true;
+ case ISD::LOAD:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SUB:
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return false;
+ }
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+ EVT VT = Op.getValueType();
+ if (VT != MVT::i16)
+ return false;
+
+ bool Promote = false;
+ bool Commute = false;
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ // If the non-extending load has a single use and it's not live out, then it
+ // might be folded.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+ Op.hasOneUse()*/) {
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ // The only case where we'd want to promote LOAD (rather then it being
+ // promoted as an operand is when it's only use is liveout.
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ }
+ }
+ Promote = true;
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Promote = true;
+ break;
+ case ISD::SHL:
+ case ISD::SRL: {
+ SDValue N0 = Op.getOperand(0);
+ // Look out for (store (shl (load), x)).
+ if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+ return false;
+ Promote = true;
+ break;
+ }
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ Commute = true;
+ // fallthrough
+ case ISD::SUB: {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ if (!Commute && MayFoldLoad(N1))
+ return false;
+ // Avoid disabling potential load folding opportunities.
+ if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+ return false;
+ if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+ return false;
+ Promote = true;
+ }
+ }
+
+ PVT = MVT::i32;
+ return Promote;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+
+ std::string AsmStr = IA->getAsmString();
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+
+ // FIXME: this should verify that we are targeting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
+ // bswap $0
+ if (AsmPieces.size() == 2 &&
+ (AsmPieces[0] == "bswap" ||
+ AsmPieces[0] == "bswapq" ||
+ AsmPieces[0] == "bswapl") &&
+ (AsmPieces[1] == "$0" ||
+ AsmPieces[1] == "${0:q}")) {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ // rorw $$8, ${0:w} --> llvm.bswap.i16
+ if (CI->getType()->isIntegerTy(16) &&
+ AsmPieces.size() == 3 &&
+ (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
+ AsmPieces[1] == "$$8," &&
+ AsmPieces[2] == "${0:w}" &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ break;
+ case 3:
+ if (CI->getType()->isIntegerTy(32) &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+ Words[2] == "$0") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ }
+ }
+ }
+
+ if (CI->getType()->isIntegerTy(64)) {
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ if (Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'f':
+ case 't':
+ case 'u':
+ case 'y':
+ case 'x':
+ case 'Y':
+ case 'l':
+ return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
+ case 'e':
+ case 'Z':
+ return C_Other;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+ weight = CW_Register;
+ break;
+ case 'I':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (C->getZExtValue() <= 31)
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
+ }
+ break;
+ }
+ return weight;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(EVT ConstraintVT) const {
+ // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+ // 'f' like normal targets.
+ if (ConstraintVT.isFloatingPoint()) {
+ if (Subtarget->hasXMMInt())
+ return "Y";
+ if (Subtarget->hasXMM())
+ return "x";
+ }
+
+ return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'I':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 31) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'J':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 63) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'K':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'N':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 255) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'e': {
+ // 32-bit signed value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getSExtValue())) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+ break;
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ }
+ return;
+ }
+ case 'Z': {
+ // 32-bit unsigned value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getZExtValue())) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ return;
+ }
+ case 'i': {
+ // Literal immediates are always ok.
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+ break;
+ }
+
+ // In any sort of PIC mode addresses need to be computed at runtime by
+ // adding in a register or some sort of table lookup. These can't
+ // be used as immediates.
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+ return;
+
+ // If we are in non-pic codegen mode, we allow the address of a global (with
+ // an optional displacement) to be used with 'i'.
+ GlobalAddressSDNode *GA = 0;
+ int64_t Offset = 0;
+
+ // Match either (GA), (GA+C), (GA+C1+C2), etc.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+ Offset += GA->getOffset();
+ break;
+ } else if (Op.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ } else if (Op.getOpcode() == ISD::SUB) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += -C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ }
+
+ // Otherwise, this isn't something we can handle, reject it.
+ return;
+ }
+
+ const GlobalValue *GV = GA->getGlobal();
+ // If we require an extra load to get this address, as in PIC mode, we
+ // can't accept it.
+ if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+ getTargetMachine())))
+ return;
+
+ Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ GA->getValueType(0), Offset);
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ // First, see if this is a constraint that directly corresponds to an LLVM
+ // register class.
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ // TODO: Slight differences here in allocation order and leaving
+ // RIP in the class. Do they matter any more here than they do
+ // in the normal allocation?
+ case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+ if (Subtarget->is64Bit()) {
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ else if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ else if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ else if (VT == MVT::i64 || VT == MVT::f64)
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ break;
+ }
+ // 32-bit fallthrough
+ case 'Q': // Q_REGS
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
+ else if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
+ else if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
+ else if (VT == MVT::i64)
+ return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
+ break;
+ case 'r': // GENERAL_REGS
+ case 'l': // INDEX_REGS
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ case 'R': // LEGACY_REGS
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
+ return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
+ case 'f': // FP Stack registers.
+ // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+ // value to the correct fpstack register class.
+ if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP32RegisterClass);
+ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP64RegisterClass);
+ return std::make_pair(0U, X86::RFP80RegisterClass);
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget->hasMMX()) break;
+ return std::make_pair(0U, X86::VR64RegisterClass);
+ case 'Y': // SSE_REGS if SSE2 allowed
+ if (!Subtarget->hasXMMInt()) break;
+ // FALL THROUGH.
+ case 'x': // SSE_REGS if SSE1 allowed
+ if (!Subtarget->hasXMM()) break;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(0U, X86::FR32RegisterClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(0U, X86::FR64RegisterClass);
+ // Vector types.
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(0U, X86::VR128RegisterClass);
+ }
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass*> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (Res.second == 0) {
+ // Map st(0) -> st(7) -> ST0
+ if (Constraint.size() == 7 && Constraint[0] == '{' &&
+ tolower(Constraint[1]) == 's' &&
+ tolower(Constraint[2]) == 't' &&
+ Constraint[3] == '(' &&
+ (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+ Constraint[5] == ')' &&
+ Constraint[6] == '}') {
+
+ Res.first = X86::ST0+Constraint[4]-'0';
+ Res.second = X86::RFP80RegisterClass;
+ return Res;
+ }
+
+ // GCC allows "st(0)" to be called just plain "st".
+ if (StringRef("{st}").equals_lower(Constraint)) {
+ Res.first = X86::ST0;
+ Res.second = X86::RFP80RegisterClass;
+ return Res;
+ }
+
+ // flags -> EFLAGS
+ if (StringRef("{flags}").equals_lower(Constraint)) {
+ Res.first = X86::EFLAGS;
+ Res.second = X86::CCRRegisterClass;
+ return Res;
+ }
+
+ // 'A' means EAX + EDX.
+ if (Constraint == "A") {
+ Res.first = X86::EAX;
+ Res.second = X86::GR32_ADRegisterClass;
+ return Res;
+ }
+ return Res;
+ }
+
+ // Otherwise, check to see if this is a register class of the wrong value
+ // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+ // turn into {ax},{dx}.
+ if (Res.second->hasType(VT))
+ return Res; // Correct type already, nothing to do.
+
+ // All of the single-register GCC register classes map their values onto
+ // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
+ // really want an 8-bit or 32-bit register, map to the appropriate register
+ // class and return the appropriate register.
+ if (Res.second == X86::GR16RegisterClass) {
+ if (VT == MVT::i8) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::AL; break;
+ case X86::DX: DestReg = X86::DL; break;
+ case X86::CX: DestReg = X86::CL; break;
+ case X86::BX: DestReg = X86::BL; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR8RegisterClass;
+ }
+ } else if (VT == MVT::i32) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::EAX; break;
+ case X86::DX: DestReg = X86::EDX; break;
+ case X86::CX: DestReg = X86::ECX; break;
+ case X86::BX: DestReg = X86::EBX; break;
+ case X86::SI: DestReg = X86::ESI; break;
+ case X86::DI: DestReg = X86::EDI; break;
+ case X86::BP: DestReg = X86::EBP; break;
+ case X86::SP: DestReg = X86::ESP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR32RegisterClass;
+ }
+ } else if (VT == MVT::i64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR64RegisterClass;
+ }
+ }
+ } else if (Res.second == X86::FR32RegisterClass ||
+ Res.second == X86::FR64RegisterClass ||
+ Res.second == X86::VR128RegisterClass) {
+ // Handle references to XMM physical registers that got mapped into the
+ // wrong class. This can happen with constraints like {xmm0} where the
+ // target independent register mapper will just pick the first match it can
+ // find, ignoring the required type.
+ if (VT == MVT::f32)
+ Res.second = X86::FR32RegisterClass;
+ else if (VT == MVT::f64)
+ Res.second = X86::FR64RegisterClass;
+ else if (X86::VR128RegisterClass->hasType(VT))
+ Res.second = X86::VR128RegisterClass;
+ }
+
+ return Res;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 000000000000..b6036782b865
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,955 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+ namespace X86ISD {
+ // X86 Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// BSF - Bit scan forward.
+ /// BSR - Bit scan reverse.
+ BSF,
+ BSR,
+
+ /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// X86::SHLDxx and X86::SHRDxx instructions.
+ SHLD,
+ SHRD,
+
+ /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// FSRL - Bitwise logical right shift of floating point values. These
+ /// corresponds to X86::PSRLDQ.
+ FSRL,
+
+ /// CALL - These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ CALL,
+
+ /// RDTSC_DAG - This operation implements the lowering for
+ /// readcyclecounter
+ RDTSC_DAG,
+
+ /// X86 compare and logical compare instructions.
+ CMP, COMI, UCOMI,
+
+ /// X86 bit-test instructions.
+ BT,
+
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
+ SETCC,
+
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
+
+ /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+ /// Operands are two FP values to compare; result is a mask of
+ /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
+ FSETCCss, FSETCCsd,
+
+ /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
+ /// result in an integer GPR. Needs masking for scalar result.
+ FGETSIGNx86,
+
+ /// X86 conditional moves. Operand 0 and operand 1 are the two values
+ /// to select from. Operand 2 is the condition code, and operand 3 is the
+ /// flag operand produced by a CMP or TEST instruction. It also writes a
+ /// flag result.
+ CMOV,
+
+ /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// 1 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// Wrapper - A wrapper node for TargetConstantPool,
+ /// TargetExternalSymbol, and TargetGlobalAddress.
+ Wrapper,
+
+ /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+ /// of an XMM vector, with the high word zero filled.
+ MOVQ2DQ,
+
+ /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// to an MMX vector. If you think this is too close to the previous
+ /// mnemonic, so do I; blame Intel.
+ MOVDQ2Q,
+
+ /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRB.
+ PEXTRB,
+
+ /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// of a destination 4 x floatvector.
+ INSERTPS,
+
+ /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRB.
+ PINSRB,
+
+ /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW, MMX_PINSRW,
+
+ /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ PSHUFB,
+
+ /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+ ANDNP,
+
+ /// PSIGNB/W/D - Copy integer sign.
+ PSIGNB, PSIGNW, PSIGND,
+
+ /// PBLENDVB - Variable blend
+ PBLENDVB,
+
+ /// FMAX, FMIN - Floating point max and min.
+ ///
+ FMAX, FMIN,
+
+ /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+ /// approximation. Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT, FRCP,
+
+ // TLSADDR - Thread Local Storage.
+ TLSADDR,
+
+ // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // thunk at the address from an earlier relocation.
+ TLSCALL,
+
+ // EH_RETURN - Exception Handling helpers.
+ EH_RETURN,
+
+ /// TC_RETURN - Tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ // VZEXT_MOVL - Vector move low and zero extend.
+ VZEXT_MOVL,
+
+ // VSHL, VSRL - Vector logical left / right shift.
+ VSHL, VSRL,
+
+ // CMPPD, CMPPS - Vector double/float comparison.
+ // CMPPD, CMPPS - Vector double/float comparison.
+ CMPPD, CMPPS,
+
+ // PCMP* - Vector integer comparisons.
+ PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+ PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+
+ // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, ADC, SBB, SMUL,
+ INC, DEC, OR, XOR, AND,
+
+ UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+
+ // MUL_IMM - X86 specific multiply by immediate.
+ MUL_IMM,
+
+ // PTEST - Vector bitwise comparisons
+ PTEST,
+
+ // TESTP - Vector packed fp sign bitwise comparisons
+ TESTP,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ PALIGN,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ PSHUFHW_LD,
+ PSHUFLW_LD,
+ SHUFPD,
+ SHUFPS,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVSHDUP_LD,
+ MOVSLDUP_LD,
+ MOVLHPS,
+ MOVLHPD,
+ MOVHLPS,
+ MOVHLPD,
+ MOVLPS,
+ MOVLPD,
+ MOVSD,
+ MOVSS,
+ UNPCKLPS,
+ UNPCKLPD,
+ VUNPCKLPS,
+ VUNPCKLPD,
+ VUNPCKLPSY,
+ VUNPCKLPDY,
+ UNPCKHPS,
+ UNPCKHPD,
+ PUNPCKLBW,
+ PUNPCKLWD,
+ PUNPCKLDQ,
+ PUNPCKLQDQ,
+ PUNPCKHBW,
+ PUNPCKHWD,
+ PUNPCKHDQ,
+ PUNPCKHQDQ,
+
+ // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+ // according to %al. An operator is needed so that this can be expanded
+ // with control flow.
+ VASTART_SAVE_XMM_REGS,
+
+ // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ WIN_ALLOCA,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE,
+
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // Atomic 64-bit binary operations.
+ ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// VAARG_64 - This instruction grabs the address of the next argument
+ /// from a va_list. (reads and modifies the va_list in memory)
+ VAARG_64
+
+ // WARNING: Do not add anything in the end unless you want the node to
+ // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+ // thought as target memory ops!
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+ /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFDMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+
+ /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+ bool isSHUFPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+ bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+ /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+ /// <2, 3, 2, 3>
+ bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
+ bool isMOVLPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+ /// as well as MOVLHPS.
+ bool isMOVLHPSMask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+ bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+ bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+ /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+ /// <0, 0, 1, 1>
+ bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+ /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+ /// <2, 2, 3, 3>
+ bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSS,
+ /// MOVSD, and MOVD, i.e. setting the lowest element.
+ bool isMOVLMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+ bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+ bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+ bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+
+ /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PALIGNR.
+ bool isPALIGNRMask(ShuffleVectorSDNode *N);
+
+ /// isVEXTRACTF128Index - Return true if the specified
+ /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+ /// suitable for input to VEXTRACTF128.
+ bool isVEXTRACTF128Index(SDNode *N);
+
+ /// isVINSERTF128Index - Return true if the specified
+ /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+ /// suitable for input to VINSERTF128.
+ bool isVINSERTF128Index(SDNode *N);
+
+ /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+ /// instructions.
+ unsigned getShuffleSHUFImmediate(SDNode *N);
+
+ /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+ /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
+ unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+ /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+ /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
+ unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+ /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+ /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+ unsigned getShufflePALIGNRImmediate(SDNode *N);
+
+ /// getExtractVEXTRACTF128Immediate - Return the appropriate
+ /// immediate to extract the specified EXTRACT_SUBVECTOR index
+ /// with VEXTRACTF128 instructions.
+ unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+ /// getInsertVINSERTF128Immediate - Return the appropriate
+ /// immediate to insert at the specified INSERT_SUBVECTOR index
+ /// with VINSERTF128 instructions.
+ unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
+ /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+ /// constant +0.0.
+ bool isZeroNode(SDValue Elt);
+
+ /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+ /// fit into displacement field of the instruction.
+ bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+ bool hasSymbolicDisplacement = true);
+
+
+ /// isCalleePop - Determines whether the callee is required to pop its
+ /// own arguments. Callee pop is necessary to support tail calls.
+ bool isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool TailCallOpt);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ class X86TargetLowering : public TargetLowering {
+ public:
+ explicit X86TargetLowering(X86TargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+ virtual const MCExpr *
+ LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB, unsigned uid,
+ MCContext &Ctx) const;
+
+ /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+ /// jumptable.
+ virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const;
+ virtual const MCExpr *
+ getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI, MCContext &Ctx) const;
+
+ /// getStackPtrReg - Return the stack pointer register we are using: either
+ /// ESP or RSP.
+ unsigned getStackPtrReg() const { return X86StackPtr; }
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. For X86, aggregates
+ /// that contains are placed at 16-byte boundaries while the rest are at
+ /// 4-byte boundaries.
+ virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If
+ /// 'NonScalarIntSafe' is true, that means it's safe to return a
+ /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+ /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+ /// constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ virtual EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe, bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+ return true;
+ }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ /// isTypeDesirableForOp - Return true if the target has native support for
+ /// the specified value type and it is 'desirable' to use the type for the
+ /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+ /// instruction encodings are longer and some i16 instructions are slow.
+ virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+ /// isTypeDesirable - Return true if the target has native support for the
+ /// specified value type and it is 'desirable' to use the type. e.g. On x86
+ /// i16 is legal, but undesirable since i16 instruction encodings are longer
+ /// and some i16 instructions are slow.
+ virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+ // operation that are sign bits.
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const;
+
+ virtual bool
+ isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ virtual ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// (e.g. {edx}), return the register number and the register class for the
+ /// register. This should only be used for C_Register constraints. On
+ /// error, this returns a register number of 0.
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a
+ /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in
+ /// unknown ways, such as incoming arguments, or copies from unknown
+ /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+ /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+ /// all instructions that define 32-bit values implicit zero-extend the
+ /// result out to 64 bits.
+ virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// isShuffleMaskLegal - Targets can use this to indicate that they only
+ /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+ /// values are assumed to be legal.
+ virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const;
+
+ /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+ /// used by Targets can use this to indicate if there is a suitable
+ /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+ /// pool entry.
+ virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const;
+
+ /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// seek to shrink the FP constant of the specified type to a smaller type
+ /// in order to save space and / or reduce runtime.
+ virtual bool ShouldShrinkFPConstant(EVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !X86ScalarSSEf64 || VT == MVT::f80;
+ }
+
+ const X86Subtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+ /// getStackCookieLocation - Return true if the target stores stack
+ /// protector cookies at a fixed offset in some non-standard address
+ /// space, and populates the address space and offset as
+ /// appropriate.
+ virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
+ SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG) const;
+
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
+ private:
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+ const X86RegisterInfo *RegInfo;
+ const TargetData *TD;
+
+ /// X86StackPtr - X86 physical register used as stack ptr.
+ unsigned X86StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf32;
+ bool X86ScalarSSEf64;
+
+ /// LegalFPImmediates - A list of legal fp immediates.
+ std::vector<APFloat> LegalFPImmediates;
+
+ /// addLegalFPImmediate - Indicate that this x86 target can instruction
+ /// select the specified FP immediate natively.
+ void addLegalFPImmediate(const APFloat& Imm) {
+ LegalFPImmediates.push_back(Imm);
+ }
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ // Call lowering helpers.
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+ bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
+ SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+ SDValue Chain, bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl) const;
+
+ unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG &DAG) const;
+
+ std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool isSigned) const;
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ // Utility functions to help LowerVECTOR_SHUFFLE
+ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ virtual EVT
+ getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
+ void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, unsigned NewOp) const;
+
+ /// Utility function to emit string processing sse4.2 instructions
+ /// that return in xmm0.
+ /// This takes the instruction to expand, the associated machine basic
+ /// block, the number of args, and whether or not the second arg is
+ /// in memory or not.
+ MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+ unsigned argNum, bool inMem) const;
+
+ /// Utility functions to emit monitor and mwait instructions. These
+ /// need to make sure that the arguments to the intrinsic are in the
+ /// correct registers.
+ MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ /// Utility function to emit atomic bitwise operations (and, or, xor).
+ /// It takes the bitwise instruction to expand, the associated machine basic
+ /// block, and the associated X86 opcodes for reg/reg and reg/imm.
+ MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned loadOpc,
+ unsigned cxchgOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc = false) const;
+
+ MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc = false) const;
+
+ /// Utility function to emit atomic min and max. It takes the min/max
+ /// instruction to expand, the associated basic block, and the associated
+ /// cmov opcode for moving the min or max value.
+ MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned cmovOpc) const;
+
+ // Utility function to emit the low-level va_arg code for X86-64.
+ MachineBasicBlock *EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Utility function to emit the xmm reg save portion of va_start.
+ MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ /// Emit nodes that will be selected as "test Op0,Op0", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
+
+ /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) const;
+ };
+
+ namespace X86 {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
+}
+
+#endif // X86ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 000000000000..dd4f6a5a85a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,102 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
+}
+
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+}
+
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
+
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
+}
+
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+ "prefetch $addr", []>;
+
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+ "prefetchw $addr", []>;
+
+// "3DNowA" instructions
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 000000000000..9f7a4b06dc6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1125 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins i32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
+ // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
+ OpSize; // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
+ // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+ // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Surprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+ "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+ "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+ "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+ "idiv{q}\t$src", []>;
+
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+ "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+ "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
+ (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
+ (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src1)),
+ (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ Operand imm8operand, SDPatternOperator imm8operator,
+ bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// HasOpSizePrefix - This bit is set to true if the instruction should have
+ /// the 0x66 operand size prefix. This is set for i16 types.
+ bit HasOpSizePrefix = hasOpSizePrefix;
+
+ /// HasREX_WPrefix - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+ Imm8 , i8imm , imm, i8imm , invalid_node,
+ 0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+ Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ 1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+ Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ 1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+ Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+ 1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+ // Infer instruction prefixes from type info.
+ let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+ let hasREX_WPrefix = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern, Format f = MRMDestReg>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f = MRMDestReg>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+ EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo,
+ (outs typeinfo.RegClass:$dst),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+ EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+ EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+ EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ list<dag> pattern>
+ : ITy<opcode, MRMDestMem, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ : ITy<opcode, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src))],
+ opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern>
+ : ITy<0x82, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg>
+ : ITy<opcode, RawFrm, typeinfo,
+ (outs), (ins typeinfo.ImmOperand:$src),
+ mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
+ areg.AsmName, ", $src}"), []> {
+ let ImmT = typeinfo.ImmEncoding;
+ let Uses = [areg];
+ let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnodeflag, SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...". It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+
+ def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+ X86and_flag, and, 1, 0>;
+defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+ X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+ X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+ X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+ X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+ defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+ 1, 0>;
+ defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+ 0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result. From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+ let isCommutable = 1 in {
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+ } // isCommutable
+
+ def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+ def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+ def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+ def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+ def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+ def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+ def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+ def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+ def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+ def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+ def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+ def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 000000000000..0245e5c09644
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,184 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call. X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense. Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly. The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned Scale;
+ unsigned IndexReg;
+ int Disp;
+ const GlobalValue *GV;
+ unsigned GVOpFlags;
+
+ X86AddressMode()
+ : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
+ Base.Reg = 0;
+ }
+
+
+ void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+ assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+
+ if (BaseType == X86AddressMode::RegBase)
+ MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+ false, false, false, 0, false));
+ else {
+ assert(BaseType == X86AddressMode::FrameIndexBase);
+ MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+ }
+
+ MO.push_back(MachineOperand::CreateImm(Scale));
+ MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+ false, false, false, 0, false));
+
+ if (GV)
+ MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+ else
+ MO.push_back(MachineOperand::CreateImm(Disp));
+
+ MO.push_back(MachineOperand::CreateReg(0, false, false,
+ false, false, false, 0, false));
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+ // Because memory references are always represented with five
+ // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+ return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill, int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1,
+ unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else {
+ assert(AM.BaseType == X86AddressMode::FrameIndexBase);
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ }
+
+ MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+ if (AM.GV)
+ MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+ else
+ MIB.addImm(AM.Disp);
+
+ return MIB.addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Flags = 0;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+static inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ unsigned GlobalBaseReg, unsigned char OpFlags) {
+ //FIXME: factor this
+ return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+ .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 000000000000..3a43b22ddf3d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ isCommutable = 1 in {
+ def #NAME#16rr
+ : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+ def #NAME#32rr
+ : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+ def #NAME#64rr
+ :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst,
+ (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+ }
+
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ def #NAME#16rm
+ : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ CondNode, EFLAGS))]>, TB, OpSize;
+ def #NAME#32rm
+ : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ def #NAME#64rm
+ :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+ let Uses = [EFLAGS] in {
+ def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+ def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+ } // Uses = [EFLAGS]
+}
+
+defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
+defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
+defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
+defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
+defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
+defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
+defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
+defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 000000000000..adcc747eb4b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1675 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 8 bits.
+ return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction. This expands to code that looks like this:
+// call $next_inst
+// popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+ "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+ (outs GR64:$dst),
+ (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+ "#VAARG_64 $dst, $ap, $size, $mode, $align",
+ [(set GR64:$dst,
+ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+ (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets. These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86WinAlloca)]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+ [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type. When
+// this happens, it is great. However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+ (SETBr)>;
+
+// (add OP, SETB) -> (adc OP, 0)
+def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
+ (ADC64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETB) -> (sbb OP, 0)
+def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETCC_CARRY) -> (adc OP, 0)
+def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC64ri8 GR64:$op, 0)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_addr32",
+ [(X86tlsaddr tls32addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_addr64",
+ [(X86tlsaddr tls64addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+ Uses = [RSP, RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+let Constraints = "$src1 = $dst" in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+ "#CMOV_GR8 PSEUDO!",
+ [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+ imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+ "#CMOV_GR32* PSEUDO!",
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+ "#CMOV_GR16* PSEUDO!",
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ "#CMOV_RFP32 PSEUDO!",
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ "#CMOV_RFP64 PSEUDO!",
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ "#CMOV_RFP80 PSEUDO!",
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
+} // Uses = [EFLAGS]
+
+} // Constraints = "$src1 = $dst" in
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMXOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMNAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "#ATOMMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
+
+// RegOpc corresponds to the mr version of the instruction
+// ImmOpc corresponds to the mi version of the instruction
+// ImmOpc8 corresponds to the mi8 version of the instruction
+// ImmMod corresponds to the instruction format of the mi and mi8 versions
+multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
+ Format ImmMod, string mnemonic> {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+
+def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, OpSize, LOCK;
+def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+}
+
+}
+
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">;
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+
+def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ isCodeGenOnly = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+ "lock\n\t"
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+ "lock\n\t"
+ "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+ "lock\n\t"
+ "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+ "lock\n\t"
+ "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock\n\t"
+ "cmpxchgq\t$swap,$ptr",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+ "lock\n\t"
+ "xadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+ "lock\n\t"
+ "xadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+ "lock\n\t"
+ "xadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+ "lock\n\t"
+ "xadd\t$val, $ptr",
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+ (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'. FIXME: This is really a hack, the
+// 'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+ Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+ (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+ (TCRETURNri GR32_TC:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi addr:$dst, imm:$off)>,
+ Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi64 addr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+ Instruction Inst64> {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+ sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+ (MOVZX32rr8 GR8 :$src), sub_16bit)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies. However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read. To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+ Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "", // orw/addw REG, REG
+ [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "", // orl/addl REG, REG
+ [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "", // orq/addq REG, REG
+ [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "", // orw/addw REG, imm8
+ [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "", // orw/addw REG, imm
+ [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "", // orl/addl REG, imm8
+ [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "", // orl/addl REG, imm
+ [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "", // orq/addq REG, imm8
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "", // orq/addq REG, imm
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri8
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo8XForm imm:$imm))),
+ sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+ (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (EXTRACT_SUBREG (MOVZX32rr8 (i8
+ (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
+ Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR16:$src, i8),
+ (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+ (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (EXTRACT_SUBREG (MOVSX32rr8
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
+ Requires<[In64BitMode]>;
+
+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+ (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+ (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+ (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+ (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+ (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+ (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+ (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+ (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+ (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+ (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+ (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+ (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+ (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+ (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+ (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+ (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+ (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 000000000000..c228a0aed59c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,304 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag timm:$amt)]>;
+ def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "retw\t$amt",
+ []>, OpSize;
+ def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ "lretl", []>;
+ def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ "lretq", []>;
+ def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lret\t$amt", []>;
+ def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp\t$dst", [(br bb:$dst)]>;
+ def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst", []>;
+ def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+ multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+ }
+}
+
+defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+ // These are the 32-bit versions of this instruction for the asmparser. In
+ // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+ // jecxz.
+ let Uses = [CX] in
+ def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+ let Uses = [ECX] in
+ def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+ // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+ // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+ // is jrcxz.
+ let Uses = [ECX] in
+ def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+ let Uses = [RCX] in
+ def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+ def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+ "ljmp{q}\t{*}$dst", []>;
+
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ "ljmp{w}\t{*}$dst", []>, OpSize;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+ "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i32imm_pcrel:$dst,variable_ops),
+ "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+ Requires<[In32BitMode]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+ Requires<[In32BitMode]>;
+
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+ "lcall{w}\t{*}$dst", []>, OpSize;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+ "lcall{l}\t{*}$dst", []>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ "callw\t$dst", []>, OpSize;
+ }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def TCRETURNdi : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+ def TCRETURNri : PseudoI<(outs),
+ (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi : PseudoI<(outs),
+ (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ // FIXME: The should be pseudo instructions that are lowered when going to
+ // mcinst.
+ def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL",
+ []>;
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+ "", []>; // FIXME: Remove encoding when JIT is dead.
+ let mayLoad = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+ "jmp{l}\t{*}$dst # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+ Requires<[In64BitMode, NotWin64]>;
+
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", []>;
+ }
+
+ // FIXME: We need to teach codegen about single list of call-clobbered
+ // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP] in {
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+ def WINCALL64m : I<0xFF, MRM2m, (outs),
+ (ins i64mem:$dst,variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call (loadi64 addr:$dst))]>,
+ Requires<[IsWin64]>;
+ }
+
+let isCall = 1, isCodeGenOnly = 1 in
+ // __chkstk(MSVC): clobber R10, R11 and EFLAGS.
+ // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+ let Defs = [RAX, R10, R11, RSP, EFLAGS],
+ Uses = [RSP] in {
+ def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ }
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP],
+ usesCustomInserter = 1 in {
+ def TCRETURNdi64 : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+ []>;
+ def TCRETURNri64 : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi64 : PseudoI<(outs),
+ (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL", []>;
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+
+ let mayLoad = 1 in
+ def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 000000000000..2e1d523ea16d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,151 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 000000000000..d868773d2d69
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy> {
+ defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+ defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+ // Fused Multiply-Add
+ defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+ defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+ defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+ defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+ defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+ defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+ defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+ defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+ // Fused Negative Multiply-Add
+ defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+ defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+ defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+ defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 000000000000..7cb870fabd62
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,648 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+ SDTCisVT<1, f80>]>;
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomInserter = 1 in { // Expanded after instruction selection.
+ def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+ def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+ [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+ [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+ [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+// All FP Stack operations are represented with four instructions here. The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information. These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler. These use "RST" registers, although frequently
+// the actual register(s) used are implicit. These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instruction for FP stack return values.
+def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+ [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m : FpIf32<(outs RFP32:$dst),
+ (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m : FpIf64<(outs RFP64:$dst),
+ (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
+ (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst),
+ (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst),
+ (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
+}
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
+}
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+ OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+ OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
+}
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
+}
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">;
+def COMP_FST0r : FPST0rInst <0xD8, "fcomp\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+
+// Versions of FP instructions that take a single memory operand. Added for the
+// disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
+def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+ def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+ CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ cc, EFLAGS))]>;
+ def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+ CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ cc, EFLAGS))]>;
+ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+ CondMovFP,
+ [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+ cc, EFLAGS))]>,
+ Requires<[HasCMov]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE : FPCMov<X86_COND_E>;
+defm CMOVP : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+} // Predicates = [HasCMov]
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+ def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+ [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1 in {
+def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1 in {
+def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+
+let mayStore = 1 in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
+ "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+
+
+// Floating point compares.
+let Defs = [EFLAGS] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+
+// CC = ST(0) cmp ST(i)
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [EFLAGS], Uses = [ST0] in {
+def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucom\t$reg">, DD;
+def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomp\t$reg">, DD;
+def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
+ (outs), (ins),
+ "fucompp">, DA;
+
+def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucomi\t$reg">, DB;
+def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucompi\t$reg">, DF;
+}
+
+def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+ "fcomi\t$reg">, DB;
+def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+ "fcompi\t$reg">, DF;
+
+// Floating point flag ops.
+let Defs = [AX] in
+def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw %ax", []>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
+ (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+ [(X86fp_cwd_get16 addr:$dst)]>;
+
+let mayLoad = 1 in
+def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
+def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
+ "ffree\t$reg">, DD;
+
+// Clear exceptions
+
+def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
+
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+
+def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
+def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
+def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9;
+def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9;
+def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9;
+def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9;
+def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9;
+def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9;
+def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9;
+def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9;
+def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9;
+def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9;
+def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9;
+def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9;
+def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9;
+def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9;
+def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9;
+def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9;
+def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9;
+def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9;
+def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
+
+def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+ RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+ RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+ RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+ RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+ Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack. We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+ Requires<[FPStackf64]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 000000000000..6d89bcc29e7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,535 @@
+//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>; def RawFrm : Format<1>;
+def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
+def MRMSrcMem : Format<6>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
+def MRMInitReg : Format<32>;
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm8PCRel : ImmType<2>;
+def Imm16 : ImmType<3>;
+def Imm16PCRel : ImmType<4>;
+def Imm32 : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64 : ImmType<7>;
+
+// FPFormat - This specifies what form this FP instruction has. This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def NotFP : FPFormat<0>;
+def ZeroArgFP : FPFormat<1>;
+def OneArgFP : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP : FPFormat<4>;
+def CompareFP : FPFormat<5>;
+def CondMovFP : FPFormat<6>;
+def SpecialFP : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt : Domain<3>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
+class LOCK { bit hasLockPrefix = 1; }
+class SegFS { bits<2> SegOvrBits = 1; }
+class SegGS { bits<2> SegOvrBits = 2; }
+class TB { bits<5> Prefix = 1; }
+class REP { bits<5> Prefix = 2; }
+class D8 { bits<5> Prefix = 3; }
+class D9 { bits<5> Prefix = 4; }
+class DA { bits<5> Prefix = 5; }
+class DB { bits<5> Prefix = 6; }
+class DC { bits<5> Prefix = 7; }
+class DD { bits<5> Prefix = 8; }
+class DE { bits<5> Prefix = 9; }
+class DF { bits<5> Prefix = 10; }
+class XD { bits<5> Prefix = 11; }
+class XS { bits<5> Prefix = 12; }
+class T8 { bits<5> Prefix = 13; }
+class TA { bits<5> Prefix = 14; }
+class A6 { bits<5> Prefix = 15; }
+class A7 { bits<5> Prefix = 16; }
+class TF { bits<5> Prefix = 17; }
+class VEX { bit hasVEXPrefix = 1; }
+class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+ string AsmStr, Domain d = GenericDomain>
+ : Instruction {
+ let Namespace = "X86";
+
+ bits<8> Opcode = opcod;
+ Format Form = f;
+ bits<6> FormBits = Form.Value;
+ ImmType ImmT = i;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ string AsmString = AsmStr;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ //
+ // Attributes specific to X86 instructions...
+ //
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+
+ bits<5> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
+ FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
+ bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
+ bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain ExeDomain = d;
+ bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
+ bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
+ // to be encoded in a immediate field?
+ bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+
+ // TSFlags layout should be kept in sync with X86InstrInfo.h.
+ let TSFlags{5-0} = FormBits;
+ let TSFlags{6} = hasOpSizePrefix;
+ let TSFlags{7} = hasAdSizePrefix;
+ let TSFlags{12-8} = Prefix;
+ let TSFlags{13} = hasREX_WPrefix;
+ let TSFlags{16-14} = ImmT.Value;
+ let TSFlags{19-17} = FPForm.Value;
+ let TSFlags{20} = hasLockPrefix;
+ let TSFlags{22-21} = SegOvrBits;
+ let TSFlags{24-23} = ExeDomain.Value;
+ let TSFlags{32-25} = Opcode;
+ let TSFlags{33} = hasVEXPrefix;
+ let TSFlags{34} = hasVEX_WPrefix;
+ let TSFlags{35} = hasVEX_4VPrefix;
+ let TSFlags{36} = hasVEX_i8ImmReg;
+ let TSFlags{37} = hasVEX_L;
+ let TSFlags{38} = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+ : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+ let FPForm = fp;
+ let Pattern = pattern;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+// Iseg16 - 16-bit segment selector, 16-bit offset
+// Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+// VSSI - SSE1 instructions with XS prefix in AVX form.
+// VPSI - SSE1 instructions with TB prefix in AVX form.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
+ Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+// VSDI - SSE2 instructions with XD prefix in AVX form.
+// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
+ OpSize, Requires<[HasAVX]>;
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with TB and OpSize prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 prefix.
+// SS3AI - SSSE3 instructions with TA prefix.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. We put those instructions here because they better
+// fit into the SSSE3 instruction category rather than the MMX category.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+// SS48I - SSE 4.1 instructions with T8 prefix.
+// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+// SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
+
+// SS42FI - SSE 4.2 instructions with TF prefix.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+
+// SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
+
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8 and OpSize prefix.
+// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasAES]>;
+
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, Requires<[HasCLMUL]>;
+
+class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ OpSize, VEX_4V, Requires<[HasFMA3]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
new file mode 100644
index 000000000000..b00109c9fa4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -0,0 +1,467 @@
+//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides pattern fragments useful for SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
+def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86andnp : SDNode<"X86ISD::ANDNP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignb : SDNode<"X86ISD::PSIGNB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignw : SDNode<"X86ISD::PSIGNW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignd : SDNode<"X86ISD::PSIGND",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
+def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
+def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>;
+def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+// 128-bit load pattern fragments
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// 256-bit load pattern fragments
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+ (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+ (f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+ (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+ (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+ (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+ (v2i64 (alignedload node:$ptr))>;
+
+// 256-bit aligned load pattern fragments
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+ (v8i32 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
+// 128-bit bitconvert pattern fragments
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
+// a PALIGNR imm.
+def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePALIGNRImmediate(N));
+}]>;
+
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+ return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+def palign : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+ (extract_subvector node:$bigvec,
+ node:$index), [{
+ return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+ node:$index),
+ (insert_subvector node:$bigvec, node:$smallvec,
+ node:$index), [{
+ return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 000000000000..55b5835f52a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,3240 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <limits>
+
+#define GET_INSTRINFO_CTOR
+#include "X86GenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+ : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::ADJCALLSTACKDOWN64
+ : X86::ADJCALLSTACKDOWN32),
+ (tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::ADJCALLSTACKUP64
+ : X86::ADJCALLSTACKUP32)),
+ TM(tm), RI(tm, *this) {
+ enum {
+ TB_NOT_REVERSABLE = 1U << 31,
+ TB_FLAGS = TB_NOT_REVERSABLE
+ };
+
+ static const unsigned OpTbl2Addr[][2] = {
+ { X86::ADC32ri, X86::ADC32mi },
+ { X86::ADC32ri8, X86::ADC32mi8 },
+ { X86::ADC32rr, X86::ADC32mr },
+ { X86::ADC64ri32, X86::ADC64mi32 },
+ { X86::ADC64ri8, X86::ADC64mi8 },
+ { X86::ADC64rr, X86::ADC64mr },
+ { X86::ADD16ri, X86::ADD16mi },
+ { X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
+ { X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
+ { X86::ADD32ri, X86::ADD32mi },
+ { X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
+ { X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
+ { X86::ADD64ri32, X86::ADD64mi32 },
+ { X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
+ { X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
+ { X86::ADD8ri, X86::ADD8mi },
+ { X86::ADD8rr, X86::ADD8mr },
+ { X86::AND16ri, X86::AND16mi },
+ { X86::AND16ri8, X86::AND16mi8 },
+ { X86::AND16rr, X86::AND16mr },
+ { X86::AND32ri, X86::AND32mi },
+ { X86::AND32ri8, X86::AND32mi8 },
+ { X86::AND32rr, X86::AND32mr },
+ { X86::AND64ri32, X86::AND64mi32 },
+ { X86::AND64ri8, X86::AND64mi8 },
+ { X86::AND64rr, X86::AND64mr },
+ { X86::AND8ri, X86::AND8mi },
+ { X86::AND8rr, X86::AND8mr },
+ { X86::DEC16r, X86::DEC16m },
+ { X86::DEC32r, X86::DEC32m },
+ { X86::DEC64_16r, X86::DEC64_16m },
+ { X86::DEC64_32r, X86::DEC64_32m },
+ { X86::DEC64r, X86::DEC64m },
+ { X86::DEC8r, X86::DEC8m },
+ { X86::INC16r, X86::INC16m },
+ { X86::INC32r, X86::INC32m },
+ { X86::INC64_16r, X86::INC64_16m },
+ { X86::INC64_32r, X86::INC64_32m },
+ { X86::INC64r, X86::INC64m },
+ { X86::INC8r, X86::INC8m },
+ { X86::NEG16r, X86::NEG16m },
+ { X86::NEG32r, X86::NEG32m },
+ { X86::NEG64r, X86::NEG64m },
+ { X86::NEG8r, X86::NEG8m },
+ { X86::NOT16r, X86::NOT16m },
+ { X86::NOT32r, X86::NOT32m },
+ { X86::NOT64r, X86::NOT64m },
+ { X86::NOT8r, X86::NOT8m },
+ { X86::OR16ri, X86::OR16mi },
+ { X86::OR16ri8, X86::OR16mi8 },
+ { X86::OR16rr, X86::OR16mr },
+ { X86::OR32ri, X86::OR32mi },
+ { X86::OR32ri8, X86::OR32mi8 },
+ { X86::OR32rr, X86::OR32mr },
+ { X86::OR64ri32, X86::OR64mi32 },
+ { X86::OR64ri8, X86::OR64mi8 },
+ { X86::OR64rr, X86::OR64mr },
+ { X86::OR8ri, X86::OR8mi },
+ { X86::OR8rr, X86::OR8mr },
+ { X86::ROL16r1, X86::ROL16m1 },
+ { X86::ROL16rCL, X86::ROL16mCL },
+ { X86::ROL16ri, X86::ROL16mi },
+ { X86::ROL32r1, X86::ROL32m1 },
+ { X86::ROL32rCL, X86::ROL32mCL },
+ { X86::ROL32ri, X86::ROL32mi },
+ { X86::ROL64r1, X86::ROL64m1 },
+ { X86::ROL64rCL, X86::ROL64mCL },
+ { X86::ROL64ri, X86::ROL64mi },
+ { X86::ROL8r1, X86::ROL8m1 },
+ { X86::ROL8rCL, X86::ROL8mCL },
+ { X86::ROL8ri, X86::ROL8mi },
+ { X86::ROR16r1, X86::ROR16m1 },
+ { X86::ROR16rCL, X86::ROR16mCL },
+ { X86::ROR16ri, X86::ROR16mi },
+ { X86::ROR32r1, X86::ROR32m1 },
+ { X86::ROR32rCL, X86::ROR32mCL },
+ { X86::ROR32ri, X86::ROR32mi },
+ { X86::ROR64r1, X86::ROR64m1 },
+ { X86::ROR64rCL, X86::ROR64mCL },
+ { X86::ROR64ri, X86::ROR64mi },
+ { X86::ROR8r1, X86::ROR8m1 },
+ { X86::ROR8rCL, X86::ROR8mCL },
+ { X86::ROR8ri, X86::ROR8mi },
+ { X86::SAR16r1, X86::SAR16m1 },
+ { X86::SAR16rCL, X86::SAR16mCL },
+ { X86::SAR16ri, X86::SAR16mi },
+ { X86::SAR32r1, X86::SAR32m1 },
+ { X86::SAR32rCL, X86::SAR32mCL },
+ { X86::SAR32ri, X86::SAR32mi },
+ { X86::SAR64r1, X86::SAR64m1 },
+ { X86::SAR64rCL, X86::SAR64mCL },
+ { X86::SAR64ri, X86::SAR64mi },
+ { X86::SAR8r1, X86::SAR8m1 },
+ { X86::SAR8rCL, X86::SAR8mCL },
+ { X86::SAR8ri, X86::SAR8mi },
+ { X86::SBB32ri, X86::SBB32mi },
+ { X86::SBB32ri8, X86::SBB32mi8 },
+ { X86::SBB32rr, X86::SBB32mr },
+ { X86::SBB64ri32, X86::SBB64mi32 },
+ { X86::SBB64ri8, X86::SBB64mi8 },
+ { X86::SBB64rr, X86::SBB64mr },
+ { X86::SHL16rCL, X86::SHL16mCL },
+ { X86::SHL16ri, X86::SHL16mi },
+ { X86::SHL32rCL, X86::SHL32mCL },
+ { X86::SHL32ri, X86::SHL32mi },
+ { X86::SHL64rCL, X86::SHL64mCL },
+ { X86::SHL64ri, X86::SHL64mi },
+ { X86::SHL8rCL, X86::SHL8mCL },
+ { X86::SHL8ri, X86::SHL8mi },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL },
+ { X86::SHLD16rri8, X86::SHLD16mri8 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL },
+ { X86::SHLD32rri8, X86::SHLD32mri8 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL },
+ { X86::SHLD64rri8, X86::SHLD64mri8 },
+ { X86::SHR16r1, X86::SHR16m1 },
+ { X86::SHR16rCL, X86::SHR16mCL },
+ { X86::SHR16ri, X86::SHR16mi },
+ { X86::SHR32r1, X86::SHR32m1 },
+ { X86::SHR32rCL, X86::SHR32mCL },
+ { X86::SHR32ri, X86::SHR32mi },
+ { X86::SHR64r1, X86::SHR64m1 },
+ { X86::SHR64rCL, X86::SHR64mCL },
+ { X86::SHR64ri, X86::SHR64mi },
+ { X86::SHR8r1, X86::SHR8m1 },
+ { X86::SHR8rCL, X86::SHR8mCL },
+ { X86::SHR8ri, X86::SHR8mi },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL },
+ { X86::SHRD16rri8, X86::SHRD16mri8 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL },
+ { X86::SHRD32rri8, X86::SHRD32mri8 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL },
+ { X86::SHRD64rri8, X86::SHRD64mri8 },
+ { X86::SUB16ri, X86::SUB16mi },
+ { X86::SUB16ri8, X86::SUB16mi8 },
+ { X86::SUB16rr, X86::SUB16mr },
+ { X86::SUB32ri, X86::SUB32mi },
+ { X86::SUB32ri8, X86::SUB32mi8 },
+ { X86::SUB32rr, X86::SUB32mr },
+ { X86::SUB64ri32, X86::SUB64mi32 },
+ { X86::SUB64ri8, X86::SUB64mi8 },
+ { X86::SUB64rr, X86::SUB64mr },
+ { X86::SUB8ri, X86::SUB8mi },
+ { X86::SUB8rr, X86::SUB8mr },
+ { X86::XOR16ri, X86::XOR16mi },
+ { X86::XOR16ri8, X86::XOR16mi8 },
+ { X86::XOR16rr, X86::XOR16mr },
+ { X86::XOR32ri, X86::XOR32mi },
+ { X86::XOR32ri8, X86::XOR32mi8 },
+ { X86::XOR32rr, X86::XOR32mr },
+ { X86::XOR64ri32, X86::XOR64mi32 },
+ { X86::XOR64ri8, X86::XOR64mi8 },
+ { X86::XOR64rr, X86::XOR64mr },
+ { X86::XOR8ri, X86::XOR8mi },
+ { X86::XOR8rr, X86::XOR8mr }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+ unsigned RegOp = OpTbl2Addr[i][0];
+ unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+ assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+ // If this is not a reversible operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
+ // Index 0, folded load and store, no alignment requirement.
+ unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
+
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ }
+
+ // If the third value is 1, then it's folding either a load or a store.
+ static const unsigned OpTbl0[][4] = {
+ { X86::BT16ri8, X86::BT16mi8, 1, 0 },
+ { X86::BT32ri8, X86::BT32mi8, 1, 0 },
+ { X86::BT64ri8, X86::BT64mi8, 1, 0 },
+ { X86::CALL32r, X86::CALL32m, 1, 0 },
+ { X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
+ { X86::CMP16ri, X86::CMP16mi, 1, 0 },
+ { X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
+ { X86::CMP16rr, X86::CMP16mr, 1, 0 },
+ { X86::CMP32ri, X86::CMP32mi, 1, 0 },
+ { X86::CMP32ri8, X86::CMP32mi8, 1, 0 },
+ { X86::CMP32rr, X86::CMP32mr, 1, 0 },
+ { X86::CMP64ri32, X86::CMP64mi32, 1, 0 },
+ { X86::CMP64ri8, X86::CMP64mi8, 1, 0 },
+ { X86::CMP64rr, X86::CMP64mr, 1, 0 },
+ { X86::CMP8ri, X86::CMP8mi, 1, 0 },
+ { X86::CMP8rr, X86::CMP8mr, 1, 0 },
+ { X86::DIV16r, X86::DIV16m, 1, 0 },
+ { X86::DIV32r, X86::DIV32m, 1, 0 },
+ { X86::DIV64r, X86::DIV64m, 1, 0 },
+ { X86::DIV8r, X86::DIV8m, 1, 0 },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
+ { X86::IDIV16r, X86::IDIV16m, 1, 0 },
+ { X86::IDIV32r, X86::IDIV32m, 1, 0 },
+ { X86::IDIV64r, X86::IDIV64m, 1, 0 },
+ { X86::IDIV8r, X86::IDIV8m, 1, 0 },
+ { X86::IMUL16r, X86::IMUL16m, 1, 0 },
+ { X86::IMUL32r, X86::IMUL32m, 1, 0 },
+ { X86::IMUL64r, X86::IMUL64m, 1, 0 },
+ { X86::IMUL8r, X86::IMUL8m, 1, 0 },
+ { X86::JMP32r, X86::JMP32m, 1, 0 },
+ { X86::JMP64r, X86::JMP64m, 1, 0 },
+ { X86::MOV16ri, X86::MOV16mi, 0, 0 },
+ { X86::MOV16rr, X86::MOV16mr, 0, 0 },
+ { X86::MOV32ri, X86::MOV32mi, 0, 0 },
+ { X86::MOV32rr, X86::MOV32mr, 0, 0 },
+ { X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
+ { X86::MOV64rr, X86::MOV64mr, 0, 0 },
+ { X86::MOV8ri, X86::MOV8mi, 0, 0 },
+ { X86::MOV8rr, X86::MOV8mr, 0, 0 },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
+ { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 },
+ { X86::MUL16r, X86::MUL16m, 1, 0 },
+ { X86::MUL32r, X86::MUL32m, 1, 0 },
+ { X86::MUL64r, X86::MUL64m, 1, 0 },
+ { X86::MUL8r, X86::MUL8m, 1, 0 },
+ { X86::SETAEr, X86::SETAEm, 0, 0 },
+ { X86::SETAr, X86::SETAm, 0, 0 },
+ { X86::SETBEr, X86::SETBEm, 0, 0 },
+ { X86::SETBr, X86::SETBm, 0, 0 },
+ { X86::SETEr, X86::SETEm, 0, 0 },
+ { X86::SETGEr, X86::SETGEm, 0, 0 },
+ { X86::SETGr, X86::SETGm, 0, 0 },
+ { X86::SETLEr, X86::SETLEm, 0, 0 },
+ { X86::SETLr, X86::SETLm, 0, 0 },
+ { X86::SETNEr, X86::SETNEm, 0, 0 },
+ { X86::SETNOr, X86::SETNOm, 0, 0 },
+ { X86::SETNPr, X86::SETNPm, 0, 0 },
+ { X86::SETNSr, X86::SETNSm, 0, 0 },
+ { X86::SETOr, X86::SETOm, 0, 0 },
+ { X86::SETPr, X86::SETPm, 0, 0 },
+ { X86::SETSr, X86::SETSm, 0, 0 },
+ { X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
+ { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
+ { X86::TEST16ri, X86::TEST16mi, 1, 0 },
+ { X86::TEST32ri, X86::TEST32mi, 1, 0 },
+ { X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
+ { X86::TEST8ri, X86::TEST8mi, 1, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
+ unsigned FoldedLoad = OpTbl0[i][2];
+ unsigned Align = OpTbl0[i][3];
+ assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversible operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
+ // Index 0, folded load or store.
+ unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ }
+
+ static const unsigned OpTbl1[][3] = {
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
+ { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, 16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, 16 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, 16 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, 16 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PSHUFDri, X86::PSHUFDmi, 16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, 16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, 16 },
+ { X86::RCPPSr, X86::RCPPSm, 16 },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, 16 },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
+ { X86::SQRTPDr, X86::SQRTPDm, 16 },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 },
+ { X86::SQRTPSr, X86::SQRTPSm, 16 },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+ unsigned RegOp = OpTbl1[i][0];
+ unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
+ unsigned Align = OpTbl1[i][2];
+ assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+ RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversible operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
+ // Index 1, folded load
+ unsigned AuxInfo = 1 | (1 << 4);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ }
+
+ static const unsigned OpTbl2[][3] = {
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, 16 },
+ { X86::ADDPSrr, X86::ADDPSrm, 16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, 16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, 16 },
+ { X86::ANDPDrr, X86::ANDPDrm, 16 },
+ { X86::ANDPSrr, X86::ANDPSrm, 16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, 16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, 16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, 16 },
+ { X86::DIVPSrr, X86::DIVPSrm, 16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 },
+ { X86::FsANDPDrr, X86::FsANDPDrm, 16 },
+ { X86::FsANDPSrr, X86::FsANDPSrm, 16 },
+ { X86::FsORPDrr, X86::FsORPDrm, 16 },
+ { X86::FsORPSrr, X86::FsORPSrm, 16 },
+ { X86::FsXORPDrr, X86::FsXORPDrm, 16 },
+ { X86::FsXORPSrr, X86::FsXORPSrm, 16 },
+ { X86::HADDPDrr, X86::HADDPDrm, 16 },
+ { X86::HADDPSrr, X86::HADDPSrm, 16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, 16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, 16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::MAXPDrr, X86::MAXPDrm, 16 },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
+ { X86::MAXPSrr, X86::MAXPSrm, 16 },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
+ { X86::MINPDrr, X86::MINPDrm, 16 },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 },
+ { X86::MINPSrr, X86::MINPSrm, 16 },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MULPDrr, X86::MULPDrm, 16 },
+ { X86::MULPSrr, X86::MULPSrm, 16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, 16 },
+ { X86::ORPSrr, X86::ORPSrm, 16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 },
+ { X86::PADDBrr, X86::PADDBrm, 16 },
+ { X86::PADDDrr, X86::PADDDrm, 16 },
+ { X86::PADDQrr, X86::PADDQrm, 16 },
+ { X86::PADDSBrr, X86::PADDSBrm, 16 },
+ { X86::PADDSWrr, X86::PADDSWrm, 16 },
+ { X86::PADDWrr, X86::PADDWrm, 16 },
+ { X86::PANDNrr, X86::PANDNrm, 16 },
+ { X86::PANDrr, X86::PANDrm, 16 },
+ { X86::PAVGBrr, X86::PAVGBrm, 16 },
+ { X86::PAVGWrr, X86::PAVGWrm, 16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 },
+ { X86::PINSRWrri, X86::PINSRWrmi, 16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, 16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, 16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, 16 },
+ { X86::PMINSWrr, X86::PMINSWrm, 16 },
+ { X86::PMINUBrr, X86::PMINUBrm, 16 },
+ { X86::PMULDQrr, X86::PMULDQrm, 16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, 16 },
+ { X86::PMULHWrr, X86::PMULHWrm, 16 },
+ { X86::PMULLDrr, X86::PMULLDrm, 16 },
+ { X86::PMULLWrr, X86::PMULLWrm, 16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, 16 },
+ { X86::PORrr, X86::PORrm, 16 },
+ { X86::PSADBWrr, X86::PSADBWrm, 16 },
+ { X86::PSLLDrr, X86::PSLLDrm, 16 },
+ { X86::PSLLQrr, X86::PSLLQrm, 16 },
+ { X86::PSLLWrr, X86::PSLLWrm, 16 },
+ { X86::PSRADrr, X86::PSRADrm, 16 },
+ { X86::PSRAWrr, X86::PSRAWrm, 16 },
+ { X86::PSRLDrr, X86::PSRLDrm, 16 },
+ { X86::PSRLQrr, X86::PSRLQrm, 16 },
+ { X86::PSRLWrr, X86::PSRLWrm, 16 },
+ { X86::PSUBBrr, X86::PSUBBrm, 16 },
+ { X86::PSUBDrr, X86::PSUBDrm, 16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, 16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, 16 },
+ { X86::PSUBWrr, X86::PSUBWrm, 16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 },
+ { X86::PXORrr, X86::PXORrm, 16 },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, 16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, 16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, 16 },
+ { X86::SUBPSrr, X86::SUBPSrm, 16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, 16 },
+ { X86::XORPSrr, X86::XORPSrm, 16 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+ unsigned RegOp = OpTbl2[i][0];
+ unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
+ unsigned Align = OpTbl2[i][2];
+
+ assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+ RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversible operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
+ // Index 2, folded load
+ unsigned AuxInfo = 2 | (1 << 4);
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ }
+}
+
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ // It's not always legal to reference the low 8-bit of the larger
+ // register in 32-bit mode.
+ return false;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32: {
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ SubIdx = X86::sub_8bit;
+ break;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ SubIdx = X86::sub_16bit;
+ break;
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32:
+ SubIdx = X86::sub_32bit;
+ break;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const {
+ if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+ MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+ MI->getOperand(Op+1).getImm() == 1 &&
+ MI->getOperand(Op+2).getReg() == 0 &&
+ MI->getOperand(Op+3).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op).getIndex();
+ return true;
+ }
+ return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+ switch (Opcode) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ return true;
+ break;
+ }
+ return false;
+}
+
+static bool isFrameStoreOpcode(int Opcode) {
+ switch (Opcode) {
+ default: break;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV32mr:
+ case X86::MOV64mr:
+ case X86::ST_FpP64m:
+ case X86::MOVSSmr:
+ case X86::MOVSDmr:
+ case X86::MOVAPSmr:
+ case X86::MOVAPDmr:
+ case X86::MOVDQAmr:
+ case X86::VMOVAPSYmr:
+ case X86::VMOVAPDYmr:
+ case X86::VMOVDQAYmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode()))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode()))
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
+ return MI->getOperand(X86::AddrNumOperands).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ MI->isInvariantLoad(AA)) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0 || BaseReg == X86::RIP)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+ }
+ return false;
+ }
+
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
+ }
+
+ // All other instructions marked M_REMATERIALIZABLE are always trivially
+ // rematerializable.
+ return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// a few instructions in each direction it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator E = MBB.end();
+
+ // It's always safe to clobber EFLAGS at the end of a block.
+ if (I == E)
+ return true;
+
+ // For compile time consideration, if we are not able to determine the
+ // safety after visiting 4 instructions in each direction, we will assume
+ // it's not safe.
+ MachineBasicBlock::iterator Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
+ bool SeenDef = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == X86::EFLAGS) {
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ }
+
+ if (SeenDef)
+ // This instruction defines EFLAGS, no need to look any further.
+ return true;
+ ++Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != E && Iter->isDebugValue())
+ ++Iter;
+
+ // If we make it to the end of the block, it's safe to clobber EFLAGS.
+ if (Iter == E)
+ return true;
+ }
+
+ MachineBasicBlock::iterator B = MBB.begin();
+ Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
+ // If we make it to the beginning of the block, it's safe to clobber
+ // EFLAGS iff EFLAGS is not live-in.
+ if (Iter == B)
+ return !MBB.isLiveIn(X86::EFLAGS);
+
+ --Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != B && Iter->isDebugValue())
+ --Iter;
+
+ bool SawKill = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+ if (MO.isDef()) return MO.isDead();
+ if (MO.isKill()) SawKill = true;
+ }
+ }
+
+ if (SawKill)
+ // This instruction kills EFLAGS and doesn't redefine it, so
+ // there's no need to look further.
+ return true;
+ }
+
+ // Conservative answer.
+ return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ DebugLoc DL = Orig->getDebugLoc();
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // Re-materialize them as movri instructions to avoid side effects.
+ bool Clone = true;
+ unsigned Opc = Orig->getOpcode();
+ switch (Opc) {
+ default: break;
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
+ if (!isSafeToClobberEFLAGS(MBB, I)) {
+ switch (Opc) {
+ default: break;
+ case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
+ case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
+ }
+ Clone = false;
+ }
+ break;
+ }
+ }
+
+ if (Clone) {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MBB.insert(I, MI);
+ } else {
+ BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
+ }
+
+ MachineInstr *NewMI = prior(I);
+ NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+ .addReg(Src, getKillRegState(isKill));
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+ .addReg(Src2, getKillRegState(isKill2));
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand. This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ // All instructions input are two-addr instructions. Get the known operands.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ MachineInstr *NewMI = NULL;
+ // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
+ // we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
+ bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ unsigned MIOpc = MI->getOpcode();
+ switch (MIOpc) {
+ case X86::SHUFPSrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ return 0;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0).addReg(0);
+ break;
+ }
+ case X86::SHL32ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ // LEA can't handle ESP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ return 0;
+
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
+ break;
+ }
+ case X86::SHL16ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0).addReg(0);
+ break;
+ }
+ default: {
+ // The following opcodes also sets the condition code register(s). Only
+ // convert them to equivalent lea if the condition code register def's
+ // are dead!
+ if (hasLiveCondCodeDef(MI))
+ return 0;
+
+ switch (MIOpc) {
+ default: return 0;
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::INC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::DEC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ }
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+ Opc = X86::LEA64r;
+ RC = X86::GR64_NOSPRegisterClass;
+ } else {
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ RC = X86::GR32_NOSPRegisterClass;
+ }
+
+
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+ !MF.getRegInfo().constrainRegClass(Src2, RC))
+ return 0;
+
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ }
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ }
+ }
+ }
+
+ if (!NewMI) return 0;
+
+ if (LV) { // Update live variables
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, NewMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, NewMI);
+ }
+
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+ case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+ case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+ case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+ case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+ case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+ unsigned Opc;
+ unsigned Size;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+ case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+ case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+ case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+ case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+ case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+ }
+ unsigned Amt = MI->getOperand(3).getImm();
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ MI->getOperand(3).setImm(Size-Amt);
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+ case X86::CMOVB16rr:
+ case X86::CMOVB32rr:
+ case X86::CMOVB64rr:
+ case X86::CMOVAE16rr:
+ case X86::CMOVAE32rr:
+ case X86::CMOVAE64rr:
+ case X86::CMOVE16rr:
+ case X86::CMOVE32rr:
+ case X86::CMOVE64rr:
+ case X86::CMOVNE16rr:
+ case X86::CMOVNE32rr:
+ case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr:
+ case X86::CMOVBE32rr:
+ case X86::CMOVBE64rr:
+ case X86::CMOVA16rr:
+ case X86::CMOVA32rr:
+ case X86::CMOVA64rr:
+ case X86::CMOVL16rr:
+ case X86::CMOVL32rr:
+ case X86::CMOVL64rr:
+ case X86::CMOVGE16rr:
+ case X86::CMOVGE32rr:
+ case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr:
+ case X86::CMOVLE32rr:
+ case X86::CMOVLE64rr:
+ case X86::CMOVG16rr:
+ case X86::CMOVG32rr:
+ case X86::CMOVG64rr:
+ case X86::CMOVS16rr:
+ case X86::CMOVS32rr:
+ case X86::CMOVS64rr:
+ case X86::CMOVNS16rr:
+ case X86::CMOVNS32rr:
+ case X86::CMOVNS64rr:
+ case X86::CMOVP16rr:
+ case X86::CMOVP32rr:
+ case X86::CMOVP64rr:
+ case X86::CMOVNP16rr:
+ case X86::CMOVNP32rr:
+ case X86::CMOVNP64rr:
+ case X86::CMOVO16rr:
+ case X86::CMOVO32rr:
+ case X86::CMOVO64rr:
+ case X86::CMOVNO16rr:
+ case X86::CMOVNO32rr:
+ case X86::CMOVNO64rr: {
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
+ case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
+ case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
+ case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+ case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+ case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+ case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
+ case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
+ case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
+ case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+ case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+ case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+ case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+ case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+ case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+ case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
+ case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
+ case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
+ case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
+ case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
+ case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
+ case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+ case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+ case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+ case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+ case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+ case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+ case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
+ case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
+ case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
+ case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
+ case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
+ case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
+ case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+ case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+ case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+ case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
+ case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
+ case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
+ case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+ case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+ case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+ case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
+ case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
+ case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
+ case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+ case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+ case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+ }
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ // Fallthrough intended.
+ }
+ default:
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+ switch (BrOpc) {
+ default: return X86::COND_INVALID;
+ case X86::JE_4: return X86::COND_E;
+ case X86::JNE_4: return X86::COND_NE;
+ case X86::JL_4: return X86::COND_L;
+ case X86::JLE_4: return X86::COND_LE;
+ case X86::JG_4: return X86::COND_G;
+ case X86::JGE_4: return X86::COND_GE;
+ case X86::JB_4: return X86::COND_B;
+ case X86::JBE_4: return X86::COND_BE;
+ case X86::JA_4: return X86::COND_A;
+ case X86::JAE_4: return X86::COND_AE;
+ case X86::JS_4: return X86::COND_S;
+ case X86::JNS_4: return X86::COND_NS;
+ case X86::JP_4: return X86::COND_P;
+ case X86::JNP_4: return X86::COND_NP;
+ case X86::JO_4: return X86::COND_O;
+ case X86::JNO_4: return X86::COND_NO;
+ }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case X86::COND_E: return X86::JE_4;
+ case X86::COND_NE: return X86::JNE_4;
+ case X86::COND_L: return X86::JL_4;
+ case X86::COND_LE: return X86::JLE_4;
+ case X86::COND_G: return X86::JG_4;
+ case X86::COND_GE: return X86::JGE_4;
+ case X86::COND_B: return X86::JB_4;
+ case X86::COND_BE: return X86::JBE_4;
+ case X86::COND_A: return X86::JA_4;
+ case X86::COND_AE: return X86::JAE_4;
+ case X86::COND_S: return X86::JS_4;
+ case X86::COND_NS: return X86::JNS_4;
+ case X86::COND_P: return X86::JP_4;
+ case X86::COND_NP: return X86::JNP_4;
+ case X86::COND_O: return X86::JO_4;
+ case X86::COND_NO: return X86::JNO_4;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case X86::COND_E: return X86::COND_NE;
+ case X86::COND_NE: return X86::COND_E;
+ case X86::COND_L: return X86::COND_GE;
+ case X86::COND_LE: return X86::COND_G;
+ case X86::COND_G: return X86::COND_LE;
+ case X86::COND_GE: return X86::COND_L;
+ case X86::COND_B: return X86::COND_AE;
+ case X86::COND_BE: return X86::COND_A;
+ case X86::COND_A: return X86::COND_BE;
+ case X86::COND_AE: return X86::COND_B;
+ case X86::COND_S: return X86::COND_NS;
+ case X86::COND_NS: return X86::COND_S;
+ case X86::COND_P: return X86::COND_NP;
+ case X86::COND_NP: return X86::COND_P;
+ case X86::COND_O: return X86::COND_NO;
+ case X86::COND_NO: return X86::COND_O;
+ }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MCID.isBranch() && !MCID.isBarrier())
+ return true;
+ if (!MCID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+
+ // Handle unconditional branches.
+ if (I->getOpcode() == X86::JMP_4) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditional destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // Handle conditional branches.
+ X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == X86::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+ // If we can modify the code and it ends in something like:
+ //
+ // jCC L1
+ // jmp L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Then we can change this to:
+ //
+ // jnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Which is a bit more efficient.
+ // We conditionally jump to the fall-through block.
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ unsigned JNCC = GetCondBranchFromCond(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB());
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+ .addMBB(TargetBB);
+
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ // Restart the analysis.
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+
+ // If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+ if (OldBranchCode == BranchCode)
+ continue;
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
+ if ((OldBranchCode == X86::COND_NP &&
+ BranchCode == X86::COND_E) ||
+ (OldBranchCode == X86::COND_E &&
+ BranchCode == X86::COND_NP))
+ BranchCode = X86::COND_NP_OR_E;
+ else if ((OldBranchCode == X86::COND_P &&
+ BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE &&
+ BranchCode == X86::COND_P))
+ BranchCode = X86::COND_NE_OR_P;
+ else
+ return true;
+
+ // Update the MachineOperand.
+ Cond[0].setImm(BranchCode);
+ }
+
+ return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != X86::JMP_4 &&
+ GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "X86 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+ switch (CC) {
+ case X86::COND_NP_OR_E:
+ // Synthesize NP_OR_E with two branches.
+ BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
+ ++Count;
+ break;
+ case X86::COND_NE_OR_P:
+ // Synthesize NE_OR_P with two branches.
+ BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
+ ++Count;
+ break;
+ default: {
+ unsigned Opc = GetCondBranchFromCond(CC);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ ++Count;
+ }
+ }
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+ return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ return 0;
+}
+
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // First deal with the normal symmetric copies.
+ unsigned Opc = 0;
+ if (X86::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV64rr;
+ else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV32rr;
+ else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV16rr;
+ else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
+ else
+ Opc = X86::MOV8rr;
+ } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOVAPSrr;
+ else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+ Opc = X86::VMOVAPSYrr;
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
+
+ if (Opc) {
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ if (SrcReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
+ BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ return;
+ } else if (X86::GR32RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
+ BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+ return;
+ }
+ }
+ if (DestReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
+ return;
+ } else if (X86::GR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
+ return;
+ }
+ }
+
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
+}
+
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM,
+ bool load) {
+ switch (RC->getSize()) {
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 1:
+ assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case 2:
+ assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case 4:
+ if (X86::GR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::FR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOVSSrm : X86::MOVSSmr;
+ if (X86::RFP32RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ llvm_unreachable("Unknown 4-byte regclass");
+ case 8:
+ if (X86::GR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::FR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOVSDrm : X86::MOVSDmr;
+ if (X86::VR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+ if (X86::RFP64RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ llvm_unreachable("Unknown 8-byte regclass");
+ case 10:
+ assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
+ return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+ case 16:
+ assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+ else
+ return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ case 32:
+ assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+ else
+ return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
+ }
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
+
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ NewMIs.push_back(MIB);
+}
+
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ NewMIs.push_back(MIB);
+}
+
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ X86AddressMode AM;
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = FrameIx;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+ addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ // Create the base instruction with the memory operand as the first part.
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+
+ // Loop over the rest of the ri operands, converting them over.
+ unsigned NumOps = MI->getDesc().getNumOperands()-2;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i+2);
+ MIB.addOperand(MO);
+ }
+ for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ MIB.addOperand(MO);
+ }
+ return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+ unsigned Opcode, unsigned OpNo,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (i == OpNo) {
+ assert(MO.isReg() && "Expected to fold into reg operand!");
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI, unsigned i,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Align) const {
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ bool isTwoAddrFold = false;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOpcode() == X86::ADD32ri &&
+ MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return NULL;
+
+ MachineInstr *NewMI = NULL;
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && i < 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ isTwoAddrFold = true;
+ } else if (i == 0) { // If operand 0
+ if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV8r0)
+ NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+ if (NewMI)
+ return NewMI;
+
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (i == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (i == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ // If table selected...
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ OpcodeTablePtr->find(MI->getOpcode());
+ if (I != OpcodeTablePtr->end()) {
+ unsigned Opcode = I->second.first;
+ unsigned MinAlign = I->second.second;
+ if (Align < MinAlign)
+ return NULL;
+ bool NarrowToMOV32rm = false;
+ if (Size) {
+ unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
+ if (Size < RCSize) {
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+ return NULL;
+ // If this is a 64-bit load, but the spill slot is 32, then we can do
+ // a 32-bit load which is implicitly zero-extended. This likely is due
+ // to liveintervalanalysis remat'ing a load from stack slot.
+ if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+ return NULL;
+ Opcode = X86::MOV32rm;
+ NarrowToMOV32rm = true;
+ }
+ }
+
+ if (isTwoAddrFold)
+ NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+ else
+ NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+ if (NarrowToMOV32rm) {
+ // If this is the special case where we use a MOV32rm to load a 32-bit
+ // value and zero-extend the top bits. Change the destination register
+ // to a 32-bit one.
+ unsigned DstReg = NewMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+ X86::sub_32bit));
+ else
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
+ }
+ return NewMI;
+ }
+ }
+
+ // No fusion
+ if (PrintFailedFusing && !MI->isCopy())
+ dbgs() << "We failed to fuse operand " << i << " in " << *MI;
+ return NULL;
+}
+
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ switch (MI->getOpcode()) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return 0;
+ }
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = MFI->getObjectSize(FrameIndex);
+ unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ unsigned RCSize = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
+ }
+ // Check if it's safe to fold the load. If the size of the object is
+ // narrower than the load width, then it's not.
+ if (Size < RCSize)
+ return NULL;
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,4> MOs;
+ MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ switch (MI->getOpcode()) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return 0;
+ }
+
+ // Determine the alignment of the load.
+ unsigned Alignment = 0;
+ if (LoadMI->hasOneMemOperand())
+ Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+ else
+ switch (LoadMI->getOpcode()) {
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
+ case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ Alignment = 16;
+ break;
+ case X86::FsFLD0SD:
+ case X86::VFsFLD0SD:
+ Alignment = 8;
+ break;
+ case X86::FsFLD0SS:
+ case X86::VFsFLD0SS:
+ Alignment = 4;
+ break;
+ default:
+ return 0;
+ }
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
+ SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
+ switch (LoadMI->getOpcode()) {
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
+ case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ case X86::FsFLD0SD:
+ case X86::FsFLD0SS: {
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
+ // Create a constant-pool entry and operands to load from it.
+
+ // Medium and large mode can't fold loads this way.
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Kernel)
+ return NULL;
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ PICBase = X86::RIP;
+ else
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return NULL;
+ }
+
+ // Create a constant-pool entry.
+ MachineConstantPool &MCP = *MF.getConstantPool();
+ const Type *Ty;
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ Ty = Type::getFloatTy(MF.getFunction()->getContext());
+ else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+ else
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+ const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+ Constant::getAllOnesValue(Ty) :
+ Constant::getNullValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
+
+ // Create operands to load from the constant pool entry.
+ MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+ MOs.push_back(MachineOperand::CreateImm(1));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ break;
+ }
+ default: {
+ // Folding a normal load. Just copy the load's address operands.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
+ MOs.push_back(LoadMI->getOperand(i));
+ break;
+ }
+ }
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // Check switch flag
+ if (NoFusing) return 0;
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ return true;
+ case X86::ADD32ri:
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return false;
+ break;
+ }
+ }
+
+ if (Ops.size() != 1)
+ return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ } else if (OpNum == 0) { // If operand 0
+ switch (Opc) {
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: return true;
+ default: break;
+ }
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (OpNum == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (OpNum == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+ return true;
+ return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(MI->getOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & 0xf;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
+ return false;
+ UnfoldLoad &= FoldedLoad;
+ if (UnfoldStore && !FoldedStore)
+ return false;
+ UnfoldStore &= FoldedStore;
+
+ const MCInstrDesc &MCID = get(Opc);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ if (!MI->hasOneMemOperand() &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+ // conservatively assume the address is unaligned. That's bad for
+ // performance.
+ return false;
+ SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
+ SmallVector<MachineOperand,2> BeforeOps;
+ SmallVector<MachineOperand,2> AfterOps;
+ SmallVector<MachineOperand,4> ImpOps;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (i >= Index && i < Index + X86::AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (Op.isReg() && Op.isImplicit())
+ ImpOps.push_back(Op);
+ else if (i < Index)
+ BeforeOps.push_back(Op);
+ else if (i > Index)
+ AfterOps.push_back(Op);
+ }
+
+ // Emit the load instruction.
+ if (UnfoldLoad) {
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+ if (UnfoldStore) {
+ // Address operands cannot be marked isKill.
+ for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
+ MachineOperand &MO = NewMIs[0]->getOperand(i);
+ if (MO.isReg())
+ MO.setIsKill(false);
+ }
+ }
+ }
+
+ // Emit the data processing instruction.
+ MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(DataMI);
+
+ if (FoldedStore)
+ MIB.addReg(Reg, RegState::Define);
+ for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+ MIB.addOperand(BeforeOps[i]);
+ if (FoldedLoad)
+ MIB.addReg(Reg);
+ for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+ MIB.addOperand(AfterOps[i]);
+ for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+ MachineOperand &MO = ImpOps[i];
+ MIB.addReg(MO.getReg(),
+ getDefRegState(MO.isDef()) |
+ RegState::Implicit |
+ getKillRegState(MO.isKill()) |
+ getDeadRegState(MO.isDead()) |
+ getUndefRegState(MO.isUndef()));
+ }
+ // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+ unsigned NewOpc = 0;
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri: {
+ MachineOperand &MO0 = DataMI->getOperand(0);
+ MachineOperand &MO1 = DataMI->getOperand(1);
+ if (MO1.getImm() == 0) {
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri8:
+ case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
+ case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
+ case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
+ case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ }
+ DataMI->setDesc(get(NewOpc));
+ MO1.ChangeToRegister(MO0.getReg(), false);
+ }
+ }
+ }
+ NewMIs.push_back(DataMI);
+
+ // Emit the store instruction.
+ if (UnfoldStore) {
+ const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+ }
+
+ return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ if (!N->isMachineOpcode())
+ return false;
+
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(N->getMachineOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & 0xf;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ const MCInstrDesc &MCID = get(Opc);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ unsigned NumDefs = MCID.NumDefs;
+ std::vector<SDValue> AddrOps;
+ std::vector<SDValue> BeforeOps;
+ std::vector<SDValue> AfterOps;
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumOps = N->getNumOperands();
+ for (unsigned i = 0; i != NumOps-1; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (i < Index-NumDefs)
+ BeforeOps.push_back(Op);
+ else if (i > Index-NumDefs)
+ AfterOps.push_back(Op);
+ }
+ SDValue Chain = N->getOperand(NumOps-1);
+ AddrOps.push_back(Chain);
+
+ // Emit the load instruction.
+ SDNode *Load = 0;
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (FoldedLoad) {
+ EVT VT = *RC->vt_begin();
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned load.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+ Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Load);
+
+ // Preserve memory reference information.
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ }
+
+ // Emit the data processing instruction.
+ std::vector<EVT> VTs;
+ const TargetRegisterClass *DstRC = 0;
+ if (MCID.getNumDefs() > 0) {
+ DstRC = getRegClass(MCID, 0, &RI);
+ VTs.push_back(*DstRC->vt_begin());
+ }
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
+ VTs.push_back(VT);
+ }
+ if (Load)
+ BeforeOps.push_back(SDValue(Load, 0));
+ std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+ BeforeOps.size());
+ NewNodes.push_back(NewNode);
+
+ // Emit the store instruction.
+ if (FoldedStore) {
+ AddrOps.pop_back();
+ AddrOps.push_back(SDValue(NewNode, 0));
+ AddrOps.push_back(Chain);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned store.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+ SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+ isAligned, TM),
+ dl, MVT::Other,
+ &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Store);
+
+ // Preserve memory reference information.
+ cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ }
+
+ return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(Opc);
+ if (I == MemOp2RegOpTable.end())
+ return 0;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
+ return 0;
+ if (UnfoldStore && !FoldedStore)
+ return 0;
+ if (LoadRegIndex)
+ *LoadRegIndex = I->second.second & 0xf;
+ return I->second.first;
+}
+
+bool
+X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const {
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+ unsigned Opc1 = Load1->getMachineOpcode();
+ unsigned Opc2 = Load2->getMachineOpcode();
+ switch (Opc1) {
+ default: return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ break;
+ }
+ switch (Opc2) {
+ default: return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ break;
+ }
+
+ // Check if chain operands and base addresses match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(5) != Load2->getOperand(5))
+ return false;
+ // Segment operands should match as well.
+ if (Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+ // Scale should be 1, Index should be Reg0.
+ if (Load1->getOperand(1) == Load2->getOperand(1) &&
+ Load1->getOperand(2) == Load2->getOperand(2)) {
+ if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
+ return false;
+
+ // Now let's examine the displacements.
+ if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
+ isa<ConstantSDNode>(Load2->getOperand(3))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
+ return true;
+ }
+ }
+ return false;
+}
+
+bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1);
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ unsigned Opc1 = Load1->getMachineOpcode();
+ unsigned Opc2 = Load2->getMachineOpcode();
+ if (Opc1 != Opc2)
+ return false; // FIXME: overly conservative?
+
+ switch (Opc1) {
+ default: break;
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ return false;
+ }
+
+ EVT VT = Load1->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ // XMM registers. In 64-bit mode we can be a bit more aggressive since we
+ // have 16 of them to play with.
+ if (TM.getSubtargetImpl()->is64Bit()) {
+ if (NumLoads >= 3)
+ return false;
+ } else if (NumLoads) {
+ return false;
+ }
+ break;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ if (NumLoads)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+ X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+ if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+ return true;
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // FIXME: Return false for x87 stack register classes for now. We can't
+ // allow any loads of these registers before FpGet_ST0_80.
+ return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+ RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
+/// register? e.g. r8, xmm8, xmm13, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
+ switch (RegNo) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
+ return true;
+ }
+ return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Create the register. The code to initialize it is inserted
+ // later, by the CGBR pass (below).
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ X86FI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
+ // AVX 256-bit support
+ { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
+ { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
+ { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
+ { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
+ { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain,
+ domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(X86::NOOP);
+}
+
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+ switch (opc) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return true;
+ }
+}
+
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return isHighLatencyDef(DefMI->getOpcode());
+}
+
+namespace {
+ /// CGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for x86-32.
+ struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+ assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ // Only emit a global base reg in PIC mode.
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ unsigned PC;
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ else
+ PC = GlobalBaseReg;
+
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
+ }
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 000000000000..5f2eba34ac45
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,886 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define GET_INSTRINFO_HEADER
+#include "X86GenInstrInfo.inc"
+
+namespace llvm {
+ class X86RegisterInfo;
+ class X86TargetMachine;
+
+namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+
+
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches to
+ // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs.
+ COND_NE_OR_P,
+ COND_NP_OR_E,
+
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(X86::CondCode CC);
+
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // X86 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+ /// relocation of:
+ /// SYMBOL_LABEL + [. - PICBASELABEL]
+ MO_GOT_ABSOLUTE_ADDRESS,
+
+ /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+ /// immediate should get the value of the symbol minus the PIC base label:
+ /// SYMBOL_LABEL - PICBASELABEL
+ MO_PIC_BASE_OFFSET,
+
+ /// MO_GOT - On a symbol operand this indicates that the immediate is the
+ /// offset to the GOT entry for the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOT
+ MO_GOT,
+
+ /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTOFF
+ MO_GOTOFF,
+
+ /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+ /// offset to the GOT entry for the symbol name from the current code
+ /// location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTPCREL
+ MO_GOTPCREL,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @PLT
+ MO_PLT,
+
+ /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSGD
+ MO_TLSGD,
+
+ /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTTPOFF
+ MO_GOTTPOFF,
+
+ /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @INDNTPOFF
+ MO_INDNTPOFF,
+
+ /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TPOFF
+ MO_TPOFF,
+
+ /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @NTPOFF
+ MO_NTPOFF,
+
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB,
+
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+
+ /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+ /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+ /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+ /// stub.
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE
+ };
+}
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_DLLIMPORT: // dllimport stub.
+ case X86II::MO_GOTPCREL: // rip-relative GOT reference.
+ case X86II::MO_GOT: // normal GOT reference.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg). If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+ switch (TargetFlag) {
+ case X86II::MO_GOTOFF: // isPICStyleGOT: local global.
+ case X86II::MO_GOT: // isPICStyleGOT: other global.
+ case X86II::MO_PIC_BASE_OFFSET: // Darwin local global.
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global.
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+ case X86II::MO_TLVP: // ??? Pretty sure..
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+ MRM_C1 = 33,
+ MRM_C2 = 34,
+ MRM_C3 = 35,
+ MRM_C4 = 36,
+ MRM_C8 = 37,
+ MRM_C9 = 38,
+ MRM_E8 = 39,
+ MRM_F0 = 40,
+ MRM_F8 = 41,
+ MRM_F9 = 42,
+ MRM_D0 = 45,
+ MRM_D1 = 46,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 44,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0x1F << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
+ // TF - Prefix before and after 0x0F
+ TF = 17 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = Op0Shift + 5,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = REXShift + 1,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm16 = 3 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = ImmShift + 3,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = FPTypeShift + 3,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = LOCKShift + 1,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
+
+ OpcodeShift = SSEDomainShift + 2,
+
+ //===------------------------------------------------------------------===//
+ /// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
+ VEX = 1U << 0,
+
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
+ VEX_W = 1U << 1,
+
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1U << 2,
+
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
+ VEX_I8IMM = 1U << 3,
+
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 4,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 5
+ };
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ return TSFlags >> X86II::OpcodeShift;
+ }
+
+ static inline bool hasImm(uint64_t TSFlags) {
+ return (TSFlags & X86II::ImmMask) != 0;
+ }
+
+ /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+ /// of the specified instruction.
+ static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8:
+ case X86II::Imm8PCRel: return 1;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
+ case X86II::Imm32:
+ case X86II::Imm32PCRel: return 4;
+ case X86II::Imm64: return 8;
+ }
+ }
+
+ /// isImmPCRel - Return true if the immediate of the specified instruction's
+ /// TSFlags indicates that it is pc relative.
+ static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
+ default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
+ case X86II::RawFrmImm16:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
+ }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ return 0;
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
+ return -1;
+ }
+ }
+}
+
+inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImm() &&
+ (MO.getImm() == 1 || MO.getImm() == 2 ||
+ MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isReg() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isReg() &&
+ (MI->getOperand(Op+3).isImm() ||
+ MI->getOperand(Op+3).isGlobal() ||
+ MI->getOperand(Op+3).isCPI() ||
+ MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+5 <= MI->getNumOperands() &&
+ MI->getOperand(Op+4).isReg() &&
+ isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public X86GenInstrInfo {
+ X86TargetMachine &TM;
+ const X86RegisterInfo RI;
+
+ /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+ /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+ ///
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+
+ /// MemOp2RegOpTable - Load / store unfolding opcode map.
+ ///
+ DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+public:
+ explicit X86InstrInfo(X86TargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+ /// true, then it's expected the pre-extension value is available as a subreg
+ /// of the result register. This also returns the sub-register index in
+ /// SubIdx.
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// hasLoadFromStackSlot - If the specified machine instruction has
+ /// a load from a stack slot, return true along with the FrameIndex
+ /// of the loaded stack slot and the machine mem operand containing
+ /// the reference. If not, return false. Unlike
+ /// isLoadFromStackSlot, this returns true for any instructions that
+ /// loads from the stack. This is a hint only and may not catch all
+ /// cases.
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// hasStoreToStackSlot - If the specified machine instruction has a
+ /// store to a stack slot, return true along with the FrameIndex of
+ /// the loaded stack slot and the machine mem operand containing the
+ /// reference. If not, return false. Unlike isStoreToStackSlot,
+ /// this returns true for any instructions that loads from the
+ /// stack. This is a hint only and may not catch all cases.
+ bool hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ /// convertToThreeAddress - This method must be implemented by targets that
+ /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+ /// may be able to convert a two-address instruction into a true
+ /// three-address instruction on demand. This allows the X86 target (for
+ /// example) to convert ADD and SHL instructions into LEA instructions if they
+ /// would require register copies due to two-addressness.
+ ///
+ /// This method returns a null pointer if the transformation cannot be
+ /// performed, otherwise it returns the new instruction.
+ ///
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// commuteInstruction - We have a few instructions that must be hacked on to
+ /// commute them.
+ ///
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ // Branch analysis.
+ virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ MachineInstr::mmo_iterator MMOBegin,
+ MachineInstr::mmo_iterator MMOEnd,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ virtual
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand(s). If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ /// foldMemoryOperand - Same as the previous version except it allows folding
+ /// of any load and store from / to any address, not just from a specific
+ /// stack slot.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const;
+
+ /// canFoldMemoryOperand - Returns true if the specified load / store is
+ /// folding is possible.
+ virtual bool canFoldMemoryOperand(const MachineInstr*,
+ const SmallVectorImpl<unsigned> &) const;
+
+ /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+ /// a store or a load and a store into two or more instruction. If this is
+ /// possible, returns true as well as the new instructions by reference.
+ virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const;
+
+ /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+ /// index of the operand which will hold the register holding the loaded
+ /// value.
+ virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
+ /// to determine if two loads are loading from the same base address. It
+ /// should only return true if the base pointers are the same and the
+ /// only differences between the two addresses are the offset. It also returns
+ /// the offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+ /// instruction that defines the specified register class.
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ static bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+
+ static bool isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isReg()) return false;
+ return isX86_64ExtendedReg(MO.getReg());
+ }
+
+ /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+ /// higher) register? e.g. r8, xmm8, xmm13, etc.
+ static bool isX86_64ExtendedReg(unsigned RegNo);
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+ /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+ /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+ std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+ /// SetSSEDomain - Set the SSEDomain of MI.
+ void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Alignment) const;
+
+ bool isHighLatencyDef(int opc) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+
+private:
+ MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// isFrameOperand - Return true and the FrameIndex if the specified
+ /// operand and follow operands form a reference to the stack frame.
+ bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 000000000000..7eb07b0a97bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,1648 @@
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+
+def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+ [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86SetCC_C : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+ SDTCisVT<2, i8>]>;
+def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+ SDTCisVT<1, iPTR>,
+ SDTCisVT<2, iPTR>]>;
+
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86Void : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+ [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
+def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
+def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
+def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86vastart_save_xmm_regs :
+ SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+ SDT_X86VASTART_SAVE_XMM_REGS,
+ [SDNPHasChain, SDNPVariadic]>;
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad]>;
+
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
+
+def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
+def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClasses = [];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86MemAsmOperand];
+}
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+let OperandType = "OPERAND_MEMORY" in {
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+def opaque512mem : X86MemOperand<"printopaquemem">;
+
+def i8mem : X86MemOperand<"printi8mem">;
+def i16mem : X86MemOperand<"printi16mem">;
+def i32mem : X86MemOperand<"printi32mem">;
+def i64mem : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
+def f32mem : X86MemOperand<"printf32mem">;
+def f64mem : X86MemOperand<"printf64mem">;
+def f80mem : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
+}
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+ let PrintMethod = "printi8mem";
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+let OperandType = "OPERAND_PCREL",
+ ParserMatchClass = X86AbsMemAsmOperand,
+ PrintMethod = "print_pcrel_imm" in {
+def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
+}
+
+def SSECC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] |
+// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+ ImmSExti64i32AsmOperand];
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16> {
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32> {
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "print_pcrel_imm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex],
+ []>;
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasCMov : Predicate<"Subtarget->hasCMov()">;
+def NoCMov : Predicate<"!Subtarget->hasCMov()">;
+
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+
+def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES : Predicate<"Subtarget->hasAES()">;
+def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
+def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
+ AssemblerPredicate<"!Mode64Bit">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">,
+ AssemblerPredicate<"Mode64Bit">;
+def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+ "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+ "TM.getCodeModel() == CodeModel::Kernel">;
+def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
+def OptForSize : Predicate<"OptForSize">;
+def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments.
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
+def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
+def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
+def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
+def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
+def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
+def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS : PatLeaf<(i8 12)>;
+def X86_COND_O : PatLeaf<(i8 13)>;
+def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S : PatLeaf<(i8 15)>;
+
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+ def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+ def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
+
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+ return N->hasOneUse();
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list.
+//
+
+// Nop
+let neverHasSideEffects = 1 in {
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+ def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
+ "nop{w}\t$zero", []>, TB, OpSize;
+ def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+ "nop{l}\t$zero", []>, TB;
+}
+
+
+// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>;
+
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
+
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize;
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
+ OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let mayStore = 1 in {
+def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize;
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+ OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+
+def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+ "push{l}\t$imm", []>;
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{w}\t$imm", []>, OpSize;
+def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+ "push{l}\t$imm", []>;
+
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+ Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
+
+
+
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad=1, neverHasSideEffects=1 in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+ Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
+def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
+ OpSize;
+def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
+def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
+
+def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
+def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+ OpSize;
+def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
+def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
+} // Defs = [EFLAGS]
+
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+}
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
+
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let neverHasSideEffects = 1 in {
+def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+
+def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
+
+/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
+/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{b}\t{$src, %al|%al, $src}", []>,
+ Requires<[In32BitMode]>;
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
+ "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
+ "mov{l}\t{$src, %eax|%eax, $src}", []>,
+ Requires<[In32BitMode]>;
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{b}\t{%al, $dst|$dst, %al}", []>,
+ Requires<[In32BitMode]>;
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+ "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+ Requires<[In32BitMode]>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
+
+let isCodeGenOnly = 1 in {
+def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+}
+
+def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)]>;
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+ (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+ (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayLoad = 1,
+ canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+}
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
+
+
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
+
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Make these instructions disassembly
+// only for now.
+
+def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi16 addr:$src1), GR16:$src2),
+// (implicit EFLAGS)]
+ []
+ >, OpSize, TB, Requires<[FastBTMem]>;
+def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi32 addr:$src1), GR32:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+ ]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+ ]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ i64immSExt8:$src2))]>, TB;
+
+
+def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // Defs = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+let Constraints = "$val = $dst" in {
+def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+ "xchg{q}\t{$val, $ptr|$ptr, $val}",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+
+def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
+ "xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+ "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+ "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+ "xchg{q}\t{$val, $src|$src, $val}", []>;
+}
+
+def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
+ "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
+ "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|%rax, $src}", []>;
+
+
+
+def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+}
+
+def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
+def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
+ "cmpxchg8b\t$dst", []>, TB;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", []>, TB;
+
+
+
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
+}
+
+
+// String manipulation instructions
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+
+def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
+def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
+def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
+
+
+// Flag instructions
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
+
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+
+// Table lookup instructions
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
+
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>,
+ Requires<[In32BitMode]>;
+
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Subsystems.
+//===----------------------------------------------------------------------===//
+
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
+
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+include "X86InstrFMA.td"
+
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwd", "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl">;
+
+// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
+// all modes. However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode. Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz", "fcmove">;
+def : MnemonicAlias<"fcmova", "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna", "fcmovbe">;
+def : MnemonicAlias<"fcmovae", "fcmovnb">;
+def : MnemonicAlias<"fcomip", "fcompi">;
+def : MnemonicAlias<"fildq", "fildll">;
+def : MnemonicAlias<"fldcww", "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip", "fucompi">;
+def : MnemonicAlias<"fwait", "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+ : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+ !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+ def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb
+ def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete
+ def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe
+ def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae
+ def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae
+ def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle
+ def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge
+ def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne
+ def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp
+ def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp
+
+ def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb
+ def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta
+ def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl
+ def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler Instruction Aliases
+//===----------------------------------------------------------------------===//
+
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
+def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
+def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
+def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
+def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcomi", (COM_FIr ST1)>;
+def : InstAlias<"fcompi", (COM_FIPr ST1)>;
+def : InstAlias<"fucom", (UCOM_Fr ST1)>;
+def : InstAlias<"fucomp", (UCOM_FPr ST1)>;
+def : InstAlias<"fucomi", (UCOM_FIr ST1)>;
+def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+ (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+ (Inst ST0), EmitAlias>;
+}
+
+defm : FpUnaryAlias<"fadd", ADD_FST0r>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw" , (FNSTSW8r)>;
+
+// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move. All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/* FIXME: This is disabled because the asm matcher is currently incapable of
+ * matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 000000000000..b2d9fca97b02
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,454 @@
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+ multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ }
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+ (ins VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+ }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ let isCommutable = 0 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+ def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+ [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ []>;
+
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+ (outs GR64:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (bitconvert VR64:$src))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (bitconvert GR64:$src))]>;
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (x86mmx VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+ "movdq2q\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movntq\t{$src, $dst|$dst, $src}",
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+ (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (X86vzmovl (x86mmx
+ (scalar_to_vector (loadi32 addr:$src))))))]>;
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
+// -- Multiplication
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw>;
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+ defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+ int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+ int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+ int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+ int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+ int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+ int_x86_mmx_punpckldq>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+ (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))]>;
+
+
+
+
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+ (iPTR imm:$src2)))]>;
+let Constraints = "$src1 = $dst" in {
+ def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
+ (outs VR64:$dst),
+ (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ GR32:$src2, (iPTR imm:$src3)))]>;
+
+ def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
+ (outs VR64:$dst),
+ (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3)))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
+ (v2i64 (MOVQI2PQIrm addr:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+ (v2i64 (MOVDI2PDIrm addr:$src))>;
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+ (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+ (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+
+// 64-bit bit convert.
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+ (MMX_MOVFR642Qrr FR64:$src)>;
+
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 000000000000..fe11d776804c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,5865 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 Instructions Classes
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, X86MemOperand x86memop,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+ }
+ def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ Operand memopr, ComplexPattern mem_cpat,
+ bit Is2Addr = 1> {
+ def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, RC:$src2))]>;
+ def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+ SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ let mayLoad = 1 in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+ string OpcodeStr, X86MemOperand x86memop,
+ list<dag> pat_rr, list<dag> pat_rm,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rr, d>;
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, RC:$src2))], d>;
+ def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+ RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+ SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+ [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> :
+ SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+}
+
+let Constraints = "$src1 = $dst" in {
+ def MOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $dst|$dst, $src2}">, XS;
+ def MOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+ let AddedComplexity = 20 in
+ def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+let AddedComplexity = 15 in {
+// Extract the low 32-bit value from one vector and insert it into another.
+def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+
+let AddedComplexity = 20 in {
+// MOVSSrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+}
+
+// Store scalar value to memory.
+def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX;
+
+// Extract and store.
+def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d,
+ bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>;
+}
+
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize;
+
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPS/D load and store
+def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
+
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
+
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+ PatFrag mov_frag, string base_opc,
+ string asm_opr> {
+ def PSrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(base_opc, "s", asm_opr),
+ [(set RC:$dst,
+ (mov_frag RC:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+ SSEPackedSingle>, TB;
+
+ def PDrm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ !strconcat(base_opc, "d", asm_opr),
+ [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))],
+ SSEPackedDouble>, TB, OpSize;
+}
+
+let AddedComplexity = 20 in {
+ defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $dst|$dst, $src2}">;
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>,
+ VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>,
+ VEX;
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
+
+let AddedComplexity = 20 in {
+ def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+ def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
+
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ []>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+}
+
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX, VEX_W;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+ VEX_4V;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+ VEX_4V, VEX_W;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+ VEX_4V;
+defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+ VEX_4V;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+ VEX_4V, VEX_W;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f32 (sint_to_fp GR32:$src)),
+ (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f32 (sint_to_fp GR64:$src)),
+ (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+ def : Pat<(f64 (sint_to_fp GR32:$src)),
+ (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f64 (sint_to_fp GR64:$src)),
+ (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, bit Is2Addr = 1> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si">, XS, VEX;
+defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si">, XS;
+defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ f32mem, load, "cvtss2si{q}">, XS, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si{q}">, XD, REX_W;
+
+
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
+
+let Constraints = "$src1 = $dst" in {
+ defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32,
+ "cvtsi2ss">, XS;
+ defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64,
+ "cvtsi2ss{q}">, XS, REX_W;
+ defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+ "cvtsi2sd">, XD;
+ defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+ "cvtsi2sd">, XD, REX_W;
+}
+
+/// SSE 1 Only
+
+// Aliases for intrinsics
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttsd2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttsd2si">, XD, VEX, VEX_W;
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si{q}">, XS, REX_W;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttsd2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttsd2si{q}">, XD, REX_W;
+
+let Pattern = []<dag> in {
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+ "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+}
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
+}
+
+/// SSE 2 Only
+
+// Convert scalar double to scalar single
+def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+ Requires<[HasAVX]>;
+
+def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
+
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+ XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
+
+// Convert scalar single to scalar double
+// SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, Requires<[HasAVX]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+ Requires<[HasAVX]>;
+
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+ Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+ Requires<[HasSSE2, OptForSize]>;
+
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
+}
+
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>,
+ Requires<[HasSSE2, OptForSpeed]>;
+
+// Convert doubleword to packed single/double fp
+// SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, Requires<[HasSSE2]>;
+
+// FIXME: why the non-intrinsic version is described as SSE3?
+// SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+
+// Convert packed single/double fp to doubleword
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>, VEX;
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>;
+
+// SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, VEX, Requires<[HasAVX]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, VEX, Requires<[HasAVX]>;
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+// SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>;
+def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
+
+
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
+
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+
+// Convert packed single to packed double
+let Predicates = [HasAVX] in {
+ // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ VEX, Requires<[HasAVX]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ VEX, Requires<[HasAVX]>;
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ TB, Requires<[HasSSE2]>;
+
+// Convert packed double to packed single
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+
+
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+ (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTTPS2DQYrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ string asm, string asm_alt> {
+ let isAsmParserOnly = 1 in {
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+ asm, []>;
+ let mayLoad = 1 in
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+ asm, []>;
+ }
+
+ // Accept explicit immediate argument form instead of comparison code.
+ def rr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+ asm_alt, []>;
+}
+
+let neverHasSideEffects = 1 in {
+ defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XS, VEX_4V;
+ defm VCMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+def CMPSSrr : SIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
+def CMPSSrm : SIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
+def CMPSDrr : SIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
+def CMPSDrm : SIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
+}
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm> {
+ def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+ defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, X86MemOperand x86memop,
+ PatFrag ld_frag, string OpcodeStr, Domain d> {
+ def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+ def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ let Pattern = []<dag> in {
+ defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+ }
+
+ defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB;
+ defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm, string asm_alt,
+ Domain d> {
+ let isAsmParserOnly = 1 in {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+ }
+
+ // Accept explicit immediate argument form instead of comparison code.
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ asm_alt, [], d>;
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedSingle>, TB;
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string asm, PatFrag mem_frag,
+ Domain d, bit IsConvertibleToThreeAddress = 0> {
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (shufp:$src3
+ RC:$src1, (mem_frag addr:$src2))))], d>;
+ let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst,
+ (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
+}
+
+defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+let Constraints = "$src1 = $dst" in {
+ defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+ TB;
+ defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ def rm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+ Domain d> {
+ def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32:$dst, (Int RC:$src))], d>;
+ def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
+}
+
+// Mask creation
+defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+// X86fgetsign
+def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
+def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
+
+// Assembler Only
+def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+ f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+ f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+ }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let mayLoad = 0 in {
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+}
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let Pattern = []<dag> in {
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
+ }
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB;
+
+ defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB, OpSize;
+ }
+}
+
+/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
+///
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, VEX_4V;
+
+ defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
+}
+
+// AVX 256-bit packed logical ops forms
+defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
+
+defm AND : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ let mayLoad = 0 in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let mayLoad = 0 in {
+ defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+ v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+ defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+ v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
+ SSEPackedSingle, Is2Addr>, TB;
+
+ defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
+ SSEPackedDouble, Is2Addr>, TB, OpSize;
+}
+
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+ SSEPackedSingle, 0>, TB;
+
+ defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+ SSEPackedDouble, 0>, TB, OpSize;
+}
+
+// Binary Arithmetic instructions
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+
+let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd>,
+ basic_sse12_fp_binop_s_int<0x58, "add">;
+ defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_s_int<0x59, "mul">;
+
+ let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub">;
+ defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_s_int<0x5E, "div">;
+ defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_s_int<0x5F, "max">,
+ basic_sse12_fp_binop_p_int<0x5F, "max">;
+ defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_s_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_int<0x5D, "min">;
+ }
+}
+
+/// Unop Arithmetic
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ Requires<[HasSSE1, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, XS, Requires<[HasAVX, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+ def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode FR64:$src))]>;
+ // See the comments in sse1_fp_unop_s for why this is OptForSize.
+ def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+ def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+ def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src))]>;
+ def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
+let Predicates = [HasAVX] in {
+ // Square root.
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ VEX_4V;
+
+ defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
+ VEX;
+
+ // Reciprocal approximations. Note that these typically require refinement
+ // in order to obtain suitable precision.
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss>, VEX_4V;
+ defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+ sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
+
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
+ VEX_4V;
+ defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+ sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
+}
+
+// Square root.
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+}
+
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+ (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+ (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti{l}\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti{q}\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version, and
+// doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
+ Requires<[HasAVX, OptForSpeed]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let neverHasSideEffects = 1 in {
+def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
+}
+
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, Requires<[HasSSE2]>;
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, Requires<[HasSSE2]>;
+}
+
+// Intrinsic forms of MOVDQU load and store
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+// 128-bit Integer Arithmetic
+
+let Predicates = [HasAVX] in {
+defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+ VEX_4V;
+defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+ VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+ VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+ VEX_4V;
+defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+ VEX_4V;
+defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+ VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+ VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+ VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+ VEX_4V;
+defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+ VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+ VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+ VEX_4V;
+defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+ VEX_4V;
+defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+ VEX_4V;
+defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+ VEX_4V;
+defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+ VEX_4V;
+defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+ VEX_4V;
+defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+ VEX_4V;
+defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+ VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+// Intrinsic forms
+defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+ VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+ VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+ VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+ VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+ VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+ VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+ VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+ VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def VPANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>, VEX_4V;
+
+ def VPANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>,
+ VEX_4V;
+}
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
+
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
+}
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+ defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+ 0>, VEX_4V;
+ defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+ 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+ defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+ defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+ defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+ defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
+
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+ 0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+ 0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+ 0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+ PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+ (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2
+ (bc_frag (memopv2i64 addr:$src1)),
+ (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
+
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 5 in
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+ VEX;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+ VEX;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+ VEX;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 5 in
+ defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+ PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (unp_frag VR128:$src1,
+ (bc_frag (memopv2i64
+ addr:$src2))))]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+ def rri : Ii8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def rmi : Ii8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Extract
+let Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, OpSize, VEX;
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>;
+
+// Insert
+let Predicates = [HasAVX] in {
+ defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+ def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+
+// Move Int Doubleword to Packed Double Int
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+ VEX;
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
+
+
+// Move Int Doubleword to Single Scalar
+def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ VEX;
+def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+// Move Packed Doubleword Int to Packed Double Int
+def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+// Move Scalar Single to Double Int
+def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>,
+ VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>,
+ VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>,
+ VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+}
+
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Store / copy lower 64-bits of a XMM register.
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 20 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+let AddedComplexity = 20 in {
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+let AddedComplexity = 20 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+}
+
+// Instructions to match in the assembler
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+
+// Instructions for the disassembler
+// xr = XMM register
+// xm = mem64
+
+let Predicates = [HasAVX] in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
+def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, XS;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ // FIXME: Change encoding to pseudo.
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
+def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (rep_frag
+ VR128:$src, (undef))))]>;
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (rep_frag
+ (memopv4f32 addr:$src), (undef)))]>;
+}
+
+multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
+ string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+}
+
+let Predicates = [HasAVX] in {
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
+ (undef))))]>;
+}
+
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+}
+
+let Predicates = [HasAVX] in {
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+}
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let Predicates = [HasAVX] in {
+ def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// Several Move patterns
+let AddedComplexity = 5 in {
+def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+}
+
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ def rr : I<0xD0, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
+ def rm : I<0xD0, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
+}
+
+let Predicates = [HasAVX],
+ ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, TB, OpSize, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, TB, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+ ExeDomain = SSEPackedDouble in {
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+ f128mem>, TB, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+ f128mem>, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
+
+// Horizontal ops
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ int_x86_avx_hsub_pd_256, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Absolute Instructions
+//===---------------------------------------------------------------------===//
+
+
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128> {
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>, VEX;
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>, VEX;
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>, VEX;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
+
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+ int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+ int_x86_ssse3_psign_b_128, 0>, VEX_4V;
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+ int_x86_ssse3_psign_w_128, 0>, VEX_4V;
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+ int_x86_ssse3_psign_d_128, 0>, VEX_4V;
+}
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
+}
+
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ int_x86_ssse3_pshuf_b_128>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ int_x86_ssse3_psign_b_128>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ int_x86_ssse3_psign_w_128>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128>;
+}
+
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+ def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+ def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PALIGN : ssse3_palign<"palignr">;
+
+let AddedComplexity = 5 in {
+def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
+ (PALIGNR128rr VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_palign_imm VR128:$src3))>,
+ Requires<[HasSSSE3]>;
+def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)),
+ (PALIGNR128rr VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_palign_imm VR128:$src3))>,
+ Requires<[HasSSSE3]>;
+def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)),
+ (PALIGNR128rr VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_palign_imm VR128:$src3))>,
+ Requires<[HasSSSE3]>;
+def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
+ (PALIGNR128rr VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_palign_imm VR128:$src3))>,
+ Requires<[HasSSSE3]>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+ Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+ Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+ Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+ Requires<[In64BitMode]>;
+
+//===---------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+let Predicates = [HasSSE2] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>;
+
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+let Predicates = [HasAVX] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
+ addr:$src)>;
+
+// bit_convert
+let Predicates = [HasXMMInt] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+}
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0PS)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0PI)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+}
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
+ (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
+ (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// Special unary SHUFPSrri case.
+def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+let AddedComplexity = 5 in
+def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Unary v4f32 shuffle with PSHUF* in order to fold a load.
+def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[HasSSE2]>;
+
+// Special binary v4i32 shuffle cases with SHUFPS.
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Special binary v2i64 shuffle cases using SHUFPDrri.
+def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+
+// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
+ (UNPCKLPSrr VR128:$src, VR128:$src)>;
+def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLBWrr VR128:$src, VR128:$src)>;
+def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLWDrr VR128:$src, VR128:$src)>;
+def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLDQrr VR128:$src, VR128:$src)>;
+}
+
+// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
+ (UNPCKHPSrr VR128:$src, VR128:$src)>;
+def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHBWrr VR128:$src, VR128:$src)>;
+def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHWDrr VR128:$src, VR128:$src)>;
+def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHDQrr VR128:$src, VR128:$src)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+}
+
+// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+
+let AddedComplexity = 15 in {
+// Setting the lowest element in the vector.
+def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
+ Requires<[HasSSE2]>;
+}
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+
+// Set lowest element and zero upper elements.
+def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// vector -> vector casts
+def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX integer load/store.
+let Predicates = [HasAVX] in {
+ // 128-bit load/store
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store
+ def : Pat<(alignedloadv4i64 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv4i64 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Move with Sign/Zero Extend
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+ VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+ VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+ VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+ VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+ VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+ VEX;
+}
+
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+// Common patterns involving scalar load.
+def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+ VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+ VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+ VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+ VEX;
+}
+
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+ VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+ VEX;
+}
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in {
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
+
+defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
+defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
+defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+let Predicates = [HasAVX] in
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+}
+defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+ defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
+
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+ imm:$src3))]>, OpSize;
+}
+
+let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm : Ii8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ TA, OpSize,
+ Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+ RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm_AVX : Ii8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TA, OpSize, Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int, bit Is2Addr = 1> {
+ // Intrinsic operation, reg.
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let Predicates = [HasAVX] in {
+ // Intrinsic form
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX;
+ defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V;
+
+ // Instructions for the assembler
+ defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+ VEX;
+ defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
+}
+
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+
+
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+ int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
+ 0>, VEX_4V;
+ defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
+ 0>, VEX_4V;
+ defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
+ 0>, VEX_4V;
+ defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
+ 0>, VEX_4V;
+ defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
+ 0>, VEX_4V;
+ defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
+ 0>, VEX_4V;
+ defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
+ 0>, VEX_4V;
+ defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
+ 0>, VEX_4V;
+ defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
+ 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
+ defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
+ defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
+ defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
+ defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
+ defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
+ defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
+ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
+ defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ OpSize;
+}
+
+let Predicates = [HasAVX] in {
+ let isCommutable = 0 in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ }
+ defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv32i8, i256mem, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in {
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem>;
+ }
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let Predicates = [HasAVX] in {
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, Intrinsic IntId> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_ps_256>;
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+ (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
+let Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize, VEX;
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+ defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
+ 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (PCMPGTQrm VR128:$src1, addr:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+multiclass pseudo_pcmpistrm<string asm> {
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+ imm:$src3))]>;
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
+ VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+}
+
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+ def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+ def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+multiclass pseudo_pcmpestrm<string asm> {
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+}
+
+let Predicates = [HasAVX],
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS] in {
+ multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let Predicates = [HasAVX] in {
+defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
+ VEX;
+}
+
+defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+// Packed Compare Explicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
+ multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX,
+ (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let Predicates = [HasAVX] in {
+defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
+ VEX;
+}
+
+defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+ def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i8mem:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_8 GR32:$src1,
+ (load addr:$src2)))]>;
+ def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR8:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+ def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i16mem:$src2),
+ "crc32{w} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_16 GR32:$src1,
+ (load addr:$src2)))]>,
+ OpSize;
+ def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR16:$src2),
+ "crc32{w} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
+ OpSize;
+ def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "crc32{l} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_32 GR32:$src1,
+ (load addr:$src2)))]>;
+ def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "crc32{l} \t{$src2, $src1|$src1, $src2}",
+ [(set GR32:$dst,
+ (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+ def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i8mem:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_8 GR64:$src1,
+ (load addr:$src2)))]>,
+ REX_W;
+ def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR8:$src2),
+ "crc32{b} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
+ REX_W;
+ def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "crc32{q} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_64 GR64:$src1,
+ (load addr:$src2)))]>,
+ REX_W;
+ def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "crc32{q} \t{$src2, $src1|$src1, $src2}",
+ [(set GR64:$dst,
+ (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
+ REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+// Perform One Round of an AES Encryption/Decryption Flow
+let Predicates = [HasAVX, HasAES] in {
+ defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
+ int_x86_aesni_aesenc, 0>, VEX_4V;
+ defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
+ int_x86_aesni_aesenclast, 0>, VEX_4V;
+ defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
+ int_x86_aesni_aesdec, 0>, VEX_4V;
+ defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
+ int_x86_aesni_aesdeclast, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+ defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+ defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+ defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
+}
+
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (AESENCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (AESENCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (AESENCLASTrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (AESDECrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (AESDECrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (AESDECLASTrm VR128:$src1, addr:$src2)>;
+
+// Perform the AES InvMixColumn Transformation
+let Predicates = [HasAVX, HasAES] in {
+ def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize, VEX;
+ def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ OpSize, VEX;
+}
+def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "aesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize;
+def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "aesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ OpSize;
+
+// AES Round Key Generation Assist
+let Predicates = [HasAVX, HasAES] in {
+ def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize, VEX;
+ def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize, VEX;
+}
+def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize;
+def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Carry-less Multiplication instructions
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>;
+
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>;
+}
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+
+multiclass pclmul_alias<string asm, int immop> {
+ def : InstAlias<!strconcat("pclmul", asm,
+ "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+ def : InstAlias<!strconcat("pclmul", asm,
+ "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+
+// Load from memory and broadcast to all elements of the destination operand
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>;
+
+// Insert packed floating-point values
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Extract packed floating-point values
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+
+// Conditional SIMD Packed Loads and Stores
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256,
+ PatFrag pf128, PatFrag pf256> {
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
+ def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V;
+ def mr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256,
+ memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256,
+ memopv2f64, memopv4f64>;
+
+// Permute Floating-Point Values
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+ Intrinsic IntVar, Intrinsic IntImm> {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop_f:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Zero All YMM registers
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
+
+// Zero Upper bits of YMM registers
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+ VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+ VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+ VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+ (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+ (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+ (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+ (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+ (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+ (bc_v4i32 (v2i64 (load addr:$src2)))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+ (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+ (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 000000000000..8278568184ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+ "shl{b}\t$dst", []>;
+def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t$dst", []>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t$dst", []>;
+def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst"
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+ "rol{b}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+ "rol{w}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+ OpSize;
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+ "rol{l}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+ "rol{q}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 000000000000..31de878343ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,429 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes. These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+ def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
+
+
+def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+ Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+ "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+ "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+ "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+ "str{q}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+ "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+ "ltr{w}\t{$src}", []>, TB;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+ "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+ "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+ "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+ "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+ "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+ "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+ "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+ "push{l}\t%es", []>, Requires<[In32BitMode]>;
+
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+ "push{w}\t%fs", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+ "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+ "push{w}\t%gs", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+ "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+ "push{q}\t%fs", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+ "push{q}\t%gs", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+ "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+ "pop{l}\t%ss", []> , Requires<[In32BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{l}\t%ds", []> , Requires<[In32BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+ "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+ "pop{l}\t%es", []> , Requires<[In32BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{w}\t%fs", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{q}\t%fs", []>, TB;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{w}\t%gs", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{q}\t%gs", []>, TB;
+
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lds{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "les{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+ "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+ "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+ "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+ "sldt{l}\t$dst", []>, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+// extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+ "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+ "lldt{w}\t$src", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+ "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+ "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+ "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+ "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+ "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+ "lmsw{w}\t$src", []>, TB;
+
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
+let Defs = [RDX, RAX], Uses = [RCX] in
+ def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+ def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+ def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+def : InstAlias<"xstorerng", (XSTORE)>;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+ def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+ def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+ def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+ def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+ def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+ def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+ def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+ def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 000000000000..daf61e4625d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,54 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+ "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+ "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 000000000000..3f88fa69d0ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,574 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Valgrind.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xE9; // Emit JMP opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+
+ // X86 doesn't need to invalidate the processor cache, so just invalidate
+ // Valgrind's cache directly.
+ sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+// need to add some workarounds for compilers, which support
+// only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ // Save RBP
+ "pushq %rbp\n"
+ CFI(".cfi_def_cfa_offset 16\n")
+ CFI(".cfi_offset %rbp, -16\n")
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ CFI(".cfi_def_cfa_register %rbp\n")
+ // Save all int arg registers
+ "pushq %rdi\n"
+ CFI(".cfi_rel_offset %rdi, 0\n")
+ "pushq %rsi\n"
+ CFI(".cfi_rel_offset %rsi, 8\n")
+ "pushq %rdx\n"
+ CFI(".cfi_rel_offset %rdx, 16\n")
+ "pushq %rcx\n"
+ CFI(".cfi_rel_offset %rcx, 24\n")
+ "pushq %r8\n"
+ CFI(".cfi_rel_offset %r8, 32\n")
+ "pushq %r9\n"
+ CFI(".cfi_rel_offset %r9, 40\n")
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+#ifdef _WIN64
+ "subq $32, %rsp\n"
+ "movq %rbp, %rcx\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rdx\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addq $32, %rsp\n"
+#else
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+#endif
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ CFI(".cfi_def_cfa_register %rsp\n")
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ CFI(".cfi_adjust_cfa_offset 48\n")
+ "popq %r9\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r9\n")
+ "popq %r8\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r8\n")
+ "popq %rcx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rcx\n")
+ "popq %rdx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdx\n")
+ "popq %rsi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rsi\n")
+ "popq %rdi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdi\n")
+ // Restore RBP
+ "popq %rbp\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rbp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+# else
+ // No inline assembler support on this platform. The routine is in external
+ // file.
+ void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+# if defined(__APPLE__)
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+# endif
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register %esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+
+ // Same as X86CompilationCallback but also saves XMM argument registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ TYPE_FUNCTION(X86CompilationCallback_SSE)
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ CFI(".cfi_restore %xmm3\n")
+ "movaps 32(%esp), %xmm2\n"
+ CFI(".cfi_restore %xmm2\n")
+ "movaps 16(%esp), %xmm1\n"
+ CFI(".cfi_restore %xmm1\n")
+ "movaps (%esp), %xmm0\n"
+ CFI(".cfi_restore %xmm0\n")
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback_SSE)
+ );
+# else
+ void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+ _declspec(naked) void X86CompilationCallback(void) {
+ __asm {
+ push ebp
+ mov ebp, esp
+ push eax
+ push edx
+ push ecx
+ and esp, -16
+ sub esp, 16
+ mov eax, dword ptr [ebp+4]
+ mov dword ptr [esp+4], eax
+ mov dword ptr [esp], ebp
+ call X86CompilationCallback2
+ mov esp, ebp
+ sub esp, 12
+ pop ecx
+ pop edx
+ pop eax
+ pop ebp
+ ret
+ }
+ }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+ void X86CompilationCallback() {
+ llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
+ }
+#endif
+}
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" {
+#if !(defined (X86_64_JIT) && defined(_MSC_VER))
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is
+ // no support for inline assembly
+static
+#endif
+void LLVM_ATTRIBUTE_USED
+X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[1];
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // It's a stub if there is an interrupt marker after the call.
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
+
+ // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+ RetAddr--; // Backtrack to the reference itself...
+#else
+ RetAddr -= 4; // Backtrack to the reference itself...
+#endif
+
+#if 0
+ DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
+#endif
+
+ // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+ assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+#if defined (X86_64_JIT)
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
+#else
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+ if (isStub) {
+ // If this is a stub, rewrite the call into an unconditional branch
+ // instruction so that two return addresses are not pushed onto the stack
+ // when the requested function finally gets called. This also makes the
+ // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+ // If the target address is within 32-bit range of the stub, use a
+ // PC-relative branch instead of loading the actual address. (This is
+ // considerably shorter than the 64-bit immediate load already there.)
+ // We assume here intptr_t is 64 bits.
+ intptr_t diff = NewVal-RetAddr+7;
+ if (diff >= -2147483648LL && diff <= 2147483647LL) {
+ *(unsigned char*)(RetAddr-0xc) = 0xE9;
+ *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+ } else {
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+ }
+ sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
+#else
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
+ sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
+#endif
+ }
+
+ // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+ *RetAddrLoc -= 0xd;
+#else
+ *RetAddrLoc -= 5;
+#endif
+}
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ if (Subtarget->hasSSE1())
+ return X86CompilationCallback_SSE;
+#endif
+
+ return X86CompilationCallback;
+}
+
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ useGOT = 0;
+ TLSOffset = 0;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+ const unsigned Alignment = 8;
+ uint8_t Buffer[8];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
+ MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
+#else
+ const unsigned Alignment = 4;
+ uint8_t Buffer[4];
+ uint8_t *Cur = Buffer;
+ MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
+#endif
+ return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
+}
+
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+ // The 64-bit stub contains:
+ // movabs r10 <- 8-byte-target-address # 10 bytes
+ // call|jmp *r10 # 3 bytes
+ // The 32-bit stub contains a 5-byte call|jmp.
+ // If the stub is a call to the compilation callback, an extra byte is added
+ // to mark it as a stub.
+ StubLayout Result = {14, 4};
+ return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+ Target != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+ bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
+#endif
+ JCE.emitAlignment(4);
+ void *Result = (void*)JCE.getCurrentPCValue();
+ if (NotCC) {
+#if defined (X86_64_JIT)
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // jmpq *r10
+ JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+ return Result;
+ }
+
+#if defined (X86_64_JIT)
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Target);
+ JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // callq *r10
+ JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+ JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
+
+ JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+
+ // This used to use 0xCD, but that value is used by JITMemoryManager to
+ // initialize the buffer with garbage, which means it may follow a
+ // noreturn function call, confusing X86CompilationCallback2. PR 4929.
+ JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
+ return Result;
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+ return BB - Entry;
+#else
+ return BB - PICBase;
+#endif
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_picrel_word: {
+ // PIC base relative relocation, add the relocated value to the value
+ // already in memory, after we adjust it for where the PIC base is.
+ ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_absolute_word:
+ case X86::reloc_absolute_word_sext:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ case X86::reloc_absolute_dword:
+ *((intptr_t*)RelocPos) += ResultPtr;
+ break;
+ }
+ }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+ TLSOffset -= size;
+ return TLSOffset;
+#else
+ llvm_unreachable("Cannot allocate thread local storage on this arch!");
+ return 0;
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.h b/contrib/llvm/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 000000000000..238420c236b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,81 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class X86TargetMachine;
+ class X86Subtarget;
+
+ class X86JITInfo : public TargetJITInfo {
+ X86TargetMachine &TM;
+ const X86Subtarget *Subtarget;
+ uintptr_t PICBase;
+ char* TLSOffset;
+ public:
+ explicit X86JITInfo(X86TargetMachine &tm);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on X86.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE);
+
+ /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+ /// specific basic block.
+ virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// allocateThreadLocalMemory - Each target has its own way of
+ /// handling thread local variables. This method returns a value only
+ /// meaningful to the target.
+ virtual char* allocateThreadLocalMemory(size_t size);
+
+ /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+ /// PIC jumptable entry.
+ void setPICBase(uintptr_t Base) { PICBase = Base; }
+ uintptr_t getPICBase() const { return PICBase; }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
new file mode 100644
index 000000000000..ce8ef495c001
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -0,0 +1,1044 @@
+//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86FixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class X86MCCodeEmitter : public MCCodeEmitter {
+ X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+public:
+ X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti), Ctx(ctx) {
+ }
+
+ ~X86MCCodeEmitter() {}
+
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+
+ static unsigned GetX86RegNum(const MCOperand &MO) {
+ return X86RegisterInfo::getX86RegNum(MO.getReg());
+ }
+
+ // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+ // 0-7 and the difference between the 2 groups is given by the REX prefix.
+ // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+ // in 1's complement form, example:
+ //
+ // ModRM field => XMM9 => 1
+ // VEX.VVVV => XMM9 => ~9
+ //
+ // See table 4-35 of Intel AVX Programming Reference for details.
+ static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
+ (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
+ SrcRegNum += 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitImmediate(const MCOperand &Disp,
+ unsigned ImmSize, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int ImmOffset = 0) const;
+
+ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+ }
+
+ void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+ unsigned &CurByte, raw_ostream &OS) const {
+ EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+ }
+
+ void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+ unsigned &CurByte, raw_ostream &OS) const {
+ // SIB byte is in the same format as the ModRMByte.
+ EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+ }
+
+
+ void EmitMemModRMByte(const MCInst &MI, unsigned Op,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ raw_ostream &OS) const;
+
+ void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const;
+
+ void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ raw_ostream &OS) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
+ unsigned Size = X86II::getSizeOfImm(TSFlags);
+ bool isPCRel = X86II::isImmPCRel(TSFlags);
+
+ return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+ Expr = BE->getLHS();
+ }
+
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return false;
+
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbol &S = Ref->getSymbol();
+ return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
+
+void X86MCCodeEmitter::
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+ const MCExpr *Expr = NULL;
+ if (DispOp.isImm()) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (FixupKind != FK_PCRel_1 &&
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ return;
+ }
+ Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ } else {
+ Expr = DispOp.getExpr();
+ }
+
+ // If we have an immoffset, add it to the expression.
+ if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ ImmOffset = CurByte;
+ }
+
+ // If the fixup is pc-relative, we need to bias the value to be relative to
+ // the start of the field, not the end of the field.
+ if (FixupKind == FK_PCRel_4 ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
+ ImmOffset -= 4;
+ if (FixupKind == FK_PCRel_2)
+ ImmOffset -= 2;
+ if (FixupKind == FK_PCRel_1)
+ ImmOffset -= 1;
+
+ if (ImmOffset)
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+ Ctx);
+
+ // Emit a symbolic constant as a fixup and 4 zeros.
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+ EmitConstant(0, Size, CurByte, OS);
+}
+
+void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte,
+ raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const{
+ const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+ unsigned BaseReg = Base.getReg();
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
+ assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+
+ unsigned FixupKind = X86::reloc_riprel_4byte;
+
+ // movq loads are handled with a special relocation form which allows the
+ // linker to eliminate some loads for GOT references which end up in the
+ // same linkage unit.
+ if (MI.getOpcode() == X86::MOV64rm)
+ FixupKind = X86::reloc_riprel_4byte_movq_load;
+
+ // rip-relative addressing is actually relative to the *next* instruction.
+ // Since an immediate can follow the mod/rm byte for an instruction, this
+ // means that we need to bias the immediate field of the instruction with
+ // the size of the immediate field. If we have this case, add it into the
+ // expression to emit.
+ int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+
+ EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
+ CurByte, OS, Fixups, -ImmSize);
+ return;
+ }
+
+ unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+
+ // Determine whether a SIB byte is needed.
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+ // 2-7) and absolute references.
+
+ if (// The SIB byte must be used if there is an index register.
+ IndexReg.getReg() == 0 &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ (!is64BitMode() || BaseReg != 0)) {
+
+ if (BaseReg == 0) { // [disp32] in X86-32 mode
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ return;
+ }
+
+ // If the base is not EBP/ESP and there is no displacement, use simple
+ // indirect register encoding, this handles addresses like [EAX]. The
+ // encoding for [EBP] with no displacement means [disp32] so we handle it
+ // by emitting a displacement of 0 below.
+ if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
+ EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ return;
+ }
+
+ // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+ if (Disp.isImm() && isDisp8(Disp.getImm())) {
+ EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+ EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+ return;
+ }
+
+ // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+ EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
+ return;
+ }
+
+ // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp32 = true;
+ } else if (!Disp.isImm()) {
+ // Emit the normal disp32 encoding.
+ EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp32 = true;
+ } else if (Disp.getImm() == 0 &&
+ // Base reg can't be anything that ends up with '5' as the base
+ // reg, it is the magic [*] nomenclature that indicates no base.
+ BaseRegNo != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ } else if (isDisp8(Disp.getImm())) {
+ // Emit the disp8 encoding.
+ EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding.
+ EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base, see Intel
+ // Manual 2A, table 2-7. The displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = GetX86RegNum(IndexReg);
+ else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+ IndexRegNo = 4;
+ EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+ } else {
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = GetX86RegNum(IndexReg);
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8)
+ EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+ else if (ForceDisp32 || Disp.getImm() != 0)
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
+}
+
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const MCInstrDesc &Desc,
+ raw_ostream &OS) const {
+ bool HasVEX_4V = false;
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ //
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ VEX_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ VEX_L = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: assert(0 && "Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::TF: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+ // Set the vector length to 256-bit if YMM0-YMM15 is used
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+ VEX_L = 1;
+ }
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned CurOp = 0;
+ bool IsDestMem = false;
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMDestMem:
+ IsDestMem = true;
+ // The important info for the VEX prefix is never beyond the address
+ // registers. Don't check beyond that.
+ NumOps = CurOp = X86::AddrNumOperands;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcReg:
+ if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
+ CurOp++;
+ }
+
+ // To only check operands before the memory address ones, start
+ // the search from the beginning
+ if (IsDestMem)
+ CurOp = 0;
+
+ // If the last register should be encoded in the immediate field
+ // do not use any bit from VEX prefix to this register, ignore it
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
+ NumOps--;
+
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_B = 0x0;
+ if (!VEX_B && MO.isReg() &&
+ ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_X = 0x0;
+ }
+ break;
+ default: // MRMDestReg, MRM0r-MRM7r, RawFrm
+ if (!MI.getNumOperands())
+ break;
+
+ if (MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+ CurOp++;
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && !HasVEX_4V &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ }
+ break;
+ }
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ EmitByte(0xC5, CurByte, OS);
+ EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ return;
+ }
+
+ // 3 byte VEX prefix
+ EmitByte(0xC4, CurByte, OS);
+ EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+ EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
+/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
+ const MCInstrDesc &Desc) {
+ unsigned REX = 0;
+ if (TSFlags & X86II::REX_W)
+ REX |= 1 << 3; // set REX.W
+
+ if (MI.getNumOperands() == 0) return REX;
+
+ unsigned NumOps = MI.getNumOperands();
+ // FIXME: MCInst should explicitize the two-addrness.
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
+ // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+ // that returns non-zero.
+ REX |= 0x40; // REX fixed encoding prefix
+ break;
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMSrcReg:
+ if (MI.getOperand(0).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 2; // set REX.R
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << 0; // set REX.B
+ }
+ break;
+ case X86II::MRMSrcMem: {
+ if (MI.getOperand(0).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 2; // set REX.R
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && MI.getOperand(e).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+ REX |= 1 << 2; // set REX.R
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
+ Bit++;
+ }
+ }
+ break;
+ }
+ default:
+ if (MI.getOperand(0).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ REX |= 1 << 0; // set REX.B
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ REX |= 1 << 2; // set REX.R
+ }
+ break;
+ }
+ return REX;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+ unsigned &CurByte, int MemOperand,
+ const MCInst &MI,
+ raw_ostream &OS) const {
+ switch (TSFlags & X86II::SegOvrMask) {
+ default: assert(0 && "Invalid segment!");
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: assert(0 && "Unknown segment register!");
+ case 0: break;
+ case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+ case X86::SS: EmitByte(0x36, CurByte, OS); break;
+ case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+ case X86::ES: EmitByte(0x26, CurByte, OS); break;
+ case X86::FS: EmitByte(0x64, CurByte, OS); break;
+ case X86::GS: EmitByte(0x65, CurByte, OS); break;
+ }
+ }
+ break;
+ case X86II::FS:
+ EmitByte(0x64, CurByte, OS);
+ break;
+ case X86II::GS:
+ EmitByte(0x65, CurByte, OS);
+ break;
+ }
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present. If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const MCInstrDesc &Desc,
+ raw_ostream &OS) const {
+
+ // Emit the lock opcode prefix as needed.
+ if (TSFlags & X86II::LOCK)
+ EmitByte(0xF0, CurByte, OS);
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // Emit the repeat opcode prefix as needed.
+ if ((TSFlags & X86II::Op0Mask) == X86II::REP)
+ EmitByte(0xF3, CurByte, OS);
+
+ // Emit the address size opcode prefix as needed.
+ if ((TSFlags & X86II::AdSize) ||
+ (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+ EmitByte(0x67, CurByte, OS);
+
+ // Emit the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ EmitByte(0x66, CurByte, OS);
+
+ bool Need0FPrefix = false;
+ switch (TSFlags & X86II::Op0Mask) {
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ case X86II::REP: break; // already handled.
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::TF: // F2 0F 38
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::XS: // F3 0F
+ EmitByte(0xF3, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ EmitByte(0xF2, CurByte, OS);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: EmitByte(0xD8, CurByte, OS); break;
+ case X86II::D9: EmitByte(0xD9, CurByte, OS); break;
+ case X86II::DA: EmitByte(0xDA, CurByte, OS); break;
+ case X86II::DB: EmitByte(0xDB, CurByte, OS); break;
+ case X86II::DC: EmitByte(0xDC, CurByte, OS); break;
+ case X86II::DD: EmitByte(0xDD, CurByte, OS); break;
+ case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
+ case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
+ }
+
+ // Handle REX prefix.
+ // FIXME: Can this come before F2 etc to simplify emission?
+ if (is64BitMode()) {
+ if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
+ EmitByte(0x40 | REX, CurByte, OS);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ EmitByte(0x0F, CurByte, OS);
+
+ // FIXME: Pull this up into previous switch if REX can be moved earlier.
+ switch (TSFlags & X86II::Op0Mask) {
+ case X86II::TF: // F2 0F 38
+ case X86II::T8: // 0F 38
+ EmitByte(0x38, CurByte, OS);
+ break;
+ case X86II::TA: // 0F 3A
+ EmitByte(0x3A, CurByte, OS);
+ break;
+ case X86II::A6: // 0F A6
+ EmitByte(0xA6, CurByte, OS);
+ break;
+ case X86II::A7: // 0F A7
+ EmitByte(0xA7, CurByte, OS);
+ break;
+ }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded.
+ if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return;
+
+ // If this is a two-address instruction, skip one of the register operands.
+ // FIXME: This should be handled during MCInst lowering.
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
+ ++CurOp;
+ else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = false;
+
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = false;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
+ HasVEXPrefix = true;
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ else
+ EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
+
+ unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+
+ unsigned SrcRegNum = 0;
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+ default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
+ assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+ case X86II::Pseudo:
+ assert(0 && "Pseudo instruction shouldn't be emitted");
+ case X86II::RawFrm:
+ EmitByte(BaseOpcode, CurByte, OS);
+ break;
+
+ case X86II::RawFrmImm8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+ break;
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ break;
+
+ case X86II::AddRegFrm:
+ EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+ break;
+
+ case X86II::MRMDestReg:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitRegModRMByte(MI.getOperand(CurOp),
+ GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
+ CurOp += 2;
+ break;
+
+ case X86II::MRMDestMem:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + X86::AddrNumOperands;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ EmitMemModRMByte(MI, CurOp,
+ GetX86RegNum(MI.getOperand(SrcRegNum)),
+ TSFlags, CurByte, OS, Fixups);
+ CurOp = SrcRegNum + 1;
+ break;
+
+ case X86II::MRMSrcReg:
+ EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ EmitRegModRMByte(MI.getOperand(SrcRegNum),
+ GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+
+ EmitByte(BaseOpcode, CurByte, OS);
+
+ EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+ TSFlags, CurByte, OS, Fixups);
+ CurOp += AddrOperands + 1;
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ CurOp++;
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitRegModRMByte(MI.getOperand(CurOp++),
+ (TSFlags & X86II::FormMask)-X86II::MRM0r,
+ CurByte, OS);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
+ TSFlags, CurByte, OS, Fixups);
+ CurOp += X86::AddrNumOperands;
+ break;
+ case X86II::MRM_C1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC1, CurByte, OS);
+ break;
+ case X86II::MRM_C2:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC2, CurByte, OS);
+ break;
+ case X86II::MRM_C3:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC3, CurByte, OS);
+ break;
+ case X86II::MRM_C4:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC4, CurByte, OS);
+ break;
+ case X86II::MRM_C8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC8, CurByte, OS);
+ break;
+ case X86II::MRM_C9:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC9, CurByte, OS);
+ break;
+ case X86II::MRM_E8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xE8, CurByte, OS);
+ break;
+ case X86II::MRM_F0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF0, CurByte, OS);
+ break;
+ case X86II::MRM_F8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF8, CurByte, OS);
+ break;
+ case X86II::MRM_F9:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF9, CurByte, OS);
+ break;
+ case X86II::MRM_D0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD0, CurByte, OS);
+ break;
+ case X86II::MRM_D1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD1, CurByte, OS);
+ break;
+ }
+
+ // If there is a remaining operand, it must be a trailing immediate. Emit it
+ // according to the right size for the instruction.
+ if (CurOp != NumOps) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ const MCOperand &MO = MI.getOperand(CurOp++);
+ bool IsExtReg =
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+ unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+ RegNum |= GetX86RegNum(MO) << 4;
+ EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+ Fixups);
+ } else {
+ unsigned FixupKind;
+ // FIXME: Is there a better way to know that we need a signed relocation?
+ if (MI.getOpcode() == X86::ADD64ri32 ||
+ MI.getOpcode() == X86::MOV64ri32 ||
+ MI.getOpcode() == X86::MOV64mi32 ||
+ MI.getOpcode() == X86::PUSH64i32)
+ FixupKind = X86::reloc_signed_4byte;
+ else
+ FixupKind = getImmFixupKind(TSFlags);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
+ CurByte, OS, Fixups);
+ }
+ }
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+
+
+#ifndef NDEBUG
+ // FIXME: Verify.
+ if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
+ errs() << "Cannot encode all operands of: ";
+ MI.dump();
+ errs() << '\n';
+ abort();
+ }
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
new file mode 100644
index 000000000000..e38533555534
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -0,0 +1,692 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86AsmPrinter.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+ X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+ MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+ return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
+/// operand to an MCSymbol.
+MCSymbol *X86MCInstLower::
+GetSymbolFromOperand(const MachineOperand &MO) const {
+ assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+
+ SmallString<128> Name;
+
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol());
+ Name += MAI.getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+ MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+ isImplicitlyPrivate = true;
+
+ Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ switch (MO.getTargetFlags()) {
+ default: break;
+ case X86II::MO_DLLIMPORT: {
+ // Handle dllimport linkage.
+ const char *Prefix = "__imp_";
+ Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+ break;
+ }
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getHiddenGVStubEntry(Sym);
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+ case X86II::MO_DARWIN_STUB: {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI().getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = 0;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case X86II::MO_NO_FLAG: // No flag.
+ // These affect the name of the symbol, not any suffix.
+ case X86II::MO_DARWIN_NONLAZY:
+ case X86II::MO_DLLIMPORT:
+ case X86II::MO_DARWIN_STUB:
+ break;
+
+ case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+ Ctx),
+ Ctx);
+ break;
+ case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+ case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
+ case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
+ case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+ case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+ case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
+ case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break;
+ case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
+ case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break;
+ case X86II::MO_PIC_BASE_OFFSET:
+ case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+ case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+ Ctx);
+ if (MO.isJTI() && MAI.hasSetDirective()) {
+ // If .set directive is supported, use it to reduce the number of
+ // relocations the assembler will generate for differences between
+ // local labels. This is only safe when the symbols are in the same
+ // section so we are restricting it to jumptable references.
+ MCSymbol *Label = Ctx.CreateTempSymbol();
+ AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+ Expr = MCSymbolRefExpr::Create(Label, Ctx);
+ }
+ break;
+ }
+
+ if (Expr == 0)
+ Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg32.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (Reg != 0)
+ MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg64.
+ for (unsigned i = 0; i != 4; ++i) {
+ if (!MI->getOperand(OpNo+i).isReg()) continue;
+
+ unsigned Reg = MI->getOperand(OpNo+i).getReg();
+ if (Reg == 0) continue;
+
+ MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+ }
+}
+
+/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8.
+static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) {
+ OutMI.setOpcode(NewOpc);
+ lower_subreg32(&OutMI, 0);
+}
+/// LowerUnaryToTwoAddr - R = setb -> R = sbb R, R
+static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
+ OutMI.setOpcode(NewOpc);
+ OutMI.addOperand(OutMI.getOperand(0));
+ OutMI.addOperand(OutMI.getOperand(0));
+}
+
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+ unsigned ImmOp = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+ ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+ Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(0).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(ImmOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
+ bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+ Inst.getOperand(AddrBase + 0).isReg() && // base
+ Inst.getOperand(AddrBase + 1).isImm() && // scale
+ Inst.getOperand(AddrBase + 2).isReg() && // index register
+ (Inst.getOperand(AddrOp).isExpr() || // address
+ Inst.getOperand(AddrOp).isImm())&&
+ Inst.getOperand(AddrBase + 4).isReg() && // segment
+ "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(RegOp).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // Check whether this is an absolute address.
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // to do this here.
+ bool Absolute = true;
+ if (Inst.getOperand(AddrOp).isExpr()) {
+ const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+ Absolute = false;
+ }
+
+ if (Absolute &&
+ (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1))
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(AddrOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO,
+ AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+
+ // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
+ switch (OutMI.getOpcode()) {
+ case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+ lower_lea64_32mem(&OutMI, 1);
+ // FALL THROUGH.
+ case X86::LEA64r:
+ case X86::LEA16r:
+ case X86::LEA32r:
+ // LEA should have a segment register, but it must be empty.
+ assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+ "Unexpected # of LEA operands");
+ assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+ "LEA has segment specified!");
+ break;
+ case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
+ case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
+ case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
+ case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+ case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+ case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
+ case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
+ case X86::SETB_C8r: LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break;
+ case X86::SETB_C16r: LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break;
+ case X86::SETB_C32r: LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break;
+ case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
+ case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
+ case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
+ case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+ case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+ case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+ case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+ case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+
+ case X86::MOV16r0:
+ LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
+ LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+ break;
+ case X86::MOV64r0:
+ LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
+ LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+ break;
+
+ // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
+ // register inputs modeled as normal uses instead of implicit uses. As such,
+ // truncate off all but the first operand (the callee). FIXME: Change isel.
+ case X86::TAILJMPr64:
+ case X86::CALL64r:
+ case X86::CALL64pcrel32:
+ case X86::WINCALL64r:
+ case X86::WINCALL64pcrel32: {
+ unsigned Opcode = OutMI.getOpcode();
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::RET);
+ break;
+ }
+
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: {
+ unsigned Opcode;
+ switch (OutMI.getOpcode()) {
+ default: assert(0 && "Invalid opcode");
+ case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+ }
+
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
+ // this with an ugly goto in case the resultant OR uses EAX and needs the
+ // short form.
+ case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+ case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+ case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+ case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+ case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+ case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+ case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+ case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+
+ // The assembler backend wants to see branches in their small form and relax
+ // them to their large form. The JIT can only handle the large form because
+ // it does not do relaxation. For now, translate the large form to the
+ // small one here.
+ case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
+ case X86::JO_4: OutMI.setOpcode(X86::JO_1); break;
+ case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
+ case X86::JB_4: OutMI.setOpcode(X86::JB_1); break;
+ case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
+ case X86::JE_4: OutMI.setOpcode(X86::JE_1); break;
+ case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
+ case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
+ case X86::JA_4: OutMI.setOpcode(X86::JA_1); break;
+ case X86::JS_4: OutMI.setOpcode(X86::JS_1); break;
+ case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
+ case X86::JP_4: OutMI.setOpcode(X86::JP_1); break;
+ case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
+ case X86::JL_4: OutMI.setOpcode(X86::JL_1); break;
+ case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
+ case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
+ case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+
+ case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
+ case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
+ case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
+ case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
+ case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
+ case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
+ case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
+ case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
+ case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
+ case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
+ case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
+ case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
+ case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
+ case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
+ case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
+ case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
+ case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
+ case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
+ case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
+ case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
+ case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
+ case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
+ case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
+ case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
+ case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
+ case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
+ case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
+ case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
+ case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
+ case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+ case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+ case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+ case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
+ case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
+ case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
+ case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
+ }
+}
+
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+ X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+ MCContext &context = OutStreamer.getContext();
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+ MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+ const MCSymbolRefExpr *symRef =
+ MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+ MCInst LEA;
+ if (is64Bits) {
+ LEA.setOpcode(X86::LEA64r);
+ LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(0)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ }
+ OutStreamer.EmitInstruction(LEA);
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::REX64_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+
+ MCInst call;
+ if (is64Bits)
+ call.setOpcode(X86::CALL64pcrel32);
+ else
+ call.setOpcode(X86::CALLpcrel32);
+ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+ MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+ const MCSymbolRefExpr *tlsRef =
+ MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::VK_PLT,
+ context);
+
+ call.addOperand(MCOperand::CreateExpr(tlsRef));
+ OutStreamer.EmitInstruction(call);
+}
+
+void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ X86MCInstLower MCInstLowering(Mang, *MF, *this);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ std::string TmpStr;
+ raw_string_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ unsigned Reg = MI->getOperand(0).getReg();
+ OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ // Lower these as normal, but add some comments.
+ OutStreamer.AddComment("TAILCALL");
+ break;
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
+ case X86::MOVPC32r: {
+ MCInst TmpInst;
+ // This is a pseudo op for a two instruction sequence with a label, which
+ // looks like:
+ // call "L1$pb"
+ // "L1$pb":
+ // popl %esi
+
+ // Emit the call.
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ TmpInst.setOpcode(X86::CALLpcrel32);
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
+ OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+
+ // popl $reg
+ TmpInst.setOpcode(X86::POP32r);
+ TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
+ case X86::ADD32ri: {
+ // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+ if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ break;
+
+ // Okay, we have something like:
+ // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+
+ // For this, we want to print something like:
+ // MYGLOBAL + (. - PICBASE)
+ // However, we can't generate a ".", so just emit a new label here and refer
+ // to it.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+
+ // Now that we have emitted the label, lower the complex operand expression.
+ MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ const MCExpr *PICBase =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+ DotExpr, OutContext);
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::ADD32ri);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.h b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
new file mode 100644
index 000000000000..021007239128
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
@@ -0,0 +1,52 @@
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCINSTLOWER_H
+#define X86_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCAsmInfo;
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MCSymbol;
+ class MachineInstr;
+ class MachineFunction;
+ class MachineModuleInfoMachO;
+ class MachineOperand;
+ class Mangler;
+ class TargetMachine;
+ class X86AsmPrinter;
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
+ MCContext &Ctx;
+ Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
+ X86AsmPrinter &AsmPrinter;
+public:
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+ MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 000000000000..37110382379e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,554 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Object/MachOFormat.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+ void RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordTLVPRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void RecordX86Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+ void RecordX86_64Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+public:
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ if (Writer->is64Bit())
+ RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ else
+ RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+static bool isFixupKindRIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1: return 0;
+ case FK_PCRel_2:
+ case FK_Data_2: return 1;
+ case FK_PCRel_4:
+ // FIXME: Remove these!!!
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case FK_Data_4: return 2;
+ case FK_Data_8: return 3;
+ }
+}
+
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+ // See <reloc.h>.
+ uint32_t FixupOffset =
+ Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ uint32_t FixupAddress =
+ Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+ int64_t Value = 0;
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ Value = Target.getConstant();
+
+ if (IsPCRel) {
+ // Compensate for the relocation offset, Darwin x86_64 relocations only have
+ // the addend and appear to have attempted to define it to be the actual
+ // expression addend without the PCrel bias. However, instructions with data
+ // following the relocation are not accommodated for (see comment below
+ // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+ Value += 1LL << Log2Size;
+ }
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ Type = macho::RIT_X86_64_Unsigned;
+ Index = 0;
+
+ // FIXME: I believe this is broken, I don't think the linker can understand
+ // it. I think it would require a local relocation, but I'm not sure if that
+ // would work either. The official way to get an absolute PCrel relocation
+ // is to use an absolute symbol (which we don't support yet).
+ if (IsPCRel) {
+ IsExtern = 1;
+ Type = macho::RIT_X86_64_Branch;
+ }
+ } else if (Target.getSymB()) { // A - B + constant
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+ const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+ const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+ // Neither symbol can be modified.
+ if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported relocation of modified symbol");
+
+ // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+ // implement most of these correctly.
+ if (IsPCRel)
+ report_fatal_error("unsupported pc-relative relocation of difference");
+
+ // The support for the situation where one or both of the symbols would
+ // require a local relocation is handled just like if the symbols were
+ // external. This is certainly used in the case of debug sections where the
+ // section has only temporary symbols and thus the symbols don't have base
+ // symbols. This is encoded using the section ordinal and non-extern
+ // relocation entries.
+
+ // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+ // single SIGNED relocation); reject it for now. Except the case where both
+ // symbols don't have a base, equal but both NULL.
+ if (A_Base == B_Base && A_Base)
+ report_fatal_error("unsupported relocation with identical base");
+
+ Value += Writer->getSymbolAddress(&A_SD, Layout) -
+ (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+ Value -= Writer->getSymbolAddress(&B_SD, Layout) -
+ (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+
+ if (A_Base) {
+ Index = A_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Unsigned;
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+
+ if (B_Base) {
+ Index = B_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Subtractor;
+ } else {
+ const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(&SD);
+
+ // Relocations inside debug sections always use local relocations when
+ // possible. This seems to be done because the debugger doesn't fully
+ // understand x86_64 relocation entries, and expects to find values that
+ // have already been fixed up.
+ if (Symbol->isInSection()) {
+ const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+ Fragment->getParent()->getSection());
+ if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+ Base = 0;
+ }
+
+ // x86_64 almost always uses external relocations, except when there is no
+ // symbol to use as a base address (a local symbol with no preceding
+ // non-local symbol).
+ if (Base) {
+ Index = Base->getIndex();
+ IsExtern = 1;
+
+ // Add the local offset, if needed.
+ if (Base != &SD)
+ Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+ } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+ // The index is the section ordinal (1-based).
+ Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ Value += Writer->getSymbolAddress(&SD, Layout);
+
+ if (IsPCRel)
+ Value -= FixupAddress + (1 << Log2Size);
+ } else if (Symbol->isVariable()) {
+ const MCExpr *Value = Symbol->getVariableValue();
+ int64_t Res;
+ bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+ Writer->getSectionAddressMap());
+ if (isAbs) {
+ FixedValue = Res;
+ return;
+ } else {
+ report_fatal_error("unsupported relocation of variable '" +
+ Symbol->getName() + "'");
+ }
+ } else {
+ report_fatal_error("unsupported relocation of undefined symbol '" +
+ Symbol->getName() + "'");
+ }
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+ if (IsPCRel) {
+ if (IsRIPRel) {
+ if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+ // rewrite the movq to an leaq at link time if the symbol ends up in
+ // the same linkage unit.
+ if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+ Type = macho::RIT_X86_64_GOTLoad;
+ else
+ Type = macho::RIT_X86_64_GOT;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ Type = macho::RIT_X86_64_TLV;
+ } else if (Modifier != MCSymbolRefExpr::VK_None) {
+ report_fatal_error("unsupported symbol modifier in relocation");
+ } else {
+ Type = macho::RIT_X86_64_Signed;
+
+ // The Darwin x86_64 relocation format has a problem where it cannot
+ // encode an address (L<foo> + <constant>) which is outside the atom
+ // containing L<foo>. Generally, this shouldn't occur but it does
+ // happen when we have a RIPrel instruction with data following the
+ // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+ // adjustment Darwin x86_64 uses, the offset is still negative and the
+ // linker has no way to recognize this.
+ //
+ // To work around this, Darwin uses several special relocation types
+ // to indicate the offsets. However, the specification or
+ // implementation of these seems to also be incomplete; they should
+ // adjust the addend as well based on the actual encoded instruction
+ // (the additional bias), but instead appear to just look at the final
+ // offset.
+ switch (-(Target.getConstant() + (1LL << Log2Size))) {
+ case 1: Type = macho::RIT_X86_64_Signed1; break;
+ case 2: Type = macho::RIT_X86_64_Signed2; break;
+ case 4: Type = macho::RIT_X86_64_Signed4; break;
+ }
+ }
+ } else {
+ if (Modifier != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported symbol modifier in branch "
+ "relocation");
+
+ Type = macho::RIT_X86_64_Branch;
+ }
+ } else {
+ if (Modifier == MCSymbolRefExpr::VK_GOT) {
+ Type = macho::RIT_X86_64_GOT;
+ } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+ // case all we do is set the PCrel bit in the relocation entry; this is
+ // used with exception handling, for example. The source is required to
+ // include any necessary offset directly.
+ Type = macho::RIT_X86_64_GOT;
+ IsPCRel = 1;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ report_fatal_error("TLVP symbol modifier should have been rip-rel");
+ } else if (Modifier != MCSymbolRefExpr::VK_None)
+ report_fatal_error("unsupported symbol modifier in relocation");
+ else
+ Type = macho::RIT_X86_64_Unsigned;
+ }
+ }
+
+ // x86_64 always writes custom values into the fixups.
+ FixedValue = Value;
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ //
+ // Note that there is no longer any semantic difference between these two
+ // relocation types from the linkers point of view, this is done solely for
+ // pedantic compatibility with 'as'.
+ Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+ (unsigned)macho::RIT_Generic_LocalDifference;
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+ !is64Bit() &&
+ "Should only be called with a 32-bit TLVP relocation!");
+
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+ uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = 0;
+
+ // Get the symbol data.
+ MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+ unsigned Index = SD_A->getIndex();
+
+ // We're only going to have a second symbol in pic mode and it'll be a
+ // subtraction from the picbase. For 32-bit pic the addend is the difference
+ // between the picbase and the next address. For 32-bit static the addend is
+ // zero.
+ if (Target.getSymB()) {
+ // If this is a subtraction then we're pcrel.
+ uint32_t FixupAddress =
+ Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+ MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+ IsPCRel = 1;
+ FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
+ Target.getConstant());
+ FixedValue += 1ULL << Log2Size;
+ } else {
+ FixedValue = 0;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = Value;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // Extern
+ (macho::RIT_Generic_TLV << 28)); // Type
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+ // If this is a 32-bit TLVP reloc it's handled a bit differently.
+ if (Target.getSymA() &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+ RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB())
+ return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // If this is an internal relocation with an offset, it also needs a scattered
+ // relocation entry.
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel)
+ Offset += 1 << Log2Size;
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+ return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ Type = macho::RIT_Vanilla;
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ Type = macho::RIT_Vanilla;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 000000000000..06043ecd3f30
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,135 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+ /// ForceFramePointer - True if the function is required to use of frame
+ /// pointer for reasons other than it containing dynamic allocation or
+ /// that FP eliminatation is turned off. For example, Cygwin main function
+ /// contains stack pointer re-alignment code which requires FP.
+ bool ForceFramePointer;
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+ /// to the space used by the return address).
+ /// Used on windows platform for stdcall & fastcall name decoration
+ unsigned BytesToPopOnReturn;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ /// TailCallReturnAddrDelta - The number of bytes by which return address
+ /// stack slot is moved as the result of tail call optimization.
+ int TailCallReturnAddrDelta;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// ReserveFP - whether the function should reserve the frame pointer
+ /// when allocating, even if there may not actually be a frame pointer used.
+ bool ReserveFP;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// RegSaveFrameIndex - X86-64 vararg func register save area.
+ int RegSaveFrameIndex;
+ /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+ unsigned VarArgsGPOffset;
+ /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+ unsigned VarArgsFPOffset;
+
+public:
+ X86MachineFunctionInfo() : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0),
+ VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0),
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0) {}
+
+ explicit X86MachineFunctionInfo(MachineFunction &MF)
+ : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0),
+ ReserveFP(false),
+ VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0),
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+ void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+ void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+ bool getReserveFP() const { return ReserveFP; }
+ void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+ int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+ void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+ unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+ void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+ unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+ void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 000000000000..f2faf59367a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,973 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "X86GenRegisterInfo.inc"
+
+using namespace llvm;
+
+cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo(), TM(tm), TII(tii) {
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ IsWin64 = Subtarget->isTargetWin64();
+
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+}
+
+static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) {
+ if (!Subtarget->is64Bit()) {
+ if (Subtarget->isTargetDarwin()) {
+ if (isEH)
+ return DWARFFlavour::X86_32_DarwinEH;
+ else
+ return DWARFFlavour::X86_32_Generic;
+ } else if (Subtarget->isTargetCygMing()) {
+ // Unsupported by now, just quick fallback
+ return DWARFFlavour::X86_32_Generic;
+ } else {
+ return DWARFFlavour::X86_32_Generic;
+ }
+ }
+ return DWARFFlavour::X86_64;
+}
+
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = getFlavour(Subtarget, isEH);
+
+ return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
+}
+
+/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register.
+int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = getFlavour(Subtarget, isEH);
+
+ return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
+}
+
+/// getCompactUnwindRegNum - This function maps the register to the number for
+/// compact unwind encoding. Return -1 if the register isn't valid.
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+ switch (getLLVMRegNum(RegNum, isEH)) {
+ case X86::EBX: case X86::RBX: return 1;
+ case X86::ECX: case X86::R12: return 2;
+ case X86::EDX: case X86::R13: return 3;
+ case X86::EDI: case X86::R14: return 4;
+ case X86::ESI: case X86::R15: return 5;
+ case X86::EBP: case X86::RBP: return 6;
+ }
+
+ return -1;
+}
+
+int
+X86RegisterInfo::getSEHRegNum(unsigned i) const {
+ int reg = getX86RegNum(i);
+ switch (i) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ reg += 8;
+ }
+ return reg;
+}
+
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
+unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM8:
+ case X86::YMM0: case X86::YMM8: case X86::MM0:
+ return 0;
+ case X86::XMM1: case X86::XMM9:
+ case X86::YMM1: case X86::YMM9: case X86::MM1:
+ return 1;
+ case X86::XMM2: case X86::XMM10:
+ case X86::YMM2: case X86::YMM10: case X86::MM2:
+ return 2;
+ case X86::XMM3: case X86::XMM11:
+ case X86::YMM3: case X86::YMM11: case X86::MM3:
+ return 3;
+ case X86::XMM4: case X86::XMM12:
+ case X86::YMM4: case X86::YMM12: case X86::MM4:
+ return 4;
+ case X86::XMM5: case X86::XMM13:
+ case X86::YMM5: case X86::YMM13: case X86::MM5:
+ return 5;
+ case X86::XMM6: case X86::XMM14:
+ case X86::YMM6: case X86::YMM14: case X86::MM6:
+ return 6;
+ case X86::XMM7: case X86::XMM15:
+ case X86::YMM7: case X86::YMM15: case X86::MM7:
+ return 7;
+
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
+
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
+ default:
+ assert(isVirtualRegister(RegNo) && "Unknown physical register!");
+ llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
+ return 0;
+ }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned SubIdx) const {
+ switch (SubIdx) {
+ default: return 0;
+ case X86::sub_8bit:
+ if (B == &X86::GR8RegClass) {
+ if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+ A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::GR8_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_NOREXRegClass;
+ else if (A == &X86::GR32_ABCDRegClass)
+ return &X86::GR32_ABCDRegClass;
+ else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
+ return &X86::GR16_NOREXRegClass;
+ else if (A == &X86::GR16_ABCDRegClass)
+ return &X86::GR16_ABCDRegClass;
+ }
+ break;
+ case X86::sub_8bit_hi:
+ if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
+ switch (A->getSize()) {
+ case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
+ case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
+ case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
+ default: return 0;
+ }
+ break;
+ case X86::sub_16bit:
+ if (B == &X86::GR16RegClass) {
+ if (A->getSize() == 4 || A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR16_ABCDRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+ A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_ABCDRegClass;
+ } else if (B == &X86::GR16_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+ A == &X86::GR32_NOSPRegClass)
+ return &X86::GR32_NOREXRegClass;
+ else if (A == &X86::GR32_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ }
+ break;
+ case X86::sub_32bit:
+ if (B == &X86::GR32RegClass) {
+ if (A->getSize() == 8)
+ return A;
+ } else if (B == &X86::GR32_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+ return &X86::GR64_NOSPRegClass;
+ if (A->getSize() == 8)
+ return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
+ } else if (B == &X86::GR32_ABCDRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+ A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass ||
+ A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREX_NOSPRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+ A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREX_NOSPRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ }
+ break;
+ case X86::sub_ss:
+ if (B == &X86::FR32RegClass)
+ return A;
+ break;
+ case X86::sub_sd:
+ if (B == &X86::FR64RegClass)
+ return A;
+ break;
+ case X86::sub_xmm:
+ if (B == &X86::VR128RegClass)
+ return A;
+ break;
+ }
+ return 0;
+}
+
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ do {
+ switch (Super->getID()) {
+ case X86::GR8RegClassID:
+ case X86::GR16RegClassID:
+ case X86::GR32RegClassID:
+ case X86::GR64RegClassID:
+ case X86::FR32RegClassID:
+ case X86::FR64RegClassID:
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ case X86::VR128RegClassID:
+ case X86::VR256RegClassID:
+ // Don't return a super-class that would shrink the spill size.
+ // That can happen with the vector and float classes.
+ if (Super->getSize() == RC->getSize())
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+ case 0: // Normal GPRs.
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64RegClass;
+ return &X86::GR32RegClass;
+ case 1: // Normal GPRs except the stack pointer (for encoding reasons).
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_NOSPRegClass;
+ return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_TCRegClass;
+ return &X86::GR32_TCRegClass;
+ }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &X86::CCRRegClass) {
+ if (Is64Bit)
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+ }
+ return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 12 - FPDiff;
+ case X86::VR128RegClassID:
+ return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
+}
+
+const unsigned *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool callsEHReturn = false;
+ bool ghcCall = false;
+
+ if (MF) {
+ callsEHReturn = MF->getMMI().callsEHReturn();
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ static const unsigned GhcCalleeSavedRegs[] = {
+ 0
+ };
+
+ static const unsigned CalleeSavedRegs32Bit[] = {
+ X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs32EHRet[] = {
+ X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64Bit[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64EHRet[] = {
+ X86::RAX, X86::RDX, X86::RBX, X86::R12,
+ X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegsWin64[] = {
+ X86::RBX, X86::RBP, X86::RDI, X86::RSI,
+ X86::R12, X86::R13, X86::R14, X86::R15,
+ X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9,
+ X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
+ X86::XMM14, X86::XMM15, 0
+ };
+
+ if (ghcCall) {
+ return GhcCalleeSavedRegs;
+ } else if (Is64Bit) {
+ if (IsWin64)
+ return CalleeSavedRegsWin64;
+ else
+ return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
+ } else {
+ return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+ }
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Set the stack-pointer register and its aliases as reserved.
+ Reserved.set(X86::RSP);
+ Reserved.set(X86::ESP);
+ Reserved.set(X86::SP);
+ Reserved.set(X86::SPL);
+
+ // Set the instruction pointer register and its aliases as reserved.
+ Reserved.set(X86::RIP);
+ Reserved.set(X86::EIP);
+ Reserved.set(X86::IP);
+
+ // Set the frame-pointer register and its aliases as reserved if needed.
+ if (TFI->hasFP(MF)) {
+ Reserved.set(X86::RBP);
+ Reserved.set(X86::EBP);
+ Reserved.set(X86::BP);
+ Reserved.set(X86::BPL);
+ }
+
+ // Mark the segment registers as reserved.
+ Reserved.set(X86::CS);
+ Reserved.set(X86::SS);
+ Reserved.set(X86::DS);
+ Reserved.set(X86::ES);
+ Reserved.set(X86::FS);
+ Reserved.set(X86::GS);
+
+ // Reserve the registers that only exist in 64-bit mode.
+ if (!Is64Bit) {
+ // These 8-bit registers are part of the x86-64 extension even though their
+ // super-registers are old 32-bits.
+ Reserved.set(X86::SIL);
+ Reserved.set(X86::DIL);
+ Reserved.set(X86::BPL);
+ Reserved.set(X86::SPL);
+
+ for (unsigned n = 0; n != 8; ++n) {
+ // R8, R9, ...
+ const unsigned GPR64[] = {
+ X86::R8, X86::R9, X86::R10, X86::R11,
+ X86::R12, X86::R13, X86::R14, X86::R15
+ };
+ for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
+ Reserved.set(Reg);
+
+ // XMM8, XMM9, ...
+ assert(X86::XMM15 == X86::XMM8+7);
+ for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+ ++AI)
+ Reserved.set(Reg);
+ }
+ }
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return (RealignStack &&
+ !MFI->hasVarSizedObjects());
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas.
+ // FIXME: It's more complicated than this...
+ if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+ report_fatal_error(
+ "Stack realignment in presence of dynamic allocas is not supported");
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (Reg == FramePtr && TFI->hasFP(MF)) {
+ FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+ return true;
+ }
+ return false;
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
+
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
+ MBB.insert(I, New);
+ }
+
+ return;
+ }
+
+ if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // We are not tracking the stack pointer adjustment by the callee, so make
+ // sure we restore the stack pointer immediately after the call, there may
+ // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+ MachineBasicBlock::iterator B = MBB.begin();
+ while (I != B && !llvm::prior(I)->getDesc().isCall())
+ --I;
+ MBB.insert(I, New);
+ }
+}
+
+void
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ unsigned BasePtr;
+
+ unsigned Opc = MI.getOpcode();
+ bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
+ if (needsStackRealignment(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+ else if (AfterFPPop)
+ BasePtr = StackPtr;
+ else
+ BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+
+ // This must be part of a four operand memory reference. Replace the
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ // Now add the frame object offset to the offset from EBP.
+ int FIOffset;
+ if (AfterFPPop) {
+ // Tail call jmp happens after FP is popped.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
+ } else
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+
+ if (MI.getOperand(i+3).isImm()) {
+ // Offset is a 32-bit integer.
+ int Imm = (int)(MI.getOperand(i + 3).getImm());
+ int Offset = FIOffset + Imm;
+ assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+ "Requesting 64-bit offset in 32-bit immediate!");
+ MI.getOperand(i + 3).ChangeToImmediate(Offset);
+ } else {
+ // Offset is symbolic. This is extremely rare.
+ uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
+ MI.getOperand(i+3).setOffset(Offset);
+ }
+}
+
+unsigned X86RegisterInfo::getRARegister() const {
+ return Is64Bit ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
+}
+
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? FramePtr : StackPtr;
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return Reg;
+ case MVT::i8:
+ if (High) {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case MVT::i16:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case MVT::i32:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+
+ return Reg;
+}
+}
+
+namespace {
+ struct MSAH : public MachineFunctionPass {
+ static char ID;
+ MSAH() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned StackAlignment = TFI->getStackAlignment();
+
+ // Be over-conservative: scan over all vreg defs and find whether vector
+ // registers are used. If yes, there is a possibility that vector register
+ // will be spilled and thus require dynamic stack realignment.
+ for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
+ FuncInfo->setReserveFP(true);
+ return true;
+ }
+ }
+ // Nothing to do
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 Maximal Stack Alignment Check";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+ char MSAH::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 000000000000..a12eb1297f7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,157 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "X86GenRegisterInfo.inc"
+
+namespace llvm {
+ class Type;
+ class TargetInstrInfo;
+ class X86TargetMachine;
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+ X86TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+private:
+ /// Is64Bit - Is the target 64-bits.
+ ///
+ bool Is64Bit;
+
+ /// IsWin64 - Is the target on of win64 flavours
+ ///
+ bool IsWin64;
+
+ /// SlotSize - Stack slot size in bytes.
+ ///
+ unsigned SlotSize;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ ///
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ ///
+ unsigned FramePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// getX86RegNum - Returns the native X86 register number for the given LLVM
+ /// register identifier.
+ static unsigned getX86RegNum(unsigned RegNo);
+
+ /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
+ /// (created by TableGen) for target dependencies.
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const;
+
+ /// getCompactUnwindRegNum - This function maps the register to the number for
+ /// compact unwind encoding. Return -1 if the register isn't valid.
+ int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
+
+ /// Code Generation virtual methods...
+ ///
+
+ /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// class A so that each register in it has a sub-register of the
+ /// specified sub-register index which is in the specified register class B.
+ virtual const TargetRegisterClass *
+ getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B, unsigned Idx) const;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+ /// values.
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns NULL if it is possible to copy
+ /// between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ /// getCalleeSavedRegs - Return a null-terminated list of all of the
+ /// callee-save registers on this target.
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ /// getReservedRegs - Returns a bitset indexed by physical register number
+ /// indicating if a register is a special register that has particular uses and
+ /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+ /// register scavenger to determine what registers are free.
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const;
+
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getStackRegister() const { return StackPtr; }
+ // FIXME: Move to FrameInfok
+ unsigned getSlotSize() const { return SlotSize; }
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 000000000000..203722a66162
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,467 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+let Namespace = "X86" in {
+
+ // Subregister indices.
+ def sub_8bit : SubRegIndex;
+ def sub_8bit_hi : SubRegIndex;
+ def sub_16bit : SubRegIndex;
+ def sub_32bit : SubRegIndex;
+
+ def sub_ss : SubRegIndex;
+ def sub_sd : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+
+
+ // In the register alias definitions below, we define which registers alias
+ // which others. We only specify which registers the small registers alias,
+ // because the register file generator is smart enough to figure out that
+ // AL aliases AX if we tell it that AX aliased AL (for example).
+
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
+ // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+ // and debug information on X86-32/Darwin)
+
+ // 8-bit registers
+ // Low registers
+ def AL : Register<"al">;
+ def DL : Register<"dl">;
+ def CL : Register<"cl">;
+ def BL : Register<"bl">;
+
+ // X86-64 only, requires REX.
+ let CostPerUse = 1 in {
+ def SIL : Register<"sil">;
+ def DIL : Register<"dil">;
+ def BPL : Register<"bpl">;
+ def SPL : Register<"spl">;
+ def R8B : Register<"r8b">;
+ def R9B : Register<"r9b">;
+ def R10B : Register<"r10b">;
+ def R11B : Register<"r11b">;
+ def R12B : Register<"r12b">;
+ def R13B : Register<"r13b">;
+ def R14B : Register<"r14b">;
+ def R15B : Register<"r15b">;
+ }
+
+ // High registers. On x86-64, these cannot be used in any instruction
+ // with a REX prefix.
+ def AH : Register<"ah">;
+ def DH : Register<"dh">;
+ def CH : Register<"ch">;
+ def BH : Register<"bh">;
+
+ // 16-bit registers
+ let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
+ def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
+ def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
+ def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
+ def BX : RegisterWithSubRegs<"bx", [BL,BH]>;
+ }
+ let SubRegIndices = [sub_8bit] in {
+ def SI : RegisterWithSubRegs<"si", [SIL]>;
+ def DI : RegisterWithSubRegs<"di", [DIL]>;
+ def BP : RegisterWithSubRegs<"bp", [BPL]>;
+ def SP : RegisterWithSubRegs<"sp", [SPL]>;
+ }
+ def IP : Register<"ip">;
+
+ // X86-64 only, requires REX.
+ let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
+ def R8W : RegisterWithSubRegs<"r8w", [R8B]>;
+ def R9W : RegisterWithSubRegs<"r9w", [R9B]>;
+ def R10W : RegisterWithSubRegs<"r10w", [R10B]>;
+ def R11W : RegisterWithSubRegs<"r11w", [R11B]>;
+ def R12W : RegisterWithSubRegs<"r12w", [R12B]>;
+ def R13W : RegisterWithSubRegs<"r13w", [R13B]>;
+ def R14W : RegisterWithSubRegs<"r14w", [R14B]>;
+ def R15W : RegisterWithSubRegs<"r15w", [R15B]>;
+ }
+ // 32-bit registers
+ let SubRegIndices = [sub_16bit] in {
+ def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>;
+ def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>;
+ def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>;
+ def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>;
+ def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>;
+ def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>;
+ def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>;
+ def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>;
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>;
+
+ // X86-64 only, requires REX
+ let CostPerUse = 1 in {
+ def R8D : RegisterWithSubRegs<"r8d", [R8W]>;
+ def R9D : RegisterWithSubRegs<"r9d", [R9W]>;
+ def R10D : RegisterWithSubRegs<"r10d", [R10W]>;
+ def R11D : RegisterWithSubRegs<"r11d", [R11W]>;
+ def R12D : RegisterWithSubRegs<"r12d", [R12W]>;
+ def R13D : RegisterWithSubRegs<"r13d", [R13W]>;
+ def R14D : RegisterWithSubRegs<"r14d", [R14W]>;
+ def R15D : RegisterWithSubRegs<"r15d", [R15W]>;
+ }}
+
+ // 64-bit registers, X86-64 only
+ let SubRegIndices = [sub_32bit] in {
+ def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
+ def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
+ def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
+ def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
+ def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
+ def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
+ def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
+ def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+ // These also require REX.
+ let CostPerUse = 1 in {
+ def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
+ def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
+ def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
+ def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
+ def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
+ def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
+ def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
+ def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
+ def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+ }}
+
+ // MMX Registers. These are actually aliased to ST0 .. ST7
+ def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
+ def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
+ def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
+ def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
+ def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
+ def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
+ def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
+ def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
+
+ // Pseudo Floating Point registers
+ def FP0 : Register<"fp0">;
+ def FP1 : Register<"fp1">;
+ def FP2 : Register<"fp2">;
+ def FP3 : Register<"fp3">;
+ def FP4 : Register<"fp4">;
+ def FP5 : Register<"fp5">;
+ def FP6 : Register<"fp6">;
+
+ // XMM Registers, used by the various SSE instruction set extensions.
+ // The sub_ss and sub_sd subregs are the same registers with another regclass.
+ let CompositeIndices = [(sub_ss), (sub_sd)] in {
+ def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
+ def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
+ def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
+ def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
+ def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
+ def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
+ def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
+ def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
+
+ // X86-64 only
+ let CostPerUse = 1 in {
+ def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
+ def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
+ def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
+ def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
+ def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
+ def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
+ def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
+ def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+ }}
+
+ // YMM Registers, used by AVX instructions
+ let SubRegIndices = [sub_xmm] in {
+ def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>;
+ def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>;
+ def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>;
+ def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>;
+ def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>;
+ def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>;
+ def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>;
+ def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>;
+ def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>;
+ def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>;
+ def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>;
+ def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>;
+ def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>;
+ def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>;
+ def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>;
+ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
+ }
+
+ class STRegister<string Name, list<Register> A> : Register<Name> {
+ let Aliases = A;
+ }
+
+ // Floating point stack registers. These don't map one-to-one to the FP
+ // pseudo registers, but we still mark them as aliasing FP registers. That
+ // way both kinds can be live without exceeding the stack depth. ST registers
+ // are only live around inline assembly.
+ def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>;
+ def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>;
+ def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>;
+ def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>;
+ def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>;
+ def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>;
+ def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
+ def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
+
+ // Status flags register
+ def EFLAGS : Register<"flags">;
+
+ // Segment registers
+ def CS : Register<"cs">;
+ def DS : Register<"ds">;
+ def SS : Register<"ss">;
+ def ES : Register<"es">;
+ def FS : Register<"fs">;
+ def GS : Register<"gs">;
+
+ // Debug registers
+ def DR0 : Register<"dr0">;
+ def DR1 : Register<"dr1">;
+ def DR2 : Register<"dr2">;
+ def DR3 : Register<"dr3">;
+ def DR4 : Register<"dr4">;
+ def DR5 : Register<"dr5">;
+ def DR6 : Register<"dr6">;
+ def DR7 : Register<"dr7">;
+
+ // Control registers
+ def CR0 : Register<"cr0">;
+ def CR1 : Register<"cr1">;
+ def CR2 : Register<"cr2">;
+ def CR3 : Register<"cr3">;
+ def CR4 : Register<"cr4">;
+ def CR5 : Register<"cr5">;
+ def CR6 : Register<"cr6">;
+ def CR7 : Register<"cr7">;
+ def CR8 : Register<"cr8">;
+ def CR9 : Register<"cr9">;
+ def CR10 : Register<"cr10">;
+ def CR11 : Register<"cr11">;
+ def CR12 : Register<"cr12">;
+ def CR13 : Register<"cr13">;
+ def CR14 : Register<"cr14">;
+ def CR15 : Register<"cr15">;
+
+ // Pseudo index registers
+ def EIZ : Register<"eiz">;
+ def RIZ : Register<"riz">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8], 8,
+ (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+ let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ (add AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
+}
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+def GR64 : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32 sub_32bit)];
+}
+
+// Segment registers for use by MOV instructions (and others) that have a
+// segment register as one operand. Always contain a 16-bit segment
+// descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
+}
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
+}
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit),
+ (GR32_ABCD sub_32bit)];
+}
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+ R8, R9, R11, RIP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_TC sub_32bit)];
+}
+
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+ R8, R9, R11)>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+ (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+ let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+ (add AX, CX, DX, SI, DI, BX, BP, SP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
+}
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
+}
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_NOSP sub_32bit)];
+}
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+ (and GR32_NOREX, GR32_NOSP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
+}
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+ (and GR64_NOREX, GR64_NOSP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX_NOSP sub_32bit)];
+}
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
+}
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
+
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values. This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware. In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+ let isAllocatable = 0;
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (add FR32)> {
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+}
+
+def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
+ (sequence "YMM%u", 0, 15)> {
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Relocations.h b/contrib/llvm/lib/Target/X86/X86Relocations.h
new file mode 100644
index 000000000000..990962dc4173
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,52 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace X86 {
+ /// RelocationType - An enum for the x86 relocation codes. Note that
+ /// the terminology here doesn't follow x86 convention - word means
+ /// 32-bit and dword means 64-bit. The relocations will be treated
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// emitter but JIT and ObjectCode will treat them differently
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..02754f9ae503
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,259 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+ TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ TLI(*TM.getTargetLowering()) {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ Subtarget->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ EVT IntPtr = TLI.getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+ DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ EVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ EVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, it is more efficient to call the library. However
+ /// if calling the library is not allowed (AlwaysInline), then soldier on as
+ /// the code generated here is better than the long load-store sequence we
+ /// would otherwise get.
+ if (!AlwaysInline && (Align & 3) != 0)
+ return SDValue();
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256 ||
+ SrcPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
+ MVT AVT;
+ if (Align & 1)
+ AVT = MVT::i8;
+ else if (Align & 2)
+ AVT = MVT::i16;
+ else if (Align & 4)
+ // DWORD aligned
+ AVT = MVT::i32;
+ else
+ // QWORD aligned
+ AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+ array_lengthof(Ops));
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, AlwaysInline,
+ DstPtrInfo.getWithOffset(Offset),
+ SrcPtrInfo.getWithOffset(Offset)));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 000000000000..d1d66fe76e94
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,56 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ const X86TargetLowering &TLI;
+
+public:
+ explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+ ~X86SelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 000000000000..5e6c659e5393
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,323 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86InstrInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyBlockAddressReference() const {
+ if (isPICStyleGOT()) // 32-bit ELF targets.
+ return X86II::MO_GOTOFF;
+
+ if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Direct static reference to label.
+ return X86II::MO_NO_FLAG;
+}
+
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+ // DLLImport only exists on windows, it is implemented as a load from a
+ // DLLIMPORT stub.
+ if (GV->hasDLLImportLinkage())
+ return X86II::MO_DLLIMPORT;
+
+ // Determine whether this is a reference to a definition or a declaration.
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ // X86-64 in PIC mode.
+ if (isPICStyleRIPRel()) {
+ // Large model never uses stubs.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return X86II::MO_NO_FLAG;
+
+ if (isTargetDarwin()) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ if (GV->hasDefaultVisibility() &&
+ (isDecl || GV->isWeakForLinker()))
+ return X86II::MO_GOTPCREL;
+ } else if (!isTargetWin64()) {
+ assert(isTargetELF() && "Unknown rip-relative target");
+
+ // Extra load is needed for all externally visible.
+ if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+ return X86II::MO_GOTPCREL;
+ }
+
+ return X86II::MO_NO_FLAG;
+ }
+
+ if (isPICStyleGOT()) { // 32-bit ELF targets.
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return X86II::MO_GOTOFF;
+ return X86II::MO_GOT;
+ }
+
+ if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
+ // Determine whether we have a stub reference and/or whether the reference
+ // is relative to the PIC base or not.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (isDecl || GV->hasCommonLinkage()) {
+ // Hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+ }
+
+ // Otherwise, no stub.
+ return X86II::MO_PIC_BASE_OFFSET;
+ }
+
+ if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
+ // Determine whether we have a stub reference.
+
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (!isDecl && !GV->isWeakForLinker())
+ return X86II::MO_NO_FLAG;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY;
+
+ // Otherwise, no stub.
+ return X86II::MO_NO_FLAG;
+ }
+
+ // Direct static reference to global.
+ return X86II::MO_NO_FLAG;
+}
+
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+ // Darwin 10 has a __bzero entry point for this purpose.
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
+ return "__bzero";
+
+ return 0;
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+ if (In64BitMode)
+ return false;
+ return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+/// getSpecialAddressLatency - For targets where it is beneficial to
+/// backschedule instructions that compute addresses, return a value
+/// indicating the number of scheduling cycles of backscheduling that
+/// should be attempted.
+unsigned X86Subtarget::getSpecialAddressLatency() const {
+ // For x86 out-of-order targets, back-schedule address computations so
+ // that loads and stores aren't blocked.
+ // This value was chosen arbitrarily.
+ return 200;
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ return;
+
+ X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+
+ if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+ if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX);
+ if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1);
+ if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2);
+ if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3);
+ if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
+ if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
+ if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
+ // FIXME: AVX codegen support is not ready.
+ //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
+
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+ HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL);
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3);
+ HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
+ HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES);
+
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ X86_MC::DetectFamilyModel(EAX, Family, Model);
+ if (IsAMD || (Family == 6 && Model >= 13)) {
+ IsBTMemSlow = true;
+ ToggleFeature(X86::FeatureSlowBTMem);
+ }
+ // If it's Nehalem, unaligned memory access is fast.
+ if (Family == 15 && Model == 26) {
+ IsUAMemFast = true;
+ ToggleFeature(X86::FeatureFastUAMem);
+ }
+
+ X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 29) & 0x1) {
+ HasX86_64 = true;
+ ToggleFeature(X86::Feature64Bit);
+ }
+ if (IsAMD && ((ECX >> 6) & 0x1)) {
+ HasSSE4A = true;
+ ToggleFeature(X86::FeatureSSE4A);
+ }
+ if (IsAMD && ((ECX >> 16) & 0x1)) {
+ HasFMA4 = true;
+ ToggleFeature(X86::FeatureFMA4);
+ }
+ }
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
+ , PICStyle(PICStyles::None)
+ , X86SSELevel(NoMMXSSE)
+ , X863DNowLevel(NoThreeDNow)
+ , HasCMov(false)
+ , HasX86_64(false)
+ , HasPOPCNT(false)
+ , HasSSE4A(false)
+ , HasAVX(false)
+ , HasAES(false)
+ , HasCLMUL(false)
+ , HasFMA3(false)
+ , HasFMA4(false)
+ , IsBTMemSlow(false)
+ , IsUAMemFast(false)
+ , HasVectorUAMem(false)
+ , stackAlignment(8)
+ // FIXME: this is a known good value for Yonah. How about others?
+ , MaxInlineSizeThreshold(128)
+ , TargetTriple(TT)
+ , In64BitMode(is64Bit) {
+ // Determine default and user specified characteristics
+ if (!FS.empty() || !CPU.empty()) {
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ // Make sure 64-bit features are available in 64-bit mode. (But make sure
+ // SSE2 can be turned off explicitly.)
+ std::string FullFS = FS;
+ if (In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+64bit,+sse2," + FullFS;
+ else
+ FullFS = "+64bit,+sse2";
+ }
+
+ // If feature string is not empty, parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+ } else {
+ // Otherwise, use CPUID to auto-detect feature set.
+ AutoDetectSubtargetFeatures();
+
+ // Make sure 64-bit features are available in 64-bit mode.
+ if (In64BitMode) {
+ HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+ HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+
+ if (!HasAVX && X86SSELevel < SSE2) {
+ X86SSELevel = SSE2;
+ ToggleFeature(X86::FeatureSSE1);
+ ToggleFeature(X86::FeatureSSE2);
+ }
+ }
+ }
+
+ // It's important to keep the MCSubtargetInfo feature bits in sync with
+ // target data structure which is shared with MC code emitter, etc.
+ if (In64BitMode)
+ ToggleFeature(X86::Mode64Bit);
+
+ if (HasAVX)
+ X86SSELevel = NoMMXSSE;
+
+ DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
+ << ", 3DNowLevel " << X863DNowLevel
+ << ", 64bit " << HasX86_64 << "\n");
+ assert((!In64BitMode || HasX86_64) &&
+ "64-bit code requested on a subtarget that doesn't support it!");
+
+ // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+ // 32 and 64 bit) and for all 64-bit targets.
+ if (StackAlignOverride)
+ stackAlignment = StackAlignOverride;
+ else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+ isTargetSolaris() || In64BitMode)
+ stackAlignment = 16;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 000000000000..6d22027b7aa8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,259 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CallingConv.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "X86GenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+///
+namespace PICStyles {
+enum Style {
+ StubPIC, // Used on i386-darwin in -fPIC mode.
+ StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+ GOT, // Used on many 32-bit unices in -fPIC mode.
+ RIPRel, // Used on X86-64 when not in -static mode.
+ None // Set when in -static mode (not PIC or DynamicNoPIC mode).
+};
+}
+
+class X86Subtarget : public X86GenSubtargetInfo {
+protected:
+ enum X86SSEEnum {
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+ };
+
+ enum X863DNowEnum {
+ NoThreeDNow, ThreeDNow, ThreeDNowA
+ };
+
+ /// PICStyle - Which PIC style to use
+ ///
+ PICStyles::Style PICStyle;
+
+ /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+ /// none supported.
+ X86SSEEnum X86SSELevel;
+
+ /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+ ///
+ X863DNowEnum X863DNowLevel;
+
+ /// HasCMov - True if this processor has conditional move instructions
+ /// (generally pentium pro+).
+ bool HasCMov;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ ///
+ bool HasX86_64;
+
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
+ /// HasSSE4A - True if the processor supports SSE4A instructions.
+ bool HasSSE4A;
+
+ /// HasAVX - Target has AVX instructions
+ bool HasAVX;
+
+ /// HasAES - Target has AES instructions
+ bool HasAES;
+
+ /// HasCLMUL - Target has carry-less multiplication
+ bool HasCLMUL;
+
+ /// HasFMA3 - Target has 3-operand fused multiply-add
+ bool HasFMA3;
+
+ /// HasFMA4 - Target has 4-operand fused multiply-add
+ bool HasFMA4;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
+ /// IsUAMemFast - True if unaligned memory access is fast.
+ bool IsUAMemFast;
+
+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+ /// operands. This may require setting a feature bit in the processor.
+ bool HasVectorUAMem;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ ///
+ unsigned MaxInlineSizeThreshold;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+private:
+ /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+ bool In64BitMode;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+ /// instruction.
+ void AutoDetectSubtargetFeatures();
+
+ bool is64Bit() const { return In64BitMode; }
+
+ PICStyles::Style getPICStyle() const { return PICStyle; }
+ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
+
+ bool hasCMov() const { return HasCMov; }
+ bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasSSE1() const { return X86SSELevel >= SSE1; }
+ bool hasSSE2() const { return X86SSELevel >= SSE2; }
+ bool hasSSE3() const { return X86SSELevel >= SSE3; }
+ bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool hasSSE41() const { return X86SSELevel >= SSE41; }
+ bool hasSSE42() const { return X86SSELevel >= SSE42; }
+ bool hasSSE4A() const { return HasSSE4A; }
+ bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
+ bool hasAVX() const { return HasAVX; }
+ bool hasXMM() const { return hasSSE1() || hasAVX(); }
+ bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
+ bool hasAES() const { return HasAES; }
+ bool hasCLMUL() const { return HasCLMUL; }
+ bool hasFMA3() const { return HasFMA3; }
+ bool hasFMA4() const { return HasFMA4; }
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+ bool hasVectorUAMem() const { return HasVectorUAMem; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetFreeBSD() const {
+ return TargetTriple.getOS() == Triple::FreeBSD;
+ }
+ bool isTargetSolaris() const {
+ return TargetTriple.getOS() == Triple::Solaris;
+ }
+
+ // ELF is a reasonably sane default and the only other X86 targets we
+ // support are Darwin and Windows. Just use "not those".
+ bool isTargetELF() const {
+ return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
+ }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+ bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+ bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
+ bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
+ bool isTargetCygMing() const {
+ return isTargetMingw() || isTargetCygwin();
+ }
+
+ /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
+ bool isTargetCOFF() const {
+ return isTargetMingw() || isTargetCygwin() || isTargetWindows();
+ }
+
+ bool isTargetWin64() const {
+ return In64BitMode && (isTargetMingw() || isTargetWindows());
+ }
+
+ bool isTargetEnvMacho() const {
+ return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+ }
+
+ bool isTargetWin32() const {
+ return !In64BitMode && (isTargetMingw() || isTargetWindows());
+ }
+
+ bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+
+ bool isPICStyleStubPIC() const {
+ return PICStyle == PICStyles::StubPIC;
+ }
+
+ bool isPICStyleStubNoDynamic() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC;
+ }
+ bool isPICStyleStubAny() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC ||
+ PICStyle == PICStyles::StubPIC; }
+
+ /// ClassifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM)const;
+
+ /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyBlockAddressReference() const;
+
+ /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+ /// to immediate address.
+ bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
+
+ /// getSpecialAddressLatency - For targets where it is beneficial to
+ /// backschedule instructions that compute addresses, return a value
+ /// indicating the number of scheduling cycles of backscheduling that
+ /// should be attempted.
+ unsigned getSpecialAddressLatency() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 000000000000..9cab0e089098
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,245 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows())
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeX86Target() {
+ // Register the target.
+ RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+ RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
+ createX86MCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
+ createX86MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+ createMCStreamer);
+}
+
+
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : X86TargetMachine(T, TT, CPU, FS, false),
+ DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
+ (getSubtargetImpl()->isTargetCygMing() ||
+ getSubtargetImpl()->isTargetWindows()) ?
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : X86TargetMachine(T, TT, CPU, FS, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
+}
+
+/// X86TargetMachine ctor - Create an X86 target.
+///
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+ FrameLowering(*this, Subtarget),
+ ELFWriterInfo(is64Bit, true) {
+ DefRelocModel = getRelocationModel();
+
+ // If no relocation model was picked, default as appropriate for the target.
+ if (getRelocationModel() == Reloc::Default) {
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::DynamicNoPIC);
+ } else if (Subtarget.isTargetWin64())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+
+ assert(getRelocationModel() != Reloc::Default &&
+ "Relocation mode not picked");
+
+ // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
+ // is defined as a model for code which may be used in static or dynamic
+ // executables but not necessarily a shared library. On X86-32 we just
+ // compile in -static mode, in x86-64 we use PIC.
+ if (getRelocationModel() == Reloc::DynamicNoPIC) {
+ if (is64Bit)
+ setRelocationModel(Reloc::PIC_);
+ else if (!Subtarget.isTargetDarwin())
+ setRelocationModel(Reloc::Static);
+ }
+
+ // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+ // the Mach-O file format doesn't support it.
+ if (getRelocationModel() == Reloc::Static &&
+ Subtarget.isTargetDarwin() &&
+ is64Bit)
+ setRelocationModel(Reloc::PIC_);
+
+ // Determine the PICStyle based on the target selected.
+ if (getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ } else if (Subtarget.isTargetCygMing()) {
+ Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.isTargetDarwin()) {
+ if (getRelocationModel() == Reloc::PIC_)
+ Subtarget.setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(getRelocationModel() == Reloc::DynamicNoPIC);
+ Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
+ } else if (Subtarget.isTargetELF()) {
+ Subtarget.setPICStyle(PICStyles::GOT);
+ }
+
+ // Finally, if we have "none" as our PIC style, force to static mode.
+ if (Subtarget.getPICStyle() == PICStyles::None)
+ setRelocationModel(Reloc::Static);
+
+ // default to hard float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Hard;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createX86ISelDag(*this, OptLevel));
+
+ // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+ if (!Subtarget.is64Bit())
+ PM.add(createGlobalBaseRegPass());
+
+ return false;
+}
+
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createX86MaxStackAlignmentHeuristicPass());
+ return false; // -print-machineinstr shouldn't print after this.
+}
+
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createX86FloatingPointStackifierPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+ PM.add(createSSEDomainFixPass());
+ return true;
+ }
+ return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+ if (DefRelocModel == Reloc::Default &&
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
+ setRelocationModel(Reloc::Static);
+ Subtarget.setPICStyle(PICStyles::None);
+ }
+
+
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
+
+void X86TargetMachine::setCodeModelForStatic() {
+
+ if (getCodeModel() != CodeModel::Default) return;
+
+ // For static codegen, if we're not already set, use Small codegen.
+ setCodeModel(CodeModel::Small);
+}
+
+
+void X86TargetMachine::setCodeModelForJIT() {
+
+ if (getCodeModel() != CodeModel::Default) return;
+
+ // 64-bit JIT places everything in the same buffer except external functions.
+ if (Subtarget.is64Bit())
+ setCodeModel(CodeModel::Large);
+ else
+ setCodeModel(CodeModel::Small);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 000000000000..885334a365fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,135 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+class X86TargetMachine : public LLVMTargetMachine {
+ X86Subtarget Subtarget;
+ X86FrameLowering FrameLowering;
+ X86ELFWriterInfo ELFWriterInfo;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+private:
+ // We have specific defaults for X86.
+ virtual void setCodeModelForJIT();
+ virtual void setCodeModelForStatic();
+
+public:
+ X86TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS,
+ bool is64Bit);
+
+ virtual const X86InstrInfo *getInstrInfo() const {
+ llvm_unreachable("getInstrInfo not implemented");
+ }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ llvm_unreachable("getJITInfo not implemented");
+ }
+ virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ llvm_unreachable("getTargetLowering not implemented");
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ llvm_unreachable("getSelectionDAGInfo not implemented");
+ }
+ virtual const X86RegisterInfo *getRegisterInfo() const {
+ return &getInstrInfo()->getRegisterInfo();
+ }
+ virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+
+ // Set up the pass pipeline.
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
+public:
+ X86_32TargetMachine(const Target &T, const std::string &M,
+ const std::string &CPU, const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
+public:
+ X86_64TargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 000000000000..1231798297ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,121 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+const MCExpr *X8664_MachoTargetObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+
+ // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+ // is an indirect pc-relative reference.
+ if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+ const MCSymbol *Sym = Mang->getSymbol(GV);
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+ return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+ }
+
+ return TargetLoweringObjectFileMachO::
+ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
+unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+ Model == CodeModel::Medium ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small || Model == CodeModel::Medium)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
+ if (CFI)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+
+ return DW_EH_PE_udata4;
+}
+
+unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+ Model == CodeModel::Medium ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 000000000000..e21b5bffd059
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,60 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+ class X86TargetMachine;
+
+ /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+ /// x86-64.
+ class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ public:
+ virtual const MCExpr *
+ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
+ // getCFIPersonalitySymbol - The symbol that gets passed to
+ // .cfi_personality.
+ virtual MCSymbol *
+ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const;
+ };
+
+ class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ const X86TargetMachine &TM;
+ public:
+ X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
+ :TM(tm) { }
+ virtual unsigned getPersonalityEncoding() const;
+ virtual unsigned getLSDAEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
+ virtual unsigned getTTypeEncoding() const;
+ };
+
+ class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ const X86TargetMachine &TM;
+ public:
+ X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
+ :TM(tm) { }
+ virtual unsigned getPersonalityEncoding() const;
+ virtual unsigned getLSDAEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
+ virtual unsigned getTTypeEncoding() const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..c3b3dc9e647d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMXCoreDesc
+ XCoreMCTargetDesc.cpp
+ XCoreMCAsmInfo.cpp
+ )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..de61543bfe9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
new file mode 100644
index 000000000000..42ab1b31d57a
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -0,0 +1,29 @@
+//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+using namespace llvm;
+
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
+ SupportsDebugInformation = true;
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = 0;
+ ZeroDirective = "\t.space\t";
+ CommentString = "#";
+
+ PrivateGlobalPrefix = ".L";
+ AscizDirective = ".asciiz";
+ WeakDefDirective = "\t.weak\t";
+ WeakRefDirective = "\t.weak\t";
+
+ // Debug
+ HasLEB128 = true;
+}
+
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
new file mode 100644
index 000000000000..840392263881
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class XCoreMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
new file mode 100644
index 000000000000..939d97c9d87c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCTargetDesc.h"
+#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "XCoreGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createXCoreMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitXCoreMCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+}
+
+static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+ createXCoreMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeXCoreMCAsmInfo() {
+ RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
new file mode 100644
index 000000000000..3cfc3764a62c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCTARGETDESC_H
+#define XCOREMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheXCoreTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for XCore registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "XCoreGenRegisterInfo.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "XCoreGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
new file mode 100644
index 000000000000..7aa8965c4ac6
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheXCoreTarget;
+
+extern "C" void LLVMInitializeXCoreTargetInfo() {
+ RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
new file mode 100644
index 000000000000..b8fb0cac319b
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -0,0 +1,31 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class XCoreTargetMachine;
+ class formatted_raw_ostream;
+
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCore.td b/contrib/llvm/lib/Target/XCore/XCore.td
new file mode 100644
index 000000000000..38401895e634
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.td
@@ -0,0 +1,46 @@
+//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"xs1b-generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCore : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = XCoreInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 000000000000..1a43714d63b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,280 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+ cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+ cl::Hidden,
+ cl::value_desc("number"),
+ cl::init(8));
+
+namespace {
+ class XCoreAsmPrinter : public AsmPrinter {
+ const XCoreSubtarget &Subtarget;
+ public:
+ explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+
+ virtual const char *getPassName() const {
+ return "XCore Assembly Printer";
+ }
+
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const std::string &directive = ".jmptable");
+ void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ printInlineJT(MI, opNum, O, ".jmptable32");
+ }
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
+ virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+ void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+ static const char *getRegisterName(unsigned RegNo);
+
+ void EmitFunctionEntryLabel();
+ void EmitInstruction(const MachineInstr *MI);
+ void EmitFunctionBodyEnd();
+ };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
+ assert(((GV->hasExternalLinkage() ||
+ GV->hasWeakLinkage()) ||
+ GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(
+ cast<PointerType>(GV->getType())->getElementType())) {
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ // FIXME: MCStreamerize.
+ OutStreamer.EmitRawText(StringRef(".globound"));
+ OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
+ OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
+ ".globound");
+ }
+ }
+}
+
+void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (!GV->hasInitializer() ||
+ EmitSpecialLLVMGlobal(GV))
+ return;
+
+ const TargetData *TD = TM.getTargetData();
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const Constant *C = GV->getInitializer();
+ unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+
+ // Mark the start of the global
+ OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
+ GVSym->getName());
+
+ switch (GV->getLinkage()) {
+ case GlobalValue::AppendingLinkage:
+ report_fatal_error("AppendingLinkage is not supported by this target!");
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalLinkage:
+ emitArrayBound(GVSym, GV);
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::DLLImportLinkage:
+ llvm_unreachable("DLLImport linkage is not supported by this target!");
+ case GlobalValue::DLLExportLinkage:
+ llvm_unreachable("DLLExport linkage is not supported by this target!");
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+
+ EmitAlignment(Align > 2 ? Align : 2, GV);
+
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ if (GV->isThreadLocal()) {
+ Size *= MaxThreads;
+ }
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+ OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
+ Twine(Size));
+ }
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(C);
+ if (GV->isThreadLocal()) {
+ for (unsigned i = 1; i < MaxThreads; ++i)
+ EmitGlobalConstant(C);
+ }
+ // The ABI requires that unsigned scalar types smaller than 32 bits
+ // are padded to 32 bits.
+ if (Size < 4)
+ OutStreamer.EmitZeros(4 - Size, 0);
+
+ // Mark the end of the global
+ OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+ // Emit function end directives
+ OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
+ ".function");
+}
+
+void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+ // Mark the start of the function
+ OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
+ ".function," + CurrentFnSym->getName());
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1, O);
+}
+
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const std::string &directive) {
+ unsigned JTI = MI->getOperand(opNum).getIndex();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ O << "\t" << directive << " ";
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (i > 0)
+ O << ",";
+ O << *MBB->getSymbol();
+ }
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+ default:
+ llvm_unreachable("not implemented");
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Check for mov mnemonic
+ if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ else
+ printInstruction(MI, O);
+ OutStreamer.EmitRawText(O.str());
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() {
+ RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 000000000000..b20d71f49cfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,36 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+ // i32 are returned in registers R0, R1, R2, R3
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 000000000000..057822074e54
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,387 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+ .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("storeToStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode))
+ .addReg(SrcReg)
+ .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
+ STI(sti) {
+ // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+ return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = &MF.getMMI();
+ const XCoreRegisterInfo *RegInfo =
+ static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ bool FP = hasFP(MF);
+ bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+
+ if (Nested) {
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+ }
+ bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+
+ // Do we need to allocate space on the stack?
+ if (FrameSize) {
+ bool saveLR = XFI->getUsesLR();
+ bool LRSavedOnEntry = false;
+ int Opcode;
+ if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+ Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+ MBB.addLiveIn(XCore::LR);
+ saveLR = false;
+ LRSavedOnEntry = true;
+ } else {
+ Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+ if (emitFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ if (LRSavedOnEntry) {
+ MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+ MachineLocation CSSrc(XCore::LR);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+ }
+ if (saveLR) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+ MBB.addLiveIn(XCore::LR);
+
+ if (emitFrameMoves) {
+ MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+ MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+ MachineLocation CSSrc(XCore::LR);
+ MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+ }
+ }
+ }
+
+ if (FP) {
+ // Save R10 to the stack.
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+ // R10 is live-in. It is killed at the spill.
+ MBB.addLiveIn(XCore::R10);
+ if (emitFrameMoves) {
+ MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+ MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+ MachineLocation CSSrc(XCore::R10);
+ MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+ }
+ // Set the FP from the SP.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+ .addImm(0);
+ if (emitFrameMoves) {
+ // Show FP is now valid.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ MachineLocation SPDst(FramePtr);
+ MachineLocation SPSrc(MachineLocation::VirtualFP);
+ MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ MCSymbol *SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = CSI.getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+ }
+ }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+ if (FP) {
+ // Restore the stack pointer.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+ .addReg(FramePtr);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+ }
+
+ if (FrameSize) {
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+ if (FP) {
+ // Restore R10
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ FPSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+ }
+ bool restoreLR = XFI->getUsesLR();
+ if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ LRSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+ restoreLR = false;
+ }
+ if (restoreLR) {
+ // Fold prologue into return instruction
+ assert(MBBI->getOpcode() == XCore::RETSP_u6
+ || MBBI->getOpcode() == XCore::RETSP_lu6);
+ int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+ MBB.erase(MBBI);
+ } else {
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+ }
+ }
+}
+
+void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(it->getReg());
+
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ it->getFrameIdx(), RC, TRI);
+ if (emitFrameMoves) {
+ MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+ XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+ }
+ }
+ return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const{
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ bool AtStart = MI == MBB.begin();
+ MachineBasicBlock::iterator BeforeI = MI;
+ if (!AtStart)
+ --BeforeI;
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+ RC, TRI);
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ MI = MBB.begin();
+ else {
+ MI = BeforeI;
+ ++MI;
+ }
+ }
+ return true;
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ if (LRUsed) {
+ MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+ bool isVarArg = MF.getFunction()->isVarArg();
+ int FrameIdx;
+ if (! isVarArg) {
+ // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+ } else {
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+ false);
+ }
+ XFI->setUsesLR(FrameIdx);
+ XFI->setLRSpillSlot(FrameIdx);
+ }
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ if (hasFP(MF)) {
+ // A callee save register is used to hold the FP.
+ // This needs saving / restoring in the epilogue / prologue.
+ XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+void XCoreFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 000000000000..7da19f0deb1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,59 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class XCoreSubtarget;
+
+ class XCoreFrameLowering: public TargetFrameLowering {
+ const XCoreSubtarget &STI;
+ public:
+ XCoreFrameLowering(const XCoreSubtarget &STI);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ //! Stack slot size (4 bytes)
+ static int stackSlotSize() {
+ return 4;
+ }
+ };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 000000000000..a8dd8479d45f
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,295 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class XCoreDAGToDAGISel : public SelectionDAGISel {
+ const XCoreTargetLowering &Lowering;
+ const XCoreSubtarget &Subtarget;
+
+ public:
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM)
+ : SelectionDAGISel(TM),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ SDNode *Select(SDNode *N);
+ SDNode *SelectBRIND(SDNode *N);
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
+ // Complex Pattern Selectors.
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ virtual const char *getPassName() const {
+ return "XCore DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "XCoreGenDAGISel.inc"
+ };
+} // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
+ return new XCoreDAGToDAGISel(TM);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
+ // Transformation function: get the size of a mask
+ // Look for the first non-zero bit
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+ return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+ MVT::i32, MskSize);
+ }
+ else if (!isUInt<16>(Val)) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+ return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ }
+ break;
+ }
+ case XCoreISD::LADD: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::LSUB: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::MACCU: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::MACCS: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::LMUL: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::xcore_crc8:
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ break;
+ }
+ case ISD::BRIND:
+ if (SDNode *ResNode = SelectBRIND(N))
+ return ResNode;
+ break;
+ // Other cases are autogenerated.
+ }
+ return SelectCode(N);
+}
+
+/// Given a chain return a new chain where any appearance of Old is replaced
+/// by New. There must be at most one instruction between Old and Chain and
+/// this instruction must be a TokenFactor. Returns an empty SDValue if
+/// these conditions don't hold.
+static SDValue
+replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
+{
+ if (Chain == Old)
+ return New;
+ if (Chain->getOpcode() != ISD::TokenFactor)
+ return SDValue();
+ SmallVector<SDValue, 8> Ops;
+ bool found = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) {
+ if (Chain->getOperand(i) == Old) {
+ Ops.push_back(New);
+ found = true;
+ } else {
+ Ops.push_back(Chain->getOperand(i));
+ }
+ }
+ if (!found)
+ return SDValue();
+ return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other,
+ &Ops[0], Ops.size());
+}
+
+SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // (brind (int_xcore_checkevent (addr)))
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return 0;
+ unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::xcore_checkevent)
+ return 0;
+ SDValue nextAddr = Addr->getOperand(2);
+ SDValue CheckEventChainOut(Addr.getNode(), 1);
+ if (!CheckEventChainOut.use_empty()) {
+ // If the chain out of the checkevent intrinsic is an operand of the
+ // indirect branch or used in a TokenFactor which is the operand of the
+ // indirect branch then build a new chain which uses the chain coming into
+ // the checkevent intrinsic instead.
+ SDValue CheckEventChainIn = Addr->getOperand(0);
+ SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
+ CheckEventChainIn);
+ if (!NewChain.getNode())
+ return 0;
+ Chain = NewChain;
+ }
+ // Enable events on the thread using setsr 1 and then disable them immediately
+ // after with clrsr 1. If any resources owned by the thread are ready an event
+ // will be taken. If no resource is ready we branch to the address which was
+ // the operand to the checkevent intrinsic.
+ SDValue constOne = getI32Imm(1);
+ SDValue Glue =
+ SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
+ constOne, Chain), 0);
+ Glue =
+ SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue,
+ constOne, Glue), 0);
+ if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper &&
+ nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) {
+ return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other,
+ nextAddr->getOperand(0), Glue);
+ }
+ return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 000000000000..6d040e052659
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,1608 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "XCoreTargetObjectFile.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case XCoreISD::BL : return "XCoreISD::BL";
+ case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+ case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+ case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+ case XCoreISD::STWSP : return "XCoreISD::STWSP";
+ case XCoreISD::RETSP : return "XCoreISD::RETSP";
+ case XCoreISD::LADD : return "XCoreISD::LADD";
+ case XCoreISD::LSUB : return "XCoreISD::LSUB";
+ case XCoreISD::LMUL : return "XCoreISD::LMUL";
+ case XCoreISD::MACCU : return "XCoreISD::MACCU";
+ case XCoreISD::MACCS : return "XCoreISD::MACCS";
+ case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
+ case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
+ default : return NULL;
+ }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+ : TargetLowering(XTM, new XCoreTargetObjectFile()),
+ TM(XTM),
+ Subtarget(*XTM.getSubtargetImpl()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setStackPointerRegisterToSaveRestore(XCore::SP);
+
+ setSchedulingPreference(Sched::RegPressure);
+
+ // Use i32 for setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // XCore does not have the NodeTypes below.
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+ // Stop the combiner recombining select and set_cc
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // 64bit
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // Bit Manipulation
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Jump tables.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Conversion of i64 -> double produces constantpool nodes
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Loads
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Custom expand misaligned loads / stores.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ // Varargs
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ // Dynamic stack
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+ maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+ = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ADD);
+
+ setMinFunctionAlignment(1);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode())
+ {
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
+ case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
+ // FIXME: Remove these when LegalizeDAGTypes lands.
+ case ISD::ADD:
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operand");
+ return SDValue();
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ return;
+ case ISD::ADD:
+ case ISD::SUB:
+ Results.push_back(ExpandADDSUB(N, DAG));
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+ SelectionDAG &DAG) const
+{
+ // FIXME there is no actual debug info here
+ DebugLoc dl = GA.getDebugLoc();
+ if (isa<Function>(GV)) {
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+ }
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine constness
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ bool isConst = GVar && GVar->isConstant();
+ if (isConst) {
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ }
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
+ return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(const Type *Ty) {
+ const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+ return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ // transform to label + getid() * size
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine size
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ if (! GVar) {
+ llvm_unreachable("Thread local object not a GlobalVariable?");
+ return SDValue();
+ }
+ const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+#ifndef NDEBUG
+ errs() << "Size of thread local object " << GVar->getName()
+ << " is unknown\n";
+#endif
+ llvm_unreachable(0);
+ }
+ SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+ DAG.getConstant(Size, MVT::i32));
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really debug info here
+ DebugLoc dl = CP->getDebugLoc();
+ EVT PtrVT = Op.getValueType();
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry()) {
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ } else {
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ }
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+}
+
+unsigned XCoreTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue XCoreTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+ if (NumEntries <= 32) {
+ return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+ }
+ assert((NumEntries >> 31) == 0);
+ SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+ ScaledIndex);
+}
+
+static bool
+IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
+ int64_t &Offset)
+{
+ if (Addr.getOpcode() != ISD::ADD) {
+ return false;
+ }
+ ConstantSDNode *CN = 0;
+ if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ return false;
+ }
+ int64_t off = CN->getSExtValue();
+ const SDValue &Base = Addr.getOperand(0);
+ const SDValue *Root = &Base;
+ if (Base.getOpcode() == ISD::ADD &&
+ Base.getOperand(1).getOpcode() == ISD::SHL) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
+ .getOperand(1));
+ if (CN && (CN->getSExtValue() >= 2)) {
+ Root = &Base.getOperand(0);
+ }
+ }
+ if (isa<FrameIndexSDNode>(*Root)) {
+ // All frame indicies are word aligned
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
+ Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
+ // All dp / cp relative addresses are word aligned
+ AlignedBase = Base;
+ Offset = off;
+ return true;
+ }
+ return false;
+}
+
+SDValue XCoreTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Unexpected extension type");
+ assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
+ if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+ return SDValue();
+
+ unsigned ABIAlignment = getTargetData()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ // Leave aligned load alone.
+ if (LD->getAlignment() >= ABIAlignment)
+ return SDValue();
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Base;
+ int64_t Offset;
+ if (!LD->isVolatile() &&
+ IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
+ if (Offset % 4 == 0) {
+ // We've managed to infer better alignment information than the load
+ // already has. Use an aligned load.
+ //
+ return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ }
+ // Lower to
+ // ldw low, base[offset >> 2]
+ // ldw high, base[(offset >> 2) + 1]
+ // shr low_shifted, low, (offset & 0x3) * 8
+ // shl high_shifted, high, 32 - (offset & 0x3) * 8
+ // or result, low_shifted, high_shifted
+ SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
+ SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
+ SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
+ SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
+
+ SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
+
+ SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+ LowAddr, MachinePointerInfo(), false, false, 0);
+ SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+ HighAddr, MachinePointerInfo(), false, false, 0);
+ SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+ if (LD->getAlignment() == 2) {
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+ BasePtr, LD->getPointerInfo(), MVT::i16,
+ LD->isVolatile(), LD->isNonTemporal(), 2);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+ HighAddr,
+ LD->getPointerInfo().getWithOffset(2),
+ MVT::i16, LD->isVolatile(),
+ LD->isNonTemporal(), 2);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
+ DAG.getConstant(16, MVT::i32));
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+ // Lower to a call to __misaligned_load(BasePtr).
+ const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = BasePtr;
+ Args.push_back(Entry);
+
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, IntPtrTy, false, false,
+ false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+ Args, DAG, DL);
+
+ SDValue Ops[] =
+ { CallResult.first, CallResult.second };
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue XCoreTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+ assert(!ST->isTruncatingStore() && "Unexpected store type");
+ assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
+ if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ return SDValue();
+ }
+ unsigned ABIAlignment = getTargetData()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ // Leave aligned store alone.
+ if (ST->getAlignment() >= ABIAlignment) {
+ return SDValue();
+ }
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (ST->getAlignment() == 2) {
+ SDValue Low = Value;
+ SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
+ DAG.getConstant(16, MVT::i32));
+ SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
+ ST->getPointerInfo(), MVT::i16,
+ ST->isVolatile(), ST->isNonTemporal(),
+ 2);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
+ ST->getPointerInfo().getWithOffset(2),
+ MVT::i16, ST->isVolatile(),
+ ST->isNonTemporal(), 2);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
+ }
+
+ // Lower to a call to __misaligned_store(BasePtr, Value).
+ const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = BasePtr;
+ Args.push_back(Entry);
+
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
+ false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+ Args, DAG, dl);
+
+ return CallResult.second;
+}
+
+SDValue XCoreTargetLowering::
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+ assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
+ "Unexpected operand to lower!");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Zero, Zero,
+ LHS, RHS);
+ SDValue Lo(Hi.getNode(), 1);
+ SDValue Ops[] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue XCoreTargetLowering::
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+ assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
+ "Unexpected operand to lower!");
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), LHS, RHS,
+ Zero, Zero);
+ SDValue Lo(Hi.getNode(), 1);
+ SDValue Ops[] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// isADDADDMUL - Return whether Op is in a form that is equivalent to
+/// add(add(mul(x,y),a),b). If requireIntermediatesHaveOneUse is true then
+/// each intermediate result in the calculation must also have a single use.
+/// If the Op is in the correct form the constituent parts are written to Mul0,
+/// Mul1, Addend0 and Addend1.
+static bool
+isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
+ SDValue &Addend1, bool requireIntermediatesHaveOneUse)
+{
+ if (Op.getOpcode() != ISD::ADD)
+ return false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue AddOp;
+ SDValue OtherOp;
+ if (N0.getOpcode() == ISD::ADD) {
+ AddOp = N0;
+ OtherOp = N1;
+ } else if (N1.getOpcode() == ISD::ADD) {
+ AddOp = N1;
+ OtherOp = N0;
+ } else {
+ return false;
+ }
+ if (requireIntermediatesHaveOneUse && !AddOp.hasOneUse())
+ return false;
+ if (OtherOp.getOpcode() == ISD::MUL) {
+ // add(add(a,b),mul(x,y))
+ if (requireIntermediatesHaveOneUse && !OtherOp.hasOneUse())
+ return false;
+ Mul0 = OtherOp.getOperand(0);
+ Mul1 = OtherOp.getOperand(1);
+ Addend0 = AddOp.getOperand(0);
+ Addend1 = AddOp.getOperand(1);
+ return true;
+ }
+ if (AddOp.getOperand(0).getOpcode() == ISD::MUL) {
+ // add(add(mul(x,y),a),b)
+ if (requireIntermediatesHaveOneUse && !AddOp.getOperand(0).hasOneUse())
+ return false;
+ Mul0 = AddOp.getOperand(0).getOperand(0);
+ Mul1 = AddOp.getOperand(0).getOperand(1);
+ Addend0 = AddOp.getOperand(1);
+ Addend1 = OtherOp;
+ return true;
+ }
+ if (AddOp.getOperand(1).getOpcode() == ISD::MUL) {
+ // add(add(a,mul(x,y)),b)
+ if (requireIntermediatesHaveOneUse && !AddOp.getOperand(1).hasOneUse())
+ return false;
+ Mul0 = AddOp.getOperand(1).getOperand(0);
+ Mul1 = AddOp.getOperand(1).getOperand(1);
+ Addend0 = AddOp.getOperand(0);
+ Addend1 = OtherOp;
+ return true;
+ }
+ return false;
+}
+
+SDValue XCoreTargetLowering::
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
+{
+ SDValue Mul;
+ SDValue Other;
+ if (N->getOperand(0).getOpcode() == ISD::MUL) {
+ Mul = N->getOperand(0);
+ Other = N->getOperand(1);
+ } else if (N->getOperand(1).getOpcode() == ISD::MUL) {
+ Mul = N->getOperand(1);
+ Other = N->getOperand(0);
+ } else {
+ return SDValue();
+ }
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, RL, AddendL, AddendH;
+ LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(0), DAG.getConstant(0, MVT::i32));
+ RL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(1), DAG.getConstant(0, MVT::i32));
+ AddendL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Other, DAG.getConstant(0, MVT::i32));
+ AddendH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Other, DAG.getConstant(1, MVT::i32));
+ APInt HighMask = APInt::getHighBitsSet(64, 32);
+ unsigned LHSSB = DAG.ComputeNumSignBits(Mul.getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(Mul.getOperand(1));
+ if (DAG.MaskedValueIsZero(Mul.getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(Mul.getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ if (LHSSB > 32 && RHSSB > 32) {
+ // The inputs are both sign-extended.
+ SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ SDValue LH, RH;
+ LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(0), DAG.getConstant(1, MVT::i32));
+ RH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul.getOperand(1), DAG.getConstant(1, MVT::i32));
+ SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+ AddendL, LL, RL);
+ SDValue Lo(Hi.getNode(), 1);
+ RH = DAG.getNode(ISD::MUL, dl, MVT::i32, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, MVT::i32, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, LH);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
+{
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Unknown operand to lower!");
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue Result = TryExpandADDWithMul(N, DAG);
+ if (Result.getNode() != 0)
+ return Result;
+ }
+
+ DebugLoc dl = N->getDebugLoc();
+
+ // Extract components
+ SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(0, MVT::i32));
+ SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(1, MVT::i32));
+ SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(0, MVT::i32));
+ SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+ // Expand
+ unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+ XCoreISD::LSUB;
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Lo(Carry.getNode(), 1);
+
+ SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Hi(Ignored.getNode(), 1);
+ // Merge the pieces
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
+{
+ llvm_unreachable("unimplemented");
+ // FIX Arguments passed by reference need a extra dereference.
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(1), MachinePointerInfo(V),
+ false, false, 0);
+ // Increment the pointer, VAList, to the next vararg
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+ DAG.getConstant(VT.getSizeInBits(),
+ getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc dl = Op.getDebugLoc();
+ // vastart stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument
+ MachineFunction &MF = DAG.getMachineFunction();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+ MachinePointerInfo(), false, false, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+SDValue XCoreTargetLowering::
+LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ // .align 4
+ // LDAPF_u10 r11, nest
+ // LDW_2rus r11, r11[0]
+ // STWSP_ru6 r11, sp[0]
+ // LDAPF_u10 r11, fptr
+ // LDW_2rus r11, r11[0]
+ // BAU_1r r11
+ // nest:
+ // .word nest
+ // fptr:
+ // .word fptr
+ SDValue OutChains[5];
+
+ SDValue Addr = Trmp;
+
+ DebugLoc dl = Op.getDebugLoc();
+ OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(4, MVT::i32));
+ OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 4), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(8, MVT::i32));
+ OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 8), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(12, MVT::i32));
+ OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(16, MVT::i32));
+ OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 16), false, false,
+ 0);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore call implementation
+SDValue
+XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // XCore target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // For now, only CallingConv::C implemented
+ switch (CallConv)
+ {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Outs, OutVals, Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue
+XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // The ABI dictates there should be one stack slot available to the callee
+ // on function entry (for saving lr).
+ CCInfo.AllocateStack(4, 4);
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ int Offset = VA.getLocMemOffset();
+
+ MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+ Chain, Arg,
+ DAG.getConstant(Offset/4, MVT::i32)));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore formal arguments implementation
+SDValue
+XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ switch (CallConv)
+ {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Chain, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue
+XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
+
+ unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
+
+ unsigned LRSaveSize = StackSlotSize;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ EVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT().SimpleTy) {
+ default:
+ {
+#ifndef NDEBUG
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+ case MVT::i32:
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ }
+ } else {
+ // sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > StackSlotSize) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize,
+ LRSaveSize + VA.getLocMemOffset(),
+ true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ }
+
+ if (isVarArg) {
+ /* Argument registers */
+ static const unsigned ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+ };
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+ array_lengthof(ArgRegs));
+ if (FirstVAReg < array_lengthof(ArgRegs)) {
+ SmallVector<SDValue, 4> MemOps;
+ int offset = 0;
+ // Save remaining registers, storing higher register numbers at a higher
+ // address
+ for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+ // Create a stack slot
+ int FI = MFI->CreateFixedObject(4, offset, true);
+ if (i == FirstVAReg) {
+ XFI->setVarArgsFrameIndex(FI);
+ }
+ offset -= StackSlotSize;
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ // Move argument from phys reg -> virt reg
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(ArgRegs[i], VReg);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ // Move argument from virt reg -> stack
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else {
+ // This will point to the next argument passed via stack.
+ XFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+ true));
+ }
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_XCore);
+}
+
+SDValue
+XCoreTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on XCore is always a "retsp 0"
+ if (Flag.getNode())
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == XCore::SELECT_CC) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case XCoreISD::LADD: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
+
+ // fold (ladd 0, 0, x) -> 0, x & 1
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ SDValue Carry = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
+ DAG.getConstant(1, VT));
+ SDValue Ops [] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+
+ // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
+ // low bit set
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+ if (KnownZero == Mask) {
+ SDValue Carry = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
+ SDValue Ops [] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ }
+ break;
+ case XCoreISD::LSUB: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+ if (KnownZero == Mask) {
+ SDValue Borrow = N2;
+ SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, VT), N2);
+ SDValue Ops [] = { Borrow, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+
+ // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
+ // low bit set
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - 1);
+ DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+ if (KnownZero == Mask) {
+ SDValue Borrow = DAG.getConstant(0, VT);
+ SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
+ SDValue Ops [] = { Borrow, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ }
+ break;
+ case XCoreISD::LMUL: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ // Canonicalize multiplicative constant to RHS. If both multiplicative
+ // operands are constant canonicalize smallest to RHS.
+ if ((N0C && !N1C) ||
+ (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
+ return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3);
+
+ // lmul(x, 0, a, b)
+ if (N1C && N1C->isNullValue()) {
+ // If the high result is unused fold to add(a, b)
+ if (N->hasNUsesOfValue(0, 0)) {
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
+ SDValue Ops [] = { Lo, Lo };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ // Otherwise fold to ladd(a, b, 0)
+ return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ }
+ }
+ break;
+ case ISD::ADD: {
+ // Fold 32 bit expressions such as add(add(mul(x,y),a),b) ->
+ // lmul(x, y, a, b). The high result of lmul will be ignored.
+ // This is only profitable if the intermediate results are unused
+ // elsewhere.
+ SDValue Mul0, Mul1, Addend0, Addend1;
+ if (N->getValueType(0) == MVT::i32 &&
+ isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
+ SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Mul0,
+ Mul1, Addend0, Addend1);
+ SDValue Result(Ignored.getNode(), 1);
+ return Result;
+ }
+ APInt HighMask = APInt::getHighBitsSet(64, 32);
+ // Fold 64 bit expression such as add(add(mul(x,y),a),b) ->
+ // lmul(x, y, a, b) if all operands are zero-extended. We do this
+ // before type legalization as it is messy to match the operands after
+ // that.
+ if (N->getValueType(0) == MVT::i64 &&
+ isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, false) &&
+ DAG.MaskedValueIsZero(Mul0, HighMask) &&
+ DAG.MaskedValueIsZero(Mul1, HighMask) &&
+ DAG.MaskedValueIsZero(Addend0, HighMask) &&
+ DAG.MaskedValueIsZero(Addend1, HighMask)) {
+ SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul0, DAG.getConstant(0, MVT::i32));
+ SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Mul1, DAG.getConstant(0, MVT::i32));
+ SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Addend0, DAG.getConstant(0, MVT::i32));
+ SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Addend1, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
+ Addend0L, Addend1L);
+ SDValue Lo(Hi.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+ }
+ break;
+ case ISD::STORE: {
+ // Replace unaligned store of unaligned load with memmove.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!DCI.isBeforeLegalize() ||
+ allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+ ST->isVolatile() || ST->isIndexed()) {
+ break;
+ }
+ SDValue Chain = ST->getChain();
+
+ unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
+ if (StoreBits % 8) {
+ break;
+ }
+ unsigned ABIAlignment = getTargetData()->getABITypeAlignment(
+ ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
+ unsigned Alignment = ST->getAlignment();
+ if (Alignment >= ABIAlignment) {
+ break;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
+ if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
+ LD->getAlignment() == Alignment &&
+ !LD->isVolatile() && !LD->isIndexed() &&
+ Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+ return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
+ LD->getBasePtr(),
+ DAG.getConstant(StoreBits/8, MVT::i32),
+ Alignment, false, ST->getPointerInfo(),
+ LD->getPointerInfo());
+ }
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case XCoreISD::LADD:
+ case XCoreISD::LSUB:
+ if (Op.getResNo() == 0) {
+ // Top bits of carry / borrow are clear.
+ KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
+ KnownZero &= Mask;
+ }
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+ return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+ return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+ return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ if (Ty->getTypeID() == Type::VoidTyID)
+ return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (AM.BaseGV) {
+ return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
+ AM.BaseOffs%4 == 0;
+ }
+
+ switch (Size) {
+ case 1:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs(AM.BaseOffs);
+ }
+ // reg + reg
+ return AM.Scale == 1 && AM.BaseOffs == 0;
+ case 2:
+ case 3:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs2(AM.BaseOffs);
+ }
+ // reg + reg<<1
+ return AM.Scale == 2 && AM.BaseOffs == 0;
+ default:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs4(AM.BaseOffs);
+ }
+ // reg + reg<<2
+ return AM.Scale == 4 && AM.BaseOffs == 0;
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+XCoreTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ return std::make_pair(0U, XCore::GRRegsRegisterClass);
+ }
+ }
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 000000000000..9c803bef6dd2
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,201 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "XCore.h"
+
+namespace llvm {
+
+ // Forward delcarations
+ class XCoreSubtarget;
+ class XCoreTargetMachine;
+
+ namespace XCoreISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Branch and link (call)
+ BL,
+
+ // pc relative address
+ PCRelativeWrapper,
+
+ // dp relative address
+ DPRelativeWrapper,
+
+ // cp relative address
+ CPRelativeWrapper,
+
+ // Store word to stack
+ STWSP,
+
+ // Corresponds to retsp instruction
+ RETSP,
+
+ // Corresponds to LADD instruction
+ LADD,
+
+ // Corresponds to LSUB instruction
+ LSUB,
+
+ // Corresponds to LMUL instruction
+ LMUL,
+
+ // Corresponds to MACCU instruction
+ MACCU,
+
+ // Corresponds to MACCS instruction
+ MACCS,
+
+ // Jumptable branch.
+ BR_JT,
+
+ // Jumptable branch using long branches for each entry.
+ BR_JT32
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class XCoreTargetLowering : public TargetLowering
+ {
+ public:
+
+ explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+ virtual unsigned getJumpTableEncoding() const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const;
+
+ private:
+ const XCoreTargetMachine &TM;
+ const XCoreSubtarget &Subtarget;
+
+ // Lower Operand helpers
+ SDValue LowerCCCArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+ SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+ SelectionDAG &DAG) const;
+
+ // Lower Operand specifics
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+
+ // Inline asm support
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Expand specifics
+ SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+ SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const;
+ };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 000000000000..8002c993270c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,120 @@
+//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "XCore";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 000000000000..f90481f3fbc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,398 @@
+//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+namespace XCore {
+
+ // XCore Condition Codes
+ enum CondCode {
+ COND_TRUE,
+ COND_FALSE,
+ COND_INVALID
+ };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo()
+ : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+ RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::STWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+ return BrOpc == XCore::BRFU_u6
+ || BrOpc == XCore::BRFU_lu6
+ || BrOpc == XCore::BRBU_u6
+ || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+ return BrOpc == XCore::BRFT_ru6
+ || BrOpc == XCore::BRFT_lru6
+ || BrOpc == XCore::BRBT_ru6
+ || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+ return BrOpc == XCore::BRFF_ru6
+ || BrOpc == XCore::BRFF_lru6
+ || BrOpc == XCore::BRBF_ru6
+ || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+ return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+static inline bool IsBR_JT(unsigned BrOpc) {
+ return BrOpc == XCore::BR_JT
+ || BrOpc == XCore::BR_JT32;
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ if (IsBRT(BrOpc)) {
+ return XCore::COND_TRUE;
+ } else if (IsBRF(BrOpc)) {
+ return XCore::COND_FALSE;
+ } else {
+ return XCore::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::BRFT_lru6;
+ case XCore::COND_FALSE : return XCore::BRFF_lru6;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: llvm_unreachable("Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::COND_FALSE;
+ case XCore::COND_FALSE : return XCore::COND_TRUE;
+ }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (IsBRU(LastInst->getOpcode())) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == XCore::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ // If the block ends with conditional branch followed by unconditional,
+ // handle it.
+ if (BranchCode != XCore::COND_INVALID
+ && IsBRU(LastInst->getOpcode())) {
+
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(SecondLastInst->getOperand(0));
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (IsBRU(SecondLastInst->getOpcode()) &&
+ IsBRU(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Likewise if it ends with a branch table followed by an unconditional branch.
+ if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Unexpected number of components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ assert(Cond.size() == 2 && "Unexpected number of components!");
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
+ return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!IsCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+ bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg);
+
+ if (GRDest && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
+ if (GRDest && SrcReg == XCore::SP) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+ return;
+ }
+
+ if (DestReg == XCore::SP && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::STWFI))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert((Cond.size() == 2) &&
+ "Invalid XCore branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 000000000000..840b1e163652
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,88 @@
+//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "XCoreRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+
+class XCoreInstrInfo : public XCoreGenInstrInfo {
+ const XCoreRegisterInfo RI;
+public:
+ XCoreInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+
+ virtual bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 000000000000..55c7527f4e83
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,1210 @@
+//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def SDT_XCoreAddress : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+ [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+ // Transformation function: imm/4
+ assert(N->getZExtValue() % 4 == 0);
+ return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+ // Transformation function: get the size of a mask
+ assert(isMask_32(N->getZExtValue()));
+ // look for the first non-zero bit
+ return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(-value);
+}]>;
+
+def bpwsub_xform : SDNodeXForm<imm, [{
+ // Transformation function: 32-imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(32-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm/4
+ uint32_t value = N->getZExtValue();
+ assert(-value % 4 == 0);
+ return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+ return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
+
+def immBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 1 && value <= 8)
+ || value == 16
+ || value == 24
+ || value == 32;
+}]>;
+
+def immBpwSubBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 24 && value <= 31)
+ || value == 16
+ || value == 8
+ || value == 0;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+ []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+ []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+// Jump tables.
+def InlineJT : Operand<i32> {
+ let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+ let PrintMethod = "printInlineJT32";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<string OpcStr> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+}
+
+multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<string OpcStr, SDNode OpNode> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<string OpcStr> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+}
+
+multiclass FRU6_LRU6_cp<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+}
+
+// U6
+multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU16:$b)]>;
+}
+multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(Int immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(Int immU16:$b)]>;
+}
+
+multiclass FU6_LU6_np<string OpcStr> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// U10
+multiclass FU10_LU10_np<string OpcStr> {
+ def _u10: _FU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu10: _FLU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// Two operand short
+
+class F2R_np<string OpcStr> : _F2R<
+ (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"),
+ []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "${:comment} ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDWFI $dst, $addr",
+ [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDAWFI $dst, $addr",
+ [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+ "${:comment} STWFI $src, $addr",
+ [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {
+ def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+ (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+ "${:comment} SELECT_CC PSEUDO!",
+ [(set GRRegs:$dst,
+ (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<"add", add>;
+defm SUB : F3R_2RUS<"sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<"eq">;
+def LSS_3r : F3R_np<"lss">;
+def LSU_3r : F3R_np<"lsu">;
+}
+def AND_3r : F3R<"and", and>;
+def OR_3r : F3R<"or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]",
+ []>;
+
+def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]",
+ []>;
+}
+
+let mayStore=1 in {
+def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+
+def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+}
+
+defm SHL : F3R_2RBITP<"shl", shl>;
+defm SHR : F3R_2RBITP<"shr", srl>;
+// TODO tsetr
+
+// Three operand long
+def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]",
+ []>;
+
+def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ []>;
+
+def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<"mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<"divs", sdiv>;
+def DIVU_l3r : FL3R<"divu", udiv>;
+def REMS_l3r : FL3R<"rems", srem>;
+def REMU_l3r : FL3R<"remu", urem>;
+}
+def XOR_l3r : FL3R<"xor", xor>;
+defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+
+let Constraints = "$src1 = $dst" in
+def CRC_l3r : _FL3R<(outs GRRegs:$dst),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc32 $dst, $src2, $src3",
+ [(set GRRegs:$dst,
+ (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+ GRRegs:$src3))]>;
+
+// TODO inpw, outpw
+let mayStore=1 in {
+def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]",
+ []>;
+
+def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]",
+ []>;
+}
+
+// Four operand long
+let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccu $dst1, $dst2, $src3, $src4",
+ []>;
+
+def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccs $dst1, $dst2, $src3, $src4",
+ []>;
+}
+
+let Constraints = "$src1 = $dst1" in
+def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc8 $dst1, $dst2, $src2, $src3",
+ []>;
+
+// Five operand long
+
+def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldiv $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+// Six operand long
+
+def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
+ []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ []>;
+
+let isReMaterializable = 1 in
+def LDAWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ [(set GRRegs:$dst, ADDRdpii:$a)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ []>;
+
+def LDWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ []>;
+
+def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ [(store GRRegs:$val, ADDRdpii:$addr)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1 in
+defm LDWCP : FRU6_LRU6_cp<"ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
+
+def STWSP_lru6 : _FLRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+
+def LDWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_ru6_RRegs : _FRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6_RRegs : _FLRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU16:$b)]>;
+}
+
+def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+
+def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+
+// Operand register - U6
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<"bt">;
+defm BRBT: FRU6_LRU6_branch<"bt">;
+defm BRFF: FRU6_LRU6_branch<"bf">;
+defm BRBF: FRU6_LRU6_branch<"bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<"extsp">;
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<"entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
+defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+}
+}
+
+// TODO extdp, kentsp, krestsp, blat
+// getsr, kalli
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def BRBU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRBU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+}
+
+//let Uses = [CP] in ...
+let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLRU6<
+ (outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
+
+defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+
+// setsr may cause a branch if it is used to enable events. clrsr may
+// branch if it is executed while events are enabled.
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
+defm SETSR_branch : FU6_LU6_np<"setsr">;
+defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+}
+
+// U10
+// TODO ldwcpl, blacp
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAP_u10 : _FU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10 : _FLU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10_ba : _FLU10<(outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BL_u10 : _FU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU10:$target)]>;
+
+def BL_lu10 : _FLU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU20:$target)]>;
+}
+
+// Two operand short
+// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg)
+def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b",
+ [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b",
+ [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+let Constraints = "$src1 = $dst" in {
+let neverHasSideEffects = 1 in
+def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ []>;
+
+def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size",
+ []>;
+
+def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+
+def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
+ "getr $dst, $type",
+ [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "getts $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setpt res[$r], $val",
+ [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "outt res[$r], $val",
+ [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "out res[$r], $val",
+ [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+let Constraints = "$src = $dst" in
+def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+ "outshr res[$r], $src",
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+
+def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "inct $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "int $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "in $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+let Constraints = "$src = $dst" in
+def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+ "inshr $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+
+def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setd res[$r], $val",
+ [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
+def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "getst $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
+
+def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:sp, $src",
+ [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
+
+def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:pc, $src",
+ [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
+
+def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:cp, $src",
+ [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
+
+def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:dp, $src",
+ [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+
+// Two operand long
+// TODO endin, peek,
+// getd, testlcl
+def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
+def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "settw res[$r], $val",
+ [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
+def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "get $dst, ps[$src]",
+ [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+
+def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "set ps[$src1], $src2",
+ [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:lr, $src",
+ [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
+
+def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setclk res[$src1], $src2",
+ [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setrdy res[$src1], $src2",
+ [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+// One operand short
+// TODO edu, eeu, waitet, waitef, tstart, clrtp
+// setdp, setcp, setev, kcall
+// dgetreg
+def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
+ "msync res[$i]",
+ [(int_xcore_msync GRRegs:$i)]>;
+def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
+ "mjoin res[$i]",
+ [(int_xcore_mjoin GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
+ "bau $addr",
+ [(brind GRRegs:$addr)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
+ "set sp, $src",
+ []>;
+
+let hasCtrlDep = 1 in
+def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallt $src",
+ []>;
+
+let hasCtrlDep = 1 in
+def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallf $src",
+ []>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+ "bla $addr",
+ [(XCoreBranchLink GRRegs:$addr)]>;
+}
+
+def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
+ "syncr res[$r]",
+ [(int_xcore_syncr GRRegs:$r)]>;
+
+def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
+ "freer res[$r]",
+ [(int_xcore_freer GRRegs:$r)]>;
+
+let Uses=[R11] in
+def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
+ "setv res[$r], r11",
+ [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
+ "eeu res[$r]",
+ [(int_xcore_eeu GRRegs:$r)]>;
+
+// Zero operand short
+// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// dentsp, drestsp
+
+def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+
+let Defs = [R11] in
+def GETID_0R : _F0R<(outs), (ins),
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+def SSYNC_0r : _F0R<(outs), (ins),
+ "ssync",
+ [(int_xcore_ssync)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ hasSideEffects = 1 in
+def WAITEU_0R : _F0R<(outs), (ins),
+ "waiteu",
+ [(brind (int_xcore_waitevent))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+ (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+ (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+ (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+ (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+ (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+ (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+ (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+ (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+ (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+ (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+ (ASHR_l2rus GRRegs:$src, 32)>;
+
+def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+// setge X, 0 is canonicalized to setgt X, -1
+def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst),
+ (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(setgt GRRegs:$lhs, -1),
+ (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>;
+
+def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm),
+ (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 000000000000..a575a0f69541
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+private:
+ bool UsesLR;
+ int LRSpillSlot;
+ int FPSpillSlot;
+ int VarArgsFrameIndex;
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
+
+public:
+ XCoreFunctionInfo() :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ explicit XCoreFunctionInfo(MachineFunction &MF) :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ ~XCoreFunctionInfo() {}
+
+ void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+
+ void setUsesLR(bool val) { UsesLR = val; }
+ bool getUsesLR() const { return UsesLR; }
+
+ void setLRSpillSlot(int off) { LRSpillSlot = off; }
+ int getLRSpillSlot() const { return LRSpillSlot; }
+
+ void setFPSpillSlot(int off) { FPSpillSlot = off; }
+ int getFPSpillSlot() const { return FPSpillSlot; }
+
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
+ return SpillLabels;
+ }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 000000000000..357a4a083582
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,340 @@
+//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+ : XCoreGenRegisterInfo(), TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static const unsigned XCore_ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+};
+
+const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
+{
+ return XCore_ArgRegs;
+}
+
+unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
+{
+ return array_lengthof(XCore_ArgRegs);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
+ return MF.getMMI().hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+}
+
+const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+ XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+ 0
+ };
+ return CalleeSavedRegs;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ Reserved.set(XCore::CP);
+ Reserved.set(XCore::DP);
+ Reserved.set(XCore::SP);
+ Reserved.set(XCore::LR);
+ if (TFI->hasFP(MF)) {
+ Reserved.set(XCore::R10);
+ }
+ return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // TODO can we estimate stack size?
+ return TFI->hasFP(MF);
+}
+
+bool
+XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ return false;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+ errs() << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &FrameOp = MI.getOperand(i);
+ int FrameIndex = FrameOp.getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ int StackSize = MF.getFrameInfo()->getStackSize();
+
+ #ifndef NDEBUG
+ DEBUG(errs() << "\nFunction : "
+ << MF.getFunction()->getName() << "\n");
+ DEBUG(errs() << "<--------->\n");
+ DEBUG(MI.print(errs()));
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n");
+ DEBUG(errs() << "FrameOffset : " << Offset << "\n");
+ DEBUG(errs() << "StackSize : " << StackSize << "\n");
+ #endif
+
+ Offset += StackSize;
+
+ unsigned FrameReg = getFrameRegister(MF);
+
+ // Special handling of DBG_VALUE instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // fold constant into offset.
+ Offset += MI.getOperand(i + 1).getImm();
+ MI.getOperand(i + 1).ChangeToImmediate(0);
+
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ Offset/=4;
+
+ bool FP = TFI->hasFP(MF);
+
+ unsigned Reg = MI.getOperand(0).getReg();
+ bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+ assert(XCore::GRRegsRegisterClass->contains(Reg) &&
+ "Unexpected register operand");
+
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ if (FP) {
+ bool isUs = isImmUs(Offset);
+
+ if (!isUs) {
+ if (!RS)
+ report_fatal_error("eliminateFrameIndex Frame size too big: " +
+ Twine(Offset));
+ unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+ SPAdj);
+ loadConstant(MBB, II, ScratchReg, Offset, dl);
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::STWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::LDAWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ } else {
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ }
+ } else {
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("eliminateFrameIndex Frame size too big: " +
+ Twine(Offset));
+
+ switch (MI.getOpcode()) {
+ int NewOpcode;
+ case XCore::LDWFI:
+ NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode))
+ .addReg(Reg, getKillRegState(isKill))
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+ }
+ // Erase old instruction.
+ MBB.erase(II);
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const {
+ // TODO use mkmsk if possible.
+ if (!isImmU16(Value)) {
+ // TODO use constant pool.
+ report_fatal_error("loadConstant value too big " + Twine(Value));
+ }
+ int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int XCoreRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ return XCoreGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
+}
+
+unsigned XCoreRegisterInfo::getRARegister() const {
+ return XCore::LR;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 000000000000..801d9eba2171
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,85 @@
+//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "XCoreGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+ void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+ void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+ XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //! Return the array of argument passing registers
+ /*!
+ \note The size of this array is returned by getArgRegsSize().
+ */
+ static const unsigned *getArgRegs(const MachineFunction *MF = 0);
+
+ //! Return the size of the argument passing register array
+ static unsigned getNumArgRegs(const MachineFunction *MF = 0);
+
+ //! Return whether to emit frame moves
+ static bool needsFrameMoves(const MachineFunction &MF);
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 000000000000..c3542304a4ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,56 @@
+//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the XCore register file
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+ let Num = num;
+}
+
+// CPU registers
+def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+ // Return values and arguments
+ (add R0, R1, R2, R3,
+ // Not preserved across procedure calls
+ R11,
+ // Callee save
+ R4, R5, R6, R7, R8, R9, R10)>;
+
+// Reserved
+def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
+ let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..44aeb6057ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreTargetMachine.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 000000000000..0386968638bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine;
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+ ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 000000000000..ad069bf138a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,28 @@
+//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "XCoreGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+XCoreSubtarget::XCoreSubtarget(const std::string &TT,
+ const std::string &CPU, const std::string &FS)
+ : XCoreGenSubtargetInfo(TT, CPU, FS)
+{
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 000000000000..7b29fa236710
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,42 @@
+//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "XCoreGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class XCoreSubtarget : public XCoreGenSubtargetInfo {
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ XCoreSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 000000000000..342966ae5c86
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,44 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS),
+ DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+ "i16:16:32-i32:32:32-i64:32:32-n32"),
+ InstrInfo(),
+ FrameLowering(Subtarget),
+ TLInfo(*this),
+ TSInfo(*this) {
+}
+
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createXCoreISelDag(*this));
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() {
+ RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 000000000000..6235ac3a6a1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,62 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreSubtarget.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+ XCoreSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameLowering FrameLowering;
+ XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
+public:
+ XCoreTargetMachine(const Target &T, const std::string &TT,
+ const std::string &CPU, const std::string &FS);
+
+ virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const XCoreFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const XCoreTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
new file mode 100644
index 000000000000..7f4e1c1b4fd7
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetObjectFile.h"
+#include "XCoreSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+
+void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ DataSection =
+ Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getDataRel());
+ BSSSection =
+ Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getBSS());
+
+ MergeableConst4Section =
+ Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst4());
+ MergeableConst8Section =
+ Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst8());
+ MergeableConst16Section =
+ Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst16());
+
+ // TLS globals are lowered in the backend to arrays indexed by the current
+ // thread id. After lowering they require no special handling by the linker
+ // and can be placed in the standard data / bss sections.
+ TLSDataSection = DataSection;
+ TLSBSSSection = BSSSection;
+
+ ReadOnlySection =
+ Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getReadOnlyWithRel());
+
+ // Dynamic linking is not supported. Data with relocations is placed in the
+ // same section as data without relocations.
+ DataRelSection = DataRelLocalSection = DataSection;
+ DataRelROSection = DataRelROLocalSection = ReadOnlySection;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
new file mode 100644
index 000000000000..7424c78be305
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -0,0 +1,25 @@
+//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
+ public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ // TODO: Classify globals as xcore wishes.
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 000000000000..fa007cfc6513
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,906 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments. In
+// practice, this means looking for internal functions that have pointer
+// arguments. If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value. This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded. Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently. This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "argpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+
+namespace {
+ /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+ ///
+ struct ArgPromotion : public CallGraphSCCPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned maxElements = 3)
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// A vector used to hold the indices of a single GEP instruction
+ typedef std::vector<uint64_t> IndicesVector;
+
+ private:
+ CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ CallGraphNode *DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned maxElements;
+ };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+ return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ bool Changed = false, LocalChange;
+
+ do { // Iterate until we stop promoting from this SCC.
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ if (CallGraphNode *CGN = PromoteArguments(*I)) {
+ LocalChange = true;
+ SCC.ReplaceNode(*I, CGN);
+ }
+ }
+ Changed |= LocalChange; // Remember that we changed something.
+ } while (LocalChange);
+
+ return Changed;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ // Make sure that it is local to this module.
+ if (!F || !F->hasLocalLinkage()) return 0;
+
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (I->getType()->isPointerTy())
+ PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+ if (PointerArgs.empty()) return 0;
+
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ // Must be a direct call.
+ if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
+
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument*, 8> ArgsToPromote;
+ SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+ for (unsigned i = 0; i != PointerArgs.size(); ++i) {
+ bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+ Argument *PtrArg = PointerArgs[i].first;
+ const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe.
+ if (isByVal) {
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (maxElements > 0 && STy->getNumElements() > maxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName() << "' because it would require adding more"
+ << " than " << maxElements << " arguments to the function.\n");
+ continue;
+ }
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (!STy->getElementType(i)->isSingleValueType()) {
+ AllSimple = false;
+ break;
+ }
+ }
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
+ }
+ }
+
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (STy->getElementType(i) == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
+ }
+ }
+
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, isByVal))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return 0;
+
+ return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+ Function *Callee = Arg->getParent();
+
+ unsigned ArgNo = std::distance(Callee->arg_begin(),
+ Function::arg_iterator(Arg));
+
+ // Look at all call sites of the function. At this pointer we know we only
+ // have direct callees.
+ for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ assert(CS && "Should only have direct calls!");
+
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+ const ArgPromotion::IndicesVector &Longer) {
+ if (Prefix.size() > Longer.size())
+ return false;
+ for (unsigned i = 0, e = Prefix.size(); i != e; ++i)
+ if (Prefix[i] != Longer[i])
+ return false;
+ return true;
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+ std::set<ArgPromotion::IndicesVector> &Set) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+ std::set<ArgPromotion::IndicesVector> &Safe) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Safe.upper_bound(ToMark);
+ // Guard against the case where Safe is empty
+ if (Low != Safe.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This
+ // means it points to a prefix of Indices (possibly Indices itself), if
+ // such prefix exists.
+ if (Low != Safe.end()) {
+ if (IsPrefix(*Low, ToMark))
+ // If there is already a prefix of these indices (or exactly these
+ // indices) marked a safe, don't bother adding these indices
+ return;
+
+ // Increment Low, so we can use it as a "insert before" hint
+ ++Low;
+ }
+ // Insert
+ Low = Safe.insert(Low, ToMark);
+ ++Low;
+ // If there we're a prefix of longer index list(s), remove those
+ std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+ while (Low != End && IsPrefix(ToMark, *Low)) {
+ std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+ ++Low;
+ Safe.erase(Remove);
+ }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+ typedef std::set<IndicesVector> GEPIndicesSet;
+
+ // Quick exit for unused arguments
+ if (Arg->use_empty())
+ return true;
+
+ // We can only promote this argument if all of the uses are loads, or are GEP
+ // instructions (with constant indices) that are subsequently loaded.
+ //
+ // Promoting the argument causes it to be loaded in the caller
+ // unconditionally. This is only safe if we can prove that either the load
+ // would have happened in the callee anyway (ie, there is a load in the entry
+ // block) or the pointer passed in at every call site is guaranteed to be
+ // valid.
+ // In the former case, invalid loads can happen, but would have happened
+ // anyway, in the latter case, invalid loads won't happen. This prevents us
+ // from introducing an invalid load that wouldn't have happened in the
+ // original code.
+ //
+ // This set will contain all sets of indices that are loaded in the entry
+ // block, and thus are safe to unconditionally load in the caller.
+ GEPIndicesSet SafeToUnconditionallyLoad;
+
+ // This set contains all the sets of indices that we are planning to promote.
+ // This makes it possible to limit the number of arguments added.
+ GEPIndicesSet ToPromote;
+
+ // If the pointer is always valid, any load with first index 0 is valid.
+ if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+ // First, iterate the entry block and mark loads of (geps of) arguments as
+ // safe.
+ BasicBlock *EntryBlock = Arg->getParent()->begin();
+ // Declare this here so we can reuse it
+ IndicesVector Indices;
+ for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
+ I != E; ++I)
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *V = LI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ V = GEP->getPointerOperand();
+ if (V == Arg) {
+ // This load actually loads (part of) Arg? Check the indices then.
+ Indices.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+ Indices.push_back(CI->getSExtValue());
+ else
+ // We found a non-constant GEP index for this argument? Bail out
+ // right away, can't promote this argument at all.
+ return false;
+
+ // Indices checked out, mark them as safe
+ MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ Indices.clear();
+ }
+ } else if (V == Arg) {
+ // Direct loads are equivalent to a GEP with a single 0 index.
+ MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ }
+ }
+
+ // Now, iterate all uses of the argument to see if there are any uses that are
+ // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ SmallVector<LoadInst*, 16> Loads;
+ IndicesVector Operands;
+ for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ Operands.clear();
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile()) return false; // Don't hack volatile loads
+ Loads.push_back(LI);
+ // Direct loads are equivalent to a GEP with a zero index and then a load.
+ Operands.push_back(0);
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEP->use_empty()) {
+ // Dead GEP's cause trouble later. Just remove them if we run into
+ // them.
+ getAnalysis<AliasAnalysis>().deleteValue(GEP);
+ GEP->eraseFromParent();
+ // TODO: This runs the above loop over and over again for dead GEPs
+ // Couldn't we just do increment the UI iterator earlier and erase the
+ // use?
+ return isSafeToPromoteArgument(Arg, isByVal);
+ }
+
+ // Ensure that all of the indices are constants.
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+ i != e; ++i)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+ Operands.push_back(C->getSExtValue());
+ else
+ return false; // Not a constant operand GEP!
+
+ // Ensure that the only users of the GEP are load instructions.
+ for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+ UI != E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LI->isVolatile()) return false; // Don't hack volatile loads
+ Loads.push_back(LI);
+ } else {
+ // Other uses than load?
+ return false;
+ }
+ } else {
+ return false; // Not a load or a GEP.
+ }
+
+ // Now, see if it is safe to promote this load / loads of this GEP. Loading
+ // is safe if Operands, or a prefix of Operands, is marked as safe.
+ if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ return false;
+
+ // See if we are already promoting a load with these indices. If not, check
+ // to make sure that we aren't promoting too many elements. If so, nothing
+ // to do.
+ if (ToPromote.find(Operands) == ToPromote.end()) {
+ if (maxElements > 0 && ToPromote.size() == maxElements) {
+ DEBUG(dbgs() << "argpromotion not promoting argument '"
+ << Arg->getName() << "' because it would require adding more "
+ << "than " << maxElements << " arguments to the function.\n");
+ // We limit aggregate promotion to only promoting up to a fixed number
+ // of elements of the aggregate.
+ return false;
+ }
+ ToPromote.insert(Operands);
+ }
+ }
+
+ if (Loads.empty()) return true; // No users, this is a dead argument.
+
+ // Okay, now we know that the argument is only used by load instructions and
+ // it is safe to unconditionally perform all of them. Use alias analysis to
+ // check to see if the pointer is guaranteed to not be modified from entry of
+ // the function to each of the load instructions.
+
+ // Because there could be several/many load instructions, remember which
+ // blocks we know to be transparent to the load.
+ SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+ // Check to see if the load is invalidated from the start of the block to
+ // the load itself.
+ LoadInst *Load = Loads[i];
+ BasicBlock *BB = Load->getParent();
+
+ AliasAnalysis::Location Loc = AA.getLocation(Load);
+ if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
+ return false; // Pointer is invalidated!
+
+ // Now check every path from the entry block to the load for transparency.
+ // To do this, we perform a depth first search on the inverse CFG from the
+ // loading block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
+ I = idf_ext_begin(P, TranspBlocks),
+ E = idf_ext_end(P, TranspBlocks); I != E; ++I)
+ if (AA.canBasicBlockModify(**I, Loc))
+ return false;
+ }
+ }
+
+ // If the path from the entry of the function to each load is free of
+ // instructions that potentially invalidate the load, we can make the
+ // transformation!
+ return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<Type*> Params;
+
+ typedef std::set<IndicesVector> ScalarizeTable;
+
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ std::map<IndicesVector, LoadInst*> OriginalLoads;
+
+ // Attributes - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // First, determine the new argument list
+ unsigned ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(I)) {
+ // Simple byval argument? Just add all the struct element types.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ const StructType *STy = cast<StructType>(AgTy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Params.push_back(STy->getElementType(i));
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
+
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+ IndicesVector Indices;
+ Indices.reserve(User->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(Indices);
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(User))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(User->use_back());
+ OriginalLoads[Indices] = OrigLoad;
+ }
+
+ // Add a parameter to the function for each element passed in.
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
+ SI->begin(),
+ SI->end()));
+ assert(Params.back());
+ }
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
+ }
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ const Type *RetTy = FTy->getReturnType();
+
+ // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
+ // have zero fixed arguments.
+ bool ExtraArgHack = false;
+ if (Params.empty() && FTy->isVarArg()) {
+ ExtraArgHack = true;
+ Params.push_back(Type::getInt32Ty(F->getContext()));
+ }
+
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
+
+
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
+
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ AttributesVec.clear();
+
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Get the alias analysis information that we need to update to reflect our
+ // changes.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Get a new callgraph node for NF.
+ CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value*, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->use_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttrListPtr &CallPAL = CS.getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = CallPAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ Args.push_back(*AI); // Unmodified argument
+
+ if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+
+ } else if (ByValArgsToTransform.count(I)) {
+ // Emit a GEP and load for each element of the struct.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ const StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+ (*AI)->getName()+"."+utostr(i),
+ Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value*> Ops;
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ Value *V = *AI;
+ LoadInst *OrigLoad = OriginalLoads[*SI];
+ if (!SI->empty()) {
+ Ops.reserve(SI->size());
+ const Type *ElTy = V->getType();
+ for (IndicesVector::const_iterator II = SI->begin(),
+ IE = SI->end(); II != IE; ++II) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ const Type *IdxTy = (ElTy->isStructTy() ?
+ Type::getInt32Ty(F->getContext()) :
+ Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, *II));
+ // Keep track of the type we're currently indexing.
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
+ V->getName()+".idx", Call);
+ Ops.clear();
+ AA.copyValue(OrigLoad->getOperand(0), V);
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the TBAA info too.
+ newLoad->setMetadata(LLVMContext::MD_tbaa,
+ OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+ Args.push_back(newLoad);
+ AA.copyValue(OrigLoad, Args.back());
+ }
+ }
+
+ if (ExtraArgHack)
+ Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = CallPAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+ AttributesVec.clear();
+
+ // Update the alias analysis implementation to know that we are replacing
+ // the old call with a new one.
+ AA.replaceWithNewValue(Call, New);
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+ CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I) {
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ AA.replaceWithNewValue(I, I2);
+ ++I2;
+ continue;
+ }
+
+ if (ByValArgsToTransform.count(I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = NF->begin()->begin();
+
+ // Just add all the struct element types.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+ const StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx =
+ GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+ TheAlloca->getName()+"."+Twine(i),
+ InsertPt);
+ I2->setName(I->getName()+"."+Twine(i));
+ new StoreInst(I2++, Idx, InsertPt);
+ }
+
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(I);
+ AA.replaceWithNewValue(I, TheAlloca);
+ continue;
+ }
+
+ if (I->use_empty()) {
+ AA.deleteValue(I);
+ continue;
+ }
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+ assert(ArgIndices.begin()->empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName()+".val");
+ LI->replaceAllUsesWith(I2);
+ AA.replaceWithNewValue(LI, I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ *It != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
+ }
+
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
+ }
+ NewName += ".val";
+ TheArg->setName(NewName);
+
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->use_back());
+ L->replaceAllUsesWith(TheArg);
+ AA.replaceWithNewValue(L, TheArg);
+ L->eraseFromParent();
+ }
+ AA.deleteValue(GEP);
+ GEP->eraseFromParent();
+ }
+ }
+
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
+ ++I2;
+ }
+
+ // Notify the alias analysis implementation that we inserted a new argument.
+ if (ExtraArgHack)
+ AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),
+ NF->arg_begin());
+
+
+ // Tell the alias analysis that the old function is about to disappear.
+ AA.replaceWithNewValue(F, NF);
+
+
+ NF_CGN->stealCalledFunctionsFrom(CG[F]);
+
+ // Now that the old function is dead, delete it. If there is a dangling
+ // reference to the CallgraphNode, just leave the dead function around for
+ // someone else to nuke.
+ CallGraphNode *CGN = CG[F];
+ if (CGN->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(CGN);
+ else
+ F->setLinkage(Function::ExternalLinkage);
+
+ return NF_CGN;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 000000000000..a21efced73b9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,190 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared. This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constmerge"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+ struct ConstantMerge : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantMerge() : ModulePass(ID) {
+ initializeConstantMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ // run - For this pass, process all of the globals in the module,
+ // eliminating duplicate constants.
+ //
+ bool runOnModule(Module &M);
+ };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+ "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+ const GlobalVariable &B) {
+ if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+ return true;
+
+ if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+ return false;
+
+ return A.hasUnnamedAddr();
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
+ // Map unique constant/section pairs to globals. We don't want to merge
+ // globals in different sections.
+ DenseMap<Constant*, GlobalVariable*> CMap;
+
+ // Replacements - This vector contains a list of replacements to perform.
+ SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+ bool MadeChange = false;
+
+ // Iterate constant merging while we are still making progress. Merging two
+ // constants together may allow us to merge other constants together if the
+ // second level constants have initializers which point to the globals that
+ // were just merged.
+ while (1) {
+
+ // First: Find the canonical constants others will be merged with.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // If this GV is dead, remove it.
+ GV->removeDeadConstantUsers();
+ if (GV->use_empty() && GV->hasLocalLinkage()) {
+ GV->eraseFromParent();
+ continue;
+ }
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *&Slot = CMap[Init];
+
+ // If this is the first constant we find or if the old on is local,
+ // replace with the current one. It the current is externally visible
+ // it cannot be replace, but can be the canonical constant we merge with.
+ if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
+ Slot = GV;
+ }
+ }
+
+ // Second: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // We can only replace constant with local linkage.
+ if (!GV->hasLocalLinkage())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *Slot = CMap[Init];
+
+ if (!Slot || Slot == GV)
+ continue;
+
+ if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+ continue;
+
+ if (!GV->hasUnnamedAddr())
+ Slot->setUnnamedAddr(false);
+
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+
+ if (Replacements.empty())
+ return MadeChange;
+ CMap.clear();
+
+ // Now that we have figured out which replacements must be made, do them all
+ // now. This avoid invalidating the pointers in CMap, which are unneeded
+ // now.
+ for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Eliminate any uses of the dead global.
+ Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+ // Delete the global value from the module.
+ assert(Replacements[i].first->hasLocalLinkage() &&
+ "Refusing to delete an externally visible global variable.");
+ Replacements[i].first->eraseFromParent();
+ }
+
+ NumMerged += Replacements.size();
+ Replacements.clear();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 000000000000..15177650f4e5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,1003 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions. Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions. This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadargelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithUndef,
+ "Number of unread args replaced with undef");
+namespace {
+ /// DAE - The dead argument elimination pass.
+ ///
+ class DAE : public ModulePass {
+ public:
+
+ /// Struct that represents (part of) either a return value or a function
+ /// argument. Used so that arguments and return values can be used
+ /// interchangeably.
+ struct RetOrArg {
+ RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+ IsArg(IsArg) {}
+ const Function *F;
+ unsigned Idx;
+ bool IsArg;
+
+ /// Make RetOrArg comparable, so we can put it into a map.
+ bool operator<(const RetOrArg &O) const {
+ if (F != O.F)
+ return F < O.F;
+ else if (Idx != O.Idx)
+ return Idx < O.Idx;
+ else
+ return IsArg < O.IsArg;
+ }
+
+ /// Make RetOrArg comparable, so we can easily iterate the multimap.
+ bool operator==(const RetOrArg &O) const {
+ return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+ }
+
+ std::string getDescription() const {
+ return std::string((IsArg ? "Argument #" : "Return value #"))
+ + utostr(Idx) + " of function " + F->getNameStr();
+ }
+ };
+
+ /// Liveness enum - During our initial pass over the program, we determine
+ /// that things are either alive or maybe alive. We don't mark anything
+ /// explicitly dead (even if we know they are), since anything not alive
+ /// with no registered uses (in Uses) will never be marked alive and will
+ /// thus become dead in the end.
+ enum Liveness { Live, MaybeLive };
+
+ /// Convenience wrapper
+ RetOrArg CreateRet(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, false);
+ }
+ /// Convenience wrapper
+ RetOrArg CreateArg(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, true);
+ }
+
+ typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+ /// This maps a return value or argument to any MaybeLive return values or
+ /// arguments it uses. This allows the MaybeLive values to be marked live
+ /// when any of its users is marked live.
+ /// For example (indices are left out for clarity):
+ /// - Uses[ret F] = ret G
+ /// This means that F calls G, and F returns the value returned by G.
+ /// - Uses[arg F] = ret G
+ /// This means that some function calls G and passes its result as an
+ /// argument to F.
+ /// - Uses[ret F] = arg F
+ /// This means that F returns one of its own arguments.
+ /// - Uses[arg F] = arg G
+ /// This means that G calls F and passes one of its own (G's) arguments
+ /// directly to F.
+ UseMap Uses;
+
+ typedef std::set<RetOrArg> LiveSet;
+ typedef std::set<const Function*> LiveFuncSet;
+
+ /// This set contains all values that have been determined to be live.
+ LiveSet LiveValues;
+ /// This set contains all values that are cannot be changed in any way.
+ LiveFuncSet LiveFunctions;
+
+ typedef SmallVector<RetOrArg, 5> UseVector;
+
+ protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(char &ID) : ModulePass(ID) {}
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+ virtual bool ShouldHackArguments() const { return false; }
+
+ private:
+ Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+ Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
+ unsigned RetValNum = 0);
+ Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+ void SurveyFunction(const Function &F);
+ void MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses);
+ void MarkLive(const RetOrArg &RA);
+ void MarkLive(const Function &F);
+ void PropagateLiveness(const RetOrArg &RA);
+ bool RemoveDeadStuffFromFunction(Function *F);
+ bool DeleteDeadVarargs(Function &Fn);
+ bool RemoveDeadArgumentsFromCallers(Function &Fn);
+ };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
+
+namespace {
+ /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+ /// deletes arguments to functions which are external. This is only for use
+ /// by bugpoint.
+ struct DAH : public DAE {
+ static char ID;
+ DAH() : DAE(ID) {}
+
+ virtual bool ShouldHackArguments() const { return true; }
+ };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r",
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+ false, false)
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+ assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+ // Ensure that the function is only directly called.
+ if (Fn.hasAddressTaken())
+ return false;
+
+ // Okay, we know we can transform this function if safe. Scan its body
+ // looking for calls to llvm.vastart.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+ }
+ }
+
+ // If we get here, there are no calls to llvm.vastart in the function body,
+ // remove the "..." and adjust all the calls.
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but doesn't have isVarArg set.
+ const FunctionType *FTy = Fn.getFunctionType();
+
+ std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+ Params, false);
+ unsigned NumArgs = Params.size();
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, Fn.getLinkage());
+ NF->copyAttributesFrom(&Fn);
+ Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ NF->takeName(&Fn);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!Fn.use_empty()) {
+ CallSite CS(Fn.use_back());
+ Instruction *Call = CS.getInstruction();
+
+ // Pass all the same arguments.
+ Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+ // Drop any attributes that were on the vararg arguments.
+ AttrListPtr PAL = CS.getAttributes();
+ if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
+ AttributesVec.push_back(PAL.getSlot(i));
+ if (Attributes FnAttrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+ }
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(PAL);
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(PAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty())
+ Call->replaceAllUsesWith(New);
+
+ New->takeName(Call);
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well. While we're at
+ // it, remove the dead arguments from the DeadArguments list.
+ //
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // Move the name and users over to the new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ }
+
+ // Finally, nuke the old function.
+ Fn.eraseFromParent();
+ return true;
+}
+
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+ if (Fn.isDeclaration() || Fn.mayBeOverridden())
+ return false;
+
+ // Functions with local linkage should already have been handled.
+ if (Fn.hasLocalLinkage())
+ return false;
+
+ if (Fn.use_empty())
+ return false;
+
+ llvm::SmallVector<unsigned, 8> UnusedArgs;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I) {
+ Argument *Arg = I;
+
+ if (Arg->use_empty() && !Arg->hasByValAttr())
+ UnusedArgs.push_back(Arg->getArgNo());
+ }
+
+ if (UnusedArgs.empty())
+ return false;
+
+ bool Changed = false;
+
+ for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end();
+ I != E; ++I) {
+ CallSite CS(*I);
+ if (!CS || !CS.isCallee(I))
+ continue;
+
+ // Now go through all unused args and replace them with "undef".
+ for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+ unsigned ArgNo = UnusedArgs[I];
+
+ Value *Arg = CS.getArgument(ArgNo);
+ CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+ ++NumArgumentsReplacedWithUndef;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+ if (F->getReturnType()->isVoidTy())
+ return 0;
+ else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ return STy->getNumElements();
+ else
+ return 1;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+ // We're live if our use or its Function is already marked as live.
+ if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+ return Live;
+
+ // We're maybe live otherwise, but remember that we must become live if
+ // Use becomes live.
+ MaybeLiveUses.push_back(Use);
+ return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+ UseVector &MaybeLiveUses, unsigned RetValNum) {
+ const User *V = *U;
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // original Use.
+ RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+ && IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
+
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (Value::const_use_iterator I = IV->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+ }
+
+ if (ImmutableCallSite CS = V) {
+ const Function *F = CS.getCalledFunction();
+ if (F) {
+ // Used in a direct call.
+
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and the we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CS.getArgumentNo(U);
+
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
+
+ assert(CS.getArgument(ArgNo)
+ == CS->getOperand(U.getOperandNo())
+ && "Argument is not where we expected it");
+
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = CreateArg(F, ArgNo);
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ }
+ // Used in any other way? Value must be live.
+ return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+ // Assume it's dead (which will only hold if there are no uses at all..).
+ Liveness Result = MaybeLive;
+ // Check each use.
+ for (Value::const_use_iterator I = V->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value. This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+ unsigned RetCount = NumRetVals(&F);
+ // Assume all return values are dead
+ typedef SmallVector<Liveness, 5> RetVals;
+ RetVals RetValLiveness(RetCount, MaybeLive);
+
+ typedef SmallVector<UseVector, 5> RetUses;
+ // These vectors map each return value to the uses that make it MaybeLive, so
+ // we can add those to the Uses map if the return value really turns out to be
+ // MaybeLive. Initialized to a list of RetCount empty lists.
+ RetUses MaybeLiveRetUses(RetCount);
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+ != F.getFunctionType()->getReturnType()) {
+ // We don't support old style multiple return values.
+ MarkLive(F);
+ return;
+ }
+
+ if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+ MarkLive(F);
+ return;
+ }
+
+ DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+ // Keep track of the number of live retvals, so we can skip checks once all
+ // of them turn out to be live.
+ unsigned NumLiveRetVals = 0;
+ const Type *STy = dyn_cast<StructType>(F.getReturnType());
+ // Loop all uses of the function.
+ for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
+ I != E; ++I) {
+ // If the function is PASSED IN as an argument, its address has been
+ // taken.
+ ImmutableCallSite CS(*I);
+ if (!CS || !CS.isCallee(I)) {
+ MarkLive(F);
+ return;
+ }
+
+ // If this use is anything other than a call site, the function is alive.
+ const Instruction *TheCall = CS.getInstruction();
+ if (!TheCall) { // Not a direct call site?
+ MarkLive(F);
+ return;
+ }
+
+ // If we end up here, we are looking at a direct call to our function.
+
+ // Now, check how our return value(s) is/are used in this caller. Don't
+ // bother checking return values if all of them are live already.
+ if (NumLiveRetVals != RetCount) {
+ if (STy) {
+ // Check all uses of the return value.
+ for (Value::const_use_iterator I = TheCall->use_begin(),
+ E = TheCall->use_end(); I != E; ++I) {
+ const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+ if (Ext && Ext->hasIndices()) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
+ }
+ } else {
+ // Used by something else than extractvalue. Mark all return
+ // values as live.
+ for (unsigned i = 0; i != RetCount; ++i )
+ RetValLiveness[i] = Live;
+ NumLiveRetVals = RetCount;
+ break;
+ }
+ }
+ } else {
+ // Single return value
+ RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
+ if (RetValLiveness[0] == Live)
+ NumLiveRetVals = RetCount;
+ }
+ }
+ }
+
+ // Now we've inspected all callers, record the liveness of our return values.
+ for (unsigned i = 0; i != RetCount; ++i)
+ MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+ DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+ // Now, check all of our arguments.
+ unsigned i = 0;
+ UseVector MaybeLiveArgUses;
+ for (Function::const_arg_iterator AI = F.arg_begin(),
+ E = F.arg_end(); AI != E; ++AI, ++i) {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+ // Mark the result.
+ MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+ // Clear the vector again for the next iteration.
+ MaybeLiveArgUses.clear();
+ }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses) {
+ switch (L) {
+ case Live: MarkLive(RA); break;
+ case MaybeLive:
+ {
+ // Note any uses of this value, so this return value can be
+ // marked live whenever one of the uses becomes live.
+ for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+ UE = MaybeLiveUses.end(); UI != UE; ++UI)
+ Uses.insert(std::make_pair(*UI, RA));
+ break;
+ }
+ }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+ DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+ // Mark the function as live.
+ LiveFunctions.insert(&F);
+ // Mark all arguments as live.
+ for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+ PropagateLiveness(CreateArg(&F, i));
+ // Mark all return values as live.
+ for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+ PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+ if (LiveFunctions.count(RA.F))
+ return; // Function was already marked Live.
+
+ if (!LiveValues.insert(RA).second)
+ return; // We were already marked Live.
+
+ DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+ PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+ // We don't use upper_bound (or equal_range) here, because our recursive call
+ // to ourselves is likely to cause the upper_bound (which is the first value
+ // not belonging to RA) to become erased and the iterator invalidated.
+ UseMap::iterator Begin = Uses.lower_bound(RA);
+ UseMap::iterator E = Uses.end();
+ UseMap::iterator I;
+ for (I = Begin; I != E && I->first == RA; ++I)
+ MarkLive(I->second);
+
+ // Erase RA from the Uses map (from the lower bound to wherever we ended up
+ // after the loop).
+ Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+ // Don't modify fully live functions
+ if (LiveFunctions.count(F))
+ return false;
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has fewer arguments and a different return type.
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<Type*> Params;
+
+ // Set up to build a new list of parameter attributes.
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // The existing function return attributes.
+ Attributes RAttrs = PAL.getRetAttributes();
+ Attributes FnAttrs = PAL.getFnAttributes();
+
+ // Find out the new return value.
+
+ Type *RetTy = FTy->getReturnType();
+ const Type *NRetTy = NULL;
+ unsigned RetCount = NumRetVals(F);
+
+ // -1 means unused, other numbers are the new index
+ SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+ std::vector<Type*> RetTypes;
+ if (RetTy->isVoidTy()) {
+ NRetTy = RetTy;
+ } else {
+ const StructType *STy = dyn_cast<StructType>(RetTy);
+ if (STy)
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(STy->getElementType(i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
+ } else {
+ ++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ else
+ // We used to return a single value.
+ if (LiveValues.erase(CreateRet(F, 0))) {
+ RetTypes.push_back(RetTy);
+ NewRetIdxs[0] = 0;
+ } else {
+ DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
+ << "\n");
+ ++NumRetValsEliminated;
+ }
+ if (RetTypes.size() > 1)
+ // More than one return type? Return a struct with them. Also, if we used
+ // to return a struct and didn't change the number of return values,
+ // return a struct again. This prevents changing {something} into
+ // something and {} into void.
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ else if (RetTypes.size() == 1)
+ // One return type? Just a simple value then, but only if we didn't use to
+ // return a struct with that simple value before.
+ NRetTy = RetTypes.front();
+ else if (RetTypes.size() == 0)
+ // No return types? Make it void, but only if we didn't use to return {}.
+ NRetTy = Type::getVoidTy(F->getContext());
+ }
+
+ assert(NRetTy && "No new return type found?");
+
+ // Remove any incompatible attributes, but only if we removed all return
+ // values. Otherwise, ensure that we don't have any conflicting attributes
+ // here. Currently, this should not be possible, but special handling might be
+ // required when new return value attributes are added.
+ if (NRetTy->isVoidTy())
+ RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+ else
+ assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+ && "Return attributes no longer compatible?");
+
+ if (RAttrs)
+ AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ // Remember which arguments are still alive.
+ SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+ // Construct the new parameter list from non-dead arguments. Also construct
+ // a new set of parameter attributes to correspond. Skip the first parameter
+ // attribute, since that belongs to the return value.
+ unsigned i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++i) {
+ RetOrArg Arg = CreateArg(F, i);
+ if (LiveValues.erase(Arg)) {
+ Params.push_back(I->getType());
+ ArgAlive[i] = true;
+
+ // Get the original parameter attributes (skipping the first one, that is
+ // for the return value.
+ if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+ } else {
+ ++NumArgumentsEliminated;
+ DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+ << ") from " << F->getName() << "\n");
+ }
+ }
+
+ if (FnAttrs != Attribute::None)
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end());
+
+ // Create the new function type based on the recomputed parameters.
+ FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+ // No change?
+ if (NFTy == FTy)
+ return false;
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(NewPAL);
+ // Insert the new function before the old function, so we won't be processing
+ // it again.
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->use_back());
+ Instruction *Call = CS.getInstruction();
+
+ AttributesVec.clear();
+ const AttrListPtr &CallPAL = CS.getAttributes();
+
+ // The call return attributes.
+ Attributes RAttrs = CallPAL.getRetAttributes();
+ Attributes FnAttrs = CallPAL.getFnAttributes();
+ // Adjust in case the function was changed to return void.
+ RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+ if (RAttrs)
+ AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ // Declare these outside of the loops, so we can reuse them for the second
+ // loop, which loops the varargs.
+ CallSite::arg_iterator I = CS.arg_begin();
+ unsigned i = 0;
+ // Loop over those operands, corresponding to the normal arguments to the
+ // original function, and add those that are still alive.
+ for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+ if (ArgAlive[i]) {
+ Args.push_back(*I);
+ // Get original parameter attributes, but skip return attributes.
+ if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ // Push any varargs arguments on the list. Don't forget their attributes.
+ for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+ Args.push_back(*I);
+ if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ if (FnAttrs != Attribute::None)
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end());
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewCallPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty()) {
+ if (New->getType() == Call->getType()) {
+ // Return type not changed? Just replace users then.
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ } else if (New->getType()->isVoidTy()) {
+ // Our return value has uses, but they will get removed later on.
+ // Replace by null for now.
+ if (!Call->getType()->isX86_MMXTy())
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ } else {
+ assert(RetTy->isStructTy() &&
+ "Return type changed, but not into a void. The old return type"
+ " must have been a struct!");
+ Instruction *InsertPt = Call;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock::iterator IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP)) ++IP;
+ InsertPt = IP;
+ }
+
+ // We used to return a struct. Instead of doing smart stuff with all the
+ // uses of this struct, we will just rebuild it using
+ // extract/insertvalue chaining and let instcombine clean that up.
+ //
+ // Start out building up our return value from undef
+ Value *RetVal = UndefValue::get(RetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ Value *V;
+ if (RetTypes.size() > 1)
+ // We are still returning a struct, so extract the value from our
+ // return value
+ V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+ InsertPt);
+ else
+ // We are now returning a single element, so just insert that
+ V = New;
+ // Insert the value at the old position
+ RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+ }
+ // Now, replace all uses of the old call instruction with the return
+ // struct we built
+ Call->replaceAllUsesWith(RetVal);
+ New->takeName(Call);
+ }
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++i)
+ if (ArgAlive[i]) {
+ // If this is a live argument, move the name and users over to the new
+ // version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ ++I2;
+ } else {
+ // If this argument is dead, replace any uses of it with null constants
+ // (these are guaranteed to become unused later on).
+ if (!I->getType()->isX86_MMXTy())
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ }
+
+ // If we change the return value of the function we must rewrite any return
+ // instructions. Check this now.
+ if (F->getReturnType() != NF->getReturnType())
+ for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Value *RetVal;
+
+ if (NFTy->getReturnType()->isVoidTy()) {
+ RetVal = 0;
+ } else {
+ assert (RetTy->isStructTy());
+ // The original return value was a struct, insert
+ // extractvalue/insertvalue chains to extract only the values we need
+ // to return and insert them into our new result.
+ // This does generate messy code, but we'll let it to instcombine to
+ // clean that up.
+ Value *OldRet = RI->getOperand(0);
+ // Start out building up our return value from undef
+ RetVal = UndefValue::get(NRetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+ "oldret", RI);
+ if (RetTypes.size() > 1) {
+ // We're still returning a struct, so reinsert the value into
+ // our new return value at the new index
+
+ RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+ "newret", RI);
+ } else {
+ // We are now only returning a simple value, so just return the
+ // extracted value.
+ RetVal = EV;
+ }
+ }
+ }
+ // Replace the return instruction with one returning the new return
+ // value (possibly 0 if we became void).
+ ReturnInst::Create(F->getContext(), RetVal, RI);
+ BB->getInstList().erase(RI);
+ }
+
+ // Now that the old function is dead, delete it.
+ F->eraseFromParent();
+
+ return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // First pass: Do a simple check to see if any functions can have their "..."
+ // removed. We can do this if they never call va_start. This loop cannot be
+ // fused with the next loop, because deleting a function invalidates
+ // information computed while surveying other functions.
+ DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function &F = *I++;
+ if (F.getFunctionType()->isVarArg())
+ Changed |= DeleteDeadVarargs(F);
+ }
+
+ // Second phase:loop through the module, determining which arguments are live.
+ // We assume all arguments are dead unless proven otherwise (allowing us to
+ // determine that dead arguments passed into recursive functions are dead).
+ //
+ DEBUG(dbgs() << "DAE - Determining liveness\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ SurveyFunction(*I);
+
+ // Now, remove all dead arguments and return values from each function in
+ // turn.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ // Increment now, because the function will probably get removed (ie.
+ // replaced by a new one).
+ Function *F = I++;
+ Changed |= RemoveDeadStuffFromFunction(F);
+ }
+
+ // Finally, look for any unused parameters in functions with non-local
+ // linkage and replace the passed in parameters with undef.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function& F = *I;
+
+ Changed |= RemoveDeadArgumentsFromCallers(F);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 000000000000..d9911bfb4597
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,92 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Constants.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// @brief A pass to extract specific functions and their dependencies.
+ class GVExtractorPass : public ModulePass {
+ SetVector<GlobalValue *> Named;
+ bool deleteStuff;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+ /// specified function. Otherwise, it deletes as much of the module as
+ /// possible, except for the function specified.
+ ///
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+ bool runOnModule(Module &M) {
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
+ I->setInitializer(0);
+ } else {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ if (I->getName() == "llvm.global_ctors")
+ continue;
+ }
+
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ }
+
+ // Visit the Functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
+ I->deleteBody();
+ } else {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ }
+
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ }
+
+ return true;
+ }
+ };
+
+ char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 000000000000..95decec0f874
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,380 @@
+//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly. In addition,
+// it marks function arguments (of pointer type) 'nocapture' if a call
+// to the function does not create any copies of the pointer value that
+// outlive the call. This more or less means that the pointer is only
+// dereferenced, and not returned from the function or stored in a global.
+// This pass is implemented as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "functionattrs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/InstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+
+namespace {
+ struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC);
+
+ // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+ bool AddReadAttrs(const CallGraphSCC &SCC);
+
+ // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+ bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
+
+ // IsFunctionMallocLike - Does this function allocate new memory?
+ bool IsFunctionMallocLike(Function *F,
+ SmallPtrSet<Function*, 8> &) const;
+
+ // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+ bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ AliasAnalysis *AA;
+ };
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+
+/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert((*I)->getFunction());
+
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - may write memory. Just give up.
+ return false;
+
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+ if (MRB == AliasAnalysis::DoesNotAccessMemory)
+ // Already perfect!
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden()) {
+ if (!AliasAnalysis::onlyReadsMemory(MRB))
+ // May write memory. Just give up.
+ return false;
+
+ ReadsMemory = true;
+ continue;
+ }
+
+ // Scan the function body for instructions that may read or write memory.
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS) {
+ // Ignore calls to functions in the same SCC.
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+ // If the call doesn't access arbitrary memory, we may be able to
+ // figure out something.
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // If the call does access argument pointees, check each argument.
+ if (AliasAnalysis::doesAccessArgPointees(MRB))
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (Arg->getType()->isPointerTy()) {
+ AliasAnalysis::Location Loc(Arg,
+ AliasAnalysis::UnknownSize,
+ I->getMetadata(LLVMContext::MD_tbaa));
+ if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+ if (MRB & AliasAnalysis::Mod)
+ // Writes non-local memory. Give up.
+ return false;
+ if (MRB & AliasAnalysis::Ref)
+ // Ok, it reads non-local memory.
+ ReadsMemory = true;
+ }
+ }
+ }
+ continue;
+ }
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & AliasAnalysis::Mod)
+ return false;
+ // If it reads, note it.
+ if (MRB & AliasAnalysis::Ref)
+ ReadsMemory = true;
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory.
+ if (!LI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(LI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore non-volatile stores to local memory.
+ if (!SI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(SI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ AliasAnalysis::Location Loc = AA->getLocation(VI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return false;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+ }
+
+ // Success! Functions in this SCC do not access memory, or only read memory.
+ // Give them the appropriate attribute.
+ bool MadeChange = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ if (F->onlyReadsMemory() && ReadsMemory)
+ // No change.
+ continue;
+
+ MadeChange = true;
+
+ // Clear out any existing attributes.
+ F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+
+ // Add in the new attribute.
+ F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone);
+
+ if (ReadsMemory)
+ ++NumReadOnly;
+ else
+ ++NumReadNone;
+ }
+
+ return MadeChange;
+}
+
+/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
+ bool Changed = false;
+
+ // Check each function in turn, determining which pointer arguments are not
+ // captured.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ continue;
+
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
+ !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
+ A->addAttr(Attribute::NoCapture);
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// IsFunctionMallocLike - A function is malloc-like if it returns either null
+/// or a pointer that doesn't alias any other pointer visible to the caller.
+bool FunctionAttrs::IsFunctionMallocLike(Function *F,
+ SmallPtrSet<Function*, 8> &SCCNodes) const {
+ UniqueVector<Value *> FlowsToReturn;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
+
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved.
+
+ if (Constant *C = dyn_cast<Constant>(RetVal)) {
+ if (!C->isNullValue() && !isa<UndefValue>(C))
+ return false;
+
+ continue;
+ }
+
+ if (isa<Argument>(RetVal))
+ return false;
+
+ if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() &&
+ SCCNodes.count(CS.getCalledFunction()))
+ break;
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
+ }
+
+ if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert((*I)->getFunction());
+
+ // Check each function in turn, determining which functions return noalias
+ // pointers.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ return false;
+
+ // Already noalias.
+ if (F->doesNotAlias(0))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate noalias return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
+
+ if (!IsFunctionMallocLike(F, SCCNodes))
+ return false;
+ }
+
+ bool MadeChange = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+ continue;
+
+ F->setDoesNotAlias(0);
+ ++NumNoAlias;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool Changed = AddReadAttrs(SCC);
+ Changed |= AddNoCaptureAttrs(SCC);
+ Changed |= AddNoAliasAttrs(SCC);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 000000000000..2b427aa6a4e6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,211 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program. It uses an aggressive algorithm, searching out globals that are
+// known to be alive. After it finds all of the globals which are needed, it
+// deletes whatever is left over. This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globaldce"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+ struct GlobalDCE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ GlobalDCE() : ModulePass(ID) {
+ initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ // run - Do the GlobalDCE pass on the specified module, optionally updating
+ // the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M);
+
+ private:
+ SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+
+ /// GlobalIsNeeded - mark the specific global value as needed, and
+ /// recursively mark anything that it uses as also needed.
+ void GlobalIsNeeded(GlobalValue *GV);
+ void MarkUsedGlobalsAsNeeded(Constant *C);
+
+ bool RemoveUnusedGlobalValue(GlobalValue &GV);
+ };
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+ "Dead Global Elimination", false, false)
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Loop over the module, adding globals which are obviously necessary.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Functions with external linkage are needed if they have a body
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible & appending globals are needed, if they have an
+ // initializer.
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible aliases are needed.
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ // Now that all globals which are needed are in the AliveGlobals set, we loop
+ // through the program, deleting those which are not alive.
+ //
+
+ // The first pass is to drop initializers of global variables which are dead.
+ std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadGlobalVars.push_back(I); // Keep track of dead globals
+ I->setInitializer(0);
+ }
+
+ // The second pass drops the bodies of functions which are dead...
+ std::vector<Function*> DeadFunctions;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadFunctions.push_back(I); // Keep track of dead globals
+ if (!I->isDeclaration())
+ I->deleteBody();
+ }
+
+ // The third pass drops targets of aliases which are dead...
+ std::vector<GlobalAlias*> DeadAliases;
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
+ ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadAliases.push_back(I);
+ I->setAliasee(0);
+ }
+
+ if (!DeadFunctions.empty()) {
+ // Now that all interferences have been dropped, delete the actual objects
+ // themselves.
+ for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadFunctions[i]);
+ M.getFunctionList().erase(DeadFunctions[i]);
+ }
+ NumFunctions += DeadFunctions.size();
+ Changed = true;
+ }
+
+ if (!DeadGlobalVars.empty()) {
+ for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
+ M.getGlobalList().erase(DeadGlobalVars[i]);
+ }
+ NumVariables += DeadGlobalVars.size();
+ Changed = true;
+ }
+
+ // Now delete any dead aliases.
+ if (!DeadAliases.empty()) {
+ for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadAliases[i]);
+ M.getAliasList().erase(DeadAliases[i]);
+ }
+ NumAliases += DeadAliases.size();
+ Changed = true;
+ }
+
+ // Make sure that all memory is released
+ AliveGlobals.clear();
+
+ return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+ // If the global is already in the set, no need to reprocess it.
+ if (!AliveGlobals.insert(G))
+ return;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+ // If this is a global variable, we must make sure to add any global values
+ // referenced by the initializer to the alive set.
+ if (GV->hasInitializer())
+ MarkUsedGlobalsAsNeeded(GV->getInitializer());
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+ // The target of a global alias is needed.
+ MarkUsedGlobalsAsNeeded(GA->getAliasee());
+ } else {
+ // Otherwise this must be a function object. We have to scan the body of
+ // the function looking for constants and global values which are used as
+ // operands. Any operands of these types must be processed to ensure that
+ // any globals used will be marked as needed.
+ Function *F = cast<Function>(G);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ GlobalIsNeeded(GV);
+ else if (Constant *C = dyn_cast<Constant>(*U))
+ MarkUsedGlobalsAsNeeded(C);
+ }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GlobalIsNeeded(GV);
+
+ // Loop over all of the operands of the constant, adding any globals they
+ // use to the list of needed globals.
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+ if (Constant *OpC = dyn_cast<Constant>(*I))
+ MarkUsedGlobalsAsNeeded(OpC);
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it. This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+ if (GV.use_empty()) return false;
+ GV.removeDeadConstantUsers();
+ return GV.use_empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 000000000000..4ac721dd0600
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,2856 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken. If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalopt"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
+STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted , "Number of globals deleted");
+STATISTIC(NumFnDeleted , "Number of functions deleted");
+STATISTIC(NumGlobUses , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+
+namespace {
+ struct GlobalStatus;
+ struct GlobalOpt : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+ static char ID; // Pass identification, replacement for typeid
+ GlobalOpt() : ModulePass(ID) {
+ initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+ private:
+ GlobalVariable *FindGlobalCtors(Module &M);
+ bool OptimizeFunctions(Module &M);
+ bool OptimizeGlobalVars(Module &M);
+ bool OptimizeGlobalAliases(Module &M);
+ bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
+ bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS);
+ bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+ };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+namespace {
+
+/// GlobalStatus - As we analyze each global, keep track of some information
+/// about it. If we find out that the address of the global is taken, none of
+/// this info will be accurate.
+struct GlobalStatus {
+ /// isCompared - True if the global's address is used in a comparison.
+ bool isCompared;
+
+ /// isLoaded - True if the global is ever loaded. If the global isn't ever
+ /// loaded it can be deleted.
+ bool isLoaded;
+
+ /// StoredType - Keep track of what stores to the global look like.
+ ///
+ enum StoredType {
+ /// NotStored - There is no store to this global. It can thus be marked
+ /// constant.
+ NotStored,
+
+ /// isInitializerStored - This global is stored to, but the only thing
+ /// stored is the constant it was initialized with. This is only tracked
+ /// for scalar globals.
+ isInitializerStored,
+
+ /// isStoredOnce - This global is stored to, but only its initializer and
+ /// one other value is ever stored to it. If this global isStoredOnce, we
+ /// track the value stored to it in StoredOnceValue below. This is only
+ /// tracked for scalar globals.
+ isStoredOnce,
+
+ /// isStored - This global is stored to by multiple values or something else
+ /// that we cannot track.
+ isStored
+ } StoredType;
+
+ /// StoredOnceValue - If only one value (besides the initializer constant) is
+ /// ever stored to this global, keep track of what value it is.
+ Value *StoredOnceValue;
+
+ /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+ /// null/false. When the first accessing function is noticed, it is recorded.
+ /// When a second different accessing function is noticed,
+ /// HasMultipleAccessingFunctions is set to true.
+ const Function *AccessingFunction;
+ bool HasMultipleAccessingFunctions;
+
+ /// HasNonInstructionUser - Set to true if this global has a user that is not
+ /// an instruction (e.g. a constant expr or GV initializer).
+ bool HasNonInstructionUser;
+
+ /// HasPHIUser - Set to true if this global has a user that is a PHI node.
+ bool HasPHIUser;
+
+ GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ HasPHIUser(false) {}
+};
+
+}
+
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself. Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!SafeToDestroyConstant(CU)) return false;
+ } else
+ return false;
+ return true;
+}
+
+
+/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
+/// structure. If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType())) return true;
+
+ if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.isLoaded = true;
+ if (LI->isVolatile()) return true; // Don't hack on volatile loads.
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V) return true;
+
+ if (SI->isVolatile()) return true; // Don't hack on volatile stores.
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::isStored) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+ SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
+ GS.StoredType = GlobalStatus::isStoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ }
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (isa<SelectInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PHIUsers.insert(PN)) // Not already visited.
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ GS.HasPHIUser = true;
+ } else if (isa<CmpInst>(I)) {
+ GS.isCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile()) return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::isStored;
+ if (MTI->getArgOperand(1) == V)
+ GS.isLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile()) return true;
+ GS.StoredType = GlobalStatus::isStored;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!SafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+ if (!CI) return 0;
+ unsigned IdxV = CI->getZExtValue();
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
+ if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
+ } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
+ if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
+ if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
+ } else if (isa<ConstantAggregateZero>(Agg)) {
+ if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (IdxV < STy->getNumElements())
+ return Constant::getNullValue(STy->getElementType(IdxV));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(Agg->getType())) {
+ return Constant::getNullValue(STy->getElementType());
+ }
+ } else if (isa<UndefValue>(Agg)) {
+ if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (IdxV < STy->getNumElements())
+ return UndefValue::get(STy->getElementType(IdxV));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(Agg->getType())) {
+ return UndefValue::get(STy->getElementType());
+ }
+ }
+ return 0;
+}
+
+
+/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
+/// users of the global, cleaning up the obvious ones. This is largely just a
+/// quick scan over the use list to clean up the easy and obvious cruft. This
+/// returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
+ User *U = *UI++;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (Init) {
+ // Replace the load with the initializer.
+ LI->replaceAllUsesWith(Init);
+ LI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Store must be unreachable or storing Init into the global.
+ SI->eraseFromParent();
+ Changed = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Constant *SubInit = 0;
+ if (Init)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ CE->getType()->isPointerTy()) {
+ // Pointer cast, delete any stores and memsets to the global.
+ Changed |= CleanupConstantGlobalUsers(CE, 0);
+ }
+
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+ // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+ // and will invalidate our notion of what Init is.
+ Constant *SubInit = 0;
+ if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ ConstantExpr *CE =
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+ if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ }
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+
+ if (GEP->use_empty()) {
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+ if (MI->getRawDest() == V) {
+ MI->eraseFromParent();
+ Changed = true;
+ }
+
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ // If we have a chain of dead constantexprs or other things dangling from
+ // us, and if they are all dead, nuke them without remorse.
+ if (SafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ // This could have invalidated UI, start over from scratch.
+ CleanupConstantGlobalUsers(V, Init);
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isSafeSROAElementUse - Return true if the specified instruction is a safe
+/// user of a derived expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+ // We might have a dead and dangling constant hanging off of here.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return SafeToDestroyConstant(C);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Loads are ok.
+ if (isa<LoadInst>(I)) return true;
+
+ // Stores *to* the pointer are ok.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getOperand(0) != V;
+
+ // Otherwise, it must be a GEP.
+ GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+ if (GEPI == 0) return false;
+
+ if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+ !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+ return false;
+
+ for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
+ I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+
+/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
+/// Look at it and its uses and decide whether it is safe to SROA this global.
+///
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+ return false;
+
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue() ||
+ !isa<ConstantInt>(U->getOperand(2)))
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // If this is a use of an array allocation, do a bit more checking for sanity.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+ uint64_t NumElements = AT->getNumElements();
+ ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+ // Check to make sure that index falls within the array. If not,
+ // something funny is going on, so we won't do the optimization.
+ //
+ if (Idx->getZExtValue() >= NumElements)
+ return false;
+
+ // We cannot scalar repl this level of the array unless any array
+ // sub-indices are in-range constants. In particular, consider:
+ // A[0][i]. We cannot know that the user isn't doing invalid things like
+ // allowing i to index an out-of-range subscript that accesses A[1].
+ //
+ // Scalar replacing *just* the outer index of the array is probably not
+ // going to be a win anyway, so just give up.
+ for (++GEPI; // Skip array index.
+ GEPI != E;
+ ++GEPI) {
+ uint64_t NumElements;
+ if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+ NumElements = SubArrayTy->getNumElements();
+ else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+ NumElements = SubVectorTy->getNumElements();
+ else {
+ assert((*GEPI)->isStructTy() &&
+ "Indexed GEP type is not array, vector, or struct!");
+ continue;
+ }
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+ return false;
+ }
+ }
+
+ for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
+/// is safe for us to perform this transformation.
+///
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+ return false;
+ }
+ return true;
+}
+
+
+/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
+/// variable. This opens the door for other optimizations by exposing the
+/// behavior of the program in a more fine-grained way. We have determined that
+/// this transformation is safe already. We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+ // Make sure this global only has simple uses that we can SRA.
+ if (!GlobalUsersSafeToSRA(GV))
+ return 0;
+
+ assert(GV->hasLocalLinkage() && !GV->isConstant());
+ Constant *Init = GV->getInitializer();
+ const Type *Ty = Init->getType();
+
+ std::vector<GlobalVariable*> NewGlobals;
+ Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+ // Get the alignment of the global, either explicit or target-specific.
+ unsigned StartAlignment = GV->getAlignment();
+ if (StartAlignment == 0)
+ StartAlignment = TD.getABITypeAlignment(GV->getType());
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ NewGlobals.reserve(STy->getNumElements());
+ const StructLayout &Layout = *TD.getStructLayout(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Constant *In = getAggregateConstantElement(Init,
+ ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
+ assert(In && "Couldn't get element of initializer?");
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->isThreadLocal(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ uint64_t FieldOffset = Layout.getElementOffset(i);
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+ if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+ NGV->setAlignment(NewAlign);
+ }
+ } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ unsigned NumElements = 0;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ NumElements = ATy->getNumElements();
+ else
+ NumElements = cast<VectorType>(STy)->getNumElements();
+
+ if (NumElements > 16 && GV->hasNUsesOrMore(16))
+ return 0; // It's not worth it.
+ NewGlobals.reserve(NumElements);
+
+ uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
+ unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Constant *In = getAggregateConstantElement(Init,
+ ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
+ assert(In && "Couldn't get element of initializer?");
+
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->isThreadLocal(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ if (NewAlign > EltAlign)
+ NGV->setAlignment(NewAlign);
+ }
+ }
+
+ if (NewGlobals.empty())
+ return 0;
+
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+
+ Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+ // Loop over all of the uses of the global, replacing the constantexpr geps,
+ // with smaller constantexpr geps or direct references.
+ while (!GV->use_empty()) {
+ User *GEP = GV->use_back();
+ assert(((isa<ConstantExpr>(GEP) &&
+ cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+ isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+ // Ignore the 1th operand, which has to be zero or else the program is quite
+ // broken (undefined). Get the 2nd operand, which is the structure or array
+ // index.
+ unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+ Value *NewPtr = NewGlobals[Val];
+
+ // Form a shorter GEP if needed.
+ if (GEP->getNumOperands() > 3) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+ Idxs.push_back(CE->getOperand(i));
+ NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
+ &Idxs[0], Idxs.size());
+ } else {
+ GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+ SmallVector<Value*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+ Idxs.push_back(GEPI->getOperand(i));
+ NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+ GEPI->getName()+"."+Twine(Val),GEPI);
+ }
+ }
+ GEP->replaceAllUsesWith(NewPtr);
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+ GEPI->eraseFromParent();
+ else
+ cast<ConstantExpr>(GEP)->destroyConstant();
+ }
+
+ // Delete the old global, now that it is dead.
+ Globals.erase(GV);
+ ++NumSRA;
+
+ // Loop over the new globals array deleting any globals that are obviously
+ // dead. This can arise due to scalarization of a structure or an array that
+ // has elements that are dead.
+ unsigned FirstGlobal = 0;
+ for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+ if (NewGlobals[i]->use_empty()) {
+ Globals.erase(NewGlobals[i]);
+ if (FirstGlobal == i) ++FirstGlobal;
+ }
+
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+}
+
+/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
+/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// phi nodes we've seen to avoid reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+ SmallPtrSet<const PHINode*, 8> &PHIs) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+
+ if (isa<LoadInst>(U)) {
+ // Will trap.
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Storing the value.
+ }
+ } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+ if (CI->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ if (II->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If we've already seen this phi node, ignore it, it has already been
+ // checked.
+ if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+ return false;
+ } else if (isa<ICmpInst>(U) &&
+ isa<ConstantPointerNull>(UI->getOperand(1))) {
+ // Ignore icmp X, null
+ } else {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false;
+ }
+ }
+ return true;
+}
+
+/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
+/// from GV will trap if the loaded value is null. Note that this also permits
+/// comparisons of the loaded value against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (isa<StoreInst>(U)) {
+ // Ignore stores to the global.
+ } else {
+ // We don't know or understand this user, bail out.
+ //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+ Instruction *I = cast<Instruction>(*UI++);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LI->setOperand(0, NewV);
+ Changed = true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) == V) {
+ SI->setOperand(1, NewV);
+ Changed = true;
+ }
+ } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ CallSite CS(I);
+ if (CS.getCalledValue() == V) {
+ // Calling through the pointer! Turn into a direct call, but be careful
+ // that the pointer is not also being passed as an argument.
+ CS.setCalledFunction(NewV);
+ Changed = true;
+ bool PassedAsArg = false;
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.getArgument(i) == V) {
+ PassedAsArg = true;
+ CS.setArgument(i, NewV);
+ }
+
+ if (PassedAsArg) {
+ // Being passed as an argument also. Be careful to not invalidate UI!
+ UI = V->use_begin();
+ }
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+ ConstantExpr::getCast(CI->getOpcode(),
+ NewV, CI->getType()));
+ if (CI->use_empty()) {
+ Changed = true;
+ CI->eraseFromParent();
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ // Should handle GEP here.
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.reserve(GEPI->getNumOperands()-1);
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(*i))
+ Idxs.push_back(C);
+ else
+ break;
+ if (Idxs.size() == GEPI->getNumOperands()-1)
+ Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
+ ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+ Idxs.size()));
+ if (GEPI->use_empty()) {
+ Changed = true;
+ GEPI->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+
+/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
+/// value stored into it. If there are uses of the loaded value that would trap
+/// if the loaded value is dynamically null, then we know that they cannot be
+/// reachable with a null optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+ bool Changed = false;
+
+ // Keep track of whether we are able to remove all the uses of the global
+ // other than the store that defines it.
+ bool AllNonStoreUsesGone = true;
+
+ // Replace all uses of loads with uses of uses of the stored value.
+ for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+ User *GlobalUser = *GUI++;
+ if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+ // If we were able to delete all uses of the loads
+ if (LI->use_empty()) {
+ LI->eraseFromParent();
+ Changed = true;
+ } else {
+ AllNonStoreUsesGone = false;
+ }
+ } else if (isa<StoreInst>(GlobalUser)) {
+ // Ignore the store that stores "LV" to the global.
+ assert(GlobalUser->getOperand(1) == GV &&
+ "Must be storing *to* the global");
+ } else {
+ AllNonStoreUsesGone = false;
+
+ // If we get here we could have other crazy uses that are transitively
+ // loaded.
+ assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+ isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
+ "Only expect load and stores!");
+ }
+ }
+
+ if (Changed) {
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+ ++NumGlobUses;
+ }
+
+ // If we nuked all of the loads, then none of the stores are needed either,
+ // nor is the global.
+ if (AllNonStoreUsesGone) {
+ DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
+ CleanupConstantGlobalUsers(GV, 0);
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
+/// instructions that are foldable.
+static void ConstantPropUsersOf(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+ if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ if (Constant *NewC = ConstantFoldInstruction(I)) {
+ I->replaceAllUsesWith(NewC);
+
+ // Advance UI to the next non-I use to avoid invalidating it!
+ // Instructions could multiply use V.
+ while (UI != E && *UI == I)
+ ++UI;
+ I->eraseFromParent();
+ }
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc. Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc. Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+ CallInst *CI,
+ const Type *AllocTy,
+ ConstantInt *NElements,
+ TargetData* TD) {
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
+
+ const Type *GlobalType;
+ if (NElements->getZExtValue() == 1)
+ GlobalType = AllocTy;
+ else
+ // If we have an array allocation, the global variable is of an array.
+ GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+ // Create the new global variable. The contents of the malloc'd memory is
+ // undefined, so initialize with an undef value.
+ GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+ GlobalType, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType),
+ GV->getName()+".body",
+ GV,
+ GV->isThreadLocal());
+
+ // If there are bitcast users of the malloc (which is typical, usually we have
+ // a malloc + bitcast) then replace them with uses of the new global. Update
+ // other users to use the global as well.
+ BitCastInst *TheBC = 0;
+ while (!CI->use_empty()) {
+ Instruction *User = cast<Instruction>(CI->use_back());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BCI->getType() == NewGV->getType()) {
+ BCI->replaceAllUsesWith(NewGV);
+ BCI->eraseFromParent();
+ } else {
+ BCI->setOperand(0, NewGV);
+ }
+ } else {
+ if (TheBC == 0)
+ TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+ User->replaceUsesOfWith(CI, TheBC);
+ }
+ }
+
+ Constant *RepValue = NewGV;
+ if (NewGV->getType() != GV->getType()->getElementType())
+ RepValue = ConstantExpr::getBitCast(RepValue,
+ GV->getType()->getElementType());
+
+ // If there is a comparison against null, we will insert a global bool to
+ // keep track of whether the global was initialized yet or not.
+ GlobalVariable *InitBool =
+ new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".init", GV->isThreadLocal());
+ bool InitBoolUsed = false;
+
+ // Loop over all uses of GV, processing them in turn.
+ while (!GV->use_empty()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
+ // The global is initialized when the store to it occurs.
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
+ SI->eraseFromParent();
+ continue;
+ }
+
+ LoadInst *LI = cast<LoadInst>(GV->use_back());
+ while (!LI->use_empty()) {
+ Use &LoadUse = LI->use_begin().getUse();
+ if (!isa<ICmpInst>(LoadUse.getUser())) {
+ LoadUse = RepValue;
+ continue;
+ }
+
+ ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+ // Replace the cmp X, 0 with a use of the bool value.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+ InitBoolUsed = true;
+ switch (ICI->getPredicate()) {
+ default: llvm_unreachable("Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ ICI->replaceAllUsesWith(LV);
+ ICI->eraseFromParent();
+ }
+ LI->eraseFromParent();
+ }
+
+ // If the initialization boolean was used, insert it, otherwise delete it.
+ if (!InitBoolUsed) {
+ while (!InitBool->use_empty()) // Delete initializations
+ cast<StoreInst>(InitBool->use_back())->eraseFromParent();
+ delete InitBool;
+ } else
+ GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+ // Now the GV is dead, nuke it and the malloc..
+ GV->eraseFromParent();
+ CI->eraseFromParent();
+
+ // To further other optimizations, loop over all users of NewGV and try to
+ // constant prop them. This will promote GEP instructions with constant
+ // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+ ConstantPropUsersOf(NewGV);
+ if (RepValue != NewGV)
+ ConstantPropUsersOf(RepValue);
+
+ return NewGV;
+}
+
+/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
+/// to make sure that there are no complex uses of V. We permit simple things
+/// like dereferencing the pointer, but not storing through the address, unless
+/// it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+ const GlobalVariable *GV,
+ SmallPtrSet<const PHINode*, 8> &PHIs) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ const Instruction *Inst = cast<Instruction>(*UI);
+
+ if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+ continue; // Fine, ignore.
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+ return false; // Storing the pointer itself... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
+
+ // Must index into the array and into the struct.
+ if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+ // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
+ // cycles.
+ if (PHIs.insert(PN))
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere. Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer. Further, delete the store into
+/// GV. This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+ GlobalVariable *GV) {
+ while (!Alloc->use_empty()) {
+ Instruction *U = cast<Instruction>(*Alloc->use_begin());
+ Instruction *InsertPt = U;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If this is the store of the allocation into the global, remove it.
+ if (SI->getOperand(1) == GV) {
+ SI->eraseFromParent();
+ continue;
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Insert the load in the corresponding predecessor, not right before the
+ // PHI.
+ InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+ } else if (isa<BitCastInst>(U)) {
+ // Must be bitcast between the malloc and store to initialize the global.
+ ReplaceUsesOfMallocWithGlobal(U, GV);
+ U->eraseFromParent();
+ continue;
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ // If this is a "GEP bitcast" and the user is a store to the global, then
+ // just process it as a bitcast.
+ if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+ if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+ if (SI->getOperand(1) == GV) {
+ // Must be bitcast GEP between the malloc and store to initialize
+ // the global.
+ ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+ GEPI->eraseFromParent();
+ continue;
+ }
+ }
+
+ // Insert a load from the global, and use it instead of the malloc.
+ Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ U->replaceUsesOfWith(Alloc, NL);
+ }
+}
+
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on. This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+ // We permit two users of the load: setcc comparing against the null
+ // pointer, and a getelementptr of a specific form.
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+
+ // Comparison against null is ok.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ continue;
+ }
+
+ // getelementptr is also ok, but only a simple form.
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Must index into the array and into the struct.
+ if (GEPI->getNumOperands() < 3)
+ return false;
+
+ // Otherwise the GEP is ok.
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(User)) {
+ if (!LoadUsingPHIsPerLoad.insert(PN))
+ // This means some phi nodes are dependent on each other.
+ // Avoid infinite looping!
+ return false;
+ if (!LoadUsingPHIs.insert(PN))
+ // If we have already analyzed this PHI, then it is safe.
+ continue;
+
+ // Make sure all uses of the PHI are simple enough to transform.
+ if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+ LoadUsingPHIs, LoadUsingPHIsPerLoad))
+ return false;
+
+ continue;
+ }
+
+ // Otherwise we don't know what this is, not ok.
+ return false;
+ }
+
+ return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+ Instruction *StoredVal) {
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI)
+ if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+ LoadUsingPHIsPerLoad))
+ return false;
+ LoadUsingPHIsPerLoad.clear();
+ }
+
+ // If we reach here, we know that all uses of the loads and transitive uses
+ // (through PHI nodes) are simple enough to transform. However, we don't know
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // Check to verify that all operands of the PHIs are either PHIS that can be
+ // transformed, loads from GV, or MI itself.
+ for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+ , E = LoadUsingPHIs.end(); I != E; ++I) {
+ const PHINode *PN = *I;
+ for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+ Value *InVal = PN->getIncomingValue(op);
+
+ // PHI of the stored value itself is ok.
+ if (InVal == StoredVal) continue;
+
+ if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ // One of the PHIs in our set is (optimistically) ok.
+ if (LoadUsingPHIs.count(InPN))
+ continue;
+ return false;
+ }
+
+ // Load from GV is ok.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (LI->getOperand(0) == GV)
+ continue;
+
+ // UNDEF? NULL?
+
+ // Anything else is rejected.
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+ if (FieldNo >= FieldVals.size())
+ FieldVals.resize(FieldNo+1);
+
+ // If we already have this value, just reuse the previously scalarized
+ // version.
+ if (Value *FieldVal = FieldVals[FieldNo])
+ return FieldVal;
+
+ // Depending on what instruction this is, we have several cases.
+ Value *Result;
+ if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ // This is a scalarized version of the load from the global. Just create
+ // a new Load of the scalarized global.
+ Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues,
+ PHIsToRewrite),
+ LI->getName()+".f"+Twine(FieldNo), LI);
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // PN's type is pointer to struct. Make a new PHI of pointer to struct
+ // field.
+ const StructType *ST =
+ cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+ PHINode *NewPN =
+ PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PN->getNumIncomingValues(),
+ PN->getName()+".f"+Twine(FieldNo), PN);
+ Result = NewPN;
+ PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+ } else {
+ llvm_unreachable("Unknown usable value");
+ Result = 0;
+ }
+
+ return FieldVals[FieldNo] = Result;
+}
+
+/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
+/// the load, rewrite the derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ // If this is a comparison against null, handle it.
+ if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+ assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+ // If we have a setcc of the loaded pointer, we can use a setcc of any
+ // field.
+ Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+ Constant::getNullValue(NPtr->getType()),
+ SCI->getName());
+ SCI->replaceAllUsesWith(New);
+ SCI->eraseFromParent();
+ return;
+ }
+
+ // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+ assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+ && "Unexpected GEPI!");
+
+ // Load the pointer for this field.
+ unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+ Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ // Create the new GEP idx vector.
+ SmallVector<Value*, 8> GEPIdx;
+ GEPIdx.push_back(GEPI->getOperand(1));
+ GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+ GEPIdx.begin(), GEPIdx.end(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NGEPI);
+ GEPI->eraseFromParent();
+ return;
+ }
+
+ // Recursively transform the users of PHI nodes. This will lazily create the
+ // PHIs that are needed for individual elements. Keep track of what PHIs we
+ // see in InsertedScalarizedValues so that we don't get infinite loops (very
+ // antisocial). If the PHI is already in InsertedScalarizedValues, it has
+ // already been seen first by another load, so its uses have already been
+ // processed.
+ PHINode *PN = cast<PHINode>(LoadUser);
+ bool Inserted;
+ DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+ tie(InsertPos, Inserted) =
+ InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+ if (!Inserted) return;
+
+ // If this is the first time we've seen this PHI, recursively process all
+ // users.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
+/// is a value loaded from the global. Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead. All uses of loaded values satisfy
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+
+ if (Load->use_empty()) {
+ Load->eraseFromParent();
+ InsertedScalarizedValues.erase(Load);
+ }
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+ Value* NElems, TargetData *TD) {
+ DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
+ const Type* MAT = getMallocAllocatedType(CI);
+ const StructType *STy = cast<StructType>(MAT);
+
+ // There is guaranteed to be at least one use of the malloc (storing
+ // it into GV). If there are other uses, change them to be uses of
+ // the global to simplify later code. This also deletes the store
+ // into GV.
+ ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+ // Okay, at this point, there are no users of the malloc. Insert N
+ // new mallocs at the same place as CI, and N globals.
+ std::vector<Value*> FieldGlobals;
+ std::vector<Value*> FieldMallocs;
+
+ for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+ const Type *FieldTy = STy->getElementType(FieldNo);
+ const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+
+ GlobalVariable *NGV =
+ new GlobalVariable(*GV->getParent(),
+ PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy),
+ GV->getName() + ".f" + Twine(FieldNo), GV,
+ GV->isThreadLocal());
+ FieldGlobals.push_back(NGV);
+
+ unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+ if (const StructType *ST = dyn_cast<StructType>(FieldTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+ ConstantInt::get(IntPtrTy, TypeSize),
+ NElems, 0,
+ CI->getName() + ".f" + Twine(FieldNo));
+ FieldMallocs.push_back(NMI);
+ new StoreInst(NMI, NGV, CI);
+ }
+
+ // The tricky aspect of this transformation is handling the case when malloc
+ // fails. In the original code, malloc failing would set the result pointer
+ // of malloc to null. In this case, some mallocs could succeed and others
+ // could fail. As such, we emit code that looks like this:
+ // F0 = malloc(field0)
+ // F1 = malloc(field1)
+ // F2 = malloc(field2)
+ // if (F0 == 0 || F1 == 0 || F2 == 0) {
+ // if (F0) { free(F0); F0 = 0; }
+ // if (F1) { free(F1); F1 = 0; }
+ // if (F2) { free(F2); F2 = 0; }
+ // }
+ // The malloc can also fail if its argument is too large.
+ Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+ ConstantZero, "isneg");
+ for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+ Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull");
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+ }
+
+ // Split the basic block at the old malloc.
+ BasicBlock *OrigBB = CI->getParent();
+ BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+
+ // Create the block to check the first condition. Put all these blocks at the
+ // end of the function as they are unlikely to be executed.
+ BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+ "malloc_ret_null",
+ OrigBB->getParent());
+
+ // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+ // branch on RunningOr.
+ OrigBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+ // Within the NullPtrBlock, we need to emit a comparison and branch for each
+ // pointer, because some may be null while others are not.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Constant::getNullValue(GVVal->getType()),
+ "tmp");
+ BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+ OrigBB->getParent());
+ BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+ OrigBB->getParent());
+ Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+ Cmp, NullPtrBlock);
+
+ // Fill in FreeBlock.
+ CallInst::CreateFree(GVVal, BI);
+ new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+ FreeBlock);
+ BranchInst::Create(NextBlock, FreeBlock);
+
+ NullPtrBlock = NextBlock;
+ }
+
+ BranchInst::Create(ContBB, NullPtrBlock);
+
+ // CI is no longer needed, remove it.
+ CI->eraseFromParent();
+
+ /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+ /// update all uses of the load, keep track of what scalarized loads are
+ /// inserted for a given load.
+ DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ InsertedScalarizedValues[GV] = FieldGlobals;
+
+ std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+ // Okay, the malloc site is completely handled. All of the uses of GV are now
+ // loads, and all uses of those loads are simple. Rewrite them to use loads
+ // of the per-field globals instead.
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+ continue;
+ }
+
+ // Must be a store of null.
+ StoreInst *SI = cast<StoreInst>(User);
+ assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+ "Unexpected heap-sra user!");
+
+ // Insert a store of null into each global.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ Constant *Null = Constant::getNullValue(PT->getElementType());
+ new StoreInst(Null, FieldGlobals[i], SI);
+ }
+ // Erase the original store.
+ SI->eraseFromParent();
+ }
+
+ // While we have PHIs that are interesting to rewrite, do it.
+ while (!PHIsToRewrite.empty()) {
+ PHINode *PN = PHIsToRewrite.back().first;
+ unsigned FieldNo = PHIsToRewrite.back().second;
+ PHIsToRewrite.pop_back();
+ PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+ assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+ // Add all the incoming values. This can materialize more phis.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+ PHIsToRewrite);
+ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+ }
+ }
+
+ // Drop all inter-phi links and any loads that made it this far.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->dropAllReferences();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->dropAllReferences();
+ }
+
+ // Delete all the phis and loads now that inter-references are dead.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->eraseFromParent();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->eraseFromParent();
+ }
+
+ // The old global is now dead, remove it.
+ GV->eraseFromParent();
+
+ ++NumHeapSRA;
+ return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+ CallInst *CI,
+ const Type *AllocTy,
+ Module::global_iterator &GVI,
+ TargetData *TD) {
+ if (!TD)
+ return false;
+
+ // If this is a malloc of an abstract type, don't touch it.
+ if (!AllocTy->isSized())
+ return false;
+
+ // We can't optimize this global unless all uses of it are *known* to be
+ // of the malloc value, not of the null initializer value (consider a use
+ // that compares the global's value against zero to see if the malloc has
+ // been reached). To do this, we check to see if all uses of the global
+ // would trap if the global were null: this proves that they must all
+ // happen after the malloc.
+ if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ return false;
+
+ // We can't optimize this if the malloc itself is used in a complex way,
+ // for example, being stored into multiple globals. This allows the
+ // malloc to be stored into the specified global, loaded setcc'd, and
+ // GEP'd. These are all things we could transform to using the global
+ // for.
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+ return false;
+
+ // If we have a global that is only initialized with a fixed size malloc,
+ // transform the program to use global memory instead of malloc'd memory.
+ // This eliminates dynamic allocation, avoids an indirection accessing the
+ // data, and exposes the resultant global to further GlobalOpt.
+ // We cannot optimize the malloc if we cannot determine malloc array size.
+ Value *NElems = getMallocArraySize(CI, TD, true);
+ if (!NElems)
+ return false;
+
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ // Restrict this transformation to only working on small allocations
+ // (2048 bytes currently), as we don't want to introduce a 16M global or
+ // something.
+ if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+ return true;
+ }
+
+ // If the allocation is an array of structures, consider transforming this
+ // into multiple malloc'd arrays, one for each field. This is basically
+ // SRoA for malloc'd memory.
+
+ // If this is an allocation of a fixed size array of structs, analyze as a
+ // variable size array. malloc [100 x struct],1 -> malloc struct, 100
+ if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ AllocTy = AT->getElementType();
+
+ const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+ if (!AllocSTy)
+ return false;
+
+ // This the structure has an unreasonable number of fields, leave it
+ // alone.
+ if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+ // If this is a fixed size array, transform the Malloc to be an alloc of
+ // structs. malloc [100 x struct],1 -> malloc struct, 100
+ if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+ Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+ Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+ Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+ AllocSize, NumElements,
+ 0, CI->getName());
+ Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+ CI->replaceAllUsesWith(Cast);
+ CI->eraseFromParent();
+ CI = dyn_cast<BitCastInst>(Malloc) ?
+ extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
+ }
+
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+ return true;
+ }
+
+ return false;
+}
+
+// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
+// that only one value (besides its initializer) is ever stored to the global.
+static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ Module::global_iterator &GVI,
+ TargetData *TD) {
+ // Ignore no-op GEPs and bitcasts.
+ StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+ // If we are dealing with a pointer global that is initialized to null and
+ // only has one (non-null) value stored into it, then we can optimize any
+ // users of the loaded value (often calls and loads) that would trap if the
+ // value was null.
+ if (GV->getInitializer()->getType()->isPointerTy() &&
+ GV->getInitializer()->isNullValue()) {
+ if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+ if (GV->getInitializer()->getType() != SOVC->getType())
+ SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+ // Optimize away any trapping uses of the loaded value.
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+ return true;
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
+ const Type* MallocType = getMallocAllocatedType(CI);
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ GVI, TD))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
+/// two values ever stored into GV are its initializer and OtherVal. See if we
+/// can shrink the global into a boolean and select between the two values
+/// whenever it is used. This exposes the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+ const Type *GVElType = GV->getType()->getElementType();
+
+ // If GVElType is already i1, it is already shrunk. If the type of the GV is
+ // an FP value, pointer or vector, don't do this optimization because a select
+ // between them is very expensive and unlikely to lead to later
+ // simplification. In these cases, we typically end up with "cond ? v1 : v2"
+ // where v1 and v2 both require constant pool loads, a big loss.
+ if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+ GVElType->isFloatingPointTy() ||
+ GVElType->isPointerTy() || GVElType->isVectorTy())
+ return false;
+
+ // Walk the use list of the global seeing if all the uses are load or store.
+ // If there is anything else, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
+
+ // Create the new global, initializing it to false.
+ GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+ false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".b",
+ GV->isThreadLocal());
+ GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+ Constant *InitVal = GV->getInitializer();
+ assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+ "No reason to shrink to bool!");
+
+ // If initialized to zero and storing one into the global, we can use a cast
+ // instead of a select to synthesize the desired value.
+ bool IsOneZero = false;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+ IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+ while (!GV->use_empty()) {
+ Instruction *UI = cast<Instruction>(GV->use_back());
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // Change the store into a boolean store.
+ bool StoringOther = SI->getOperand(0) == OtherVal;
+ // Only do this if we weren't storing a loaded value.
+ Value *StoreVal;
+ if (StoringOther || SI->getOperand(0) == InitVal)
+ StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+ StoringOther);
+ else {
+ // Otherwise, we are storing a previously loaded copy. To do this,
+ // change the copy from copying the original value to just copying the
+ // bool.
+ Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+ // If we've already replaced the input, StoredVal will be a cast or
+ // select instruction. If not, it will be a load of the original
+ // global.
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ assert(LI->getOperand(0) == GV && "Not a copy!");
+ // Insert a new load, to preserve the saved value.
+ StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+ } else {
+ assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+ "This is not a form that we understand!");
+ StoreVal = StoredVal->getOperand(0);
+ assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+ }
+ }
+ new StoreInst(StoreVal, NewGV, SI);
+ } else {
+ // Change the load into a load of bool then a select.
+ LoadInst *LI = cast<LoadInst>(UI);
+ LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+ Value *NSI;
+ if (IsOneZero)
+ NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+ else
+ NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+ NSI->takeName(LI);
+ LI->replaceAllUsesWith(NSI);
+ }
+ UI->eraseFromParent();
+ }
+
+ GV->eraseFromParent();
+ return true;
+}
+
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI) {
+ if (!GV->hasLocalLinkage())
+ return false;
+
+ // Do more involved optimizations if the global is internal.
+ GV->removeDeadConstantUsers();
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
+ GV->eraseFromParent();
+ ++NumDeleted;
+ return true;
+ }
+
+ SmallPtrSet<const PHINode*, 16> PHIUsers;
+ GlobalStatus GS;
+
+ if (AnalyzeGlobal(GV, GS, PHIUsers))
+ return false;
+
+ if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ GV->setUnnamedAddr(true);
+ NumUnnamed++;
+ }
+
+ if (GV->isConstant() || !GV->hasInitializer())
+ return false;
+
+ return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS) {
+ // If this is a first class global and has only one accessing function
+ // and this function is main (which we know is not recursive we can make
+ // this global a local variable) we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non single-value types since we
+ // are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GS.AccessingFunction->getName() == "main" &&
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
+ const Type* ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.isLoaded) {
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ Changed = true;
+ }
+ return Changed;
+
+ } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+ GV->setConstant(true);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
+ }
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ } else {
+ GVI = GV;
+ }
+ ++NumSubstitute;
+ return true;
+ }
+
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+ getAnalysisIfAvailable<TargetData>()))
+ return true;
+
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
+/// function, changing them to FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ CallSite User(cast<Instruction>(*UI));
+ User.setCallingConv(CallingConv::Fast);
+ }
+}
+
+static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
+ continue;
+
+ // There can be only one.
+ return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
+ }
+
+ return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+ F->setAttributes(StripNest(F->getAttributes()));
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ CallSite User(cast<Instruction>(*UI));
+ User.setAttributes(StripNest(User.getAttributes()));
+ }
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+ bool Changed = false;
+ // Optimize functions.
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+ Function *F = FI++;
+ // Functions without names cannot be referenced outside this module.
+ if (!F->hasName() && !F->isDeclaration())
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->removeDeadConstantUsers();
+ if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+ F->eraseFromParent();
+ Changed = true;
+ ++NumFnDeleted;
+ } else if (F->hasLocalLinkage()) {
+ if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has C calling conventions, is not a varargs
+ // function, and is only called directly, promote it to use the Fast
+ // calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+ bool Changed = false;
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+ // Global variables without names cannot be referenced outside this module.
+ if (!GV->hasName() && !GV->isDeclaration())
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ // Simplify the initializer.
+ if (GV->hasInitializer())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ GV->setInitializer(New);
+ }
+
+ Changed |= ProcessGlobal(GV, GVI);
+ }
+ return Changed;
+}
+
+/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
+/// initializers have an init priority of 65535.
+GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (GV == 0) return 0;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer()) return 0;
+
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return GV;
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ if (isa<ConstantAggregateZero>(*i))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return 0;
+
+ // Init priority must be standard.
+ ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+ if (CI->getZExtValue() != 65535)
+ return 0;
+ }
+
+ return GV;
+}
+
+/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+ if (GV->getInitializer()->isNullValue())
+ return std::vector<Function*>();
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function*> Result;
+ Result.reserve(CA->getNumOperands());
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
+/// specified array, returning the new global to use.
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+ const std::vector<Function*> &Ctors) {
+ // If we made a change, reassemble the initializer list.
+ Constant *CSVals[2];
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
+ CSVals[1] = 0;
+
+ const StructType *StructTy =
+ cast <StructType>(
+ cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+
+ // Create the new init list.
+ std::vector<Constant*> CAList;
+ for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
+ if (Ctors[i]) {
+ CSVals[1] = Ctors[i];
+ } else {
+ const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+ false);
+ const PointerType *PFTy = PointerType::getUnqual(FTy);
+ CSVals[1] = Constant::getNullValue(PFTy);
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+ 0x7fffffff);
+ }
+ CAList.push_back(ConstantStruct::get(StructTy, CSVals));
+ }
+
+ // Create the array initializer.
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+ CAList.size()), CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == GCL->getInitializer()->getType()) {
+ GCL->setInitializer(CA);
+ return GCL;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+ GCL->getLinkage(), CA, "",
+ GCL->isThreadLocal());
+ GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+
+ if (Ctors.size())
+ return NGV;
+ else
+ return 0;
+}
+
+
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
+ if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+ Constant *R = ComputedValues[V];
+ assert(R && "Reference to an uncomputed value!");
+ return R;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator. We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ // Simple integer, undef, constant aggregate zero, global addresses, etc are
+ // all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+ isa<GlobalValue>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ Constant *Op = cast<Constant>(C->getOperand(i));
+ if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+ return false;
+ }
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // These casts are always fine if the casted value is.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C)) return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+}
+
+
+/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
+/// enough for us to understand. In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
+/// globals. This should be kept up to date with CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+ return false;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return GV->hasUniqueInitializer();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0)) &&
+ cast<GEPOperator>(CE)->isInBounds()) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ if (!GV->hasUniqueInitializer())
+ return false;
+
+ // The first index must be zero.
+ ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
+ if (!CI || !CI->isZero()) return false;
+
+ // The remaining indices must be compile-time known integers within the
+ // notional bounds of the corresponding static array types.
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+ }
+ }
+
+ return false;
+}
+
+/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
+/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
+/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+ ConstantExpr *Addr, unsigned OpNo) {
+ // Base case of the recursion.
+ if (OpNo == Addr->getNumOperands()) {
+ assert(Val->getType() == Init->getType() && "Type mismatch!");
+ return Val;
+ }
+
+ std::vector<Constant*> Elts;
+ if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+
+ // Break up the constant into its elements.
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+ for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
+ } else if (isa<UndefValue>(Init)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(UndefValue::get(STy->getElementType(i)));
+ } else {
+ llvm_unreachable("This code is out of sync with "
+ " ConstantFoldLoadThroughGEPConstantExpr");
+ }
+
+ // Replace the element that we are supposed to.
+ ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+ unsigned Idx = CU->getZExtValue();
+ assert(Idx < STy->getNumElements() && "Struct index out of range!");
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+ // Return the modified struct.
+ return ConstantStruct::get(STy, Elts);
+ }
+
+ ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+ const SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+ uint64_t NumElts;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+ NumElts = ATy->getNumElements();
+ else
+ NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+ // Break up the array into elements.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+ for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+ } else {
+ assert(isa<UndefValue>(Init) && "This code is out of sync with "
+ " ConstantFoldLoadThroughGEPConstantExpr");
+ Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+ }
+
+ assert(CI->getZExtValue() < NumElts);
+ Elts[CI->getZExtValue()] =
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+ if (Init->getType()->isArrayTy())
+ return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+ return ConstantVector::get(Elts);
+}
+
+/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ assert(GV->hasInitializer());
+ GV->setInitializer(Val);
+ return;
+ }
+
+ ConstantExpr *CE = cast<ConstantExpr>(Addr);
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+/// ComputeLoadResult - Return the value that would be computed by a load from
+/// P after the stores reflected by 'memory' have been performed. If we can't
+/// decide, return null.
+static Constant *ComputeLoadResult(Constant *P,
+ const DenseMap<Constant*, Constant*> &Memory) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
+ if (I != Memory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+ return 0;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return 0; // don't know how to evaluate.
+}
+
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it. ActualArgs contains the formal
+/// arguments for the function.
+static bool EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs,
+ std::vector<Function*> &CallStack,
+ DenseMap<Constant*, Constant*> &MutatedMemory,
+ std::vector<GlobalVariable*> &AllocaTmps,
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const TargetData *TD) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
+
+ CallStack.push_back(F);
+
+ /// Values - As we compute SSA register values, we store their contents here.
+ DenseMap<Value*, Constant*> Values;
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ Values[AI] = ActualArgs[ArgNo];
+
+ /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ /// we can only evaluate any one basic block at most once. This set keeps
+ /// track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurInst - The current instruction we're evaluating.
+ BasicBlock::iterator CurInst = F->begin()->begin();
+
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = 0;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (SI->isVolatile()) return false; // no volatile accesses.
+ Constant *Ptr = getVal(Values, SI->getOperand(1));
+ if (!isSimpleEnoughPointerToCommit(Ptr))
+ // If this is too complex for us to commit, reject it.
+ return false;
+
+ Constant *Val = getVal(Values, SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+ return false;
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::BitCast) {
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ return 0;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+ }
+
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(Values, BO->getOperand(0)),
+ getVal(Values, BO->getOperand(1)));
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(Values, CI->getOperand(0)),
+ getVal(Values, CI->getOperand(1)));
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(Values, CI->getOperand(0)),
+ CI->getType());
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+ getVal(Values, SI->getOperand(1)),
+ getVal(Values, SI->getOperand(2)));
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(Values, GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(Values, *i));
+ InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
+ ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
+ ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+ if (LI->isVolatile()) return false; // no volatile accesses.
+ InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
+ MutatedMemory);
+ if (InstResult == 0) return false; // Could not evaluate load.
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
+ const Type *Ty = AI->getType()->getElementType();
+ AllocaTmps.push_back(new GlobalVariable(Ty, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(Ty),
+ AI->getName()));
+ InstResult = AllocaTmps.back();
+ } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CI)) {
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CI->getCalledValue())) return false;
+
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) {
+ if (MSI->isVolatile()) return false;
+ Constant *Ptr = getVal(Values, MSI->getDest());
+ Constant *Val = getVal(Values, MSI->getValue());
+ Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr),
+ MutatedMemory);
+ if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+ // This memset is a no-op.
+ ++CurInst;
+ continue;
+ }
+ return false;
+ }
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(Values,
+ CI->getCalledValue()));
+ if (!Callee) return false; // Cannot resolve.
+
+ SmallVector<Constant*, 8> Formals;
+ CallSite CS(CI);
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i)
+ Formals.push_back(getVal(Values, *i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
+ Formals.size())) {
+ InstResult = C;
+ } else {
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg())
+ return false;
+
+ Constant *RetVal;
+ // Execute the call, if successful, use the return value.
+ if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
+ MutatedMemory, AllocaTmps, SimpleConstants, TD))
+ return false;
+ InstResult = RetVal;
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ BasicBlock *NewBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NewBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NewBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
+ if (RI->getNumOperands())
+ RetVal = getVal(Values, RI->getOperand(0));
+
+ CallStack.pop_back(); // return from fn.
+ return true; // We succeeded at evaluating this ctor!
+ } else {
+ // invoke, unwind, unreachable.
+ return false; // Cannot handle this terminator.
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NewBB))
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ BasicBlock *OldBB = CurInst->getParent();
+ CurInst = NewBB->begin();
+ PHINode *PN;
+ for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
+
+ // Do NOT increment CurInst. We know that the terminator had no value.
+ continue;
+ } else {
+ // Did not know how to evaluate this!
+ return false;
+ }
+
+ if (!CurInst->use_empty()) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+ InstResult = ConstantFoldConstantExpression(CE, TD);
+
+ Values[CurInst] = InstResult;
+ }
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can. Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
+ /// MutatedMemory - For each store we execute, we update this map. Loads
+ /// check this to get the most up-to-date value. If evaluation is successful,
+ /// this state is committed to the process.
+ DenseMap<Constant*, Constant*> MutatedMemory;
+
+ /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+ /// to represent its body. This vector is needed so we can delete the
+ /// temporary globals when we are done.
+ std::vector<GlobalVariable*> AllocaTmps;
+
+ /// CallStack - This is used to detect recursion. In pathological situations
+ /// we could hit exponential behavior, but at least there is nothing
+ /// unbounded.
+ std::vector<Function*> CallStack;
+
+ /// SimpleConstants - These are constants we have checked and know to be
+ /// simple enough to live in a static initializer of a global.
+ SmallPtrSet<Constant*, 8> SimpleConstants;
+
+ // Call the function.
+ Constant *RetValDummy;
+ bool EvalSuccess = EvaluateFunction(F, RetValDummy,
+ SmallVector<Constant*, 0>(), CallStack,
+ MutatedMemory, AllocaTmps,
+ SimpleConstants, TD);
+
+ if (EvalSuccess) {
+ // We succeeded at evaluation: commit the result.
+ DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+ << F->getName() << "' to " << MutatedMemory.size()
+ << " stores.\n");
+ for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
+ E = MutatedMemory.end(); I != E; ++I)
+ CommitValueTo(I->second, I->first);
+ }
+
+ // At this point, we are done interpreting. If we created any 'alloca'
+ // temporaries, release them now.
+ while (!AllocaTmps.empty()) {
+ GlobalVariable *Tmp = AllocaTmps.back();
+ AllocaTmps.pop_back();
+
+ // If there are still users of the alloca, the program is doing something
+ // silly, e.g. storing the address of the alloca somewhere and using it
+ // later. Since this is undefined, we'll just make it be null.
+ if (!Tmp->use_empty())
+ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+ delete Tmp;
+ }
+
+ return EvalSuccess;
+}
+
+
+
+/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
+/// Return true if anything changed.
+bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
+ std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
+ bool MadeChange = false;
+ if (Ctors.empty()) return false;
+
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ // Loop over global ctors, optimizing them when we can.
+ for (unsigned i = 0; i != Ctors.size(); ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (F == 0) {
+ if (i != Ctors.size()-1) {
+ Ctors.resize(i+1);
+ MadeChange = true;
+ }
+ break;
+ }
+
+ // We cannot simplify external ctor functions.
+ if (F->empty()) continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (EvaluateStaticConstructor(F, TD)) {
+ Ctors.erase(Ctors.begin()+i);
+ MadeChange = true;
+ --i;
+ ++NumCtorsEvaluated;
+ continue;
+ }
+ }
+
+ if (!MadeChange) return false;
+
+ GCL = InstallGlobalCtors(GCL, Ctors);
+ return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+ bool Changed = false;
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator J = I++;
+ // Aliases without names cannot be referenced outside this module.
+ if (!J->hasName() && !J->isDeclaration())
+ J->setLinkage(GlobalValue::InternalLinkage);
+ // If the aliasee may change at link time, nothing can be done - bail out.
+ if (J->mayBeOverridden())
+ continue;
+
+ Constant *Aliasee = J->getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ Target->removeDeadConstantUsers();
+ bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+ // Make all users of the alias use the aliasee instead.
+ if (!J->use_empty()) {
+ J->replaceAllUsesWith(Aliasee);
+ ++NumAliasesResolved;
+ Changed = true;
+ }
+
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!J->hasLocalLinkage()) {
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ if (!Target->hasLocalLinkage())
+ continue;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (!hasOneUse)
+ continue;
+
+ // Give the aliasee the name, linkage and other attributes of the alias.
+ Target->takeName(J);
+ Target->setLinkage(J->getLinkage());
+ Target->GlobalValue::copyAttributesFrom(J);
+ }
+
+ // Delete the alias.
+ M.getAliasList().erase(J);
+ ++NumAliasesRemoved;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+static Function *FindCXAAtExit(Module &M) {
+ Function *Fn = M.getFunction("__cxa_atexit");
+
+ if (!Fn)
+ return 0;
+
+ const FunctionType *FTy = Fn->getFunctionType();
+
+ // Checking that the function has the right return type, the right number of
+ // parameters and that they all have pointer types should be enough.
+ if (!FTy->getReturnType()->isIntegerTy() ||
+ FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return 0;
+
+ return Fn;
+}
+
+/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
+/// destructor and can therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor.
+static bool cxxDtorIsEmpty(const Function &Fn,
+ SmallPtrSet<const Function *, 8> &CalledFunctions) {
+ // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+ // nounwind, but that doesn't seem worth doing.
+ if (Fn.isDeclaration())
+ return false;
+
+ if (++Fn.begin() != Fn.end())
+ return false;
+
+ const BasicBlock &EntryBlock = Fn.getEntryBlock();
+ for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+ I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore debug intrinsics.
+ if (isa<DbgInfoIntrinsic>(CI))
+ continue;
+
+ const Function *CalledFn = CI->getCalledFunction();
+
+ if (!CalledFn)
+ return false;
+
+ SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+ // Don't treat recursive functions as empty.
+ if (!NewCalledFunctions.insert(CalledFn))
+ return false;
+
+ if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+ return false;
+ } else if (isa<ReturnInst>(*I))
+ return true;
+ else
+ return false;
+ }
+
+ return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+ /// Itanium C++ ABI p3.3.5:
+ ///
+ /// After constructing a global (or local static) object, that will require
+ /// destruction on exit, a termination function is registered as follows:
+ ///
+ /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+ ///
+ /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+ /// call f(p) when DSO d is unloaded, before all such termination calls
+ /// registered before this one. It returns zero if registration is
+ /// successful, nonzero on failure.
+
+ // This pass will look for calls to __cxa_atexit where the function is trivial
+ // and remove them.
+ bool Changed = false;
+
+ for (Function::use_iterator I = CXAAtExitFn->use_begin(),
+ E = CXAAtExitFn->use_end(); I != E;) {
+ // We're only interested in calls. Theoretically, we could handle invoke
+ // instructions as well, but neither llvm-gcc nor clang generate invokes
+ // to __cxa_atexit.
+ CallInst *CI = dyn_cast<CallInst>(*I++);
+ if (!CI)
+ continue;
+
+ Function *DtorFn =
+ dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+ if (!DtorFn)
+ continue;
+
+ SmallPtrSet<const Function *, 8> CalledFunctions;
+ if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+ continue;
+
+ // Just remove the call.
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ CI->eraseFromParent();
+
+ ++NumCXXDtorsRemoved;
+
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Try to find the llvm.globalctors list.
+ GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+
+ Function *CXAAtExitFn = FindCXAAtExit(M);
+
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Delete functions that are trivially dead, ccc -> fastcc
+ LocalChange |= OptimizeFunctions(M);
+
+ // Optimize global_ctors list.
+ if (GlobalCtors)
+ LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+
+ // Optimize non-address-taken globals.
+ LocalChange |= OptimizeGlobalVars(M);
+
+ // Resolve aliases, when possible.
+ LocalChange |= OptimizeGlobalAliases(M);
+
+ // Try to remove trivial global destructors.
+ if (CXAAtExitFn)
+ LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
+ Changed |= LocalChange;
+ }
+
+ // TODO: Move all global ctors functions to the end of the module for code
+ // layout.
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 000000000000..25c01346642b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,279 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass. It could certainly be improved in many different ways,
+// like using a worklist. This pass makes arguments dead, but does not remove
+// them. The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ipconstprop"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+ /// IPCP - The interprocedural constant propagation pass
+ ///
+ struct IPCP : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ IPCP() : ModulePass(ID) {
+ initializeIPCPPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+ private:
+ bool PropagateConstantsIntoArguments(Function &F);
+ bool PropagateConstantReturn(Function &F);
+ };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+ bool Changed = false;
+ bool LocalChange = true;
+
+ // FIXME: instead of using smart algorithms, we just iterate until we stop
+ // making changes.
+ while (LocalChange) {
+ LocalChange = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Delete any klingons.
+ I->removeDeadConstantUsers();
+ if (I->hasLocalLinkage())
+ LocalChange |= PropagateConstantsIntoArguments(*I);
+ Changed |= PropagateConstantReturn(*I);
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function. If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+ if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+ // For each argument, keep track of its constant value and whether it is a
+ // constant or not. The bool is driven to true when found to be non-constant.
+ SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+ ArgumentConstants.resize(F.arg_size());
+
+ unsigned NumNonconstant = 0;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ // Ignore blockaddress uses.
+ if (isa<BlockAddress>(U)) continue;
+
+ // Used by a non-instruction, or not the callee of a function, do not
+ // transform.
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return false;
+
+ CallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
+ return false;
+
+ // Check out all of the potentially constant arguments. Note that we don't
+ // inspect varargs here.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Function::arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+ ++i, ++AI, ++Arg) {
+
+ // If this argument is known non-constant, ignore it.
+ if (ArgumentConstants[i].second)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(*AI);
+ if (C && ArgumentConstants[i].first == 0) {
+ ArgumentConstants[i].first = C; // First constant seen.
+ } else if (C && ArgumentConstants[i].first == C) {
+ // Still the constant value we think it is.
+ } else if (*AI == &*Arg) {
+ // Ignore recursive calls passing argument down.
+ } else {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+ ArgumentConstants[i].second = true;
+ }
+ }
+ }
+
+ // If we got to this point, there is a constant argument!
+ assert(NumNonconstant != ArgumentConstants.size());
+ bool MadeChange = false;
+ Function::arg_iterator AI = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+ // Do we have a constant argument?
+ if (ArgumentConstants[i].second || AI->use_empty() ||
+ (AI->hasByValAttr() && !F.onlyReadsMemory()))
+ continue;
+
+ Value *V = ArgumentConstants[i].first;
+ if (V == 0) V = UndefValue::get(AI->getType());
+ AI->replaceAllUsesWith(V);
+ ++NumArgumentsProped;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+ if (F.getReturnType()->isVoidTy())
+ return false; // No return value.
+
+ // If this function could be overridden later in the link stage, we can't
+ // propagate information about its results into callers.
+ if (F.mayBeOverridden())
+ return false;
+
+ // Check to see if this function returns a constant.
+ SmallVector<Value *,4> RetVals;
+ const StructType *STy = dyn_cast<StructType>(F.getReturnType());
+ if (STy)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+ else
+ RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+ unsigned NumNonConstant = 0;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+ // Already found conflicting return values?
+ Value *RV = RetVals[i];
+ if (!RV)
+ continue;
+
+ // Find the returned value
+ Value *V;
+ if (!STy)
+ V = RI->getOperand(0);
+ else
+ V = FindInsertedValue(RI->getOperand(0), i);
+
+ if (V) {
+ // Ignore undefs, we can change them into anything
+ if (isa<UndefValue>(V))
+ continue;
+
+ // Try to see if all the rets return the same constant or argument.
+ if (isa<Constant>(V) || isa<Argument>(V)) {
+ if (isa<UndefValue>(RV)) {
+ // No value found yet? Try the current one.
+ RetVals[i] = V;
+ continue;
+ }
+ // Returning the same value? Good.
+ if (RV == V)
+ continue;
+ }
+ }
+ // Different or no known return value? Don't propagate this return
+ // value.
+ RetVals[i] = 0;
+ // All values non constant? Stop looking.
+ if (++NumNonConstant == RetVals.size())
+ return false;
+ }
+ }
+
+ // If we got here, the function returns at least one constant value. Loop
+ // over all users, replacing any uses of the return value with the returned
+ // constant.
+ bool MadeChange = false;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ CallSite CS(*UI);
+ Instruction* Call = CS.getInstruction();
+
+ // Not a call instruction or a call instruction that's not calling F
+ // directly?
+ if (!Call || !CS.isCallee(UI))
+ continue;
+
+ // Call result not used?
+ if (Call->use_empty())
+ continue;
+
+ MadeChange = true;
+
+ if (STy == 0) {
+ Value* New = RetVals[0];
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Call->replaceAllUsesWith(New);
+ continue;
+ }
+
+ for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+ I != E;) {
+ Instruction *Ins = cast<Instruction>(*I);
+
+ // Increment now, so we can remove the use
+ ++I;
+
+ // Find the index of the retval to replace with
+ int index = -1;
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+ if (EV->hasIndices())
+ index = *EV->idx_begin();
+
+ // If this use uses a specific return value, and we have a replacement,
+ // replace it.
+ if (index != -1) {
+ Value *New = RetVals[index];
+ if (New) {
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Ins->replaceAllUsesWith(New);
+ Ins->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ if (MadeChange) ++NumReturnValProped;
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 000000000000..31ce95f53d33
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,112 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeArgPromotionPass(Registry);
+ initializeConstantMergePass(Registry);
+ initializeDAEPass(Registry);
+ initializeDAHPass(Registry);
+ initializeFunctionAttrsPass(Registry);
+ initializeGlobalDCEPass(Registry);
+ initializeGlobalOptPass(Registry);
+ initializeIPCPPass(Registry);
+ initializeAlwaysInlinerPass(Registry);
+ initializeSimpleInlinerPass(Registry);
+ initializeInternalizePassPass(Registry);
+ initializeLoopExtractorPass(Registry);
+ initializeBlockExtractorPassPass(Registry);
+ initializeSingleLoopExtractorPass(Registry);
+ initializeLowerSetJmpPass(Registry);
+ initializeMergeFunctionsPass(Registry);
+ initializePartialInlinerPass(Registry);
+ initializePruneEHPass(Registry);
+ initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripSymbolsPass(Registry);
+ initializeStripDebugDeclarePass(Registry);
+ initializeStripDeadDebugInfoPass(Registry);
+ initializeStripNonDebugSymbolsPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+ initializeIPO(*unwrap(R));
+}
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerSetJmpPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+ unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+}
+
+
+void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
+ // FIXME: Remove in LLVM 3.0.
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 000000000000..ce795b72438d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,85 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+ // AlwaysInliner only inlines functions that are mark as "always inline".
+ class AlwaysInliner : public Inliner {
+ // Functions that are never inlined
+ SmallPtrSet<const Function*, 16> NeverInline;
+ InlineCostAnalyzer CA;
+ public:
+ // Use extremely low threshold.
+ AlwaysInliner() : Inliner(ID, -2000000000) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+ static char ID; // Pass identification, replacement for typeid
+ InlineCost getInlineCost(CallSite CS) {
+ return CA.getInlineCost(CS, NeverInline);
+ }
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
+ void resetCachedCostInfo(Function *Caller) {
+ CA.resetCachedCostInfo(Caller);
+ }
+ void growCachedCostInfo(Function* Caller, Function* Callee) {
+ CA.growCachedCostInfo(Caller, Callee);
+ }
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, &NeverInline);
+ }
+ virtual bool doInitialization(CallGraph &CG);
+ void releaseMemory() {
+ CA.clear();
+ }
+ };
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+// doInitialization - Initializes the vector of functions that have not
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
+ Module &M = CG.getModule();
+
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
+ NeverInline.insert(I);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 000000000000..0c5b3be8f983
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,118 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+ class SimpleInliner : public Inliner {
+ // Functions that are never inlined
+ SmallPtrSet<const Function*, 16> NeverInline;
+ InlineCostAnalyzer CA;
+ public:
+ SimpleInliner() : Inliner(ID) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+ static char ID; // Pass identification, replacement for typeid
+ InlineCost getInlineCost(CallSite CS) {
+ return CA.getInlineCost(CS, NeverInline);
+ }
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
+ void resetCachedCostInfo(Function *Caller) {
+ CA.resetCachedCostInfo(Caller);
+ }
+ void growCachedCostInfo(Function* Caller, Function* Callee) {
+ CA.growCachedCostInfo(Caller, Callee);
+ }
+ virtual bool doInitialization(CallGraph &CG);
+ void releaseMemory() {
+ CA.clear();
+ }
+ };
+}
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) {
+ return new SimpleInliner(Threshold);
+}
+
+// doInitialization - Initializes the vector of functions that have been
+// annotated with the noinline attribute.
+bool SimpleInliner::doInitialization(CallGraph &CG) {
+
+ Module &M = CG.getModule();
+
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E; ++I)
+ if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
+ NeverInline.insert(I);
+
+ // Get llvm.noinline
+ GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
+
+ if (GV == 0)
+ return false;
+
+ // Don't crash on invalid code
+ if (!GV->hasDefinitiveInitializer())
+ return false;
+
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+
+ if (InitList == 0)
+ return false;
+
+ // Iterate over each element and add to the NeverInline set
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+
+ // Get Source
+ const Constant *Elt = InitList->getOperand(i);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
+ if (CE->getOpcode() == Instruction::BitCast)
+ Elt = CE->getOperand(0);
+
+ // Insert into set of functions to never inline
+ if (const Function *F = dyn_cast<Function>(Elt))
+ NeverInline.insert(F);
+ }
+
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 000000000000..57f3e772b569
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,571 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph. The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+ cl::desc("Threshold for inlining functions with inline hint"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+
+Inliner::Inliner(char &ID, int Threshold)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+ InlineLimit : Threshold) {}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
+ CallGraphSCCPass::getAnalysisUsage(Info);
+}
+
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR. The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other functions inlined into the caller. If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ if (!InlineFunction(CS, IFI))
+ return false;
+
+ // If the inlined function had a higher stack protection level than the
+ // calling function, then bump up the caller's stack protection level.
+ if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+ !Caller->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtect);
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // When processing our SCC, check to see if CS was inlined from some other
+ // call site. For example, if we're processing "A" in this code:
+ // A() { B() }
+ // B() { x = alloca ... C() }
+ // C() { y = alloca ... }
+ // Assume that C was not inlined into B initially, and so we're processing A
+ // and decide to inline B into A. Doing this makes an alloca available for
+ // reuse and makes a callsite (C) available for inlining. When we process
+ // the C call site we don't want to do any alloca merging between X and Y
+ // because their scopes are not disjoint. We could make this smarter by
+ // keeping track of the inline history for each alloca in the
+ // InlinedArrayAllocas but this isn't likely to be a significant win.
+ if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+ return true;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+ for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+ << *AvailableAlloca << '\n');
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ ++NumMergedAllocas;
+ IFI.StaticAllocas[AllocaNo] = 0;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+ int thres = InlineThreshold;
+
+ // Listen to optsize when -inline-limit is not given.
+ Function *Caller = CS.getCaller();
+ if (Caller && !Caller->isDeclaration() &&
+ Caller->hasFnAttr(Attribute::OptimizeForSize) &&
+ InlineLimit.getNumOccurrences() == 0)
+ thres = OptSizeThreshold;
+
+ // Listen to inlinehint when it would increase the threshold.
+ Function *Callee = CS.getCalledFunction();
+ if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+ Callee->hasFnAttr(Attribute::InlineHint))
+ thres = HintThreshold;
+
+ return thres;
+}
+
+/// shouldInline - Return true if the inliner should attempt to inline
+/// at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+ InlineCost IC = getInlineCost(CS);
+
+ if (IC.isAlways()) {
+ DEBUG(dbgs() << " Inlining: cost=always"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return true;
+ }
+
+ if (IC.isNever()) {
+ DEBUG(dbgs() << " NOT Inlining: cost=never"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ int Cost = IC.getValue();
+ Function *Caller = CS.getCaller();
+ int CurrentThreshold = getInlineThreshold(CS);
+ float FudgeFactor = getInlineFudgeFactor(CS);
+ int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
+ if (Cost >= AdjThreshold) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << Cost
+ << ", thres=" << AdjThreshold
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ // Try to detect the case where the current inlining candidate caller
+ // (call it B) is a static function and is an inlining candidate elsewhere,
+ // and the current candidate callee (call it C) is large enough that
+ // inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B
+ // into its callers.
+ if (Caller->hasLocalLinkage()) {
+ int TotalSecondaryCost = 0;
+ bool outerCallsFound = false;
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = true;
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
+ for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
+ I != E; ++I) {
+ CallSite CS2(*I);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+
+ InlineCost IC2 = getInlineCost(CS2);
+ if (IC2.isNever())
+ callerWillBeRemoved = false;
+ if (IC2.isAlways() || IC2.isNever())
+ continue;
+
+ outerCallsFound = true;
+ int Cost2 = IC2.getValue();
+ int CurrentThreshold2 = getInlineThreshold(CS2);
+ float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+ if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+ callerWillBeRemoved = false;
+
+ // See if we have this case. We subtract off the penalty
+ // for the call instruction, which we would be deleting.
+ if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+ Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
+ (int)(CurrentThreshold2 * FudgeFactor2)) {
+ inliningPreventsSomeOuterInline = true;
+ TotalSecondaryCost += Cost2;
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
+ TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+ if (outerCallsFound && inliningPreventsSomeOuterInline &&
+ TotalSecondaryCost < Cost) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << Cost <<
+ ", outer Cost = " << TotalSecondaryCost << '\n');
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << " Inlining: cost=" << Cost
+ << ", thres=" << AdjThreshold
+ << ", Call: " << *CS.getInstruction() << '\n');
+ return true;
+}
+
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ SmallPtrSet<Function*, 8> SCCFunctions;
+ DEBUG(dbgs() << "Inliner visiting SCC:");
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F) SCCFunctions.insert(F);
+ DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+ }
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ SmallVector<std::pair<CallSite, int>, 16> CallSites;
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F) continue;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS(cast<Value>(I));
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (!CS || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+ continue;
+
+ CallSites.push_back(std::make_pair(CS, -1));
+ }
+ }
+
+ DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+ // If there are no calls in this function, exit early.
+ if (CallSites.empty())
+ return false;
+
+ // Now that we have all of the call sites, move the ones to functions in the
+ // current SCC to the end of the list.
+ unsigned FirstCallInSCC = CallSites.size();
+ for (unsigned i = 0; i < FirstCallInSCC; ++i)
+ if (Function *F = CallSites[i].first.getCalledFunction())
+ if (SCCFunctions.count(F))
+ std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ InlineFunctionInfo InlineInfo(&CG, TD);
+
+ // Now that we have all of the call sites, loop over them and inline them if
+ // it looks profitable to do so.
+ bool Changed = false;
+ bool LocalChange;
+ do {
+ LocalChange = false;
+ // Iterate over the outer loop because inlining functions can cause indirect
+ // calls to become direct calls.
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi].first;
+
+ Function *Caller = CS.getCaller();
+ Function *Callee = CS.getCalledFunction();
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (isInstructionTriviallyDead(CS.getInstruction())) {
+ DEBUG(dbgs() << " -> Deleting dead call: "
+ << *CS.getInstruction() << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ CS.getInstruction()->eraseFromParent();
+ ++NumCallsDeleted;
+ // Update the cached cost info with the missing call
+ growCachedCostInfo(Caller, NULL);
+ } else {
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // If this call site was obtained by inlining another function, verify
+ // that the include path for the function did not include the callee
+ // itself. If so, we'd be recursively inlining the same function,
+ // which would provide the same callsites, which would cause us to
+ // infinitely inline.
+ int InlineHistoryID = CallSites[CSi].second;
+ if (InlineHistoryID != -1 &&
+ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+ continue;
+
+
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ if (!shouldInline(CS))
+ continue;
+
+ // Attempt to inline the function.
+ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ InlineHistoryID))
+ continue;
+ ++NumInlined;
+
+ // If inlining this function gave us any new call sites, throw them
+ // onto our worklist to process. They are useful inline candidates.
+ if (!InlineInfo.InlinedCalls.empty()) {
+ // Create a new inline history entry for this, so that we remember
+ // that these new callsites came about due to inlining Callee.
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+ for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+ i != e; ++i) {
+ Value *Ptr = InlineInfo.InlinedCalls[i];
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+ }
+ }
+
+ // Update the cached cost info with the inlined call.
+ growCachedCostInfo(Caller, Callee);
+ }
+
+ // If we inlined or deleted the last possible call site to the function,
+ // delete the function body now.
+ if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+ // TODO: Can remove if in SCC now.
+ !SCCFunctions.count(Callee) &&
+
+ // The function may be apparently dead, but if there are indirect
+ // callgraph references to the node, we cannot delete it yet, this
+ // could invalidate the CGSCC iterator.
+ CG[Callee]->getNumReferences() == 0) {
+ DEBUG(dbgs() << " -> Deleting dead function: "
+ << Callee->getName() << "\n");
+ CallGraphNode *CalleeNode = CG[Callee];
+
+ // Remove any call graph edges from the callee to its callees.
+ CalleeNode->removeAllCalledFunctions();
+
+ resetCachedCostInfo(Callee);
+
+ // Removing the node for callee from the call graph and delete it.
+ delete CG.removeFunctionFromModule(CalleeNode);
+ ++NumDeleted;
+ }
+
+ // Remove this call site from the list. If possible, use
+ // swap/pop_back for efficiency, but do not use it if doing so would
+ // move a call site to a function in this SCC before the
+ // 'FirstCallInSCC' barrier.
+ if (SCC.isSingular()) {
+ CallSites[CSi] = CallSites.back();
+ CallSites.pop_back();
+ } else {
+ CallSites.erase(CallSites.begin()+CSi);
+ }
+ --CSi;
+
+ Changed = true;
+ LocalChange = true;
+ }
+ } while (LocalChange);
+
+ return Changed;
+}
+
+// doFinalization - Remove now-dead linkonce functions at the end of
+// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG);
+}
+
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG,
+ SmallPtrSet<const Function *, 16> *DNR) {
+ SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+
+ // Scan for all of the functions, looking for ones that should now be removed
+ // from the program. Insert the dead ones in the FunctionsToRemove set.
+ for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
+ CallGraphNode *CGN = I->second;
+ if (CGN->getFunction() == 0)
+ continue;
+
+ Function *F = CGN->getFunction();
+
+ // If the only remaining users of the function are dead constants, remove
+ // them.
+ F->removeDeadConstantUsers();
+
+ if (DNR && DNR->count(F))
+ continue;
+ if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+ !F->hasAvailableExternallyLinkage())
+ continue;
+ if (!F->use_empty())
+ continue;
+
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.insert(CGN);
+ }
+
+ // Now that we know which functions to delete, do so. We didn't want to do
+ // this inline, because that would invalidate our CallGraph::iterator
+ // objects. :(
+ //
+ // Note that it doesn't matter that we are iterating over a non-stable set
+ // here to do this, it doesn't matter which order the functions are deleted
+ // in.
+ bool Changed = false;
+ for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end(); I != E; ++I) {
+ resetCachedCostInfo((*I)->getFunction());
+ delete CG.removeFunctionFromModule(*I);
+ ++NumDeleted;
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 000000000000..7cb1d18f933d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,191 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for a
+// main function. If a main function is found, all other functions and all
+// global variables with initializers are marked as internal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "internalize"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"),
+ cl::CommaSeparated);
+
+namespace {
+ class InternalizePass : public ModulePass {
+ std::set<std::string> ExternalNames;
+ /// If no api symbols were specified and a main function is defined,
+ /// assume the main function is the only API
+ bool AllButMain;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit InternalizePass(bool AllButMain = true);
+ explicit InternalizePass(const std::vector <const char *>& exportList);
+ void LoadFile(const char *Filename);
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<CallGraph>();
+ }
+ };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+ "Internalize Global Symbols", false, false)
+
+InternalizePass::InternalizePass(bool AllButMain)
+ : ModulePass(ID), AllButMain(AllButMain){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+ if (!APIFile.empty()) // If a filename is specified, use it.
+ LoadFile(APIFile.c_str());
+ if (!APIList.empty()) // If a list is specified, use it as well.
+ ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+ : ModulePass(ID), AllButMain(false){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+ for(std::vector<const char *>::const_iterator itr = exportList.begin();
+ itr != exportList.end(); itr++) {
+ ExternalNames.insert(*itr);
+ }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+ // Load the APIFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
+ }
+ while (In) {
+ std::string Symbol;
+ In >> Symbol;
+ if (!Symbol.empty())
+ ExternalNames.insert(Symbol);
+ }
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+ CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+
+ if (ExternalNames.empty()) {
+ // Return if we're not in 'all but main' mode and have no external api
+ if (!AllButMain)
+ return false;
+ // If no list or file of symbols was specified, check to see if there is a
+ // "main" symbol defined in the module. If so, use it, otherwise do not
+ // internalize the module, it must be a library or something.
+ //
+ Function *MainFunc = M.getFunction("main");
+ if (MainFunc == 0 || MainFunc->isDeclaration())
+ return false; // No main found, must be a library...
+
+ // Preserve main, internalize all else.
+ ExternalNames.insert(MainFunc->getName());
+ }
+
+ bool Changed = false;
+
+ // Mark all functions not in the api as internal.
+ // FIXME: maybe use private linkage?
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && // Function must be defined here
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !I->hasLocalLinkage() && // Can't already have internal linkage
+ !ExternalNames.count(I->getName())) {// Not marked to keep external?
+ I->setLinkage(GlobalValue::InternalLinkage);
+ // Remove a callgraph edge from the external node to this function.
+ if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ Changed = true;
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ }
+
+ // Never internalize the llvm.used symbol. It is used to implement
+ // attribute((used)).
+ // FIXME: Shouldn't this just filter on llvm.metadata section??
+ ExternalNames.insert("llvm.used");
+ ExternalNames.insert("llvm.compiler.used");
+
+ // Never internalize anchors used by the machine module info, else the info
+ // won't find them. (see MachineModuleInfo.)
+ ExternalNames.insert("llvm.global_ctors");
+ ExternalNames.insert("llvm.global_dtors");
+ ExternalNames.insert("llvm.noinline");
+ ExternalNames.insert("llvm.global.annotations");
+
+ // Mark all global variables with initializers that are not in the api as
+ // internal as well.
+ // FIXME: maybe use private linkage?
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
+
+ // Mark all aliases that are not in the api as internal as well.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createInternalizePass(bool AllButMain) {
+ return new InternalizePass(AllButMain);
+}
+
+ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+ return new InternalizePass(el);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 000000000000..848944dc9381
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,248 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-extract"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+ struct LoopExtractor : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ unsigned NumLoops;
+
+ explicit LoopExtractor(unsigned numLoops = ~0)
+ : LoopPass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ }
+ };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+
+namespace {
+ /// SingleLoopExtractor - For bugpoint.
+ struct SingleLoopExtractor : public LoopExtractor {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractor(1) {}
+ };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+ "Extract at most one loop into a new function", false, false)
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // Only visit top-level loops.
+ if (L->getParentLoop())
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ bool Changed = false;
+
+ // If there is more than one top-level loop in this function, extract all of
+ // the loops. Otherwise there is exactly one top-level loop; in this case if
+ // this function is more than a minimal wrapper around the loop, extract
+ // the loop.
+ bool ShouldExtractLoop = false;
+
+ // Extract the loop if the entry block doesn't branch to the loop header.
+ TerminatorInst *EntryTI =
+ L->getHeader()->getParent()->getEntryBlock().getTerminator();
+ if (!isa<BranchInst>(EntryTI) ||
+ !cast<BranchInst>(EntryTI)->isUnconditional() ||
+ EntryTI->getSuccessor(0) != L->getHeader())
+ ShouldExtractLoop = true;
+ else {
+ // Check to see if any exits from the loop are more than just return
+ // blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+ ShouldExtractLoop = true;
+ break;
+ }
+ }
+ if (ShouldExtractLoop) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ if (ExtractLoop(DT, L) != 0) {
+ Changed = true;
+ // After extraction, the loop is replaced by a function call, so
+ // we shouldn't try to run any more loop passes on it.
+ LPM.deleteLoopFromQueue(L);
+ }
+ ++NumExtracted;
+ }
+
+ return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can. This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+ return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of basic blocks to not extract"),
+ cl::Hidden);
+
+namespace {
+ /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+ /// from the module into their own functions except for those specified by the
+ /// BlocksToNotExtract list.
+ class BlockExtractorPass : public ModulePass {
+ void LoadFile(const char *Filename);
+
+ std::vector<BasicBlock*> BlocksToNotExtract;
+ std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ BlockExtractorPass() : ModulePass(ID) {
+ if (!BlockFile.empty())
+ LoadFile(BlockFile.c_str());
+ }
+
+ bool runOnModule(Module &M);
+ };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+ "Extract Basic Blocks From Module (for bugpoint use)",
+ false, false)
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass()
+{
+ return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+ // Load the BlockFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+ << "'!\n";
+ return;
+ }
+ while (In) {
+ std::string FunctionName, BlockName;
+ In >> FunctionName;
+ In >> BlockName;
+ if (!BlockName.empty())
+ BlocksToNotExtractByName.push_back(
+ std::make_pair(FunctionName, BlockName));
+ }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+ std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+ for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+ BasicBlock *BB = BlocksToNotExtract[i];
+ Function *F = BB->getParent();
+
+ // Map the corresponding function in this module.
+ Function *MF = M.getFunction(F->getName());
+ assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+ // Figure out which index the basic block is in its function.
+ Function::iterator BBI = MF->begin();
+ std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+ TranslatedBlocksToNotExtract.insert(BBI);
+ }
+
+ while (!BlocksToNotExtractByName.empty()) {
+ // There's no way to find BBs by name without looking at every BB inside
+ // every Function. Fortunately, this is always empty except when used by
+ // bugpoint in which case correctness is more important than performance.
+
+ std::string &FuncName = BlocksToNotExtractByName.back().first;
+ std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+ Function &F = *FI;
+ if (F.getName() != FuncName) continue;
+
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock &BB = *BI;
+ if (BB.getName() != BlockName) continue;
+
+ TranslatedBlocksToNotExtract.insert(BI);
+ }
+ }
+
+ BlocksToNotExtractByName.pop_back();
+ }
+
+ // Now that we know which blocks to not extract, figure out which ones we WANT
+ // to extract.
+ std::vector<BasicBlock*> BlocksToExtract;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (!TranslatedBlocksToNotExtract.count(BB))
+ BlocksToExtract.push_back(BB);
+
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
+ ExtractBasicBlock(BlocksToExtract[i]);
+
+ return !BlocksToExtract.empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
new file mode 100644
index 000000000000..659476b139e4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -0,0 +1,547 @@
+//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering of setjmp and longjmp to use the
+// LLVM invoke and unwind instructions as necessary.
+//
+// Lowering of longjmp is fairly trivial. We replace the call with a
+// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
+// This unwinds the stack for us calling all of the destructors for
+// objects allocated on the stack.
+//
+// At a setjmp call, the basic block is split and the setjmp removed.
+// The calls in a function that have a setjmp are converted to invoke
+// where the except part checks to see if it's a longjmp exception and,
+// if so, if it's handled in the function. If it is, then it gets the
+// value returned by the longjmp and goes to where the basic block was
+// split. Invoke instructions are handled in a similar fashion with the
+// original except block being executed if it isn't a longjmp except
+// that is handled by that function.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FIXME: This pass doesn't deal with PHI statements just yet. That is,
+// we expect this to occur before SSAification is done. This would seem
+// to make sense, but in general, it might be a good idea to make this
+// pass invokable via the "opt" command at will.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersetjmp"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
+STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
+STATISTIC(CallsTransformed , "Number of calls invokified");
+STATISTIC(InvokesTransformed , "Number of invokes modified");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // LowerSetJmp pass implementation.
+ class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
+ // LLVM library functions...
+ Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap
+ Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap
+ Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map
+ Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp
+ Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception
+ Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception
+ Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value
+
+ typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
+
+ // Keep track of those basic blocks reachable via a depth-first search of
+ // the CFG from a setjmp call. We only need to transform those "call" and
+ // "invoke" instructions that are reachable from the setjmp call site.
+ std::set<BasicBlock*> DFSBlocks;
+
+ // The setjmp map is going to hold information about which setjmps
+ // were called (each setjmp gets its own number) and with which
+ // buffer it was called.
+ std::map<Function*, AllocaInst*> SJMap;
+
+ // The rethrow basic block map holds the basic block to branch to if
+ // the exception isn't handled in the current function and needs to
+ // be rethrown.
+ std::map<const Function*, BasicBlock*> RethrowBBMap;
+
+ // The preliminary basic block map holds a basic block that grabs the
+ // exception and determines if it's handled by the current function.
+ std::map<const Function*, BasicBlock*> PrelimBBMap;
+
+ // The switch/value map holds a switch inst/call inst pair. The
+ // switch inst controls which handler (if any) gets called and the
+ // value is the value returned to that handler by the call to
+ // __llvm_sjljeh_get_longjmp_value.
+ std::map<const Function*, SwitchValuePair> SwitchValMap;
+
+ // A map of which setjmps we've seen so far in a function.
+ std::map<const Function*, unsigned> SetJmpIDMap;
+
+ AllocaInst* GetSetJmpMap(Function* Func);
+ BasicBlock* GetRethrowBB(Function* Func);
+ SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
+
+ void TransformLongJmpCall(CallInst* Inst);
+ void TransformSetJmpCall(CallInst* Inst);
+
+ bool IsTransformableFunction(StringRef Name);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSetJmp() : ModulePass(ID) {
+ initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+ }
+
+ void visitCallInst(CallInst& CI);
+ void visitInvokeInst(InvokeInst& II);
+ void visitReturnInst(ReturnInst& RI);
+ void visitUnwindInst(UnwindInst& UI);
+
+ bool runOnModule(Module& M);
+ bool doInitialization(Module& M);
+ };
+} // end anonymous namespace
+
+char LowerSetJmp::ID = 0;
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
+
+// run - Run the transformation on the program. We grab the function
+// prototypes for longjmp and setjmp. If they are used in the program,
+// then we can go directly to the places they're at and transform them.
+bool LowerSetJmp::runOnModule(Module& M) {
+ bool Changed = false;
+
+ // These are what the functions are called.
+ Function* SetJmp = M.getFunction("llvm.setjmp");
+ Function* LongJmp = M.getFunction("llvm.longjmp");
+
+ // This program doesn't have longjmp and setjmp calls.
+ if ((!LongJmp || LongJmp->use_empty()) &&
+ (!SetJmp || SetJmp->use_empty())) return false;
+
+ // Initialize some values and functions we'll need to transform the
+ // setjmp/longjmp functions.
+ doInitialization(M);
+
+ if (SetJmp) {
+ for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
+ B != E; ++B) {
+ BasicBlock* BB = cast<Instruction>(*B)->getParent();
+ for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
+ E = df_ext_end(BB, DFSBlocks); I != E; ++I)
+ /* empty */;
+ }
+
+ while (!SetJmp->use_empty()) {
+ assert(isa<CallInst>(SetJmp->use_back()) &&
+ "User of setjmp intrinsic not a call?");
+ TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
+ Changed = true;
+ }
+ }
+
+ if (LongJmp)
+ while (!LongJmp->use_empty()) {
+ assert(isa<CallInst>(LongJmp->use_back()) &&
+ "User of longjmp intrinsic not a call?");
+ TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
+ Changed = true;
+ }
+
+ // Now go through the affected functions and convert calls and invokes
+ // to new invokes...
+ for (std::map<Function*, AllocaInst*>::iterator
+ B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
+ Function* F = B->first;
+ for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
+ visit(*IB++);
+ if (IB != BB->end() && IB->getParent() != BB)
+ break; // The next instruction got moved to a different block!
+ }
+ }
+
+ DFSBlocks.clear();
+ SJMap.clear();
+ RethrowBBMap.clear();
+ PrelimBBMap.clear();
+ SwitchValMap.clear();
+ SetJmpIDMap.clear();
+
+ return Changed;
+}
+
+// doInitialization - For the lower long/setjmp pass, this ensures that a
+// module contains a declaration for the intrisic functions we are going
+// to call to convert longjmp and setjmp calls.
+//
+// This function is always successful, unless it isn't.
+bool LowerSetJmp::doInitialization(Module& M)
+{
+ const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
+ const Type *SBPPTy = PointerType::getUnqual(SBPTy);
+
+ // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
+ // a description of the following library functions.
+
+ // void __llvm_sjljeh_init_setjmpmap(void**)
+ InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
+ Type::getVoidTy(M.getContext()),
+ SBPPTy, (Type *)0);
+ // void __llvm_sjljeh_destroy_setjmpmap(void**)
+ DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
+ Type::getVoidTy(M.getContext()),
+ SBPPTy, (Type *)0);
+
+ // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
+ AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
+ Type::getVoidTy(M.getContext()),
+ SBPPTy, SBPTy,
+ Type::getInt32Ty(M.getContext()),
+ (Type *)0);
+
+ // void __llvm_sjljeh_throw_longjmp(int*, int)
+ ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
+ Type::getVoidTy(M.getContext()), SBPTy,
+ Type::getInt32Ty(M.getContext()),
+ (Type *)0);
+
+ // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
+ TryCatchLJ =
+ M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
+ Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
+
+ // bool __llvm_sjljeh_is_longjmp_exception()
+ IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
+ Type::getInt1Ty(M.getContext()),
+ (Type *)0);
+
+ // int __llvm_sjljeh_get_longjmp_value()
+ GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
+ Type::getInt32Ty(M.getContext()),
+ (Type *)0);
+ return true;
+}
+
+// IsTransformableFunction - Return true if the function name isn't one
+// of the ones we don't want transformed. Currently, don't transform any
+// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
+// handling functions (beginning with __llvm_sjljeh_...they don't throw
+// exceptions).
+bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
+ return !Name.startswith("__llvm_sjljeh_");
+}
+
+// TransformLongJmpCall - Transform a longjmp call into a call to the
+// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
+// throwing the exception for us.
+void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
+{
+ const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
+
+ // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
+ // same parameters as "longjmp", except that the buffer is cast to a
+ // char*. It returns "void", so it doesn't need to replace any of
+ // Inst's uses and doesn't get a name.
+ CastInst* CI =
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+ Value *Args[] = { CI, Inst->getArgOperand(1) };
+ CallInst::Create(ThrowLongJmp, Args, "", Inst);
+
+ SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
+
+ // If the function has a setjmp call in it (they are transformed first)
+ // we should branch to the basic block that determines if this longjmp
+ // is applicable here. Otherwise, issue an unwind.
+ if (SVP.first)
+ BranchInst::Create(SVP.first->getParent(), Inst);
+ else
+ new UnwindInst(Inst->getContext(), Inst);
+
+ // Remove all insts after the branch/unwind inst. Go from back to front to
+ // avoid replaceAllUsesWith if possible.
+ BasicBlock *BB = Inst->getParent();
+ Instruction *Removed;
+ do {
+ Removed = &BB->back();
+ // If the removed instructions have any users, replace them now.
+ if (!Removed->use_empty())
+ Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
+ Removed->eraseFromParent();
+ } while (Removed != Inst);
+
+ ++LongJmpsTransformed;
+}
+
+// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
+// setjmp map. This map is going to hold information about which setjmps
+// were called (each setjmp gets its own number) and with which buffer it
+// was called. There can be only one!
+AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
+{
+ if (SJMap[Func]) return SJMap[Func];
+
+ // Insert the setjmp map initialization before the first instruction in
+ // the function.
+ Instruction* Inst = Func->getEntryBlock().begin();
+ assert(Inst && "Couldn't find even ONE instruction in entry block!");
+
+ // Fill in the alloca and call to initialize the SJ map.
+ const Type *SBPTy =
+ Type::getInt8PtrTy(Func->getContext());
+ AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
+ CallInst::Create(InitSJMap, Map, "", Inst);
+ return SJMap[Func] = Map;
+}
+
+// GetRethrowBB - Only one rethrow basic block is needed per function.
+// If this is a longjmp exception but not handled in this block, this BB
+// performs the rethrow.
+BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
+{
+ if (RethrowBBMap[Func]) return RethrowBBMap[Func];
+
+ // The basic block we're going to jump to if we need to rethrow the
+ // exception.
+ BasicBlock* Rethrow =
+ BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
+
+ // Fill in the "Rethrow" BB with a call to rethrow the exception. This
+ // is the last instruction in the BB since at this point the runtime
+ // should exit this function and go to the next function.
+ new UnwindInst(Func->getContext(), Rethrow);
+ return RethrowBBMap[Func] = Rethrow;
+}
+
+// GetSJSwitch - Return the switch statement that controls which handler
+// (if any) gets called and the value returned to that handler.
+LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
+ BasicBlock* Rethrow)
+{
+ if (SwitchValMap[Func].first) return SwitchValMap[Func];
+
+ BasicBlock* LongJmpPre =
+ BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
+
+ // Keep track of the preliminary basic block for some of the other
+ // transformations.
+ PrelimBBMap[Func] = LongJmpPre;
+
+ // Grab the exception.
+ CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
+
+ // The "decision basic block" gets the number associated with the
+ // setjmp call returning to switch on and the value returned by
+ // longjmp.
+ BasicBlock* DecisionBB =
+ BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
+
+ BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
+
+ // Fill in the "decision" basic block.
+ CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
+ CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
+ DecisionBB);
+
+ SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
+ return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
+}
+
+// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
+// We're going to convert all setjmp calls to nops. Then all "call" and
+// "invoke" instructions in the function are converted to "invoke" where
+// the "except" branch is used when returning from a longjmp call.
+void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
+{
+ BasicBlock* ABlock = Inst->getParent();
+ Function* Func = ABlock->getParent();
+
+ // Add this setjmp to the setjmp map.
+ const Type* SBPTy =
+ Type::getInt8PtrTy(Inst->getContext());
+ CastInst* BufPtr =
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
+ Value *Args[] = {
+ GetSetJmpMap(Func), BufPtr,
+ ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
+ };
+ CallInst::Create(AddSJToMap, Args, "", Inst);
+
+ // We are guaranteed that there are no values live across basic blocks
+ // (because we are "not in SSA form" yet), but there can still be values live
+ // in basic blocks. Because of this, splitting the setjmp block can cause
+ // values above the setjmp to not dominate uses which are after the setjmp
+ // call. For all of these occasions, we must spill the value to the stack.
+ //
+ std::set<Instruction*> InstrsAfterCall;
+
+ // The call is probably very close to the end of the basic block, for the
+ // common usage pattern of: 'if (setjmp(...))', so keep track of the
+ // instructions after the call.
+ for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
+ I != E; ++I)
+ InstrsAfterCall.insert(I);
+
+ for (BasicBlock::iterator II = ABlock->begin();
+ II != BasicBlock::iterator(Inst); ++II)
+ // Loop over all of the uses of instruction. If any of them are after the
+ // call, "spill" the value to the stack.
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != ABlock ||
+ InstrsAfterCall.count(cast<Instruction>(U))) {
+ DemoteRegToStack(*II);
+ break;
+ }
+ }
+ InstrsAfterCall.clear();
+
+ // Change the setjmp call into a branch statement. We'll remove the
+ // setjmp call in a little bit. No worries.
+ BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
+ assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
+
+ SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
+
+ // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
+ DFSBlocks.insert(SetJmpContBlock);
+
+ // This PHI node will be in the new block created from the
+ // splitBasicBlock call.
+ PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2,
+ "SetJmpReturn", Inst);
+
+ // Coming from a call to setjmp, the return is 0.
+ PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
+ ABlock);
+
+ // Add the case for this setjmp's number...
+ SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
+ SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
+ SetJmpIDMap[Func] - 1),
+ SetJmpContBlock);
+
+ // Value coming from the handling of the exception.
+ PHI->addIncoming(SVP.second, SVP.second->getParent());
+
+ // Replace all uses of this instruction with the PHI node created by
+ // the eradication of setjmp.
+ Inst->replaceAllUsesWith(PHI);
+ Inst->eraseFromParent();
+
+ ++SetJmpsTransformed;
+}
+
+// visitCallInst - This converts all LLVM call instructions into invoke
+// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
+// that grabs the exception and proceeds to determine if it's a longjmp
+// exception or not.
+void LowerSetJmp::visitCallInst(CallInst& CI)
+{
+ if (CI.getCalledFunction())
+ if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
+ CI.getCalledFunction()->isIntrinsic()) return;
+
+ BasicBlock* OldBB = CI.getParent();
+
+ // If not reachable from a setjmp call, don't transform.
+ if (!DFSBlocks.count(OldBB)) return;
+
+ BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
+ assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
+ DFSBlocks.insert(NewBB);
+ NewBB->setName("Call2Invoke");
+
+ Function* Func = OldBB->getParent();
+
+ // Construct the new "invoke" instruction.
+ TerminatorInst* Term = OldBB->getTerminator();
+ CallSite CS(&CI);
+ std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
+ InvokeInst* II =
+ InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
+ Params, CI.getName(), Term);
+ II->setCallingConv(CI.getCallingConv());
+ II->setAttributes(CI.getAttributes());
+
+ // Replace the old call inst with the invoke inst and remove the call.
+ CI.replaceAllUsesWith(II);
+ CI.eraseFromParent();
+
+ // The old terminator is useless now that we have the invoke inst.
+ Term->eraseFromParent();
+ ++CallsTransformed;
+}
+
+// visitInvokeInst - Converting the "invoke" instruction is fairly
+// straight-forward. The old exception part is replaced by a query asking
+// if this is a longjmp exception. If it is, then it goes to the longjmp
+// exception blocks. Otherwise, control is passed the old exception.
+void LowerSetJmp::visitInvokeInst(InvokeInst& II)
+{
+ if (II.getCalledFunction())
+ if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
+ II.getCalledFunction()->isIntrinsic()) return;
+
+ BasicBlock* BB = II.getParent();
+
+ // If not reachable from a setjmp call, don't transform.
+ if (!DFSBlocks.count(BB)) return;
+
+ BasicBlock* ExceptBB = II.getUnwindDest();
+
+ Function* Func = BB->getParent();
+ BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(),
+ "InvokeExcept", Func);
+
+ // If this is a longjmp exception, then branch to the preliminary BB of
+ // the longjmp exception handling. Otherwise, go to the old exception.
+ CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
+ NewExceptBB);
+
+ BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
+
+ II.setUnwindDest(NewExceptBB);
+ ++InvokesTransformed;
+}
+
+// visitReturnInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
+ Function* Func = RI.getParent()->getParent();
+ CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
+}
+
+// visitUnwindInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
+ Function* Func = UI.getParent()->getParent();
+ CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
+}
+
+ModulePass *llvm::createLowerSetJmpPass() {
+ return new LowerSetJmp();
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 000000000000..7796d05b7bc6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,868 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// A hash is computed from the function, based on its type and number of
+// basic blocks.
+//
+// Once all hashes are computed, we perform an expensive equality comparison
+// on each function pair. This takes n^2/2 comparisons per bucket, so it's
+// important that the hash function be high quality. The equality comparison
+// iterates through each instruction in each basic block.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mergefunc"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include <vector>
+using namespace llvm;
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+ const FunctionType *FTy = F->getFunctionType();
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
+}
+
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// TargetData so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+ static const ComparableFunction EmptyKey;
+ static const ComparableFunction TombstoneKey;
+ static TargetData * const LookupOnly;
+
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+ Function *getFunc() const { return Func; }
+ unsigned getHash() const { return Hash; }
+ TargetData *getTD() const { return TD; }
+
+ // Drops AssertingVH reference to the function. Outside of debug mode, this
+ // does nothing.
+ void release() {
+ assert(Func &&
+ "Attempted to release function twice, or release empty/tombstone!");
+ Func = NULL;
+ }
+
+private:
+ explicit ComparableFunction(unsigned Hash)
+ : Func(NULL), Hash(Hash), TD(NULL) {}
+
+ AssertingVH<Function> Func;
+ unsigned Hash;
+ TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+ ComparableFunction(1);
+TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1);
+
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<ComparableFunction> {
+ static ComparableFunction getEmptyKey() {
+ return ComparableFunction::EmptyKey;
+ }
+ static ComparableFunction getTombstoneKey() {
+ return ComparableFunction::TombstoneKey;
+ }
+ static unsigned getHashValue(const ComparableFunction &CF) {
+ return CF.getHash();
+ }
+ static bool isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS);
+ };
+}
+
+namespace {
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const TargetData *TD, const Function *F1,
+ const Function *F2)
+ : F1(F1), F2(F2), TD(TD) {}
+
+ /// Test whether the two functions have equivalent behaviour.
+ bool compare();
+
+private:
+ /// Test whether two basic blocks have equivalent behaviour.
+ bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+ /// Assign or look up previously assigned numbers for the two values, and
+ /// return whether the numbers are equal. Numbers are assigned in the order
+ /// visited.
+ bool enumerate(const Value *V1, const Value *V2);
+
+ /// Compare two Instructions for equivalence, similar to
+ /// Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ bool isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const;
+
+ /// Compare two GEPs for equivalent pointer arithmetic.
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+ }
+
+ /// Compare two Types, treating all pointer types as equal.
+ bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+
+ // The two functions undergoing comparison.
+ const Function *F1, *F2;
+
+ const TargetData *TD;
+
+ DenseMap<const Value *, const Value *> id_map;
+ DenseSet<const Value *> seen_values;
+};
+
+}
+
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+ const Type *Ty2) const {
+ if (Ty1 == Ty2)
+ return true;
+ if (Ty1->getTypeID() != Ty2->getTypeID()) {
+ if (TD) {
+ LLVMContext &Ctx = Ty1->getContext();
+ if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+ if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+ }
+ return false;
+ }
+
+ switch (Ty1->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ // Fall through in Release mode.
+ case Type::IntegerTyID:
+ case Type::VectorTyID:
+ // Ty1 == Ty2 would have returned true earlier.
+ return false;
+
+ case Type::VoidTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ case Type::MetadataTyID:
+ return true;
+
+ case Type::PointerTyID: {
+ const PointerType *PTy1 = cast<PointerType>(Ty1);
+ const PointerType *PTy2 = cast<PointerType>(Ty2);
+ return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+ }
+
+ case Type::StructTyID: {
+ const StructType *STy1 = cast<StructType>(Ty1);
+ const StructType *STy2 = cast<StructType>(Ty2);
+ if (STy1->getNumElements() != STy2->getNumElements())
+ return false;
+
+ if (STy1->isPacked() != STy2->isPacked())
+ return false;
+
+ for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+ if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+ return false;
+ }
+ return true;
+ }
+
+ case Type::FunctionTyID: {
+ const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+ const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+ if (FTy1->getNumParams() != FTy2->getNumParams() ||
+ FTy1->isVarArg() != FTy2->isVarArg())
+ return false;
+
+ if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+ return false;
+
+ for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+ if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+ return false;
+ }
+ return true;
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy1 = cast<ArrayType>(Ty1);
+ const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ return ATy1->getNumElements() == ATy2->getNumElements() &&
+ isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+ }
+ }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const {
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to isEquivalentType.
+ // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+ // * because of the above, we don't test for the tail bit on calls later on
+ if (I1->getOpcode() != I2->getOpcode() ||
+ I1->getNumOperands() != I2->getNumOperands() ||
+ !isEquivalentType(I1->getType(), I2->getType()) ||
+ !I1->hasSameSubclassOptionalData(I2))
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+ if (!isEquivalentType(I1->getOperand(i)->getType(),
+ I2->getOperand(i)->getType()))
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+ return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+ return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+ return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(I1))
+ return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+ return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+ if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+ if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+ return false;
+ return true;
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+ if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+ if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+ return false;
+ return true;
+ }
+
+ return true;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+ const GEPOperator *GEP2) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+ SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+ SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
+ uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+ Indices1.data(), Indices1.size());
+ uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+ Indices2.data(), Indices2.size());
+ return Offset1 == Offset2;
+ }
+
+ if (GEP1->getPointerOperand()->getType() !=
+ GEP2->getPointerOperand()->getType())
+ return false;
+
+ if (GEP1->getNumOperands() != GEP2->getNumOperands())
+ return false;
+
+ for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+ if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+ return false;
+ }
+
+ return true;
+}
+
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
+ // Check for function @f1 referring to itself and function @f2 referring to
+ // itself, or referring to each other, or both referring to either of them.
+ // They're all equivalent if the two functions are otherwise equivalent.
+ if (V1 == F1 && V2 == F2)
+ return true;
+ if (V1 == F2 && V2 == F1)
+ return true;
+
+ if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+ if (V1 == V2) return true;
+ const Constant *C2 = dyn_cast<Constant>(V2);
+ if (!C2) return false;
+ // TODO: constant expressions with GEP or references to F1 or F2.
+ if (C1->isNullValue() && C2->isNullValue() &&
+ isEquivalentType(C1->getType(), C2->getType()))
+ return true;
+ // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+ // then they must have equal bit patterns.
+ return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+ C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
+ }
+
+ if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+ return V1 == V2;
+
+ // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+ // check whether it's equal to V2. When there is no mapping then we need to
+ // ensure that V2 isn't already equivalent to something else. For this
+ // purpose, we track the V2 values in a set.
+
+ const Value *&map_elem = id_map[V1];
+ if (map_elem)
+ return map_elem == V2;
+ if (!seen_values.insert(V2).second)
+ return false;
+ map_elem = V2;
+ return true;
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+ BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
+
+ do {
+ if (!enumerate(F1I, F2I))
+ return false;
+
+ if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+ const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+ if (!GEP2)
+ return false;
+
+ if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ return false;
+
+ if (!isEquivalentGEP(GEP1, GEP2))
+ return false;
+ } else {
+ if (!isEquivalentOperation(F1I, F2I))
+ return false;
+
+ assert(F1I->getNumOperands() == F2I->getNumOperands());
+ for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+ Value *OpF1 = F1I->getOperand(i);
+ Value *OpF2 = F2I->getOperand(i);
+
+ if (!enumerate(OpF1, OpF2))
+ return false;
+
+ if (OpF1->getValueID() != OpF2->getValueID() ||
+ !isEquivalentType(OpF1->getType(), OpF2->getType()))
+ return false;
+ }
+ }
+
+ ++F1I, ++F2I;
+ } while (F1I != F1E && F2I != F2E);
+
+ return F1I == F1E && F2I == F2E;
+}
+
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
+ // We need to recheck everything, but check the things that weren't included
+ // in the hash first.
+
+ if (F1->getAttributes() != F2->getAttributes())
+ return false;
+
+ if (F1->hasGC() != F2->hasGC())
+ return false;
+
+ if (F1->hasGC() && F1->getGC() != F2->getGC())
+ return false;
+
+ if (F1->hasSection() != F2->hasSection())
+ return false;
+
+ if (F1->hasSection() && F1->getSection() != F2->getSection())
+ return false;
+
+ if (F1->isVarArg() != F2->isVarArg())
+ return false;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (F1->getCallingConv() != F2->getCallingConv())
+ return false;
+
+ if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
+ return false;
+
+ assert(F1->arg_size() == F2->arg_size() &&
+ "Identically typed functions have different numbers of args!");
+
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator f1i = F1->arg_begin(),
+ f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+ if (!enumerate(f1i, f2i))
+ llvm_unreachable("Arguments repeat!");
+ }
+
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+ F1BBs.push_back(&F1->getEntryBlock());
+ F2BBs.push_back(&F2->getEntryBlock());
+
+ VisitedBBs.insert(F1BBs[0]);
+ while (!F1BBs.empty()) {
+ const BasicBlock *F1BB = F1BBs.pop_back_val();
+ const BasicBlock *F2BB = F2BBs.pop_back_val();
+
+ if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
+ return false;
+
+ const TerminatorInst *F1TI = F1BB->getTerminator();
+ const TerminatorInst *F2TI = F2BB->getTerminator();
+
+ assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+ for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ continue;
+
+ F1BBs.push_back(F1TI->getSuccessor(i));
+ F2BBs.push_back(F2TI->getSuccessor(i));
+ }
+ }
+ return true;
+}
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+ static char ID;
+ MergeFunctions()
+ : ModulePass(ID), HasGlobalAliases(false) {
+ initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+private:
+ typedef DenseSet<ComparableFunction> FnSetType;
+
+ /// A work queue of functions that may have been modified and should be
+ /// analyzed again.
+ std::vector<WeakVH> Deferred;
+
+ /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// equal to one that's already present.
+ bool insert(ComparableFunction &NewF);
+
+ /// Remove a Function from the FnSet and queue it up for a second sweep of
+ /// analysis.
+ void remove(Function *F);
+
+ /// Find the functions that use this Value and remove them from FnSet and
+ /// queue the functions.
+ void removeUsers(Value *V);
+
+ /// Replace all direct calls of Old with calls of New. Will bitcast New if
+ /// necessary to make types match.
+ void replaceDirectCallers(Function *Old, Function *New);
+
+ /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+ /// be converted into a thunk. In either case, it should never be visited
+ /// again.
+ void mergeTwoFunctions(Function *F, Function *G);
+
+ /// Replace G with a thunk or an alias to F. Deletes G.
+ void writeThunkOrAlias(Function *F, Function *G);
+
+ /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+ /// of G with bitcast(F). Deletes G.
+ void writeThunk(Function *F, Function *G);
+
+ /// Replace G with an alias to F. Deletes G.
+ void writeAlias(Function *F, Function *G);
+
+ /// The set of all distinct functions. Use the insert() and remove() methods
+ /// to modify it.
+ FnSetType FnSet;
+
+ /// TargetData for more accurate GEP comparisons. May be NULL.
+ TargetData *TD;
+
+ /// Whether or not the target supports global aliases.
+ bool HasGlobalAliases;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ Deferred.push_back(WeakVH(I));
+ }
+ FnSet.resize(Deferred.size());
+
+ do {
+ std::vector<WeakVH> Worklist;
+ Deferred.swap(Worklist);
+
+ DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+ DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+ // Insert only strong functions and merge them. Strong function merging
+ // always deletes one of them.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ !F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+
+ // Insert only weak functions and merge them. By doing these second we
+ // create thunks to the strong function when possible. When two weak
+ // functions are identical, we create a new strong function with two weak
+ // weak thunks to it which are identical but not mergable.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+ DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+ } while (!Deferred.empty());
+
+ FnSet.clear();
+
+ return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS) {
+ if (LHS.getFunc() == RHS.getFunc() &&
+ LHS.getHash() == RHS.getHash())
+ return true;
+ if (!LHS.getFunc() || !RHS.getFunc())
+ return false;
+
+ // One of these is a special "underlying pointer comparison only" object.
+ if (LHS.getTD() == ComparableFunction::LookupOnly ||
+ RHS.getTD() == ComparableFunction::LookupOnly)
+ return false;
+
+ assert(LHS.getTD() == RHS.getTD() &&
+ "Comparing functions for different targets");
+
+ return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+ RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+ Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE;) {
+ Value::use_iterator TheIter = UI;
+ ++UI;
+ CallSite CS(*TheIter);
+ if (CS && CS.isCallee(TheIter)) {
+ remove(CS.getInstruction()->getParent()->getParent());
+ TheIter.getUse().set(BitcastNew);
+ }
+ }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+ if (HasGlobalAliases && G->hasUnnamedAddr()) {
+ if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+ G->hasWeakLinkage()) {
+ writeAlias(F, G);
+ return;
+ }
+ }
+
+ writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
+ if (!G->mayBeOverridden()) {
+ // Redirect direct callers of G to F.
+ replaceDirectCallers(G, F);
+ }
+
+ // If G was internal then we may have replaced all uses of G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
+ Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
+
+ SmallVector<Value *, 16> Args;
+ unsigned i = 0;
+ const FunctionType *FFTy = F->getFunctionType();
+ for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+ AI != AE; ++AI) {
+ Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+ ++i;
+ }
+
+ CallInst *CI = Builder.CreateCall(F, Args);
+ CI->setTailCall();
+ CI->setCallingConv(F->getCallingConv());
+ if (NewG->getReturnType()->isVoidTy()) {
+ Builder.CreateRetVoid();
+ } else {
+ Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+ }
+
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ removeUsers(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ ++NumThunksWritten;
+}
+
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+ BitcastF, G->getParent());
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ removeUsers(G);
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+ ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+ if (F->mayBeOverridden()) {
+ assert(G->mayBeOverridden());
+
+ if (HasGlobalAliases) {
+ // Make them both thunks to the same internal function.
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ removeUsers(F);
+ F->replaceAllUsesWith(H);
+
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+ writeAlias(F, G);
+ writeAlias(F, H);
+
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ } else {
+ // We can't merge them. Instead, pick one and update all direct callers
+ // to call it and hope that we improve the instruction cache hit rate.
+ replaceDirectCallers(G, F);
+ }
+
+ ++NumDoubleWeak;
+ } else {
+ writeThunkOrAlias(F, G);
+ }
+
+ ++NumFunctionsMerged;
+}
+
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (Result.second) {
+ DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+ return false;
+ }
+
+ const ComparableFunction &OldF = *Result.first;
+
+ // Never thunk a strong function to a weak function.
+ assert(!OldF.getFunc()->mayBeOverridden() ||
+ NewF.getFunc()->mayBeOverridden());
+
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
+ << NewF.getFunc()->getName() << '\n');
+
+ Function *DeleteF = NewF.getFunc();
+ NewF.release();
+ mergeTwoFunctions(OldF.getFunc(), DeleteF);
+ return true;
+}
+
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+ // We need to make sure we remove F, not a function "equal" to F per the
+ // function equality comparator.
+ //
+ // The special "lookup only" ComparableFunction bypasses the expensive
+ // function comparison in favour of a pointer comparison on the underlying
+ // Function*'s.
+ ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+ if (FnSet.erase(CF)) {
+ DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+ Deferred.push_back(F);
+ }
+}
+
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+ std::vector<Value *> Worklist;
+ Worklist.push_back(V);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ Use &U = UI.getUse();
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+ remove(I->getParent()->getParent());
+ } else if (isa<GlobalValue>(U.getUser())) {
+ // do nothing
+ } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+ CUI != CUE; ++CUI)
+ Worklist.push_back(*CUI);
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 000000000000..d9d1d106111e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,182 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+ struct PartialInliner : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ static char ID; // Pass identification, replacement for typeid
+ PartialInliner() : ModulePass(ID) {
+ initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module& M);
+
+ private:
+ Function* unswitchFunction(Function* F);
+ };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+ "Partial Inliner", false, false)
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+ // First, verify that this function is an unswitching candidate...
+ BasicBlock* entryBlock = F->begin();
+ BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+ if (!BR || BR->isUnconditional())
+ return 0;
+
+ BasicBlock* returnBlock = 0;
+ BasicBlock* nonReturnBlock = 0;
+ unsigned returnCount = 0;
+ for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+ SI != SE; ++SI)
+ if (isa<ReturnInst>((*SI)->getTerminator())) {
+ returnBlock = *SI;
+ returnCount++;
+ } else
+ nonReturnBlock = *SI;
+
+ if (returnCount != 1)
+ return 0;
+
+ // Clone the function, so that we can hack away on it.
+ ValueToValueMapTy VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
+ duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(duplicateFunction);
+ BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+ BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+ BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+
+ // Go ahead and update all uses to the duplicate, so that we can just
+ // use the inliner functionality when we're done hacking.
+ F->replaceAllUsesWith(duplicateFunction);
+
+ // Special hackery is needed with PHI nodes that have inputs from more than
+ // one extracted block. For simplicity, just split the PHIs into a two-level
+ // sequence of PHIs, some of which will go in the extracted region, and some
+ // of which will go outside.
+ BasicBlock* preReturn = newReturnBlock;
+ newReturnBlock = newReturnBlock->splitBasicBlock(
+ newReturnBlock->getFirstNonPHI());
+ BasicBlock::iterator I = preReturn->begin();
+ BasicBlock::iterator Ins = newReturnBlock->begin();
+ while (I != preReturn->end()) {
+ PHINode* OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi) break;
+
+ PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+ OldPhi->replaceAllUsesWith(retPhi);
+ Ins = newReturnBlock->getFirstNonPHI();
+
+ retPhi->addIncoming(I, preReturn);
+ retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+ newEntryBlock);
+ OldPhi->removeIncomingValue(newEntryBlock);
+
+ ++I;
+ }
+ newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+
+ // Gather up the blocks that we're going to extract.
+ std::vector<BasicBlock*> toExtract;
+ toExtract.push_back(newNonReturnBlock);
+ for (Function::iterator FI = duplicateFunction->begin(),
+ FE = duplicateFunction->end(); FI != FE; ++FI)
+ if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+ &*FI != newNonReturnBlock)
+ toExtract.push_back(FI);
+
+ // The CodeExtractor needs a dominator tree.
+ DominatorTree DT;
+ DT.runOnFunction(*duplicateFunction);
+
+ // Extract the body of the if.
+ Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+
+ InlineFunctionInfo IFI;
+
+ // Inline the top-level if test into all callers.
+ std::vector<User*> Users(duplicateFunction->use_begin(),
+ duplicateFunction->use_end());
+ for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+ UI != UE; ++UI)
+ if (CallInst *CI = dyn_cast<CallInst>(*UI))
+ InlineFunction(CI, IFI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+ InlineFunction(II, IFI);
+
+ // Ditch the duplicate, since we're done with it, and rewrite all remaining
+ // users (function pointers, etc.) back to the original function.
+ duplicateFunction->replaceAllUsesWith(F);
+ duplicateFunction->eraseFromParent();
+
+ ++NumPartialInlined;
+
+ return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+ std::vector<Function*> worklist;
+ worklist.reserve(M.size());
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+ if (!FI->use_empty() && !FI->isDeclaration())
+ worklist.push_back(&*FI);
+
+ bool changed = false;
+ while (!worklist.empty()) {
+ Function* currFunc = worklist.back();
+ worklist.pop_back();
+
+ if (currFunc->use_empty()) continue;
+
+ bool recursive = false;
+ for (Function::use_iterator UI = currFunc->use_begin(),
+ UE = currFunc->use_end(); UI != UE; ++UI)
+ if (Instruction* I = dyn_cast<Instruction>(*UI))
+ if (I->getParent()->getParent() == currFunc) {
+ recursive = true;
+ break;
+ }
+ if (recursive) continue;
+
+
+ if (Function* newFunc = unswitchFunction(currFunc)) {
+ worklist.push_back(newFunc);
+ changed = true;
+ }
+
+ }
+
+ return changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 000000000000..b7e63dc4484c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,256 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "prune-eh"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+ struct PruneEH : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ PruneEH() : CallGraphSCCPass(ID) {
+ initializePruneEHPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC);
+
+ bool SimplifyFunction(Function *F);
+ void DeleteBasicBlock(BasicBlock *BB);
+ };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+ SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool MadeChange = false;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert(*I);
+
+ // First pass, scan all of the functions in the SCC, simplifying them
+ // according to what we know.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+
+ // Next, check to see if any callees might throw or if there are any external
+ // functions in this SCC: if so, we cannot prune any functions in this SCC.
+ // Definitions that are weak and not declared non-throwing might be
+ // overridden at linktime with something that throws, so assume that.
+ // If this SCC includes the unwind instruction, we KNOW it throws, so
+ // obviously the SCC might throw.
+ //
+ bool SCCMightUnwind = false, SCCMightReturn = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
+ (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F == 0) {
+ SCCMightUnwind = true;
+ SCCMightReturn = true;
+ } else if (F->isDeclaration() || F->mayBeOverridden()) {
+ SCCMightUnwind |= !F->doesNotThrow();
+ SCCMightReturn |= !F->doesNotReturn();
+ } else {
+ bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+ bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+
+ if (!CheckUnwind && !CheckReturn)
+ continue;
+
+ // Check to see if this function performs an unwind or calls an
+ // unwinding function.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
+ // Uses unwind!
+ SCCMightUnwind = true;
+ } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+ SCCMightReturn = true;
+ }
+
+ // Invoke instructions don't allow unwinding to continue, so we are
+ // only interested in call instructions.
+ if (CheckUnwind && !SCCMightUnwind)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->doesNotThrow()) {
+ // This call cannot throw.
+ } else if (Function *Callee = CI->getCalledFunction()) {
+ CallGraphNode *CalleeNode = CG[Callee];
+ // If the callee is outside our current SCC then we may
+ // throw because it might.
+ if (!SCCNodes.count(CalleeNode)) {
+ SCCMightUnwind = true;
+ break;
+ }
+ } else {
+ // Indirect call, it might throw.
+ SCCMightUnwind = true;
+ break;
+ }
+ }
+ if (SCCMightUnwind && SCCMightReturn) break;
+ }
+ }
+ }
+
+ // If the SCC doesn't unwind or doesn't throw, note this fact.
+ if (!SCCMightUnwind || !SCCMightReturn)
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Attributes NewAttributes = Attribute::None;
+
+ if (!SCCMightUnwind)
+ NewAttributes |= Attribute::NoUnwind;
+ if (!SCCMightReturn)
+ NewAttributes |= Attribute::NoReturn;
+
+ Function *F = (*I)->getFunction();
+ const AttrListPtr &PAL = F->getAttributes();
+ const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
+ if (PAL != NPAL) {
+ MadeChange = true;
+ F->setAttributes(NPAL);
+ }
+ }
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ // Convert any invoke instructions to non-throwing functions in this node
+ // into call instructions with a branch. This makes the exception blocks
+ // dead.
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+ }
+
+ return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+ bool MadeChange = false;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction
+ // now uses the value produced by the call instruction. Note that we
+ // do this even for void functions and calls with no uses so that the
+ // callgraph edge is updated.
+ II->replaceAllUsesWith(Call);
+ BasicBlock *UnwindBlock = II->getUnwindDest();
+ UnwindBlock->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the
+ // invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ BB->getInstList().pop_back();
+
+ // If the unwind block is now dead, nuke it.
+ if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+ DeleteBasicBlock(UnwindBlock); // Delete the new BB.
+
+ ++NumRemoved;
+ MadeChange = true;
+ }
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ // This call calls a function that cannot return. Insert an
+ // unreachable instruction after it and simplify the code. Do this
+ // by splitting the BB, adding the unreachable, then deleting the
+ // new BB.
+ BasicBlock *New = BB->splitBasicBlock(I);
+
+ // Remove the uncond branch and add an unreachable.
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+
+ DeleteBasicBlock(New); // Delete the new BB.
+ MadeChange = true;
+ ++NumUnreach;
+ break;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+ assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ CallGraphNode *CGN = CG[BB->getParent()];
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+ --I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<IntrinsicInst>(I))
+ CGN->removeCallEdgeFor(CI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ CGN->removeCallEdgeFor(II);
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // Get the list of successors of this block.
+ std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ Succs[i]->removePredecessor(BB);
+
+ BB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 000000000000..b5f09ecccaf2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,73 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+namespace {
+
+/// @brief Pass to remove unused function declarations.
+class StripDeadPrototypesPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnModule(Module &M);
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false)
+
+bool StripDeadPrototypesPass::runOnModule(Module &M) {
+ bool MadeChange = false;
+
+ // Erase dead function prototypes.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function *F = I++;
+ // Function must be a prototype and unused.
+ if (F->isDeclaration() && F->use_empty()) {
+ F->eraseFromParent();
+ ++NumDeadPrototypes;
+ MadeChange = true;
+ }
+ }
+
+ // Erase dead global var prototypes.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ) {
+ GlobalVariable *GV = I++;
+ // Global must be a prototype and unused.
+ if (GV->isDeclaration() && GV->use_empty())
+ GV->eraseFromParent();
+ }
+
+ // Return an indication of whether we changed anything or not.
+ return MadeChange;
+}
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+ return new StripDeadPrototypesPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 000000000000..0fbaff1509a7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,413 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+// * names for virtual registers
+// * symbols for internal globals and functions
+// * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class StripSymbols : public ModulePass {
+ bool OnlyDebugInfo;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripSymbols(bool ODI = false)
+ : ModulePass(ID), OnlyDebugInfo(ODI) {
+ initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripNonDebugSymbols : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripNonDebugSymbols()
+ : ModulePass(ID) {
+ initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDebugDeclare : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDebugDeclare()
+ : ModulePass(ID) {
+ initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDeadDebugInfo : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDeadDebugInfo()
+ : ModulePass(ID) {
+ initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+ "Strip all symbols from a module", false, false)
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+ return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+ "Strip all symbols, except dbg symbols, from a module",
+ false, false)
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+ return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+ "Strip all llvm.dbg.declare intrinsics", false, false)
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+ return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+ "Strip debug info for unused symbols", false, false)
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+ return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+ for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ User *U = *I;
+ if (U != Usr)
+ return false;
+ }
+ return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+ assert(C->use_empty() && "Constant is not dead!");
+ SmallPtrSet<Constant*, 4> Operands;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ if (OnlyUsedBy(C->getOperand(i), C))
+ Operands.insert(cast<Constant>(C->getOperand(i)));
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
+ GV->eraseFromParent();
+ }
+ else if (!isa<Function>(C))
+ if (isa<CompositeType>(C->getType()))
+ C->destroyConstant();
+
+ // If the constant referenced anything, see if we can delete it as well.
+ for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
+ OE = Operands.end(); OI != OE; ++OI)
+ RemoveDeadConstant(*OI);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+ for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+ Value *V = VI->getValue();
+ ++VI;
+ if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+ if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+ // Set name to "", removing from symbol table!
+ V->setName("");
+ }
+ }
+}
+
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+ std::vector<StructType*> StructTypes;
+ M.findUsedStructTypes(StructTypes);
+
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *STy = StructTypes[i];
+ if (STy->isAnonymous() || STy->getName().empty()) continue;
+
+ if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+ continue;
+
+ STy->setName("");
+ }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ UsedValues.insert(LLVMUsed);
+
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+ SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+ findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+ findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+ }
+
+ // Remove all names from types.
+ StripTypeNames(M, PreserveDbgInfo);
+
+ return true;
+}
+
+// StripDebugInfo - Strip debug info in the module if it exists.
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
+// llvm.dbg.region.end calls, and any globals they point to if now dead.
+static bool StripDebugInfo(Module &M) {
+
+ bool Changed = false;
+
+ // Remove all of the calls to the debugger intrinsics, and remove them from
+ // the module.
+ if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ CI->eraseFromParent();
+ }
+ Declare->eraseFromParent();
+ Changed = true;
+ }
+
+ if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+ while (!DbgVal->use_empty()) {
+ CallInst *CI = cast<CallInst>(DbgVal->use_back());
+ CI->eraseFromParent();
+ }
+ DbgVal->eraseFromParent();
+ Changed = true;
+ }
+
+ for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+ NME = M.named_metadata_end(); NMI != NME;) {
+ NamedMDNode *NMD = NMI;
+ ++NMI;
+ if (NMD->getName().startswith("llvm.dbg.")) {
+ NMD->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+ ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ if (!BI->getDebugLoc().isUnknown()) {
+ Changed = true;
+ BI->setDebugLoc(DebugLoc());
+ }
+ }
+
+ return Changed;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+ bool Changed = false;
+ Changed |= StripDebugInfo(M);
+ if (!OnlyDebugInfo)
+ Changed |= StripSymbolNames(M, false);
+ return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+ return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+ Function *Declare = M.getFunction("llvm.dbg.declare");
+ std::vector<Constant*> DeadConstants;
+
+ if (Declare) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ Value *Arg1 = CI->getArgOperand(0);
+ Value *Arg2 = CI->getArgOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg1->use_empty()) {
+ if (Constant *C = dyn_cast<Constant>(Arg1))
+ DeadConstants.push_back(C);
+ else
+ RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+ }
+ if (Arg2->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg2))
+ DeadConstants.push_back(C);
+ }
+ Declare->eraseFromParent();
+ }
+
+ while (!DeadConstants.empty()) {
+ Constant *C = DeadConstants.back();
+ DeadConstants.pop_back();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasLocalLinkage())
+ RemoveDeadConstant(GV);
+ } else
+ RemoveDeadConstant(C);
+ }
+
+ return true;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Debugging infomration is encoded in llvm IR using metadata. This is designed
+ // such a way that debug info for symbols preserved even if symbols are
+ // optimized away by the optimizer. This special pass removes debug info for
+ // such symbols.
+
+ // llvm.dbg.gv keeps track of debug info for global variables.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+ if (GV && M.getGlobalVariable(GV->getName(), true)) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(*I);
+ }
+ else
+ Changed = true;
+ }
+ }
+
+ // llvm.dbg.sp keeps track of debug info for subprograms.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DISubprogram(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ bool FnIsLive = false;
+ if (Function *F = DISubprogram(*I).getFunction())
+ if (M.getFunction(F->getName()))
+ FnIsLive = true;
+ if (FnIsLive) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(*I);
+ } else {
+ // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+ // to hold debug info for dead function's local variables.
+ StringRef FName = DISubprogram(*I).getLinkageName();
+ if (FName.empty())
+ FName = DISubprogram(*I).getName();
+ if (NamedMDNode *LVNMD =
+ M.getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName))))
+ LVNMD->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 000000000000..8257d6b89ddc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,367 @@
+//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_INSTCOMBINE_H
+#define INSTCOMBINE_INSTCOMBINE_H
+
+#include "InstCombineWorklist.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/TargetFolder.h"
+
+namespace llvm {
+ class CallSite;
+ class TargetData;
+ class DbgDeclareInst;
+ class MemIntrinsic;
+ class MemSetInst;
+
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+ SPF_UNKNOWN = 0,
+ SPF_SMIN, SPF_UMIN,
+ SPF_SMAX, SPF_UMAX
+ //SPF_ABS - TODO.
+};
+
+/// getComplexity: Assign a complexity or rank value to LLVM Values...
+/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static inline unsigned getComplexity(Value *V) {
+ if (isa<Instruction>(V)) {
+ if (BinaryOperator::isNeg(V) ||
+ BinaryOperator::isFNeg(V) ||
+ BinaryOperator::isNot(V))
+ return 3;
+ return 4;
+ }
+ if (isa<Argument>(V)) return 3;
+ return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+
+/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+/// just like the normal insertion helper, but also adds any new instructions
+/// to the instcombine worklist.
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
+ : public IRBuilderDefaultInserter<true> {
+ InstCombineWorklist &Worklist;
+public:
+ InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+
+ void InsertHelper(Instruction *I, const Twine &Name,
+ BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+ Worklist.Add(I);
+ }
+};
+
+/// InstCombiner - The -instcombine pass.
+class LLVM_LIBRARY_VISIBILITY InstCombiner
+ : public FunctionPass,
+ public InstVisitor<InstCombiner, Instruction*> {
+ TargetData *TD;
+ bool MadeIRChange;
+public:
+ /// Worklist - All of the instructions that need to be simplified.
+ InstCombineWorklist Worklist;
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
+ BuilderTy *Builder;
+
+ static char ID; // Pass identification, replacement for typeid
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+ initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+ }
+
+public:
+ virtual bool runOnFunction(Function &F);
+
+ bool DoOneIteration(Function &F, unsigned ItNum);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ TargetData *getTargetData() const { return TD; }
+
+ // Visitation implementation - Implement instruction combining for different
+ // instruction types. The semantics are as follows:
+ // Return Value:
+ // null - No change was made
+ // I - Change was made, I is still valid, I may be dead though
+ // otherwise - Change was made, replace I with returned instruction
+ //
+ Instruction *visitAdd(BinaryOperator &I);
+ Instruction *visitFAdd(BinaryOperator &I);
+ Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
+ Instruction *visitSub(BinaryOperator &I);
+ Instruction *visitFSub(BinaryOperator &I);
+ Instruction *visitMul(BinaryOperator &I);
+ Instruction *visitFMul(BinaryOperator &I);
+ Instruction *visitURem(BinaryOperator &I);
+ Instruction *visitSRem(BinaryOperator &I);
+ Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
+ Instruction *commonRemTransforms(BinaryOperator &I);
+ Instruction *commonIRemTransforms(BinaryOperator &I);
+ Instruction *commonDivTransforms(BinaryOperator &I);
+ Instruction *commonIDivTransforms(BinaryOperator &I);
+ Instruction *visitUDiv(BinaryOperator &I);
+ Instruction *visitSDiv(BinaryOperator &I);
+ Instruction *visitFDiv(BinaryOperator &I);
+ Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *visitAnd(BinaryOperator &I);
+ Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C);
+ Instruction *visitOr (BinaryOperator &I);
+ Instruction *visitXor(BinaryOperator &I);
+ Instruction *visitShl(BinaryOperator &I);
+ Instruction *visitAShr(BinaryOperator &I);
+ Instruction *visitLShr(BinaryOperator &I);
+ Instruction *commonShiftTransforms(BinaryOperator &I);
+ Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC);
+ Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+ GlobalVariable *GV, CmpInst &ICI,
+ ConstantInt *AndCst = 0);
+ Instruction *visitFCmpInst(FCmpInst &I);
+ Instruction *visitICmpInst(ICmpInst &I);
+ Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHS,
+ ConstantInt *RHS);
+ Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I);
+ Instruction *commonCastTransforms(CastInst &CI);
+ Instruction *commonPointerCastTransforms(CastInst &CI);
+ Instruction *visitTrunc(TruncInst &CI);
+ Instruction *visitZExt(ZExtInst &CI);
+ Instruction *visitSExt(SExtInst &CI);
+ Instruction *visitFPTrunc(FPTruncInst &CI);
+ Instruction *visitFPExt(CastInst &CI);
+ Instruction *visitFPToUI(FPToUIInst &FI);
+ Instruction *visitFPToSI(FPToSIInst &FI);
+ Instruction *visitUIToFP(CastInst &CI);
+ Instruction *visitSIToFP(CastInst &CI);
+ Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitIntToPtr(IntToPtrInst &CI);
+ Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+ Value *A, Value *B, Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C);
+ Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+ Instruction *visitCallInst(CallInst &CI);
+ Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+ Instruction *visitPHINode(PHINode &PN);
+ Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+ Instruction *visitAllocaInst(AllocaInst &AI);
+ Instruction *visitMalloc(Instruction &FI);
+ Instruction *visitFree(CallInst &FI);
+ Instruction *visitLoadInst(LoadInst &LI);
+ Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitBranchInst(BranchInst &BI);
+ Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertElementInst(InsertElementInst &IE);
+ Instruction *visitExtractElementInst(ExtractElementInst &EI);
+ Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
+
+ // visitInstruction - Specify what to return for unhandled instructions...
+ Instruction *visitInstruction(Instruction &I) { return 0; }
+
+private:
+ bool ShouldChangeType(const Type *From, const Type *To) const;
+ Value *dyn_castNegVal(Value *V) const;
+ Value *dyn_castFNegVal(Value *V) const;
+ const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
+ Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+
+ /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+ /// results in any code being generated and is interesting to optimize out. If
+ /// the cast can be eliminated by some other simple transformation, we prefer
+ /// to do the simplification first.
+ bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ const Type *Ty);
+
+ Instruction *visitCallSite(CallSite CS);
+ Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
+ bool transformConstExprCastCall(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS);
+ Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform = true);
+ Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+ Value *EmitGEPOffset(User *GEP);
+
+public:
+ // InsertNewInstBefore - insert an instruction New before instruction Old
+ // in the program. Add the new instruction to the worklist.
+ //
+ Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ assert(New && New->getParent() == 0 &&
+ "New instruction already inserted into a basic block!");
+ BasicBlock *BB = Old.getParent();
+ BB->getInstList().insert(&Old, New); // Insert inst
+ Worklist.Add(New);
+ return New;
+ }
+
+ // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
+ // debug loc.
+ //
+ Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
+ New->setDebugLoc(Old.getDebugLoc());
+ return InsertNewInstBefore(New, Old);
+ }
+
+ // ReplaceInstUsesWith - This method is to be used when an instruction is
+ // found to be dead, replacable with another preexisting expression. Here
+ // we add all uses of I to the worklist, replace all uses of I with the new
+ // value, then return I, so that the inst combiner will know that I was
+ // modified.
+ //
+ Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
+
+ // If we are replacing the instruction with itself, this must be in a
+ // segment of unreachable code, so just clobber the instruction.
+ if (&I == V)
+ V = UndefValue::get(I.getType());
+
+ DEBUG(errs() << "IC: Replacing " << I << "\n"
+ " with " << *V << '\n');
+
+ I.replaceAllUsesWith(V);
+ return &I;
+ }
+
+ // EraseInstFromFunction - When dealing with an instruction that has side
+ // effects or produces a void value, we can't rely on DCE to delete the
+ // instruction. Instead, visit methods should return the value returned by
+ // this function.
+ Instruction *EraseInstFromFunction(Instruction &I) {
+ DEBUG(errs() << "IC: ERASE " << I << '\n');
+
+ assert(I.use_empty() && "Cannot erase instruction that is used!");
+ // Make sure that we reprocess all operands now that we reduced their
+ // use counts.
+ if (I.getNumOperands() < 8) {
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i))
+ Worklist.Add(Op);
+ }
+ Worklist.Remove(&I);
+ I.eraseFromParent();
+ MadeIRChange = true;
+ return 0; // Don't do anything with FI
+ }
+
+ void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0) const {
+ return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ unsigned Depth = 0) const {
+ return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+ return llvm::ComputeNumSignBits(Op, TD, Depth);
+ }
+
+private:
+
+ /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+ /// operators which are associative or commutative.
+ bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+ /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+ /// which some other binary operation distributes over either by factorizing
+ /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+ /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+ /// a win). Returns the simplified value, or null if it didn't simplify.
+ Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
+
+ /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+ /// based on the demanded bits.
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth);
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth=0);
+
+ /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+ /// SimplifyDemandedBits knows about. See if the instruction has any
+ /// properties that allow us to simplify its operands.
+ bool SimplifyDemandedInstructionBits(Instruction &Inst);
+
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts, unsigned Depth = 0);
+
+ // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+ // which has a PHI node as operand #0, see if we can fold the instruction
+ // into the PHI (which is only possible if all operands to the PHI are
+ // constants).
+ //
+ Instruction *FoldOpIntoPhi(Instruction &I);
+
+ // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+ // operator and they all are only used by the PHI, PHI together their
+ // inputs, and do the operation once, to the result of the PHI.
+ Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+
+
+ Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+ ConstantInt *AndRHS, BinaryOperator &TheAnd);
+
+ Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+ bool isSub, Instruction &I);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside);
+ Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+ Instruction *MatchBSwap(BinaryOperator &I);
+ bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+ Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+ Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+ Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+};
+
+
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 000000000000..c36a9552e7a3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,697 @@
+//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+ if (!V->hasOneUse() || !V->getType()->isIntegerTy())
+ return 0;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ if (I->getOpcode() == Instruction::Mul)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+ return I->getOperand(0);
+ if (I->getOpcode() == Instruction::Shl)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+ // The multiplier is really 1 << CST.
+ uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+ CST = ConstantInt::get(V->getType()->getContext(),
+ APInt(BitWidth, 1).shl(CSTVal));
+ return I->getOperand(0);
+ }
+ return 0;
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+ // There are different heuristics we can use for this. Here are some simple
+ // ones.
+
+ // Add has the property that adding any two 2's complement numbers can only
+ // have one carry bit which can change a sign. As such, if LHS and RHS each
+ // have at least two sign bits, we know that the addition of the two values
+ // will sign extend fine.
+ if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+ return true;
+
+
+ // If one of the operands only has one non-zero bit, and if the other operand
+ // has a known-zero bit in a more significant place than it (not including the
+ // sign bit) the ripple may go up to and fill the zero, but won't change the
+ // sign. For example, (X & ~4) + 1.
+
+ // TODO: Implement.
+
+ return false;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)+(A*C) -> A*(B+C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // X + (signbit) --> X ^ signbit
+ const APInt &Val = CI->getValue();
+ if (Val.isSignBit())
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(bool) + C -> bool ? C + 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (ZI->getSrcTy()->isIntegerTy(1))
+ return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+
+ Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+ if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+ const APInt &RHSVal = CI->getValue();
+ unsigned ExtendAmt = 0;
+ // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+ // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+ if (XorRHS->getValue() == -RHSVal) {
+ if (RHSVal.isPowerOf2())
+ ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+ else if (XorRHS->getValue().isPowerOf2())
+ ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+ }
+
+ if (ExtendAmt) {
+ APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+ if (!MaskedValueIsZero(XorLHS, Mask))
+ ExtendAmt = 0;
+ }
+
+ if (ExtendAmt) {
+ Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+ Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+ return BinaryOperator::CreateAShr(NewShl, ShAmt);
+ }
+ }
+ }
+
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // X + X --> X << 1
+ if (LHS == RHS) {
+ BinaryOperator *New =
+ BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castNegVal(LHS)) {
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ return BinaryOperator::CreateNeg(NewAdd);
+ }
+
+ return BinaryOperator::CreateSub(RHS, LHSV);
+ }
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castNegVal(RHS))
+ return BinaryOperator::CreateSub(LHS, V);
+
+
+ ConstantInt *C2;
+ if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+ if (X == RHS) // X*C + X --> X * (C+1)
+ return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+ // X*C1 + X*C2 --> X * (C1+C2)
+ ConstantInt *C1;
+ if (X == dyn_castFoldableMul(RHS, C1))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+ }
+
+ // X + X*C --> X * (C+1)
+ if (dyn_castFoldableMul(RHS, C2) == LHS)
+ return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+ // A+B --> A|B iff A and B have no bits set in common.
+ if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ if (LHSKnownZero != 0) {
+ APInt RHSKnownOne(IT->getBitWidth(), 0);
+ APInt RHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+ // No bits in common -> bitwise or.
+ if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateOr(LHS, RHS);
+ }
+ }
+
+ // W*X + Y*Z --> W * (X+Z) iff W == Y
+ {
+ Value *W, *X, *Y, *Z;
+ if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+ match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+ if (W != Y) {
+ if (W == Z) {
+ std::swap(Y, Z);
+ } else if (Y == X) {
+ std::swap(W, X);
+ } else if (X == Z) {
+ std::swap(Y, Z);
+ std::swap(W, X);
+ }
+ }
+
+ if (W == Y) {
+ Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
+ return BinaryOperator::CreateMul(W, NewAdd);
+ }
+ }
+ }
+
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+ Value *X = 0;
+ if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
+ return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+ // (X & FF00) + xx00 -> (X+xx00) & FF00
+ if (LHS->hasOneUse() &&
+ match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+ CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt &AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ return BinaryOperator::CreateAnd(NewAdd, C2);
+ }
+ }
+
+ // Try to fold constant add into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ }
+
+ // add (select X 0 (sub n A)) A --> select X A n
+ {
+ SelectInst *SI = dyn_cast<SelectInst>(LHS);
+ Value *A = RHS;
+ if (!SI) {
+ SI = dyn_cast<SelectInst>(RHS);
+ A = LHS;
+ }
+ if (SI && SI->hasOneUse()) {
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
+ Value *N;
+
+ // Can we fold the add into the argument of the select?
+ // We check both true and false select arguments for a matching subtract.
+ if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the true select value.
+ return SelectInst::Create(SI->getCondition(), N, A);
+
+ if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the false select value.
+ return SelectInst::Create(SI->getCondition(), A, N);
+ }
+ }
+
+ // Check for (add (sext x), y), see if we can merge this into an
+ // integer add followed by a sext.
+ if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+ // (add (sext x), cst) --> (sext (add x, cst'))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new, smaller add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add (sext x), (sext y)) --> (sext (add int x, y))
+ if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of sexts), and if the
+ // integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0), "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // X + 0 --> X
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+ (I.getType())->getValueAPF()))
+ return ReplaceInstUsesWith(I, LHS);
+ }
+
+ if (isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castFNegVal(LHS))
+ return BinaryOperator::CreateFSub(RHS, LHSV);
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castFNegVal(RHS))
+ return BinaryOperator::CreateFSub(LHS, V);
+
+ // Check for X+0.0. Simplify it to X if we know X is not -0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+ if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+ return ReplaceInstUsesWith(I, LHS);
+
+ // Check for (fadd double (sitofp x), y), see if we can merge this into an
+ // integer add followed by a promotion.
+ if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // ... if the constant fits in the integer value. This is useful for things
+ // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+ // requires a constant pool load, and generally allows the add to be better
+ // instcombined.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+
+ // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),"addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer). Return the result as a signed integer of intptr size.
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+ TargetData &TD = *getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+ Value *Result = Constant::getNullValue(IntPtrTy);
+
+ // If the GEP is inbounds, we know that none of the addressing operations will
+ // overflow in an unsigned sense.
+ bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
+
+ // Build a mask for high order bits.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+ ++i, ++GTI) {
+ Value *Op = *i;
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+ if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+
+ if (Size)
+ Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".offs");
+ continue;
+ }
+
+ Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+ Constant *OC =
+ ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+ Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
+ // Emit an add instruction.
+ Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
+ continue;
+ }
+ // Convert to correct type.
+ if (Op->getType() != IntPtrTy)
+ Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+ if (Size != 1) {
+ // We'll let instcombine(mul) convert this to a shl if possible.
+ Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".idx", isInBounds /*NUW*/);
+ }
+
+ // Emit an add instruction.
+ Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
+ }
+ return Result;
+}
+
+
+
+
+/// Optimize pointer differences into the same array into a size. Consider:
+/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+ const Type *Ty) {
+ assert(TD && "Must have target data info for this");
+
+ // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+ // this.
+ bool Swapped = false;
+ GetElementPtrInst *GEP = 0;
+ ConstantExpr *CstGEP = 0;
+
+ // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+ // For now we require one side to be the base pointer "A" or a constant
+ // expression derived from it.
+ if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+ // (gep X, ...) - X
+ if (LHSGEP->getOperand(0) == RHS) {
+ GEP = LHSGEP;
+ Swapped = false;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
+ // (gep X, ...) - (ce_gep X, ...)
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ LHSGEP->getOperand(0) == CE->getOperand(0)) {
+ CstGEP = CE;
+ GEP = LHSGEP;
+ Swapped = false;
+ }
+ }
+ }
+
+ if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+ // X - (gep X, ...)
+ if (RHSGEP->getOperand(0) == LHS) {
+ GEP = RHSGEP;
+ Swapped = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
+ // (ce_gep X, ...) - (gep X, ...)
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ RHSGEP->getOperand(0) == CE->getOperand(0)) {
+ CstGEP = CE;
+ GEP = RHSGEP;
+ Swapped = true;
+ }
+ }
+ }
+
+ if (GEP == 0)
+ return 0;
+
+ // Emit the offset of the GEP and an intptr_t.
+ Value *Result = EmitGEPOffset(GEP);
+
+ // If we had a constant expression GEP on the other side offsetting the
+ // pointer, subtract it from the offset we have.
+ if (CstGEP) {
+ Value *CstOffset = EmitGEPOffset(CstGEP);
+ Result = Builder->CreateSub(Result, CstOffset);
+ }
+
+
+ // If we have p - gep(p, ...) then we have to negate the result.
+ if (Swapped)
+ Result = Builder->CreateNeg(Result, "diff.neg");
+
+ return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)-(A*C) -> A*(B-C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
+ if (Value *V = dyn_castNegVal(Op1)) {
+ BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+ Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return Res;
+ }
+
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ // Replace (-1 - A) with (~A).
+ if (match(Op0, m_AllOnes()))
+ return BinaryOperator::CreateNot(Op1);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ // C - ~X == X + (1+C)
+ Value *X = 0;
+ if (match(Op1, m_Not(m_Value(X))))
+ return BinaryOperator::CreateAdd(X, AddOne(C));
+
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (C->isZero()) {
+ Value *X; ConstantInt *CI;
+ if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateAShr(X, CI);
+
+ if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateLShr(X, CI);
+ }
+
+ // Try to fold constant sub into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ // C - zext(bool) -> bool ? C - 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+ if (ZI->getSrcTy()->isIntegerTy(1))
+ return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+
+ // C-(X+C2) --> (C-C2)-X
+ ConstantInt *C2;
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+ }
+
+
+ { Value *Y;
+ // X-(X+Y) == -Y X-(Y+X) == -Y
+ if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+ match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+ return BinaryOperator::CreateNeg(Y);
+
+ // (X-Y)-X == -Y
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateNeg(Y);
+ }
+
+ if (Op1->hasOneUse()) {
+ Value *X = 0, *Y = 0, *Z = 0;
+ Constant *C = 0;
+ ConstantInt *CI = 0;
+
+ // (X - (Y - Z)) --> (X + (Z - Y)).
+ if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+ return BinaryOperator::CreateAdd(Op0,
+ Builder->CreateSub(Z, Y, Op1->getName()));
+
+ // (X - (X & Y)) --> (X & ~Y)
+ //
+ if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+ match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+ return BinaryOperator::CreateAnd(Op0,
+ Builder->CreateNot(Y, Y->getName() + ".not"));
+
+ // 0 - (X sdiv C) -> (X sdiv -C)
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+ match(Op0, m_Zero()))
+ return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+ // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
+ if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+ if (Value *XNeg = dyn_castNegVal(X))
+ return BinaryOperator::CreateShl(XNeg, Y);
+
+ // X - X*C --> X * (1-C)
+ if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+ return BinaryOperator::CreateMul(Op0, CP1);
+ }
+
+ // X - X<<C --> X * (1-(1<<C))
+ if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *One = ConstantInt::get(I.getType(), 1);
+ C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+ return BinaryOperator::CreateMul(Op0, C);
+ }
+
+ // X - A*-B -> X + A*B
+ // X - -A*B -> X + A*B
+ Value *A, *B;
+ if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+ match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+ return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+
+ // X - A*CI -> X + A*-CI
+ // X - CI*A -> X + A*-CI
+ if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+ match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+ Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+ return BinaryOperator::CreateAdd(Op0, NewMul);
+ }
+ }
+
+ ConstantInt *C1;
+ if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+ if (X == Op1) // X*C - X --> X * (C-1)
+ return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+ ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
+ if (X == dyn_castFoldableMul(Op1, C2))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+ }
+
+ // Optimize pointer differences into the same array into a size. Consider:
+ // &A[10] - &A[0]: we should compile this to "10".
+ if (TD) {
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // If this is a 'B = x-(-A)', change to B = x+A...
+ if (Value *V = dyn_castFNegVal(Op1))
+ return BinaryOperator::CreateFAdd(Op0, V);
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 000000000000..64ea36fb1e9d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,2306 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to). This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+ // ~(~(X)) -> X.
+ if (BinaryOperator::isNot(V))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Compares can be inverted if they have a single use.
+ if (CmpInst *CI = dyn_cast<CmpInst>(V))
+ return CI->hasOneUse();
+
+ return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+ // If this is not(not(x)) don't return that this is a not: we want the two
+ // not's to be folded first.
+ if (BinaryOperator::isNot(V)) {
+ Value *Operand = BinaryOperator::getNotArgument(V);
+ if (!isFreeToInvert(Operand))
+ return Operand;
+ }
+
+ // Constants can be considered to be not'ed values...
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantInt::get(C->getType(), ~C->getValue());
+ return 0;
+}
+
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+/// (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+/// 0 A > B
+/// 1 A == B
+/// 2 A < B
+///
+/// <=> Value Definition
+/// 000 0 Always false
+/// 001 1 A > B
+/// 010 2 A == B
+/// 011 3 A >= B
+/// 100 4 A < B
+/// 101 5 A != B
+/// 110 6 A <= B
+/// 111 7 Always true
+///
+static unsigned getICmpCode(const ICmpInst *ICI) {
+ switch (ICI->getPredicate()) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ return 0;
+ }
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+ isOrdered = false;
+ switch (CC) {
+ case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000
+ case FCmpInst::FCMP_UNO: return 0; // 000
+ case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001
+ case FCmpInst::FCMP_UGT: return 1; // 001
+ case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010
+ case FCmpInst::FCMP_UEQ: return 2; // 010
+ case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011
+ case FCmpInst::FCMP_UGE: return 3; // 011
+ case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100
+ case FCmpInst::FCMP_ULT: return 4; // 100
+ case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101
+ case FCmpInst::FCMP_UNE: return 5; // 101
+ case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110
+ case FCmpInst::FCMP_ULE: return 6; // 110
+ // True -> 7
+ default:
+ // Not expecting FCMP_FALSE and FCMP_TRUE;
+ llvm_unreachable("Unexpected FCmp predicate!");
+ return 0;
+ }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
+ switch (Code) {
+ default: assert(0 && "Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: Pred = ICmpInst::ICMP_EQ; break;
+ case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: Pred = ICmpInst::ICMP_NE; break;
+ case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return Builder->CreateICmp(Pred, LHS, RHS);
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+ Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
+ switch (code) {
+ default: assert(0 && "Illegal FCmp code!");
+ case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+ case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+ case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+ case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+ case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+ case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+ case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+ case 7:
+ if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+ Pred = FCmpInst::FCMP_ORD; break;
+ }
+ return Builder->CreateFCmp(Pred, LHS, RHS);
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+ ConstantInt *OpRHS,
+ ConstantInt *AndRHS,
+ BinaryOperator &TheAnd) {
+ Value *X = Op->getOperand(0);
+ Constant *Together = 0;
+ if (!Op->isShift())
+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ if (Op->hasOneUse()) {
+ // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+ Value *And = Builder->CreateAnd(X, AndRHS);
+ And->takeName(Op);
+ return BinaryOperator::CreateXor(And, Together);
+ }
+ break;
+ case Instruction::Or:
+ if (Op->hasOneUse()){
+ if (Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+
+ ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+ if (TogetherCI && !TogetherCI->isZero()){
+ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+ // NOTE: This reduces the number of bits set in the & mask, which
+ // can expose opportunities for store narrowing.
+ Together = ConstantExpr::getXor(AndRHS, Together);
+ Value *And = Builder->CreateAnd(X, Together);
+ And->takeName(Op);
+ return BinaryOperator::CreateOr(And, OpRHS);
+ }
+ }
+
+ break;
+ case Instruction::Add:
+ if (Op->hasOneUse()) {
+ // Adding a one to a single bit bit-field should be turned into an XOR
+ // of the bit. First thing to check is to see if this AND is with a
+ // single bit constant.
+ const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+ // If there is only one bit set.
+ if (AndRHSV.isPowerOf2()) {
+ // Ok, at this point, we know that we are masking the result of the
+ // ADD down to exactly one bit. If the constant we are adding has
+ // no bits set below this bit, then we can eliminate the ADD.
+ const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+ // Check to see if any bits below the one bit set in AndRHSV are set.
+ if ((AddRHS & (AndRHSV-1)) == 0) {
+ // If not, the only thing that can effect the output of the AND is
+ // the bit specified by AndRHSV. If that bit is set, the effect of
+ // the XOR is to toggle the bit. If it is clear, then the ADD has
+ // no effect.
+ if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+ TheAnd.setOperand(0, X);
+ return &TheAnd;
+ } else {
+ // Pull the XOR out of the AND.
+ Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+ NewAnd->takeName(Op);
+ return BinaryOperator::CreateXor(NewAnd, AndRHS);
+ }
+ }
+ }
+ }
+ break;
+
+ case Instruction::Shl: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
+ AndRHS->getValue() & ShlMask);
+
+ if (CI->getValue() == ShlMask)
+ // Masking out bits that the shift already masks.
+ return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
+
+ if (CI != AndRHS) { // Reducing bits set in and.
+ TheAnd.setOperand(1, CI);
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::LShr: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now! This only applies to
+ // unsigned shifts, because a signed shr may bring in set bits!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(Op->getContext(),
+ AndRHS->getValue() & ShrMask);
+
+ if (CI->getValue() == ShrMask)
+ // Masking out bits that the shift already masks.
+ return ReplaceInstUsesWith(TheAnd, Op);
+
+ if (CI != AndRHS) {
+ TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::AShr:
+ // Signed shr.
+ // See if this is shifting in some sign extension, then masking it out
+ // with an and.
+ if (Op->hasOneUse()) {
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ Constant *C = ConstantInt::get(Op->getContext(),
+ AndRHS->getValue() & ShrMask);
+ if (C == AndRHS) { // Masking out bits shifted in.
+ // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+ // Make the argument unsigned.
+ Value *ShVal = Op->getOperand(0);
+ ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+ return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside) {
+ assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
+ ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+ "Lo is not <= Hi in range emission code!");
+
+ if (Inside) {
+ if (Lo == Hi) // Trivially false.
+ return ConstantInt::getFalse(V->getContext());
+
+ // V >= Min && V < Hi --> V < Hi
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo <u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Lo);
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpULT(Add, UpperBound);
+ }
+
+ if (Lo == Hi) // Trivially true.
+ return ConstantInt::getTrue(V->getContext());
+
+ // V < Min || V >= Hi -> V > Hi-1
+ Hi = SubOne(cast<ConstantInt>(Hi));
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo >u Hi-1-Lo
+ // Note that Hi has already had one subtracted from it, above.
+ ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpUGT(Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side. The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+ const APInt& V = Val->getValue();
+ uint32_t BitWidth = Val->getType()->getBitWidth();
+ if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+ // look for the first zero bit after the run of ones
+ MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+ // look for the first non-zero bit
+ ME = V.getActiveBits();
+ return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub. If we can fold one of
+/// the following xforms:
+///
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+ ConstantInt *Mask, bool isSub,
+ Instruction &I) {
+ Instruction *LHSI = dyn_cast<Instruction>(LHS);
+ if (!LHSI || LHSI->getNumOperands() != 2 ||
+ !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+ ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+ switch (LHSI->getOpcode()) {
+ default: return 0;
+ case Instruction::And:
+ if (ConstantExpr::getAnd(N, Mask) == Mask) {
+ // If the AndRHS is a power of two minus one (0+1+), this is simple.
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) ==
+ Mask->getValue().getBitWidth())
+ break;
+
+ // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+ // part, we don't need any explicit masks to take them out of A. If that
+ // is all N is, ignore it.
+ uint32_t MB = 0, ME = 0;
+ if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
+ uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+ if (MaskedValueIsZero(RHS, Mask))
+ break;
+ }
+ }
+ return 0;
+ case Instruction::Or:
+ case Instruction::Xor:
+ // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+ && ConstantExpr::getAnd(N, Mask)->isNullValue())
+ break;
+ return 0;
+ }
+
+ if (isSub)
+ return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+ return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only
+/// if (A & B) == A, or all bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only
+/// if (A & B) == 0, or all bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
+/// contain any number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+ FoldMskICmp_AMask_AllOnes = 1,
+ FoldMskICmp_AMask_NotAllOnes = 2,
+ FoldMskICmp_BMask_AllOnes = 4,
+ FoldMskICmp_BMask_NotAllOnes = 8,
+ FoldMskICmp_Mask_AllZeroes = 16,
+ FoldMskICmp_Mask_NotAllZeroes = 32,
+ FoldMskICmp_AMask_Mixed = 64,
+ FoldMskICmp_AMask_NotMixed = 128,
+ FoldMskICmp_BMask_Mixed = 256,
+ FoldMskICmp_BMask_NotMixed = 512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
+ ICmpInst::Predicate SCC)
+{
+ ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+ bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ ACst->getValue().isPowerOf2());
+ bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ BCst->getValue().isPowerOf2());
+ unsigned result = 0;
+ if (CCst != 0 && CCst->isZero()) {
+ // if C is zero, then both A and B qualify as mask
+ result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed));
+ return result;
+ }
+ if (A == C) {
+ result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed)
+ : (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed));
+ }
+ else if (ACst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+ : FoldMskICmp_AMask_NotMixed);
+ }
+ if (B == C)
+ {
+ result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_BMask_Mixed));
+ }
+ else if (BCst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+ : FoldMskICmp_BMask_NotMixed);
+ }
+ return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
+ Value*& B, Value*& C,
+ Value*& D, Value*& E,
+ ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+ if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+ // vectors are not (yet?) supported
+ if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+ // Here comes the tricky part:
+ // LHS might be of the form L11 & L12 == X, X == L21 & L22,
+ // and L11 & L12 == L21 & L22. The same goes for RHS.
+ // Now we must find those components L** and R**, that are equal, so
+ // that we can extract the parameters A, B, C, D, and E for the canonical
+ // above.
+ Value *L1 = LHS->getOperand(0);
+ Value *L2 = LHS->getOperand(1);
+ Value *L11,*L12,*L21,*L22;
+ if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ L21 = L22 = 0;
+ }
+ else {
+ if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+ return 0;
+ std::swap(L1, L2);
+ L21 = L22 = 0;
+ }
+
+ Value *R1 = RHS->getOperand(0);
+ Value *R2 = RHS->getOperand(1);
+ Value *R11,*R12;
+ bool ok = false;
+ if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R2; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R2; ok = true;
+ }
+ }
+ if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R1; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R1; ok = true;
+ }
+ else
+ return 0;
+ }
+ if (!ok)
+ return 0;
+
+ if (L11 == A) {
+ B = L12; C = L2;
+ }
+ else if (L12 == A) {
+ B = L11; C = L2;
+ }
+ else if (L21 == A) {
+ B = L22; C = L1;
+ }
+ else if (L22 == A) {
+ B = L21; C = L1;
+ }
+
+ unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+ unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate NEWCC,
+ llvm::InstCombiner::BuilderTy* Builder) {
+ Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+ if (mask == 0) return 0;
+
+ if (NEWCC == ICmpInst::ICMP_NE)
+ mask >>= 1; // treat "Not"-states as normal states
+
+ if (mask & FoldMskICmp_Mask_AllZeroes) {
+ // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
+ // -> (icmp eq (A & (B|D)), 0)
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ // we can't use C as zero, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // with B and D, having a single bit set
+ Value* zero = Constant::getNullValue(A->getType());
+ return Builder->CreateICmp(NEWCC, newAnd, zero);
+ }
+ else if (mask & FoldMskICmp_BMask_AllOnes) {
+ // (icmp eq (A & B), B) & (icmp eq (A & D), D)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr);
+ }
+ else if (mask & FoldMskICmp_AMask_AllOnes) {
+ // (icmp eq (A & B), A) & (icmp eq (A & D), A)
+ // -> (icmp eq (A & (B&D)), A)
+ Value* newAnd1 = Builder->CreateAnd(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newAnd1);
+ return Builder->CreateICmp(NEWCC, newAnd, A);
+ }
+ else if (mask & FoldMskICmp_BMask_Mixed) {
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+ // we can't simply use C and E, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set
+
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (CCst == 0) return 0;
+ if (LHS->getPredicate() != NEWCC)
+ CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (ECst == 0) return 0;
+ if (RHS->getPredicate() != NEWCC)
+ ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+ ConstantInt* MCst = dyn_cast<ConstantInt>(
+ ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+ ConstantExpr::getXor(CCst, ECst)) );
+ // if there is a conflict we should actually return a false for the
+ // whole construct
+ if (!MCst->isZero())
+ return 0;
+ Value *newOr1 = Builder->CreateOr(B, D);
+ Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+ Value *newAnd = Builder->CreateAnd(A, newOr1);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+ }
+ return 0;
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ return V;
+
+ // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHSCst == 0 || RHSCst == 0) return 0;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+ // where C is a power of 2
+ if (LHSCC == ICmpInst::ICMP_ULT &&
+ LHSCst->getValue().isPowerOf2()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
+ if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+ Value *NewAnd = Builder->CreateAnd(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+ }
+
+ // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
+ if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+ }
+
+ // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+ // where CMAX is the all ones value for the truncated type,
+ // iff the lower bits of C2 and CA are zero.
+ if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+ LHS->hasOneUse() && RHS->hasOneUse()) {
+ Value *V;
+ ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+
+ // (trunc x) == C1 & (and x, CA) == C2
+ if (match(Val2, m_Trunc(m_Value(V))) &&
+ match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = RHSCst;
+ BigCst = LHSCst;
+ }
+ // (and x, CA) == C2 & (trunc x) == C1
+ else if (match(Val, m_Trunc(m_Value(V))) &&
+ match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+ SmallCst = LHSCst;
+ BigCst = RHSCst;
+ }
+
+ if (SmallCst && BigCst) {
+ unsigned BigBitSize = BigCst->getType()->getBitWidth();
+ unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+ // Check that the low bits are zero.
+ APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+ if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+ Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+ APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+ Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+ return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+ }
+ }
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // Make a constant range that's the intersection of the two icmp ranges.
+ // If the intersection is empty, we know that the result is false.
+ ConstantRange LHSRange =
+ ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange RHSRange =
+ ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+ if (LHSRange.intersectWith(RHSRange).isEmptySet())
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
+ // We can't fold (ugt x, C) & (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and and'ing the result
+ // together. Because of the above check, we know that we only have
+ // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+ // (from the icmp folding check above), that the two constants
+ // are not equal and that the larger constant is on the RHS
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
+ case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
+ case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
+ return LHS;
+ }
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_ULT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+ return Builder->CreateICmpULT(Val, LHSCst);
+ break; // (X != 13 & X u< 15) -> no change
+ case ICmpInst::ICMP_SLT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+ return Builder->CreateICmpSLT(Val, LHSCst);
+ break; // (X != 13 & X s< 15) -> no change
+ case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
+ case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_NE:
+ if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ }
+ break; // (X != 13 & X != 15) -> no change
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
+ case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
+ return LHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
+ case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
+ return LHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
+ return RHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X u> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+ case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X s> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+ case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+ RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // false.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ConstantInt::getFalse(LHS->getContext());
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp ord x,x" is "fcmp ord x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ return 0;
+ }
+
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ if (Op0CC == FCmpInst::FCMP_TRUE)
+ return RHS;
+ if (Op1CC == FCmpInst::FCMP_TRUE)
+ return LHS;
+
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op1Pred == 0) {
+ std::swap(LHS, RHS);
+ std::swap(Op0Pred, Op1Pred);
+ std::swap(Op0Ordered, Op1Ordered);
+ }
+ if (Op0Pred == 0) {
+ // uno && ueq -> uno && (uno || eq) -> ueq
+ // ord && olt -> ord && (ord && lt) -> olt
+ if (Op0Ordered == Op1Ordered)
+ return RHS;
+
+ // uno && oeq -> uno && (ord && eq) -> false
+ // uno && ord -> false
+ if (!Op0Ordered)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ // ord && ueq -> ord && (uno || eq) -> oeq
+ return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
+ }
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+ const APInt &AndRHSMask = AndRHS->getValue();
+
+ // Optimize a variety of ((val OP C1) & C2) combinations...
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ Value *Op0LHS = Op0I->getOperand(0);
+ Value *Op0RHS = Op0I->getOperand(1);
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Xor:
+ case Instruction::Or: {
+ // If the mask is only needed on one incoming arm, push it up.
+ if (!Op0I->hasOneUse()) break;
+
+ APInt NotAndRHS(~AndRHSMask);
+ if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+ // Not masking anything out for the LHS, move to RHS.
+ Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+ Op0RHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+ }
+ if (!isa<Constant>(Op0RHS) &&
+ MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+ // Not masking anything out for the RHS, move to LHS.
+ Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+ Op0LHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+ }
+
+ break;
+ }
+ case Instruction::Add:
+ // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+ if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
+ break;
+
+ case Instruction::Sub:
+ // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+
+ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+ // has 1's for all bits that the subtraction with A might affect.
+ if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
+ uint32_t BitWidth = AndRHSMask.getBitWidth();
+ uint32_t Zeros = AndRHSMask.countLeadingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+ if (MaskedValueIsZero(Op0LHS, Mask)) {
+ Value *NewNeg = Builder->CreateNeg(Op0RHS);
+ return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+ Value *NewICmp =
+ Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+ return new ZExtInst(NewICmp, I.getType());
+ }
+ break;
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+ if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+ return Res;
+ }
+
+ // If this is an integer truncation, and if the source is an 'and' with
+ // immediate, transform it. This frequently occurs for bitfield accesses.
+ {
+ Value *X = 0; ConstantInt *YC = 0;
+ if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+ // Change: and (trunc (and X, YC) to T), C2
+ // into : and (trunc X to T), trunc(YC) & C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+ Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+
+ // (~A & ~B) == (~(A | B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(Or);
+ }
+
+ {
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1) { // (A^B)&A -> A&(A^B)
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ } else if (B == Op1) { // (A^B)&B -> B&(B^A)
+ cast<BinaryOperator>(Op0)->swapOperands();
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ }
+ }
+
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == Op0) { // B&(A^B) -> B&(B^A)
+ cast<BinaryOperator>(Op1)->swapOperands();
+ std::swap(A, B);
+ }
+ // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
+ }
+
+ // (A&((~A)|B)) -> A&B
+ if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+ match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(A, Op1);
+ if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+ match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(A, Op0);
+ }
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+ SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ // Only do this if the casts both really cause code to be generated.
+ if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+
+ // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+ }
+
+ // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Value *NewOp =
+ Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+ SI0->getName());
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap. The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero. This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted. For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry. This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user. For example, in (X & 255), X will be
+/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values. ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+ SmallVector<Value*, 8> &ByteValues) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If this is an or instruction, it may be an inner node of the bswap.
+ if (I->getOpcode() == Instruction::Or) {
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues) ||
+ CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical shift by a constant multiple of 8, recurse with
+ // OverallLeftShift and ByteMask adjusted.
+ if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+ unsigned ShAmt =
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined and of a byte value.
+ if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+ return true;
+
+ unsigned ByteShift = ShAmt >> 3;
+ if (I->getOpcode() == Instruction::Shl) {
+ // X << 2 -> collect(X, +2)
+ OverallLeftShift += ByteShift;
+ ByteMask >>= ByteShift;
+ } else {
+ // X >>u 2 -> collect(X, -2)
+ OverallLeftShift -= ByteShift;
+ ByteMask <<= ByteShift;
+ ByteMask &= (~0U >> (32-ByteValues.size()));
+ }
+
+ if (OverallLeftShift >= (int)ByteValues.size()) return true;
+ if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical 'and' with a mask that clears bytes, clear the
+ // corresponding bytes in ByteMask.
+ if (I->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(I->getOperand(1))) {
+ // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+ unsigned NumBytes = ByteValues.size();
+ APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+ const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+
+ for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+ // If this byte is masked out by a later operation, we don't care what
+ // the and mask is.
+ if ((ByteMask & (1 << i)) == 0)
+ continue;
+
+ // If the AndMask is all zeros for this byte, clear the bit.
+ APInt MaskB = AndMask & Byte;
+ if (MaskB == 0) {
+ ByteMask &= ~(1U << i);
+ continue;
+ }
+
+ // If the AndMask is not all ones for this byte, it's not a bytezap.
+ if (MaskB != Byte)
+ return true;
+
+ // Otherwise, this byte is kept.
+ }
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+ }
+
+ // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
+ // the input value to the bswap. Some observations: 1) if more than one byte
+ // is demanded from this input, then it could not be successfully assembled
+ // into a byteswap. At least one of the two bytes would not be aligned with
+ // their ultimate destination.
+ if (!isPowerOf2_32(ByteMask)) return true;
+ unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+
+ // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+ // is demanded, it needs to go into byte 0 of the result. This means that the
+ // byte needs to be shifted until it lands in the right byte bucket. The
+ // shift amount depends on the position: if the byte is coming from the high
+ // part of the value (e.g. byte 3) then it must be shifted right. If from the
+ // low part, it must be shifted left.
+ unsigned DestByteNo = InputByteNo + OverallLeftShift;
+ if (InputByteNo < ByteValues.size()/2) {
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+ } else {
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+ }
+
+ // If the destination byte value is already defined, the values are or'd
+ // together, which isn't a bswap (unless it's an or of the same bits).
+ if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+ return true;
+ ByteValues[DestByteNo] = V;
+ return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+ IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+ if (!ITy || ITy->getBitWidth() % 16 ||
+ // ByteMask only allows up to 32-byte values.
+ ITy->getBitWidth() > 32*8)
+ return 0; // Can only bswap pairs of bytes. Can't do vectors.
+
+ /// ByteValues - For each byte of the result, we keep track of which value
+ /// defines each byte.
+ SmallVector<Value*, 8> ByteValues;
+ ByteValues.resize(ITy->getBitWidth()/8);
+
+ // Try to find all the pieces corresponding to the bswap.
+ uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+ if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+ return 0;
+
+ // Check to see if all of the bytes come from the same value.
+ Value *V = ByteValues[0];
+ if (V == 0) return 0; // Didn't find a byte? Must be zero.
+
+ // Check to make sure that all of the bytes come from the same value.
+ for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+ if (ByteValues[i] != V)
+ return 0;
+ Module *M = I.getParent()->getParent()->getParent();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+ return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+ Value *C, Value *D) {
+ // If A is not a select of -1/0, this cannot match.
+ Value *Cond = 0;
+ if (!match(A, m_SExt(m_Value(Cond))) ||
+ !Cond->getType()->isIntegerTy(1))
+ return 0;
+
+ // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+ if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+ if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+
+ // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+ if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ return V;
+
+ // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHSCst == 0 || RHSCst == 0) return 0;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
+ if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+ Value *NewAnd = Builder->CreateAnd(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+ }
+ }
+
+ // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+ // iff C2 + CA == C1.
+ if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+ ConstantInt *AddCst;
+ if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+ if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+ return Builder->CreateICmpULE(Val, LHSCst);
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // We can't fold (ugt x, C) | (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and or'ing the result
+ // together. Because of the above check, we know that we only have
+ // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+ // icmp folding check above), that the two constants are not
+ // equal.
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+ break; // (X == 13 | X == 15) -> no change
+ case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
+ case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
+ return RHS;
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
+ case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
+ case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
+ return LHS;
+ case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
+ case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(false))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+ case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
+ return RHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(true))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+ case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
+ return RHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
+ case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
+ return LHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
+ case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
+ return LHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
+ case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ RHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // true.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ConstantInt::getTrue(LHS->getContext());
+
+ // Otherwise, no need to compare the two constants, compare the
+ // rest.
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp uno x,x" is "fcmp uno x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+
+ return 0;
+ }
+
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ if (Op0CC == FCmpInst::FCMP_FALSE)
+ return RHS;
+ if (Op1CC == FCmpInst::FCMP_FALSE)
+ return LHS;
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op0Ordered == Op1Ordered) {
+ // If both are ordered or unordered, return a new fcmp with
+ // or'ed predicates.
+ return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
+ }
+ }
+ return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+/// ((A | B) & C1) | (B & C2)
+///
+/// into:
+///
+/// (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1) return 0;
+
+ Value *V1 = 0;
+ ConstantInt *CI2 = 0;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue()) return 0;
+
+ if (V1 == A || V1 == B) {
+ Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+ return BinaryOperator::CreateOr(NewOp, V1);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ ConstantInt *C1 = 0; Value *X = 0;
+ // (X & C1) | C2 --> (X | C2) & (C1|C2)
+ // iff (C1 & C2) == 0.
+ if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+ (RHS->getValue() & C1->getValue()) != 0 &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateAnd(Or,
+ ConstantInt::get(I.getContext(),
+ RHS->getValue() | C1->getValue()));
+ }
+
+ // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+ if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateXor(Or,
+ ConstantInt::get(I.getContext(),
+ C1->getValue() & ~RHS->getValue()));
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ Value *A = 0, *B = 0;
+ ConstantInt *C1 = 0, *C2 = 0;
+
+ // (A | B) | C and A | (B | C) -> bswap if possible.
+ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
+ if (match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value())) ||
+ (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
+ if (Instruction *BSwap = MatchBSwap(I))
+ return BSwap;
+ }
+
+ // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op1, C1->getValue())) {
+ Value *NOr = Builder->CreateOr(A, Op1);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op0, C1->getValue())) {
+ Value *NOr = Builder->CreateOr(A, Op0);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // (A & C)|(B & D)
+ Value *C = 0, *D = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+ match(Op1, m_And(m_Value(B), m_Value(D)))) {
+ Value *V1 = 0, *V2 = 0;
+ C1 = dyn_cast<ConstantInt>(C);
+ C2 = dyn_cast<ConstantInt>(D);
+ if (C1 && C2) { // (A & C1)|(B & C2)
+ // If we have: ((V + N) & C1) | (V & C2)
+ // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ // replace with V+N.
+ if (C1->getValue() == ~C2->getValue()) {
+ if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+ match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ }
+ // Or commutes, try both ways.
+ if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+ match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ }
+ }
+
+ if ((C1->getValue() & C2->getValue()) == 0) {
+ // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+ // iff (C1&C2) == 0 and (N&~C1) == 0
+ if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
+ (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
+ return BinaryOperator::CreateAnd(A,
+ ConstantInt::get(A->getContext(),
+ C1->getValue()|C2->getValue()));
+ // Or commutes, try both ways.
+ if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
+ (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
+ return BinaryOperator::CreateAnd(B,
+ ConstantInt::get(B->getContext(),
+ C1->getValue()|C2->getValue()));
+
+ // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+ // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+ ConstantInt *C3 = 0, *C4 = 0;
+ if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+ (C3->getValue() & ~C1->getValue()) == 0 &&
+ match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+ (C4->getValue() & ~C2->getValue()) == 0) {
+ V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+ return BinaryOperator::CreateAnd(V2,
+ ConstantInt::get(B->getContext(),
+ C1->getValue()|C2->getValue()));
+ }
+ }
+ }
+
+ // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
+ // Don't do this for vector select idioms, the code generator doesn't handle
+ // them well yet.
+ if (!I.getType()->isVectorTy()) {
+ if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+ return Match;
+ }
+
+ // ((A&~B)|(~A&B)) -> A^B
+ if ((match(C, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, D);
+ // ((~B&A)|(~A&B)) -> A^B
+ if ((match(A, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, D);
+ // ((A&~B)|(B&~A)) -> A^B
+ if ((match(C, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, B);
+ // ((~B&A)|(B&~A)) -> A^B
+ if ((match(A, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, B);
+
+ // ((A|B)&1)|(B&-2) -> (A&1) | B
+ if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A|B)&1) -> (A&1) | B
+ if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
+ }
+
+ // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+ SI0->getName());
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(And);
+ }
+
+ // Canonicalize xor to the RHS.
+ if (match(Op0, m_Xor(m_Value(), m_Value())))
+ std::swap(Op0, Op1);
+
+ // A | ( A ^ B) -> A | B
+ // A | (~A ^ B) -> A | ~B
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (Op0 == A || Op0 == B)
+ return BinaryOperator::CreateOr(A, B);
+
+ if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+ Value *Not = Builder->CreateNot(B, B->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+ Value *Not = Builder->CreateNot(A, A->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+
+ // A | ~(A | B) -> A | ~B
+ // A | ~(A ^ B) -> A | ~B
+ if (match(Op1, m_Not(m_Value(A))))
+ if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+ if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+ Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+ B->getOpcode() == Instruction::Xor)) {
+ Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+ B->getOperand(0);
+ Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // fold (or (cast A), (cast B)) -> (cast (or A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ CastInst *Op1C = dyn_cast<CastInst>(Op1);
+ if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+ // Only do this if the casts both really cause code to be
+ // generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+ }
+ }
+
+ // or(sext(A), B) -> A ? -1 : B where A is an i1
+ // or(A, sext(B)) -> B ? -1 : A where B is an i1
+ if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+ if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+ return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
+ // Note: If we've gotten to the point of visiting the outer OR, then the
+ // inner one couldn't be simplified. If it was a constant, then it won't
+ // be simplified by a later pass either, so we try swapping the inner/outer
+ // ORs in the hopes that we'll be able to simplify it this way.
+ // (X|C) | V --> (X|V) | C
+ if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ Value *Inner = Builder->CreateOr(A, Op1);
+ Inner->takeName(Op0);
+ return BinaryOperator::CreateOr(Inner, C1);
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Is this a ~ operation?
+ if (Value *NotOp = dyn_castNotVal(&I)) {
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+ if (Op0I->getOpcode() == Instruction::And ||
+ Op0I->getOpcode() == Instruction::Or) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (dyn_castNotVal(Op0I->getOperand(1)))
+ Op0I->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1),
+ Op0I->getOperand(1)->getName()+".not");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
+
+ // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+ // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ if (isFreeToInvert(Op0I->getOperand(0)) &&
+ isFreeToInvert(Op0I->getOperand(1))) {
+ Value *NotX =
+ Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(NotX, NotY);
+ return BinaryOperator::CreateAnd(NotX, NotY);
+ }
+
+ } else if (Op0I->getOpcode() == Instruction::AShr) {
+ // ~(~X >>s Y) --> (X >>s Y)
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
+ return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+ }
+ }
+ }
+
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ if (RHS->isOne() && Op0->hasOneUse())
+ // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+ return CmpInst::Create(CI->getOpcode(),
+ CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1));
+
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ (RHS == ConstantExpr::getCast(Opcode,
+ ConstantInt::getTrue(I.getContext()),
+ Op0C->getDestTy()))) {
+ CI->setPredicate(CI->getInversePredicate());
+ return CastInst::Create(Opcode, CI, Op0C->getType());
+ }
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ // ~(c-X) == X-c-1 == X+(-c-1)
+ if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+ if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+ Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+ Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+ ConstantInt::get(I.getType(), 1));
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ // ~(X-c) --> (-c-1)-X
+ if (RHS->isAllOnesValue()) {
+ Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+ return BinaryOperator::CreateSub(
+ ConstantExpr::getSub(NegOp0CI,
+ ConstantInt::get(I.getType(), 1)),
+ Op0I->getOperand(0));
+ } else if (RHS->getValue().isSignBit()) {
+ // (X + C) ^ signbit -> (X + C + signbit)
+ Constant *C = ConstantInt::get(I.getContext(),
+ RHS->getValue() + Op0CI->getValue());
+ return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+ }
+ } else if (Op0I->getOpcode() == Instruction::Or) {
+ // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ // Anything in both C1 and C2 is known to be zero, remove it from
+ // NewRHS.
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+ NewRHS = ConstantExpr::getAnd(NewRHS,
+ ConstantExpr::getNot(CommonBits));
+ Worklist.Add(Op0I);
+ I.setOperand(0, Op0I->getOperand(0));
+ I.setOperand(1, NewRHS);
+ return &I;
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+ if (Op1I) {
+ Value *A, *B;
+ if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op0) { // B^(B|A) == (A|B)^B
+ Op1I->swapOperands();
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ } else if (B == Op0) { // B^(A|B) == (A|B)^B
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
+ Op1I->hasOneUse()){
+ if (A == Op0) { // A^(A&B) -> A^(B&A)
+ Op1I->swapOperands();
+ std::swap(A, B);
+ }
+ if (B == Op0) { // A^(B&A) -> (B&A)^A
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ }
+ }
+
+ BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+ if (Op0I) {
+ Value *A, *B;
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()) {
+ if (A == Op1) // (B|A)^B == (A|B)^B
+ std::swap(A, B);
+ if (B == Op1) // (A|B)^B == A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+ } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()){
+ if (A == Op1) // (A&B)^A -> (B&A)^A
+ std::swap(A, B);
+ if (B == Op1 && // (B&A)^A == ~B & A
+ !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+ }
+ }
+ }
+
+ // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
+ if (Op0I && Op1I && Op0I->isShift() &&
+ Op0I->getOpcode() == Op1I->getOpcode() &&
+ Op0I->getOperand(1) == Op1I->getOperand(1) &&
+ (Op1I->hasOneUse() || Op1I->hasOneUse())) {
+ Value *NewOp =
+ Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+ Op0I->getName());
+ return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
+ Op1I->getOperand(1));
+ }
+
+ if (Op0I && Op1I) {
+ Value *A, *B, *C, *D;
+ // (A & B)^(A | B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A | B)^(A & B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ }
+
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return ReplaceInstUsesWith(I,
+ getICmpValue(isSigned, Code, Op0, Op1, Builder));
+ }
+ }
+
+ // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
+ // Only do this if the casts both really cause code to be generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType())) {
+ Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+ Op1C->getOperand(0), I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 000000000000..537f2b318aa9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,1317 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+/// getPromotedType - Return the specified type promoted as it would be to pass
+/// though a va_arg area.
+static const Type *getPromotedType(const Type *Ty) {
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+ if (ITy->getBitWidth() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+ }
+ return Ty;
+}
+
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
+ unsigned MinAlign = std::min(DstAlign, SrcAlign);
+ unsigned CopyAlign = MI->getAlignment();
+
+ if (CopyAlign < MinAlign) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ MinAlign, false));
+ return MI;
+ }
+
+ // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+ // load/store.
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
+ if (MemOpLength == 0) return 0;
+
+ // Source and destination pointer types are always "i8*" for intrinsic. See
+ // if the size is something we can handle with a single primitive load/store.
+ // A single load+store correctly handles overlapping memory in the memmove
+ // case.
+ unsigned Size = MemOpLength->getZExtValue();
+ if (Size == 0) return MI; // Delete this mem transfer.
+
+ if (Size > 8 || (Size&(Size-1)))
+ return 0; // If not 1/2/4/8 bytes, exit.
+
+ // Use an integer load+store unless we can find something better.
+ unsigned SrcAddrSp =
+ cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
+ unsigned DstAddrSp =
+ cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
+
+ const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+ Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
+
+ // Memcpy forces the use of i8* for the source and destination. That means
+ // that if you're using memcpy to move one double around, you'll get a cast
+ // from double* to i8*. We'd much rather use a double load+store rather than
+ // an i64 load+store, here because this improves the odds that the source or
+ // dest address will be promotable. See if we can find a better type than the
+ // integer datatype.
+ Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ if (StrippedDest != MI->getArgOperand(0)) {
+ const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+ ->getElementType();
+ if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+ // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
+ // down through these levels if so.
+ while (!SrcETy->isSingleValueType()) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+ if (STy->getNumElements() == 1)
+ SrcETy = STy->getElementType(0);
+ else
+ break;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+ if (ATy->getNumElements() == 1)
+ SrcETy = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ if (SrcETy->isSingleValueType()) {
+ NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+ NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+ }
+ }
+ }
+
+
+ // If the memcpy/memmove provides better alignment info than we can
+ // infer, use it.
+ SrcAlign = std::max(SrcAlign, CopyAlign);
+ DstAlign = std::max(DstAlign, CopyAlign);
+
+ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+ LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
+ L->setAlignment(SrcAlign);
+ StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
+ S->setAlignment(DstAlign);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
+ return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+ unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
+ if (MI->getAlignment() < Alignment) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ Alignment, false));
+ return MI;
+ }
+
+ // Extract the length and alignment and fill if they are constant.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+ ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+ if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
+ return 0;
+ uint64_t Len = LenC->getZExtValue();
+ Alignment = MI->getAlignment();
+
+ // If the length is zero, this is a no-op
+ if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+
+ // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+ if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+ const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
+
+ Value *Dest = MI->getDest();
+ unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+ Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+ Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0) Alignment = 1;
+
+ // Extract the fill value and store.
+ uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+ StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
+ MI->isVolatile());
+ S->setAlignment(Alignment);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(LenC->getType()));
+ return MI;
+ }
+
+ return 0;
+}
+
+/// visitCallInst - CallInst simplification. This mostly only handles folding
+/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ if (isFreeCall(&CI))
+ return visitFree(CI);
+ if (isMalloc(&CI))
+ return visitMalloc(CI);
+
+ // If the caller function is nounwind, mark the call as nounwind, even if the
+ // callee isn't.
+ if (CI.getParent()->getParent()->doesNotThrow() &&
+ !CI.doesNotThrow()) {
+ CI.setDoesNotThrow();
+ return &CI;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+ if (!II) return visitCallSite(&CI);
+
+ // Intrinsics cannot occur in an invoke, so handle them here instead of in
+ // visitCallSite.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+ bool Changed = false;
+
+ // memmove/cpy/set of zero bytes is a noop.
+ if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+ if (NumBytes->isNullValue())
+ return EraseInstFromFunction(CI);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+ if (CI->getZExtValue() == 1) {
+ // Replace the instruction with just byte operations. We would
+ // transform other cases to loads/stores, but we don't know if
+ // alignment is sufficient.
+ }
+ }
+
+ // No other transformations apply to volatile transfers.
+ if (MI->isVolatile())
+ return 0;
+
+ // If we have a memmove and the source operation is a constant global,
+ // then the source and dest pointers can't alias, so we can change this
+ // into a call to memcpy.
+ if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+ if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+ if (GVSrc->isConstant()) {
+ Module *M = CI.getParent()->getParent()->getParent();
+ Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+ Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType() };
+ CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
+ Changed = true;
+ }
+ }
+
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ // memmove(x,x,size) -> noop.
+ if (MTI->getSource() == MTI->getDest())
+ return EraseInstFromFunction(CI);
+ }
+
+ // If we can determine a pointer alignment that is bigger than currently
+ // set, update the alignment.
+ if (isa<MemTransferInst>(MI)) {
+ if (Instruction *I = SimplifyMemTransfer(MI))
+ return I;
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+ if (Instruction *I = SimplifyMemSet(MSI))
+ return I;
+ }
+
+ if (Changed) return II;
+ }
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ // We need target data for just about everything so depend on it.
+ if (!TD) break;
+
+ const Type *ReturnTy = CI.getType();
+ uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
+
+ // Get to the real allocated thing and offset as fast as possible.
+ Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
+
+ uint64_t Offset = 0;
+ uint64_t Size = -1ULL;
+
+ // Try to look through constant GEPs.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
+ if (!GEP->hasAllConstantIndices()) break;
+
+ // Get the current byte offset into the thing. Use the original
+ // operand in case we're looking through a bitcast.
+ SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
+ Ops.data(), Ops.size());
+
+ Op1 = GEP->getPointerOperand()->stripPointerCasts();
+
+ // Make sure we're not a constant offset from an external
+ // global.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
+ if (!GV->hasDefinitiveInitializer()) break;
+ }
+
+ // If we've stripped down to a single global variable that we
+ // can know the size of then just return that.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
+ if (GV->hasDefinitiveInitializer()) {
+ Constant *C = GV->getInitializer();
+ Size = TD->getTypeAllocSize(C->getType());
+ } else {
+ // Can't determine size of the GV.
+ Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
+ return ReplaceInstUsesWith(CI, RetVal);
+ }
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ // Get alloca size.
+ if (AI->getAllocatedType()->isSized()) {
+ Size = TD->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C) break;
+ Size *= C->getZExtValue();
+ }
+ }
+ } else if (CallInst *MI = extractMallocCall(Op1)) {
+ // Get allocation size.
+ const Type* MallocType = getMallocAllocatedType(MI);
+ if (MallocType && MallocType->isSized())
+ if (Value *NElems = getMallocArraySize(MI, TD, true))
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType);
+ }
+
+ // Do not return "I don't know" here. Later optimization passes could
+ // make it possible to evaluate objectsize to a constant.
+ if (Size == -1ULL)
+ break;
+
+ if (Size < Offset) {
+ // Out of bound reference? Negative index normalized to large
+ // index? Just return "I don't know".
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
+ }
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
+ }
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap)
+ return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+
+ // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+ if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap) {
+ unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
+ TI->getType()->getPrimitiveSizeInBits();
+ Value *CV = ConstantInt::get(Operand->getType(), C);
+ Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
+ return new TruncInst(V, TI->getType());
+ }
+ }
+
+ break;
+ case Intrinsic::powi:
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return ReplaceInstUsesWith(CI, II->getArgOperand(0));
+ // powi(x, -1) -> 1/x
+ if (Power->isAllOnesValue())
+ return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+ II->getArgOperand(0));
+ }
+ break;
+ case Intrinsic::cttz: {
+ // If all bits below the first known one are known zero,
+ // this value is constant.
+ const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ // FIXME: Try to simplify vectors of integers.
+ if (!IT) break;
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+ KnownZero, KnownOne);
+ unsigned TrailingZeros = KnownOne.countTrailingZeros();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, TrailingZeros)));
+
+ }
+ break;
+ case Intrinsic::ctlz: {
+ // If all bits above the first known one are known zero,
+ // this value is constant.
+ const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ // FIXME: Try to simplify vectors of integers.
+ if (!IT) break;
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+ KnownZero, KnownOne);
+ unsigned LeadingZeros = KnownOne.countLeadingZeros();
+ APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, LeadingZeros)));
+
+ }
+ break;
+ case Intrinsic::uadd_with_overflow: {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt Mask = APInt::getSignBit(BitWidth);
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+ bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+ if (LHSKnownNegative || LHSKnownPositive) {
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+ bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+ bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+ if (LHSKnownNegative && RHSKnownNegative) {
+ // The sign bit is set in both cases: this MUST overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ Value *Add = Builder->CreateAdd(LHS, RHS);
+ Add->takeName(&CI);
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ ConstantInt::getTrue(II->getContext())
+ };
+ const StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+
+ if (LHSKnownPositive && RHSKnownPositive) {
+ // The sign bit is clear in both cases: this CANNOT overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ Value *Add = Builder->CreateNUWAdd(LHS, RHS);
+ Add->takeName(&CI);
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ const StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+ }
+ }
+ // FALL THROUGH uadd into sadd
+ case Intrinsic::sadd_with_overflow:
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+
+ // X + undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X + 0 -> {X, false}
+ if (RHS->isZero()) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct =
+ ConstantStruct::get(cast<StructType>(II->getType()), V);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ // undef - X -> undef
+ // X - undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(0)) ||
+ isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X - 0 -> {X, false}
+ if (RHS->isZero()) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct =
+ ConstantStruct::get(cast<StructType>(II->getType()), V);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::umul_with_overflow: {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+ // Get the largest possible values for each operand.
+ APInt LHSMax = ~LHSKnownZero;
+ APInt RHSMax = ~RHSKnownZero;
+
+ // If multiplying the maximum values does not overflow then we can turn
+ // this into a plain NUW mul.
+ bool Overflow;
+ LHSMax.umul_ov(RHSMax, Overflow);
+ if (!Overflow) {
+ Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ Builder->getFalse()
+ };
+ Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
+ return InsertValueInst::Create(Struct, Mul, 0);
+ }
+ } // FALL THROUGH
+ case Intrinsic::smul_with_overflow:
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+
+ // X * undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X*0 -> {0, false}
+ if (RHSI->isZero())
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+
+ // X * 1 -> {X, false}
+ if (RHSI->equalsInt(1)) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct =
+ ConstantStruct::get(cast<StructType>(II->getType()), V);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(1)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+ return new StoreInst(II->getArgOperand(1), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
+ // we can simplify the input based on that, do so now.
+ unsigned VWidth =
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+ APInt DemandedElts(VWidth, 1);
+ APInt UndefElts(VWidth, 0);
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ DemandedElts, UndefElts)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+
+ case Intrinsic::x86_sse41_pmovsxbw:
+ case Intrinsic::x86_sse41_pmovsxwd:
+ case Intrinsic::x86_sse41_pmovsxdq:
+ case Intrinsic::x86_sse41_pmovzxbw:
+ case Intrinsic::x86_sse41_pmovzxwd:
+ case Intrinsic::x86_sse41_pmovzxdq: {
+ // pmov{s|z}x ignores the upper half of their input vectors.
+ unsigned VWidth =
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+ unsigned LowHalfElts = VWidth / 2;
+ APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
+ APInt UndefElts(VWidth, 0);
+ if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ InputDemandedElts,
+ UndefElts)) {
+ II->setArgOperand(0, TmpV);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
+ assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ if (!isa<ConstantInt>(Mask->getOperand(i)) &&
+ !isa<UndefValue>(Mask->getOperand(i))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getOperand(i)))
+ continue;
+ unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+
+ if (ExtractedElts[Idx] == 0) {
+ ExtractedElts[Idx] =
+ Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ Idx&15, false), "tmp");
+ }
+
+ // Insert this value into the result vector.
+ Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ i, false), "tmp");
+ }
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+ }
+ }
+ break;
+
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+ unsigned AlignArg = II->getNumArgOperands() - 1;
+ ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+ if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+ II->setArgOperand(AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ MemAlign, false));
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ BasicBlock::iterator BI = SS;
+ if (&*++BI == II)
+ return EraseInstFromFunction(CI);
+ }
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI = II;
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI) || isMalloc(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return EraseInstFromFunction(CI);
+ // Otherwise, ignore the intrinsic.
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
+ CannotRemove = true;
+ break;
+ }
+ }
+ }
+
+ // If the stack restore is in a return/unwind block and if there are no
+ // allocas or calls between the restore and the return, nuke the restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ return EraseInstFromFunction(CI);
+ break;
+ }
+ }
+
+ return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+ return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+ const CastInst * const CI,
+ const TargetData * const TD,
+ const int ix) {
+ if (!CI->isLosslessCast())
+ return false;
+
+ // The size of ByVal arguments is derived from the type, so we
+ // can't change to a type with a different size. If the size were
+ // passed explicitly we could avoid this check.
+ if (!CS.paramHasAttr(ix, Attribute::ByVal))
+ return true;
+
+ const Type* SrcTy =
+ cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+ const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ if (!SrcTy->isSized() || !DstTy->isSized())
+ return false;
+ if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+ return false;
+ return true;
+}
+
+namespace {
+class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+ InstCombiner *IC;
+protected:
+ void replaceCall(Value *With) {
+ NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
+ }
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
+ return true;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0) return false;
+ return SizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ }
+ return false;
+ }
+public:
+ InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
+ Instruction *NewInstruction;
+};
+} // end anonymous namespace
+
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk,
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+ if (CI->getCalledFunction() == 0) return 0;
+
+ InstCombineFortifiedLibCalls Simplifier(this);
+ Simplifier.fold(CI, TD);
+ return Simplifier.NewInstruction;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+ bool Changed = false;
+
+ // If the callee is a pointer to a function, attempt to move any casts to the
+ // arguments of the call/invoke.
+ Value *Callee = CS.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ return 0;
+
+ if (Function *CalleeF = dyn_cast<Function>(Callee))
+ // If the call and callee calling conventions don't match, this call must
+ // be unreachable, as the call is undefined.
+ if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+ // Only do this for calls to a function with a body. A prototype may
+ // not actually end up matching the implementation's calling conv for a
+ // variety of reasons (e.g. it may be written in assembly).
+ !CalleeF->isDeclaration()) {
+ Instruction *OldCall = CS.getInstruction();
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ OldCall);
+ // If OldCall dues not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!OldCall->getType()->isVoidTy())
+ ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
+ if (isa<CallInst>(OldCall))
+ return EraseInstFromFunction(*OldCall);
+
+ // We cannot remove an invoke, because it would change the CFG, just
+ // change the callee to a null pointer.
+ cast<InvokeInst>(OldCall)->setCalledFunction(
+ Constant::getNullValue(CalleeF->getType()));
+ return 0;
+ }
+
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ // This instruction is not reachable, just remove it. We insert a store to
+ // undef so that we know that this code is not reachable, despite the fact
+ // that we can't modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ CS.getInstruction());
+
+ // If CS does not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!CS.getInstruction()->getType()->isVoidTy())
+ ReplaceInstUsesWith(*CS.getInstruction(),
+ UndefValue::get(CS.getInstruction()->getType()));
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ // Don't break the CFG, insert a dummy cond branch.
+ BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+ ConstantInt::getTrue(Callee->getContext()), II);
+ }
+ return EraseInstFromFunction(*CS.getInstruction());
+ }
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+ if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+ if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+ return transformCallThroughTrampoline(CS);
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ if (FTy->isVarArg()) {
+ int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+ // See if we can optimize any arguments passed through the varargs area of
+ // the call.
+ for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+ E = CS.arg_end(); I != E; ++I, ++ix) {
+ CastInst *CI = dyn_cast<CastInst>(*I);
+ if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+ *I = CI->getOperand(0);
+ Changed = true;
+ }
+ }
+ }
+
+ if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ // Inline asm calls cannot throw - mark them 'nounwind'.
+ CS.setDoesNotThrow();
+ Changed = true;
+ }
+
+ // Try to optimize the call if possible, we require TargetData for most of
+ // this. None of these calls are seen as possibly dead so go ahead and
+ // delete the instruction now.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ Instruction *I = tryOptimizeCall(CI, TD);
+ // If we changed something return the result, etc. Otherwise let
+ // the fallthrough check.
+ if (I) return EraseInstFromFunction(*I);
+ }
+
+ return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+ Function *Callee =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (Callee == 0)
+ return false;
+ Instruction *Caller = CS.getInstruction();
+ const AttrListPtr &CallerPAL = CS.getAttributes();
+
+ // Okay, this is a cast from a function to a different type. Unless doing so
+ // would cause a type conversion of one of our arguments, change this call to
+ // be a direct call with arguments casted to the appropriate types.
+ //
+ const FunctionType *FT = Callee->getFunctionType();
+ const Type *OldRetTy = Caller->getType();
+ const Type *NewRetTy = FT->getReturnType();
+
+ if (NewRetTy->isStructTy())
+ return false; // TODO: Handle multiple return values.
+
+ // Check to see if we are changing the return type...
+ if (OldRetTy != NewRetTy) {
+ if (Callee->isDeclaration() &&
+ // Conversion is ok if changing from one pointer type to another or from
+ // a pointer to an integer of the same size.
+ !((OldRetTy->isPointerTy() || !TD ||
+ OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+ (NewRetTy->isPointerTy() || !TD ||
+ NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+ return false; // Cannot transform this return value.
+
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+ return false; // Cannot transform this return value.
+
+ if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+ Attributes RAttrs = CallerPAL.getRetAttributes();
+ if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+ return false; // Attribute not compatible with transformed value.
+ }
+
+ // If the callsite is an invoke instruction, and the return value is used by
+ // a PHI node in a successor, we cannot change the return type of the call
+ // because there is no place to put the cast instruction (without breaking
+ // the critical edge). Bail out in this case.
+ if (!Caller->use_empty())
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI)
+ if (PHINode *PN = dyn_cast<PHINode>(*UI))
+ if (PN->getParent() == II->getNormalDest() ||
+ PN->getParent() == II->getUnwindDest())
+ return false;
+ }
+
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+ CallSite::arg_iterator AI = CS.arg_begin();
+ for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+ const Type *ParamTy = FT->getParamType(i);
+ const Type *ActTy = (*AI)->getType();
+
+ if (!CastInst::isCastable(ActTy, ParamTy))
+ return false; // Cannot transform this parameter value.
+
+ unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+ if (Attrs & Attribute::typeIncompatible(ParamTy))
+ return false; // Attribute not compatible with transformed value.
+
+ // If the parameter is passed as a byval argument, then we have to have a
+ // sized type and the sized type has to have the same size as the old type.
+ if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+ const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+ if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+ return false;
+
+ const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ if (TD->getTypeAllocSize(CurElTy) !=
+ TD->getTypeAllocSize(ParamPTy->getElementType()))
+ return false;
+ }
+
+ // Converting from one pointer type to another or between a pointer and an
+ // integer of the same size is safe even if we do not have a body.
+ bool isConvertible = ActTy == ParamTy ||
+ (TD && ((ParamTy->isPointerTy() ||
+ ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+ (ActTy->isPointerTy() ||
+ ActTy == TD->getIntPtrType(Caller->getContext()))));
+ if (Callee->isDeclaration() && !isConvertible) return false;
+ }
+
+ if (Callee->isDeclaration()) {
+ // Do not delete arguments unless we have a function body.
+ if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
+ return false;
+
+ // If the callee is just a declaration, don't change the varargsness of the
+ // call. We don't want to introduce a varargs call where one doesn't
+ // already exist.
+ const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+ if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+ return false;
+ }
+
+ if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+ !CallerPAL.isEmpty())
+ // In this case we have more arguments than the new function type, but we
+ // won't be dropping them. Check that these extra arguments have attributes
+ // that are compatible with being a vararg call argument.
+ for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+ if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+ break;
+ Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+ if (PAttrs & Attribute::VarArgsIncompatible)
+ return false;
+ }
+
+
+ // Okay, we decided that this is a safe thing to do: go ahead and start
+ // inserting cast instructions as necessary.
+ std::vector<Value*> Args;
+ Args.reserve(NumActualArgs);
+ SmallVector<AttributeWithIndex, 8> attrVec;
+ attrVec.reserve(NumCommonArgs);
+
+ // Get any return attributes.
+ Attributes RAttrs = CallerPAL.getRetAttributes();
+
+ // If the return value is not being used, the type may not be compatible
+ // with the existing attributes. Wipe out any problematic attributes.
+ RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+
+ // Add the new return attributes.
+ if (RAttrs)
+ attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ AI = CS.arg_begin();
+ for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+ const Type *ParamTy = FT->getParamType(i);
+ if ((*AI)->getType() == ParamTy) {
+ Args.push_back(*AI);
+ } else {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+ false, ParamTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+ }
+
+ // Add any parameter attributes.
+ if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ }
+
+ // If the function takes more arguments than the call was taking, add them
+ // now.
+ for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+ Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+ // If we are removing arguments to the function, emit an obnoxious warning.
+ if (FT->getNumParams() < NumActualArgs) {
+ if (!FT->isVarArg()) {
+ errs() << "WARNING: While resolving call to function '"
+ << Callee->getName() << "' arguments were dropped!\n";
+ } else {
+ // Add all of the arguments in their promoted form to the arg list.
+ for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+ const Type *PTy = getPromotedType((*AI)->getType());
+ if (PTy != (*AI)->getType()) {
+ // Must promote to pass through va_arg area!
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(*AI, false, PTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+ } else {
+ Args.push_back(*AI);
+ }
+
+ // Add any parameter attributes.
+ if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ }
+ }
+ }
+
+ if (Attributes FnAttrs = CallerPAL.getFnAttributes())
+ attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ if (NewRetTy->isVoidTy())
+ Caller->setName(""); // Void type should not have a name.
+
+ const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+ attrVec.end());
+
+ Instruction *NC;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
+ II->getUnwindDest(), Args);
+ NC->takeName(II);
+ cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+ } else {
+ CallInst *CI = cast<CallInst>(Caller);
+ NC = Builder->CreateCall(Callee, Args);
+ NC->takeName(CI);
+ if (CI->isTailCall())
+ cast<CallInst>(NC)->setTailCall();
+ cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+ cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+ }
+
+ // Insert a cast of the return type as necessary.
+ Value *NV = NC;
+ if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+ if (!NV->getType()->isVoidTy()) {
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(NC, false, OldRetTy, false);
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+ NC->setDebugLoc(Caller->getDebugLoc());
+
+ // If this is an invoke instruction, we should insert it after the first
+ // non-phi, instruction in the normal successor block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+ InsertNewInstBefore(NC, *I);
+ } else {
+ // Otherwise, it's a call, just insert cast right after the call.
+ InsertNewInstBefore(NC, *Caller);
+ }
+ Worklist.AddUsersToWorkList(*Caller);
+ } else {
+ NV = UndefValue::get(Caller->getType());
+ }
+ }
+
+ if (!Caller->use_empty())
+ ReplaceInstUsesWith(*Caller, NV);
+
+ EraseInstFromFunction(*Caller);
+ return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+ Value *Callee = CS.getCalledValue();
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const AttrListPtr &Attrs = CS.getAttributes();
+
+ // If the call already has the 'nest' attribute somewhere then give up -
+ // otherwise 'nest' would occur twice after splicing in the chain.
+ if (Attrs.hasAttrSomewhere(Attribute::Nest))
+ return 0;
+
+ IntrinsicInst *Tramp =
+ cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+ Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+ const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+ const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+ const AttrListPtr &NestAttrs = NestF->getAttributes();
+ if (!NestAttrs.isEmpty()) {
+ unsigned NestIdx = 1;
+ Type *NestTy = 0;
+ Attributes NestAttr = Attribute::None;
+
+ // Look for a parameter marked with the 'nest' attribute.
+ for (FunctionType::param_iterator I = NestFTy->param_begin(),
+ E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+ if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+ // Record the parameter type and any other attributes.
+ NestTy = *I;
+ NestAttr = NestAttrs.getParamAttributes(NestIdx);
+ break;
+ }
+
+ if (NestTy) {
+ Instruction *Caller = CS.getInstruction();
+ std::vector<Value*> NewArgs;
+ NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+ SmallVector<AttributeWithIndex, 8> NewAttrs;
+ NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+ // Insert the nest argument into the call argument list, which may
+ // mean appending it. Likewise for attributes.
+
+ // Add any result attributes.
+ if (Attributes Attr = Attrs.getRetAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+
+ {
+ unsigned Idx = 1;
+ CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ do {
+ if (Idx == NestIdx) {
+ // Add the chain argument and attributes.
+ Value *NestVal = Tramp->getArgOperand(2);
+ if (NestVal->getType() != NestTy)
+ NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
+ NewArgs.push_back(NestVal);
+ NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+ }
+
+ if (I == E)
+ break;
+
+ // Add the original argument and attributes.
+ NewArgs.push_back(*I);
+ if (Attributes Attr = Attrs.getParamAttributes(Idx))
+ NewAttrs.push_back
+ (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Add any function attributes.
+ if (Attributes Attr = Attrs.getFnAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+
+ // The trampoline may have been bitcast to a bogus type (FTy).
+ // Handle this by synthesizing a new function type, equal to FTy
+ // with the chain parameter inserted.
+
+ std::vector<Type*> NewTypes;
+ NewTypes.reserve(FTy->getNumParams()+1);
+
+ // Insert the chain's type into the list of parameter types, which may
+ // mean appending it.
+ {
+ unsigned Idx = 1;
+ FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end();
+
+ do {
+ if (Idx == NestIdx)
+ // Add the chain's type.
+ NewTypes.push_back(NestTy);
+
+ if (I == E)
+ break;
+
+ // Add the original type.
+ NewTypes.push_back(*I);
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Replace the trampoline call with a direct call. Let the generic
+ // code sort out any function type mismatches.
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
+ FTy->isVarArg());
+ Constant *NewCallee =
+ NestF->getType() == PointerType::getUnqual(NewFTy) ?
+ NestF : ConstantExpr::getBitCast(NestF,
+ PointerType::getUnqual(NewFTy));
+ const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+ NewAttrs.end());
+
+ Instruction *NewCaller;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NewCaller = InvokeInst::Create(NewCallee,
+ II->getNormalDest(), II->getUnwindDest(),
+ NewArgs);
+ cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else {
+ NewCaller = CallInst::Create(NewCallee, NewArgs);
+ if (cast<CallInst>(Caller)->isTailCall())
+ cast<CallInst>(NewCaller)->setTailCall();
+ cast<CallInst>(NewCaller)->
+ setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+ }
+
+ return NewCaller;
+ }
+ }
+
+ // Replace the trampoline call with a direct call. Since there is no 'nest'
+ // parameter, there is no need to adjust the argument list. Let the generic
+ // code sort out any function type mismatches.
+ Constant *NewCallee =
+ NestF->getType() == PTy ? NestF :
+ ConstantExpr::getBitCast(NestF, PTy);
+ CS.setCalledFunction(NewCallee);
+ return CS.getInstruction();
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 000000000000..82c734e0b829
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1771 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression. If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+ uint64_t &Offset) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ Offset = CI->getZExtValue();
+ Scale = 0;
+ return ConstantInt::get(Val->getType(), 0);
+ }
+
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+ // Cannot look past anything that might overflow.
+ OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+ if (OBI && !OBI->hasNoUnsignedWrap()) {
+ Scale = 1;
+ Offset = 0;
+ return Val;
+ }
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (I->getOpcode() == Instruction::Shl) {
+ // This is a value scaled by '1 << the shift amt'.
+ Scale = UINT64_C(1) << RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Mul) {
+ // This value is scaled by 'RHS'.
+ Scale = RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Add) {
+ // We have X+C. Check to see if we really have (X*C2)+C1,
+ // where C1 is divisible by C2.
+ unsigned SubScale;
+ Value *SubVal =
+ DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ Offset += RHS->getZExtValue();
+ Scale = SubScale;
+ return SubVal;
+ }
+ }
+ }
+
+ // Otherwise, we can't look past this.
+ Scale = 1;
+ Offset = 0;
+ return Val;
+}
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+ AllocaInst &AI) {
+ // This requires TargetData to get the alloca alignment and size information.
+ if (!TD) return 0;
+
+ const PointerType *PTy = cast<PointerType>(CI.getType());
+
+ BuilderTy AllocaBuilder(*Builder);
+ AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+
+ // Get the type really allocated and the type casted to.
+ const Type *AllocElTy = AI.getAllocatedType();
+ const Type *CastElTy = PTy->getElementType();
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+ unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+ if (CastElTyAlign < AllocElTyAlign) return 0;
+
+ // If the allocation has multiple uses, only promote it if we are strictly
+ // increasing the alignment of the resultant allocation. If we keep it the
+ // same, we open the door to infinite loops of various kinds.
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
+
+ uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+ if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+ // See if we can satisfy the modulus by pulling a scale out of the array
+ // size argument.
+ unsigned ArraySizeScale;
+ uint64_t ArrayOffset;
+ Value *NumElements = // See if the array size is a decomposable linear expr.
+ DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+
+ // If we can now satisfy the modulus, by using a non-1 scale, we really can
+ // do the xform.
+ if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+
+ unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+ Value *Amt = 0;
+ if (Scale == 1) {
+ Amt = NumElements;
+ } else {
+ Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
+ // Insert before the alloca, not before the cast.
+ Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+ }
+
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
+ Offset, true);
+ Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+ }
+
+ AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+ New->setAlignment(AI.getAlignment());
+ New->takeName(&AI);
+
+ // If the allocation has multiple real uses, insert a cast and change all
+ // things that used it to use the new cast. This will also hack on CI, but it
+ // will die soon.
+ if (!AI.hasOneUse()) {
+ // New is the allocation instruction, pointer typed. AI is the original
+ // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+ Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+ ReplaceInstUsesWith(AI, NewCast);
+ }
+ return ReplaceInstUsesWith(CI, New);
+}
+
+
+
+/// EvaluateInDifferentType - Given an expression that
+/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
+/// insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
+ bool isSigned) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ C = ConstantFoldConstantExpression(CE, TD);
+ return C;
+ }
+
+ // Otherwise, it must be an instruction.
+ Instruction *I = cast<Instruction>(V);
+ Instruction *Res = 0;
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+ Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the source type of the cast is the type we're trying for then we can
+ // just return the source. There's no need to insert it because it is not
+ // new.
+ if (I->getOperand(0)->getType() == Ty)
+ return I->getOperand(0);
+
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
+ // This also handles the case of zext(trunc(x)) -> zext(x).
+ Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+ Opc == Instruction::SExt);
+ break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
+ break;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ llvm_unreachable("Unreachable!");
+ break;
+ }
+
+ Res->takeName(I);
+ return InsertNewInstWith(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps
+isEliminableCastPair(
+ const CastInst *CI, ///< The first cast instruction
+ unsigned opcode, ///< The opcode of the second cast instruction
+ const Type *DstTy, ///< The target type for the second cast instruction
+ TargetData *TD ///< The target data for pointer size
+) {
+
+ const Type *SrcTy = CI->getOperand(0)->getType(); // A from above
+ const Type *MidTy = CI->getType(); // B from above
+
+ // Get the opcodes of the two Cast instructions
+ Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+
+ unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+ DstTy,
+ TD ? TD->getIntPtrType(CI->getContext()) : 0);
+
+ // We don't want to form an inttoptr or ptrtoint that converts to an integer
+ // type that differs from the pointer size.
+ if ((Res == Instruction::IntToPtr &&
+ (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+ (Res == Instruction::PtrToInt &&
+ (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+ Res = 0;
+
+ return Instruction::CastOps(Res);
+}
+
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ const Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
+ if (V->getType() == Ty || isa<Constant>(V)) return false;
+
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
+ if (const CastInst *CI = dyn_cast<CastInst>(V))
+ if (isEliminableCastPair(CI, opc, Ty, TD))
+ return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+ return false;
+
+ return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+ // eliminate it now.
+ if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
+ if (Instruction::CastOps opc =
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+ // The first cast (CSrc) is eliminable so we need to fix up or replace
+ // the second cast (CI). CSrc will then have a good chance of being dead.
+ return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+ }
+ }
+
+ // If we are casting a select then fold the cast into the select
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+ if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+ return NV;
+
+ // If we are casting a PHI then fold the cast into the PHI
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!Src->getType()->isIntegerTy() ||
+ !CI.getType()->isIntegerTy() ||
+ ShouldChangeType(CI.getType(), Src->getType()))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
+
+ return 0;
+}
+
+/// CanEvaluateTruncated - Return true if we can evaluate the specified
+/// expression tree as type Ty instead of its larger type, and arrive with the
+/// same value. This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V. We should return true if trunc(V)
+/// can be computed by computing V in the smaller type. If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+ // We can always evaluate constants in another type.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ const Type *OrigTy = V->getType();
+
+ // If this is an extension from the dest type, we can eliminate it, even if it
+ // has multiple uses.
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // These operators can all arbitrarily be extended or truncated.
+ return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty);
+
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ // UDiv and URem can be truncated if all the truncated bits are zero.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (BitWidth < OrigBitWidth) {
+ APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+ if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+ MaskedValueIsZero(I->getOperand(1), Mask)) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty);
+ }
+ }
+ break;
+ }
+ case Instruction::Shl:
+ // If we are truncating the result of this SHL, and if it's a shift of a
+ // constant amount, we can always perform a SHL in a smaller type.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (CI->getLimitedValue(BitWidth) < BitWidth)
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+ break;
+ case Instruction::LShr:
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+ }
+ break;
+ case Instruction::Trunc:
+ // trunc(trunc(x)) -> trunc(x)
+ return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
+ CanEvaluateTruncated(SI->getFalseValue(), Ty);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+
+ // Attempt to truncate the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateTruncated(Src, DestTy)) {
+
+ // If this cast is a truncate, evaluting in a different type always
+ // eliminates the cast, so it is always a win.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid cast: " << CI << '\n');
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+ if (DestTy->getScalarSizeInBits() == 1) {
+ Constant *One = ConstantInt::get(Src->getType(), 1);
+ Src = Builder->CreateAnd(Src, One, "tmp");
+ Value *Zero = Constant::getNullValue(Src->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+ }
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
+
+ // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+ // type isn't non-native.
+ if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+ ShouldChangeType(Src->getType(), CI.getType()) &&
+ match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+ Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ return BinaryOperator::CreateAnd(NewTrunc,
+ ConstantExpr::getTrunc(Cst, CI.getType()));
+ }
+
+ return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform) {
+ // If we are just checking for a icmp eq of a single bit and zext'ing it
+ // to an integer, then shift the bit to the appropriate place and then
+ // cast to integer to avoid the comparison.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ const APInt &Op1CV = Op1C->getValue();
+
+ // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
+ // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
+ if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+ if (!DoXform) return ICI;
+
+ Value *In = ICI->getOperand(0);
+ Value *Sh = ConstantInt::get(In->getType(),
+ In->getType()->getScalarSizeInBits()-1);
+ In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One, In->getName()+".not");
+ }
+
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+
+
+ // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ // zext (X == 1) to i32 --> X iff X has only the low bit set.
+ // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 0) to i32 --> X iff X has only the low bit set.
+ // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ // This only works for EQ and NE
+ ICI->isEquality()) {
+ // If Op1C some other power of two, convert:
+ uint32_t BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+ if (!DoXform) return ICI;
+
+ bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+ // (X&4) == 2 --> false
+ // (X&4) != 2 --> true
+ Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+ isNE);
+ Res = ConstantExpr::getZExt(Res, CI.getType());
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ uint32_t ShiftAmt = KnownZeroMask.logBase2();
+ Value *In = ICI->getOperand(0);
+ if (ShiftAmt) {
+ // Perform a logical shr by shiftamt.
+ // Insert the shift to put the result in the low bit.
+ In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+ In->getName()+".lobit");
+ }
+
+ if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One, "tmp");
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ }
+ }
+ }
+
+ // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+ // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+ // may lead to additional simplifications.
+ if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+ uint32_t BitWidth = ITy->getBitWidth();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+ APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+ ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+ if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+ APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ APInt UnknownBit = ~KnownBits;
+ if (UnknownBit.countPopulation() == 1) {
+ if (!DoXform) return ICI;
+
+ Value *Result = Builder->CreateXor(LHS, RHS);
+
+ // Mask off any bits that are set and won't be shifted away.
+ if (KnownOneLHS.uge(UnknownBit))
+ Result = Builder->CreateAnd(Result,
+ ConstantInt::get(ITy, UnknownBit));
+
+ // Shift the bit we're testing down to the lsb.
+ Result = Builder->CreateLShr(
+ Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+ Result->takeName(ICI);
+ return ReplaceInstUsesWith(CI, Result);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// CanEvaluateZExtd - Determine if the specified value can be computed in the
+/// specified wider type and produce the same low bits. If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out. For example, to promote something like:
+///
+/// %B = trunc i64 %A to i32
+/// %C = lshr i32 %B, 8
+/// %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63. Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+ BitsToClear = 0;
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If the input is a truncate from the destination type, we can trivially
+ // eliminate it, even if it has multiple uses.
+ // FIXME: This is currently disabled until codegen can handle this without
+ // pessimizing code, PR5997.
+ if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode(), Tmp;
+ switch (Opc) {
+ case Instruction::ZExt: // zext(zext(x)) -> zext(x).
+ case Instruction::SExt: // zext(sext(x)) -> sext(x).
+ case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+ !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+ return false;
+ // These can all be promoted if neither operand has 'bits to clear'.
+ if (BitsToClear == 0 && Tmp == 0)
+ return true;
+
+ // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+ // other side, BitsToClear is ok.
+ if (Tmp == 0 &&
+ (Opc == Instruction::And || Opc == Instruction::Or ||
+ Opc == Instruction::Xor)) {
+ // We use MaskedValueIsZero here for generality, but the case we care
+ // about the most is constant RHS.
+ unsigned VSize = V->getType()->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear)))
+ return true;
+ }
+
+ // Otherwise, we don't know how to analyze this BitsToClear case yet.
+ return false;
+
+ case Instruction::LShr:
+ // We can promote lshr(x, cst) if we can promote x. This requires the
+ // ultimate 'and' to clear out the high zero bits we're clearing out though.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ return false;
+ BitsToClear += Amt->getZExtValue();
+ if (BitsToClear > V->getType()->getScalarSizeInBits())
+ BitsToClear = V->getType()->getScalarSizeInBits();
+ return true;
+ }
+ // Cannot promote variable LSHR.
+ return false;
+ case Instruction::Select:
+ if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+ !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+ // TODO: If important, we could handle the case when the BitsToClear are
+ // known zero in the disagreeing side.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+ return false;
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+ // TODO: If important, we could handle the case when the BitsToClear
+ // are known zero in the disagreeing input.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ return false;
+ }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+ // If this zero extend is only used by a truncate, let the truncate by
+ // eliminated before we try to optimize this zext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ return 0;
+
+ // If one of the common conversion will work, do it.
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ unsigned BitsToClear;
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+ "Unreasonable BitsToClear");
+
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid zero extend: " << CI);
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with zeros, just replace this
+ // cast with the result.
+ if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
+ DestBitSize-SrcBitsKept)))
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit an AND to clear the high bits.
+ Constant *C = ConstantInt::get(Res->getType(),
+ APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+ return BinaryOperator::CreateAnd(Res, C);
+ }
+
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // TODO: Subsume this into EvaluateInDifferentType.
+
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getScalarSizeInBits();
+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+ Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ return new ZExtInst(And, CI.getType());
+ }
+
+ if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+ AndValue));
+ }
+ if (SrcSize > DstSize) {
+ Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc,
+ ConstantInt::get(Trunc->getType(),
+ AndValue));
+ }
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(ICI, CI);
+
+ BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+ if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+ // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+ // of the (zext icmp) will be transformed.
+ ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+ ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+ (transformZExtICmp(LHS, CI, false) ||
+ transformZExtICmp(RHS, CI, false))) {
+ Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+ Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+ return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ }
+ }
+
+ // zext(trunc(t) & C) -> (t & zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType())
+ return
+ BinaryOperator::CreateAnd(TI0,
+ ConstantExpr::getZExt(C, CI.getType()));
+ }
+
+ // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+ if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+ And->getOperand(1) == C)
+ if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+ return BinaryOperator::CreateXor(NewAnd, ZC);
+ }
+ }
+
+ // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
+ Value *X;
+ if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
+ match(SrcI, m_Not(m_Value(X))) &&
+ (!X->hasOneUse() || !isa<CmpInst>(X))) {
+ Value *New = Builder->CreateZExt(X, CI.getType());
+ return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+ }
+
+ return 0;
+}
+
+/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+ Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
+ // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
+ if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+ (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+ Value *Sh = ConstantInt::get(Op0->getType(),
+ Op0->getType()->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = Builder->CreateNot(In, In->getName()+".not");
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+ // If we know that only one bit of the LHS of the icmp can be set and we
+ // have an equality comparison with zero or a power of 2, we can transform
+ // the icmp and sext into bitwise/integer operations.
+ if (ICI->hasOneUse() &&
+ ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+ unsigned BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) {
+ Value *In = ICI->getOperand(0);
+
+ // If the icmp tests for a known zero bit we can constant fold it.
+ if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+ Value *V = Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getAllOnesValue(CI.getType()) :
+ ConstantInt::getNullValue(CI.getType());
+ return ReplaceInstUsesWith(CI, V);
+ }
+
+ if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+ // sext ((x & 2^n) == 0) -> (x >> n) - 1
+ // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+ unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+ // Perform a right shift to place the desired bit in the LSB.
+ if (ShiftAmt)
+ In = Builder->CreateLShr(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // At this point "In" is either 1 or 0. Subtract 1 to turn
+ // {1, 0} -> {0, -1}.
+ In = Builder->CreateAdd(In,
+ ConstantInt::getAllOnesValue(In->getType()),
+ "sext");
+ } else {
+ // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
+ // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+ unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+ // Perform a left shift to place the desired bit in the MSB.
+ if (ShiftAmt)
+ In = Builder->CreateShl(In,
+ ConstantInt::get(In->getType(), ShiftAmt));
+
+ // Distribute the bit over the whole bit width.
+ In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+ BitWidth - 1), "sext");
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+ }
+ }
+ }
+
+ // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
+ if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+ if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
+ Op0->getType() == CI.getType()) {
+ const Type *EltTy = VTy->getElementType();
+
+ // splat the shift constant to a constant vector.
+ Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+
+ return 0;
+}
+
+/// CanEvaluateSExtd - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the value of the common low bits. This is used by code that tries
+/// to promote integer operations to a wider types will allow us to eliminate
+/// the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+ assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+ "Can't sign extend type to a smaller type");
+ // If this is a constant, it can be trivially promoted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is a truncate from the dest type, we can trivially eliminate it,
+ // even if it has multiple uses.
+ // FIXME: This is currently disabled until codegen can handle this without
+ // pessimizing code, PR5997.
+ if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::SExt: // sext(sext(x)) -> sext(x)
+ case Instruction::ZExt: // sext(zext(x)) -> zext(x)
+ case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // These operators can all arbitrarily be extended if their inputs can.
+ return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+ CanEvaluateSExtd(I->getOperand(1), Ty);
+
+ //case Instruction::Shl: TODO
+ //case Instruction::LShr: TODO
+
+ case Instruction::Select:
+ return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+ CanEvaluateSExtd(I->getOperand(2), Ty);
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+ // If this sign extend is only used by a truncate, let the truncate by
+ // eliminated before we try to optimize this zext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ return 0;
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateSExtd(Src, DestTy)) {
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid sign extend: " << CI);
+ Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with sign bit, just replace this
+ // cast with the result.
+ if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+ ShAmt);
+ }
+
+ // If this input is a trunc from our destination, then turn sext(trunc(x))
+ // into shifts.
+ if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+ if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+ return BinaryOperator::CreateAShr(Res, ShAmt);
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformSExtICmp(ICI, CI);
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = 0;
+ // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+ ConstantInt *BA = 0, *CA = 0;
+ if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && A->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getScalarSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ A = Builder->CreateShl(A, ShAmtV, CI.getName());
+ return BinaryOperator::CreateAShr(A, ShAmtV);
+ }
+
+ return 0;
+}
+
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+ bool losesInfo;
+ APFloat F = CFP->getValueAPF();
+ (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(CFP->getContext(), F);
+ return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::FPExt)
+ return LookThroughFPExtensions(I->getOperand(0));
+
+ // If this value is a constant, return the constant in the smallest FP type
+ // that can accurately represent it. This allows us to turn
+ // (float)((double)X+2.0) into x+2.0f.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+ return V; // No constant folding of this.
+ // See if the value can be truncated to float and then reextended.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ return V;
+ if (CFP->getType()->isDoubleTy())
+ return V; // Won't shrink.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ return V;
+ // Don't try to shrink to various long double types.
+ }
+
+ return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
+ // smaller than the destination type, we can eliminate the truncate by doing
+ // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
+ // as many builtins (sqrt, etc).
+ BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+ if (OpI && OpI->hasOneUse()) {
+ switch (OpI->getOpcode()) {
+ default: break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ const Type *SrcTy = OpI->getType();
+ Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+ if (LHSTrunc->getType() != SrcTy &&
+ RHSTrunc->getType() != SrcTy) {
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If the source types were both smaller than the destination type of
+ // the cast, do this xform.
+ if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+ RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
+ LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+ RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
+ return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ }
+ }
+ break;
+ }
+ }
+
+ // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+ if (Call && Call->getCalledFunction() &&
+ Call->getCalledFunction()->getName() == "sqrt" &&
+ Call->getNumArgOperands() == 1 &&
+ Call->hasOneUse()) {
+ CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+ if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+ CI.getType()->isFloatTy() &&
+ Call->getType()->isDoubleTy() &&
+ Arg->getType()->isDoubleTy() &&
+ Arg->getOperand(0)->getType()->isFloatTy()) {
+ Function *Callee = Call->getCalledFunction();
+ Module *M = CI.getParent()->getParent()->getParent();
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Callee->getAttributes(),
+ Builder->getFloatTy(),
+ Builder->getFloatTy(),
+ NULL);
+ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+ "sqrtfcall");
+ ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptoui(uitofp(X)) --> X
+ // fptoui(sitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptosi(sitofp(X)) --> X
+ // fptosi(uitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getScalarSizeInBits() <=
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+ // If the source integer type is not the intptr_t type for this target, do a
+ // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
+ // cast to be exposed to other transforms.
+ if (TD) {
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+ TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreateTrunc(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()), "tmp");
+ return new IntToPtrInst(P, CI.getType());
+ }
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+ TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreateZExt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()), "tmp");
+ return new IntToPtrInst(P, CI.getType());
+ }
+ }
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ return 0;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+ // If casting the result of a getelementptr instruction with no offset, turn
+ // this into a cast of the original pointer!
+ if (GEP->hasAllZeroIndices()) {
+ // Changing the cast operand is usually not a good idea but it is safe
+ // here because the pointer operand is being replaced with another
+ // pointer operand so the opcode doesn't need to change.
+ Worklist.Add(GEP);
+ CI.setOperand(0, GEP->getOperand(0));
+ return &CI;
+ }
+
+ // If the GEP has a single use, and the base pointer is a bitcast, and the
+ // GEP computes a constant offset, see if we can convert these three
+ // instructions into fewer. This typically happens with unions and other
+ // non-type-safe code.
+ if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ GEP->hasAllConstantIndices()) {
+ // We are guaranteed to get a constant from EmitGEPOffset.
+ ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
+ int64_t Offset = OffsetV->getSExtValue();
+
+ // Get the base pointer input of the bitcast, and the type it points to.
+ Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+ const Type *GEPIdxTy =
+ cast<PointerType>(OrigBase->getType())->getElementType();
+ SmallVector<Value*, 8> NewIndices;
+ if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+ // If we were able to index down into an element, create the GEP
+ // and bitcast the result. This eliminates one bitcast, potentially
+ // two.
+ Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+ Builder->CreateInBoundsGEP(OrigBase,
+ NewIndices.begin(), NewIndices.end()) :
+ Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+ NGEP->takeName(GEP);
+
+ if (isa<BitCastInst>(CI))
+ return new BitCastInst(NGEP, CI.getType());
+ assert(isa<PtrToIntInst>(CI));
+ return new PtrToIntInst(NGEP, CI.getType());
+ }
+ }
+ }
+
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+ // If the destination integer type is not the intptr_t type for this target,
+ // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
+ // to be exposed to other transforms.
+ if (TD) {
+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()),
+ "tmp");
+ return new TruncInst(P, CI.getType());
+ }
+ if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()),
+ "tmp");
+ return new ZExtInst(P, CI.getType());
+ }
+ }
+
+ return commonPointerCastTransforms(CI);
+}
+
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<Constant*, 16> ShuffleMask;
+ Value *V2;
+ const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ // The excess elements reference the first element of the zero input.
+ ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+ ConstantInt::get(Int32Ty, SrcElts));
+ }
+
+ return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ const Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+ // If the operands are integer typed then apply the integer transforms,
+ // otherwise just apply the common ones.
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType();
+ const Type *DestTy = CI.getType();
+
+ // Get rid of casts from one type to the same type. These are useless and can
+ // be replaced by the operand.
+ if (DestTy == Src->getType())
+ return ReplaceInstUsesWith(CI, Src);
+
+ if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+ const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+ const Type *DstElTy = DstPTy->getElementType();
+ const Type *SrcElTy = SrcPTy->getElementType();
+
+ // If the address spaces don't match, don't eliminate the bitcast, which is
+ // required for changing types.
+ if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+ return 0;
+
+ // If we are casting a alloca to a pointer to a type of the same
+ // size, rewrite the allocation instruction to allocate the "right" type.
+ // There is no need to modify malloc calls because it is their bitcast that
+ // needs to be cleaned up.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+ if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+ return V;
+
+ // If the source and destination are pointers, and this cast is equivalent
+ // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
+ // This can enhance SROA and other transforms that want type-safe pointers.
+ Constant *ZeroUInt =
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
+ unsigned NumZeros = 0;
+ while (SrcElTy != DstElTy &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
+ SrcElTy->getNumContainedTypes() /* not "{}" */) {
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ ++NumZeros;
+ }
+
+ // If we found a path from the src to dest, create the getelementptr now.
+ if (SrcElTy == DstElTy) {
+ SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end());
+ }
+ }
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
+
+ if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
+ Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+ return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+ }
+
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
+ }
+ }
+
+ if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
+ Value *Elem =
+ Builder->CreateExtractElement(Src,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+ }
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+ // Okay, we have (bitcast (shuffle ..)). Check to see if this is
+ // a bitcast to a vector with the same # elts.
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ cast<VectorType>(DestTy)->getNumElements() ==
+ SVI->getType()->getNumElements() &&
+ SVI->getType()->getNumElements() ==
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ BitCastInst *Tmp;
+ // If either of the operands is a cast from CI.getType(), then
+ // evaluating the shuffle in the casted destination's type will allow
+ // us to eliminate at least one cast.
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ Tmp->getOperand(0)->getType() == DestTy) ||
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ Tmp->getOperand(0)->getType() == DestTy)) {
+ Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+ Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+ // Return a new shuffle vector. Use the same element ID's, as we
+ // know the vector types match #elts.
+ return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+ }
+ }
+ }
+
+ if (SrcTy->isPointerTy())
+ return commonPointerCastTransforms(CI);
+ return commonCastTransforms(CI);
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 000000000000..c78760b20692
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,2919 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+static ConstantInt *getOne(Constant *C) {
+ return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
+/// AddOne - Add one to a ConstantInt
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt
+static Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (!IsSigned)
+ return Result->getValue().ult(In1->getValue());
+
+ if (In2->isNegative())
+ return Result->getValue().sgt(In1->getValue());
+ return Result->getValue().slt(In1->getValue());
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getAdd(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasAddOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasAddOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (!IsSigned)
+ return Result->getValue().ugt(In1->getValue());
+
+ if (In2->isNegative())
+ return Result->getValue().slt(In1->getValue());
+
+ return Result->getValue().sgt(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getSub(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasSubOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasSubOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit. If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+ bool &TrueIfSigned) {
+ switch (pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS->isZero();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
+ TrueIfSigned = true;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == high-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS->isMaxValue(true);
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS->getValue().isSignBit();
+ default:
+ return false;
+ }
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+ return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
+/// set of known zero and one bits, compute the maximum and minimum values that
+/// could have the specified known zero and known one bits, returning them in
+/// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+ const APInt& KnownOne,
+ APInt& Min, APInt& Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+ // bit if it is unknown.
+ Min = KnownOne;
+ Max = KnownOne|UnknownBits;
+
+ if (UnknownBits.isNegative()) { // Sign bit is unknown
+ Min.setBit(Min.getBitWidth()-1);
+ Max.clearBit(Max.getBitWidth()-1);
+ }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+ const APInt &KnownOne,
+ APInt &Min, APInt &Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when the unknown bits are all zeros.
+ Min = KnownOne;
+ // The maximum value is when the unknown bits are all ones.
+ Max = KnownOne|UnknownBits;
+}
+
+
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+/// cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer. Try to simplify
+/// this into some simple computation that does not need the load. For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison. This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+ CmpInst &ICI, ConstantInt *AndCst) {
+ // We need TD information to know the pointer size unless this is inbounds.
+ if (!GEP->isInBounds() && TD == 0) return 0;
+
+ ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+
+ // There are many forms of this optimization we can handle, for now, just do
+ // the simple index into a single-dimensional array.
+ //
+ // Require: GEP GV, 0, i {{, constant indices}}
+ if (GEP->getNumOperands() < 3 ||
+ !isa<ConstantInt>(GEP->getOperand(1)) ||
+ !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+ isa<Constant>(GEP->getOperand(2)))
+ return 0;
+
+ // Check that indices after the variable are constants and in-range for the
+ // type they index. Collect the indices. This is typically for arrays of
+ // structs.
+ SmallVector<unsigned, 4> LaterIndices;
+
+ const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+ for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+ ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (Idx == 0) return 0; // Variable index.
+
+ uint64_t IdxVal = Idx->getZExtValue();
+ if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+
+ if (const StructType *STy = dyn_cast<StructType>(EltTy))
+ EltTy = STy->getElementType(IdxVal);
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+ if (IdxVal >= ATy->getNumElements()) return 0;
+ EltTy = ATy->getElementType();
+ } else {
+ return 0; // Unknown type.
+ }
+
+ LaterIndices.push_back(IdxVal);
+ }
+
+ enum { Overdefined = -3, Undefined = -2 };
+
+ // Variables for our state machines.
+
+ // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+ // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+ // and 87 is the second (and last) index. FirstTrueElement is -2 when
+ // undefined, otherwise set to the first true element. SecondTrueElement is
+ // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+ int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+ // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+ // form "i != 47 & i != 87". Same state transitions as for true elements.
+ int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+
+ /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+ /// define a state machine that triggers for ranges of values that the index
+ /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
+ /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+ /// index in the range (inclusive). We use -2 for undefined here because we
+ /// use relative comparisons and don't want 0-1 to match -1.
+ int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+
+ // MagicBitvector - This is a magic bitvector where we set a bit if the
+ // comparison is true for element 'i'. If there are 64 elements or less in
+ // the array, this will fully represent all the comparison results.
+ uint64_t MagicBitvector = 0;
+
+
+ // Scan the array and see if one of our patterns matches.
+ Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+ for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+ Constant *Elt = Init->getOperand(i);
+
+ // If this is indexing an array of structures, get the structure element.
+ if (!LaterIndices.empty())
+ Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
+
+ // If the element is masked, handle it.
+ if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+
+ // Find out if the comparison would be true or false for the i'th element.
+ Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+ CompareRHS, TD);
+ // If the result is undef for this element, ignore it.
+ if (isa<UndefValue>(C)) {
+ // Extend range state machines to cover this element in case there is an
+ // undef in the middle of the range.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ continue;
+ }
+
+ // If we can't compute the result for any of the elements, we have to give
+ // up evaluating the entire conditional.
+ if (!isa<ConstantInt>(C)) return 0;
+
+ // Otherwise, we know if the comparison is true or false for this element,
+ // update our state machines.
+ bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+
+ // State machine for single/double/range index comparison.
+ if (IsTrueForElt) {
+ // Update the TrueElement state machine.
+ if (FirstTrueElement == Undefined)
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
+ else {
+ // Update double-compare state machine.
+ if (SecondTrueElement == Undefined)
+ SecondTrueElement = i;
+ else
+ SecondTrueElement = Overdefined;
+
+ // Update range state machine.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ else
+ TrueRangeEnd = Overdefined;
+ }
+ } else {
+ // Update the FalseElement state machine.
+ if (FirstFalseElement == Undefined)
+ FirstFalseElement = FalseRangeEnd = i; // First false element.
+ else {
+ // Update double-compare state machine.
+ if (SecondFalseElement == Undefined)
+ SecondFalseElement = i;
+ else
+ SecondFalseElement = Overdefined;
+
+ // Update range state machine.
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ else
+ FalseRangeEnd = Overdefined;
+ }
+ }
+
+
+ // If this element is in range, update our magic bitvector.
+ if (i < 64 && IsTrueForElt)
+ MagicBitvector |= 1ULL << i;
+
+ // If all of our states become overdefined, bail out early. Since the
+ // predicate is expensive, only check it every 8 elements. This is only
+ // really useful for really huge arrays.
+ if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+ SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+ FalseRangeEnd == Overdefined)
+ return 0;
+ }
+
+ // Now that we've scanned the entire array, emit our new comparison(s). We
+ // order the state machines in complexity of the generated code.
+ Value *Idx = GEP->getOperand(2);
+
+ // If the index is larger than the pointer size of the target, truncate the
+ // index down like the GEP would do implicitly. We don't have to do this for
+ // an inbounds GEP because the index can't be out of range.
+ if (!GEP->isInBounds() &&
+ Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+ Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+
+ // If the comparison is only true for one or two elements, emit direct
+ // comparisons.
+ if (SecondTrueElement != Overdefined) {
+ // None true -> false.
+ if (FirstTrueElement == Undefined)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+
+ Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+
+ // True for one element -> 'i == 47'.
+ if (SecondTrueElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+
+ // True for two elements -> 'i == 47 | i == 72'.
+ Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+ Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+ Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+ return BinaryOperator::CreateOr(C1, C2);
+ }
+
+ // If the comparison is only false for one or two elements, emit direct
+ // comparisons.
+ if (SecondFalseElement != Overdefined) {
+ // None false -> true.
+ if (FirstFalseElement == Undefined)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+
+ Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+ // False for one element -> 'i != 47'.
+ if (SecondFalseElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+
+ // False for two elements -> 'i != 47 & i != 72'.
+ Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+ Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+ return BinaryOperator::CreateAnd(C1, C2);
+ }
+
+ // If the comparison can be replaced with a range comparison for the elements
+ // where it is true, emit the range check.
+ if (TrueRangeEnd != Overdefined) {
+ assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+
+ // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+ if (FirstTrueElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ TrueRangeEnd-FirstTrueElement+1);
+ return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+ }
+
+ // False range check.
+ if (FalseRangeEnd != Overdefined) {
+ assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+ // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+ if (FirstFalseElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ FalseRangeEnd-FirstFalseElement);
+ return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+ }
+
+
+ // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+ // of this load, replace it with computation that does:
+ // ((magic_cst >> i) & 1) != 0
+ if (Init->getNumOperands() <= 32 ||
+ (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+ const Type *Ty;
+ if (Init->getNumOperands() <= 32)
+ Ty = Type::getInt32Ty(Init->getContext());
+ else
+ Ty = Type::getInt64Ty(Init->getContext());
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
+
+ return 0;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can
+/// be complex, and scales are involved. The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
+ TargetData &TD = *IC.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return 0;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return 0;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
+ const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
+ }
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return 0;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
+ true /*Signed*/);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
+}
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else. At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ Instruction &I) {
+ // Look through bitcasts.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+ RHS = BCI->getOperand(0);
+
+ Value *PtrBase = GEPLHS->getOperand(0);
+ if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+ // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow since the gep is inbounds. See if we can
+ // output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+
+ // If not, synthesize the offset the hard way.
+ if (Offset == 0)
+ Offset = EmitGEPOffset(GEPLHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+ Constant::getNullValue(Offset->getType()));
+ } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+ // If the base pointers are different, but the indices are the same, just
+ // compare the base pointer.
+ if (PtrBase != GEPRHS->getOperand(0)) {
+ bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+ IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+ GEPRHS->getOperand(0)->getType();
+ if (IndicesTheSame)
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ IndicesTheSame = false;
+ break;
+ }
+
+ // If all indices are the same, just compare the base pointers.
+ if (IndicesTheSame)
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+ GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+ // Otherwise, the base pointers are different and the indices are
+ // different, bail out.
+ return 0;
+ }
+
+ // If one of the GEPs has all zero indices, recurse.
+ bool AllZeros = true;
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+ !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+ ICmpInst::getSwappedPredicate(Cond), I);
+
+ // If the other GEP has all zero indices, recurse.
+ AllZeros = true;
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+ !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+ bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
+ if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+ // If the GEPs only differ by one index, compare it.
+ unsigned NumDifferences = 0; // Keep track of # differences.
+ unsigned DiffOperand = 0; // The operand that differs.
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+ GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+ // Irreconcilable differences.
+ NumDifferences = 2;
+ break;
+ } else {
+ if (NumDifferences++) break;
+ DiffOperand = i;
+ }
+ }
+
+ if (NumDifferences == 0) // SAME GEP?
+ return ReplaceInstUsesWith(I, // No comparison is needed here.
+ ConstantInt::get(Type::getInt1Ty(I.getContext()),
+ ICmpInst::isTrueWhenEqual(Cond)));
+
+ else if (NumDifferences == 1 && GEPsInBounds) {
+ Value *LHSV = GEPLHS->getOperand(DiffOperand);
+ Value *RHSV = GEPRHS->getOperand(DiffOperand);
+ // Make sure we do a signed comparison here.
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+ }
+ }
+
+ // Only lower this if the icmp is the only user of the GEP or if we expect
+ // the result to fold to a constant!
+ if (TD &&
+ GEPsInBounds &&
+ (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+ // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
+ Value *L = EmitGEPOffset(GEPLHS);
+ Value *R = EmitGEPOffset(GEPRHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+ }
+ }
+ return 0;
+}
+
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+ Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred,
+ Value *TheAdd) {
+ // If we have X+0, exit early (simplifying logic below) and let it get folded
+ // elsewhere. icmp X+0, X -> icmp X, X
+ if (CI->isZero()) {
+ bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ // (X+4) == X -> false.
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+ // (X+4) != X -> true.
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+ // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+ // so the values can never be equal. Similarly for all other "or equals"
+ // operators.
+
+ // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
+ // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
+ // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+ Value *R =
+ ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+ }
+
+ // (X+1) >u X --> X <u (0-1) --> X != 255
+ // (X+2) >u X --> X <u (0-2) --> X <u 254
+ // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+
+ unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+ ConstantInt *SMax = ConstantInt::get(X->getContext(),
+ APInt::getSignedMaxValue(BitWidth));
+
+ // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
+ // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
+ // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
+ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
+ // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
+ // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+
+ // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
+ // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
+ // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+ // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+ // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
+ // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
+
+ assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+ Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+ return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS) {
+ ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+ const APInt &CmpRHSV = CmpRHS->getValue();
+
+ // FIXME: If the operand types don't match the type of the divide
+ // then don't attempt this transform. The code below doesn't have the
+ // logic to deal with a signed divide and an unsigned compare (and
+ // vice versa). This is because (x /s C1) <s C2 produces different
+ // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
+ // if it finds it.
+ bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+ if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+ return 0;
+ if (DivRHS->isZero())
+ return 0; // The ProdOV computation fails on divide by zero.
+ if (DivIsSigned && DivRHS->isAllOnesValue())
+ return 0; // The overflow computation also screws up here
+ if (DivRHS->isOne()) {
+ // This eliminates some funny cases with INT_MIN.
+ ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
+ return &ICI;
+ }
+
+ // Compute Prod = CI * DivRHS. We are essentially solving an equation
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+ // Determine if the product overflows by seeing if the product is
+ // not equal to the divide. Make sure we do the same kind of divide
+ // as in the LHS instruction that we're folding.
+ bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+ // Get the ICmp opcode
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+
+ /// If the division is known to be exact, then there is no remainder from the
+ /// divide, so the covered range size is unit, otherwise it is the divisor.
+ ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
+ // Figure out the interval that is being checked. For example, a comparison
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // Compute this interval based on the constants involved and the signedness of
+ // the compare/divide. This computes a half-open interval, keeping track of
+ // whether either value in the interval overflows. After analysis each
+ // overflow variable is set to 0 if it's corresponding bound variable is valid
+ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+ int LoOverflow = 0, HiOverflow = 0;
+ Constant *LoBound = 0, *HiBound = 0;
+
+ if (!DivIsSigned) { // udiv
+ // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod;
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow) {
+ // If this is not an exact divide, then many values in the range collapse
+ // to the same result value.
+ HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+ }
+
+ } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+ if (CmpRHSV == 0) { // (X / pos) op 0
+ // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
+ LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+ HiBound = RangeSize;
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+ } else { // (X / pos) op neg
+ // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
+ HiBound = AddOne(Prod);
+ LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow) {
+ ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+ }
+ }
+ } else if (DivRHS->isNegative()) { // Divisor is < 0.
+ if (DivI->isExact())
+ RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (CmpRHSV == 0) { // (X / neg) op 0
+ // e.g. X/-5 op 0 --> [-4, 5)
+ LoBound = AddOne(RangeSize);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+ if (HiBound == DivRHS) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
+ }
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
+ // e.g. X/-5 op 3 --> [-19, -14)
+ HiBound = AddOne(Prod);
+ HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow)
+ LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow = HiOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+ }
+
+ // Dividing by a negative swaps the condition. LT <-> GT
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ Value *X = DivI->getOperand(0);
+ switch (Pred) {
+ default: llvm_unreachable("Unhandled icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, true));
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, false));
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return new ICmpInst(Pred, X, LoBound);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ if (HiOverflow == -1) // High bound less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+ ConstantInt *ShAmt) {
+ const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = CmpRHSV.getBitWidth();
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+ return 0;
+
+ if (!ICI.isEquality()) {
+ // If we have an unsigned comparison and an ashr, we can't simplify this.
+ // Similarly for signed comparisons with lshr.
+ if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+ return 0;
+
+ // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+ // by a power of 2. Since we already have logic to simplify these,
+ // transform to div and then simplify the resultant comparison.
+ if (Shr->getOpcode() == Instruction::AShr &&
+ (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+ return 0;
+
+ // Revisit the shift (to delete it).
+ Worklist.Add(Shr);
+
+ Constant *DivCst =
+ ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+ Value *Tmp =
+ Shr->getOpcode() == Instruction::AShr ?
+ Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+ Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+ ICI.setOperand(0, Tmp);
+
+ // If the builder folded the binop, just return it.
+ BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+ if (TheDiv == 0)
+ return &ICI;
+
+ // Otherwise, fold this div/compare.
+ assert(TheDiv->getOpcode() == Instruction::SDiv ||
+ TheDiv->getOpcode() == Instruction::UDiv);
+
+ Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+ assert(Res && "This div/cst should have folded!");
+ return Res;
+ }
+
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = CmpRHSV << ShAmtVal;
+ ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+ if (Shr->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (Shr->hasOneUse() && Shr->isExact())
+ return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+ if (Shr->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+
+ Value *And = Builder->CreateAnd(Shr->getOperand(0),
+ Mask, Shr->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+ }
+ return 0;
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHSI,
+ ConstantInt *RHS) {
+ const APInt &RHSV = RHS->getValue();
+
+ switch (LHSI->getOpcode()) {
+ case Instruction::Trunc:
+ if (ICI.isEquality() && LHSI->hasOneUse()) {
+ // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+ // of the high bits truncated out of x are known.
+ unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+ SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+ APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+ ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If all the high bits are known, we can do this xform.
+ if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+ // Pull in the high bits from known-ones set.
+ APInt NewRHS = RHS->getValue().zext(SrcBits);
+ NewRHS |= KnownOne;
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(), NewRHS));
+ }
+ }
+ break;
+
+ case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
+ if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+ // fold the xor.
+ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+ (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+ Value *CompareVal = LHSI->getOperand(0);
+
+ // If the sign bit of the XorCST is not set, there is no change to
+ // the operation, just stop using the Xor.
+ if (!XorCST->isNegative()) {
+ ICI.setOperand(0, CompareVal);
+ Worklist.Add(LHSI);
+ return &ICI;
+ }
+
+ // Was the old condition true if the operand is positive?
+ bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+
+ // If so, the new one isn't.
+ isTrueIfPositive ^= true;
+
+ if (isTrueIfPositive)
+ return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+ SubOne(RHS));
+ else
+ return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+ AddOne(RHS));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+ const APInt &SignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),
+ RHSV ^ SignBit));
+ }
+
+ // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+ if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
+ const APInt &NotSignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ Pred = ICI.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),
+ RHSV ^ NotSignBit));
+ }
+ }
+ }
+ break;
+ case Instruction::And: // (icmp pred (and X, AndCST), RHS)
+ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+ LHSI->getOperand(0)->hasOneUse()) {
+ ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+
+ // If the LHS is an AND of a truncating cast, we can widen the
+ // and/compare to be the input width without changing the value
+ // produced, eliminating a cast.
+ if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+ // We can do this transformation if either the AND constant does not
+ // have its sign bit set or if it is an equality comparison.
+ // Extending a relational comparison when we're checking the sign
+ // bit would not work.
+ if (ICI.isEquality() ||
+ (!AndCST->isNegative() && RHSV.isNonNegative())) {
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
+ NewAnd->takeName(LHSI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
+ }
+ }
+
+ // If the LHS is an AND of a zext, and we have an equality compare, we can
+ // shrink the and/compare to the smaller type, eliminating the cast.
+ if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
+ const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+ // Make sure we don't compare the upper bits, SimplifyDemandedBits
+ // should fold the icmp to true/false in that case.
+ if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantExpr::getTrunc(AndCST, Ty));
+ NewAnd->takeName(LHSI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantExpr::getTrunc(RHS, Ty));
+ }
+ }
+
+ // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+ // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This
+ // happens a LOT in code produced by the C front-end, for bitfield
+ // access.
+ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+ if (Shift && !Shift->isShift())
+ Shift = 0;
+
+ ConstantInt *ShAmt;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+ const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
+ const Type *AndTy = AndCST->getType(); // Type of the and.
+
+ // We can fold this as long as we can't shift unknown bits
+ // into the mask. This can only happen with signed shift
+ // rights, as they sign-extend.
+ if (ShAmt) {
+ bool CanFold = Shift->isLogicalShift();
+ if (!CanFold) {
+ // To test for the bad case of the signed shr, see if any
+ // of the bits shifted in could be tested after the mask.
+ uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+ int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+
+ uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+ if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
+ AndCST->getValue()) == 0)
+ CanFold = true;
+ }
+
+ if (CanFold) {
+ Constant *NewCst;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+ else
+ NewCst = ConstantExpr::getShl(RHS, ShAmt);
+
+ // Check to see if we are shifting out any of the bits being
+ // compared.
+ if (ConstantExpr::get(Shift->getOpcode(),
+ NewCst, ShAmt) != RHS) {
+ // If we shifted bits out, the fold is not going to work out.
+ // As a special case, check to see if this means that the
+ // result is always true or false now.
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::getFalse(ICI.getContext()));
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::getTrue(ICI.getContext()));
+ } else {
+ ICI.setOperand(1, NewCst);
+ Constant *NewAndCST;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+ else
+ NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+ LHSI->setOperand(1, NewAndCST);
+ LHSI->setOperand(0, Shift->getOperand(0));
+ Worklist.Add(Shift); // Shift is dead.
+ return &ICI;
+ }
+ }
+ }
+
+ // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
+ // preferable because it allows the C<<Y expression to be hoisted out
+ // of a loop if Y is invariant and X is not.
+ if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+ ICI.isEquality() && !Shift->isArithmeticShift() &&
+ !isa<Constant>(Shift->getOperand(0))) {
+ // Compute C << Y.
+ Value *NS;
+ if (Shift->getOpcode() == Instruction::LShr) {
+ NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+ } else {
+ // Insert a logical shift.
+ NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+ }
+
+ // Compute X & (C << Y).
+ Value *NewAnd =
+ Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+ }
+
+ // Try to optimize things like "A[i]&42 == 0" to index computations.
+ if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+ ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+ return Res;
+ }
+ }
+ break;
+
+ case Instruction::Or: {
+ if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+ break;
+ Value *P, *Q;
+ if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+ // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+ // -> and (icmp eq P, null), (icmp eq Q, null).
+ Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+ Constant::getNullValue(P->getType()));
+ Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+ Constant::getNullValue(Q->getType()));
+ Instruction *Op;
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+ else
+ Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+ return Op;
+ }
+ break;
+ }
+
+ case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt) break;
+
+ uint32_t TypeBits = RHSV.getBitWidth();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ if (ICI.isEquality()) {
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ Constant *Comp =
+ ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+ ShAmt);
+ if (Comp != RHS) {// Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst =
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // If the shift is NUW, then it is just shifting out zeros, no need for an
+ // AND.
+ if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ Constant *Mask =
+ ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
+ TypeBits-ShAmtVal));
+
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantExpr::getLShr(RHS, ShAmt));
+ }
+ }
+
+ // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+ bool TrueIfSigned = false;
+ if (LHSI->hasOneUse() &&
+ isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+ // (X << 31) <s 0 --> (X&1) != 0
+ Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+ APInt::getOneBitSet(TypeBits,
+ TypeBits-ShAmt->getZExtValue()-1));
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+ return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+ And, Constant::getNullValue(And->getType()));
+ }
+ break;
+ }
+
+ case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
+ case Instruction::AShr: {
+ // Handle equality comparisons of shift-by-constant.
+ BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+ if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
+ return Res;
+ }
+
+ // Handle exact shr's.
+ if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+ if (RHSV.isMinValue())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+ }
+ break;
+ }
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ // Fold: icmp pred ([us]div X, C1), C2 -> range test
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
+ // it, otherwise compute the range [low, hi) bounding the new value.
+ // See: InsertRangeTest above for the kinds of replacements possible.
+ if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+ DivRHS))
+ return R;
+ break;
+
+ case Instruction::Add:
+ // Fold: icmp pred (add X, C1), C2
+ if (!ICI.isEquality()) {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+ .subtract(LHSV);
+
+ if (ICI.isSigned()) {
+ if (CR.getLower().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ } else if (CR.getUpper().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getLower()));
+ }
+ } else {
+ if (CR.getLower().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ } else if (CR.getUpper().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getLower()));
+ }
+ }
+ }
+ break;
+ }
+
+ // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+ if (ICI.isEquality()) {
+ bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+ // the second operand is a constant, simplify a bit.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+ switch (BO->getOpcode()) {
+ case Instruction::SRem:
+ // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+ if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+ if (V.sgt(1) && V.isPowerOf2()) {
+ Value *NewRem =
+ Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+ BO->getName());
+ return new ICmpInst(ICI.getPredicate(), NewRem,
+ Constant::getNullValue(BO->getType()));
+ }
+ }
+ break;
+ case Instruction::Add:
+ // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+ if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getSub(RHS, BOp1C));
+ } else if (RHSV == 0) {
+ // Replace ((add A, B) != 0) with (A != -B) if A or B is
+ // efficiently invertible, or if the add has just this one use.
+ Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+
+ if (Value *NegVal = dyn_castNegVal(BOp1))
+ return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+ if (Value *NegVal = dyn_castNegVal(BOp0))
+ return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+ if (BO->hasOneUse()) {
+ Value *Neg = Builder->CreateNeg(BOp1);
+ Neg->takeName(BO);
+ return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+ }
+ }
+ break;
+ case Instruction::Xor:
+ // For the xor case, we can xor two constants together, eliminating
+ // the explicit xor.
+ if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getXor(RHS, BOC));
+ } else if (RHSV == 0) {
+ // Replace ((xor A, B) != 0) with (A != B)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ }
+ break;
+ case Instruction::Sub:
+ // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
+ if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
+ ConstantExpr::getSub(BOp0C, RHS));
+ } else if (RHSV == 0) {
+ // Replace ((sub A, B) != 0) with (A != B)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ }
+ break;
+ case Instruction::Or:
+ // If bits are being or'd in that are not present in the constant we
+ // are comparing against, then the comparison could never succeed!
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ Constant *NotCI = ConstantExpr::getNot(RHS);
+ if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ isICMP_NE));
+ }
+ break;
+
+ case Instruction::And:
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // If bits are being compared against that are and'd out, then the
+ // comparison can never succeed!
+ if ((RHSV & ~BOC->getValue()) != 0)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ isICMP_NE));
+
+ // If we have ((X & C) == C), turn it into ((X & C) != 0).
+ if (RHS == BOC && RHSV.isPowerOf2())
+ return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+ ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+
+ // Don't perform the following transforms if the AND has multiple uses
+ if (!BO->hasOneUse())
+ break;
+
+ // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+ if (BOC->getValue().isSignBit()) {
+ Value *X = BO->getOperand(0);
+ Constant *Zero = Constant::getNullValue(X->getType());
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(pred, X, Zero);
+ }
+
+ // ((X & ~7) == 0) --> X < 8
+ if (RHSV == 0 && isHighOnes(BOC)) {
+ Value *X = BO->getOperand(0);
+ Constant *NegX = ConstantExpr::getNeg(BOC);
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(pred, X, NegX);
+ }
+ }
+ default: break;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+ // Handle icmp {eq|ne} <intrinsic>, intcst.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+ return &ICI;
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ // ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
+ if (RHSV == RHS->getType()->getBitWidth()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+ return &ICI;
+ }
+ break;
+ case Intrinsic::ctpop:
+ // popcount(A) == 0 -> A == 0 and likewise for !=
+ if (RHS->isZero()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, RHS);
+ return &ICI;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+ const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+ Value *LHSCIOp = LHSCI->getOperand(0);
+ const Type *SrcTy = LHSCIOp->getType();
+ const Type *DestTy = LHSCI->getType();
+ Value *RHSCIOp;
+
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // integer type is the same size as the pointer type.
+ if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+ TD->getPointerSizeInBits() ==
+ cast<IntegerType>(DestTy)->getBitWidth()) {
+ Value *RHSOp = 0;
+ if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+ RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+ } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ }
+
+ if (RHSOp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+ }
+
+ // The code below only handles extension cast instructions, so far.
+ // Enforce this.
+ if (LHSCI->getOpcode() != Instruction::ZExt &&
+ LHSCI->getOpcode() != Instruction::SExt)
+ return 0;
+
+ bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+ bool isSignedCmp = ICI.isSigned();
+
+ if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+ // Not an extension from the same type?
+ RHSCIOp = CI->getOperand(0);
+ if (RHSCIOp->getType() != LHSCIOp->getType())
+ return 0;
+
+ // If the signedness of the two casts doesn't agree (i.e. one is a sext
+ // and the other is a zext), then we can't handle this.
+ if (CI->getOpcode() != LHSCI->getOpcode())
+ return 0;
+
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedCmp && isSignedExt)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+ }
+
+ // If we aren't dealing with a constant on the RHS, exit early
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+ if (!CI)
+ return 0;
+
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DestTy.
+ Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
+ Res1, DestTy);
+
+ // If the re-extended constant didn't change...
+ if (Res2 == CI) {
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedExt && isSignedCmp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
+ }
+
+ // The re-extended constant changed so the constant cannot be represented
+ // in the shorter type. Consequently, we cannot emit a simple comparison.
+ // All the cases that fold to true or false will have already been handled
+ // by SimplifyICmpInst, so only deal with the tricky case.
+
+ if (isSignedCmp || !isSignedExt)
+ return 0;
+
+ // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+ // should have been folded away previously and not enter in here.
+
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+ Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
+
+ // Finally, return the value computed.
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
+ return ReplaceInstUsesWith(ICI, Result);
+
+ assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
+ return BinaryOperator::CreateNot(Result);
+}
+
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+/// I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+/// sum = a + b
+/// if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+ ConstantInt *CI2, ConstantInt *CI1,
+ InstCombiner &IC) {
+ // The transformation we're trying to do here is to transform this into an
+ // llvm.sadd.with.overflow. To do this, we have to replace the original add
+ // with a narrower add, and discard the add-with-constant that is part of the
+ // range check (if we can't eliminate it, this isn't profitable).
+
+ // In order to eliminate the add-with-constant, the compare can be its only
+ // use.
+ Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+ if (!AddWithCst->hasOneUse()) return 0;
+
+ // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+ if (!CI2->getValue().isPowerOf2()) return 0;
+ unsigned NewWidth = CI2->getValue().countTrailingZeros();
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+
+ // The width of the new add formed is 1 more than the bias.
+ ++NewWidth;
+
+ // Check to see that CI1 is an all-ones value with NewWidth bits.
+ if (CI1->getBitWidth() == NewWidth ||
+ CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+ return 0;
+
+ // In order to replace the original add with a narrower
+ // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+ // and truncates that discard the high bits of the add. Verify that this is
+ // the case.
+ Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+ for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+ UI != E; ++UI) {
+ if (*UI == AddWithCst) continue;
+
+ // Only accept truncates for now. We would really like a nice recursive
+ // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+ // chain to see which bits of a value are actually demanded. If the
+ // original add had another add which was then immediately truncated, we
+ // could still do the transformation.
+ TruncInst *TI = dyn_cast<TruncInst>(*UI);
+ if (TI == 0 ||
+ TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+ }
+
+ // If the pattern matches, truncate the inputs to the narrower type and
+ // use the sadd_with_overflow intrinsic to efficiently compute both the
+ // result and the overflow bit.
+ Module *M = I.getParent()->getParent()->getParent();
+
+ Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+ NewType);
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ Builder->SetInsertPoint(OrigAdd);
+
+ Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+ Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+ CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+ Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+
+ // The inner add was the result of the narrow add, zero extended to the
+ // wider type. Replace it with the result computed by the intrinsic.
+ IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+ InstCombiner &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+
+ // If the add is a constant expr, then we don't bother transforming it.
+ Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+ if (OrigAdd == 0) return 0;
+
+ Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(OrigAdd);
+
+ Module *M = I.getParent()->getParent()->getParent();
+ Type *Ty = LHS->getType();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+ CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0);
+
+ IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded. This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+ unsigned BitWidth, bool isSignCheck) {
+ if (isSignCheck)
+ return APInt::getSignBit(BitWidth);
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (!CI) return APInt::getAllOnesValue(BitWidth);
+ const APInt &RHS = CI->getValue();
+
+ switch (I.getPredicate()) {
+ // For a UGT comparison, we don't care about any bits that
+ // correspond to the trailing ones of the comparand. The value of these
+ // bits doesn't impact the outcome of the comparison, because any value
+ // greater than the RHS must differ in a bit higher than these due to carry.
+ case ICmpInst::ICMP_UGT: {
+ unsigned trailingOnes = RHS.countTrailingOnes();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+ return ~lowBitsSet;
+ }
+
+ // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+ // Any value less than the RHS must differ in a higher bit because of carries.
+ case ICmpInst::ICMP_ULT: {
+ unsigned trailingZeros = RHS.countTrailingZeros();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+ return ~lowBitsSet;
+ }
+
+ default:
+ return APInt::getAllOnesValue(BitWidth);
+ }
+
+}
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+ bool Changed = false;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(Op0) < getComplexity(Op1)) {
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ Changed = true;
+ }
+
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ const Type *Ty = Op0->getType();
+
+ // icmp's with boolean values can always be turned into bitwise operations
+ if (Ty->isIntegerTy(1)) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
+ Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateNot(Xor);
+ }
+ case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ case ICmpInst::ICMP_UGT:
+ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
+ Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+ return BinaryOperator::CreateAnd(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGT:
+ std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateAnd(Not, Op0);
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(Op0, Op1); // Change icmp uge -> icmp ule
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
+ Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+ return BinaryOperator::CreateOr(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGE:
+ std::swap(Op0, Op1); // Change icmp sge -> icmp sle
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+ }
+
+ unsigned BitWidth = 0;
+ if (Ty->isIntOrIntVectorTy())
+ BitWidth = Ty->getScalarSizeInBits();
+ else if (TD) // Pointers require TD info to get their size.
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+
+ bool isSignBit = false;
+
+ // See if we are doing a comparison with a constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A = 0, *B = 0;
+
+ // Match the following pattern, which is a common idiom when writing
+ // overflow-safe integer arithmetic function. The source performs an
+ // addition in wider type, and explicitly checks for overflow using
+ // comparisons against INT_MIN and INT_MAX. Simplify this by using the
+ // sadd_with_overflow intrinsic.
+ //
+ // TODO: This could probably be generalized to handle other overflow-safe
+ // operations if we worked out the formulas to compute the appropriate
+ // magic constants.
+ //
+ // sum = a + b
+ // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
+ {
+ ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+ return Res;
+ }
+
+ // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+ if (I.isEquality() && CI->isZero() &&
+ match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+ // (icmp cond A B) if cond is equality
+ return new ICmpInst(I.getPredicate(), A, B);
+ }
+
+ // If we have an icmp le or icmp ge instruction, turn it into the
+ // appropriate icmp lt or icmp gt instruction. This allows us to rely on
+ // them being folded in the code below. The SimplifyICmpInst code has
+ // already handled the edge cases for us, so we just assert on them.
+ switch (I.getPredicate()) {
+ default: break;
+ case ICmpInst::ICMP_ULE:
+ assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ case ICmpInst::ICMP_SLE:
+ assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ case ICmpInst::ICMP_UGE:
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ case ICmpInst::ICMP_SGE:
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ }
+
+ // If this comparison is a normal comparison, it demands all
+ // bits, if it is a sign bit comparison, it only demands the sign bit.
+ bool UnusedBit;
+ isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+ }
+
+ // See if we can fold the comparison based on range information we can get
+ // by checking whether bits are known to be zero or one in the input.
+ if (BitWidth != 0) {
+ APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+ if (SimplifyDemandedBits(I.getOperandUse(0),
+ DemandedBitsLHSMask(I, BitWidth, isSignBit),
+ Op0KnownZero, Op0KnownOne, 0))
+ return &I;
+ if (SimplifyDemandedBits(I.getOperandUse(1),
+ APInt::getAllOnesValue(BitWidth),
+ Op1KnownZero, Op1KnownOne, 0))
+ return &I;
+
+ // Given the known and unknown bits, compute a range that the LHS could be
+ // in. Compute the Min, Max and RHS values based on the known bits. For the
+ // EQ and NE we use unsigned values.
+ APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+ APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+ if (I.isSigned()) {
+ ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ } else {
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ }
+
+ // If Min and Max are known to be the same, then SimplifyDemandedBits
+ // figured out that the LHS is a constant. Just constant fold this now so
+ // that code below can assume that Min != Max.
+ if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+ return new ICmpInst(I.getPredicate(),
+ ConstantInt::get(Op0->getType(), Op0Min), Op1);
+ if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+ return new ICmpInst(I.getPredicate(), Op0,
+ ConstantInt::get(Op1->getType(), Op1Min));
+
+ // Based on the range information we know about the LHS, see if we can
+ // simplify this comparison. For example, (x&4) < 8 is always true.
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown icmp opcode!");
+ case ICmpInst::ICMP_EQ: {
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) == 0" into "x != 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) == 0" into "x != 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
+ break;
+ }
+ case ICmpInst::ICMP_NE: {
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) != 0" into "x == 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) != 0" into "x == 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
+ break;
+ }
+ case ICmpInst::ICMP_ULT:
+ if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+
+ // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
+ if (CI->isMinValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ Constant::getAllOnesValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+
+ // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
+ if (CI->isMaxValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ Constant::getNullValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+ if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+ if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_UGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+ if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ case ICmpInst::ICMP_ULE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+ if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ break;
+ }
+
+ // Turn a signed comparison into an unsigned one if both operands
+ // are known to have the same sign.
+ if (I.isSigned() &&
+ ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+ (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+ }
+
+ // Test if the ICmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return 0;
+
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+ return Res;
+ }
+
+ // Handle icmp with constant (but not simple integer constant) RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::GetElementPtr:
+ // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+ if (RHSC->isNullValue() &&
+ cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+ case Instruction::PHI:
+ // Only fold icmp into the PHI if the phi and icmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+ Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+ Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+ // We only want to perform this transformation if it will not lead to
+ // additional code. This is true if either both sides of the select
+ // fold to a constant (in which case the icmp is replaced with a select
+ // which will usually simplify) or this is the only user of the
+ // select (in which case we are trading a select+icmp for a simpler
+ // select+icmp).
+ if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+ if (!Op1)
+ Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ if (!Op2)
+ Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+ RHSC, I.getName());
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ }
+ break;
+ }
+ case Instruction::IntToPtr:
+ // icmp pred inttoptr(X), null -> icmp pred X, 0
+ if (RHSC->isNullValue() && TD &&
+ TD->getIntPtrType(RHSC->getContext()) ==
+ LHSI->getOperand(0)->getType())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+
+ case Instruction::Load:
+ // Try to optimize things like "A[i] > 4" to index computations.
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ }
+ }
+
+ // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+ return NI;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+ ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ return NI;
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, we do so
+ // now.
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+ if (Op0->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // We keep moving the cast from the left operand over to the right
+ // operand, where it can often be eliminated completely.
+ Op0 = CI->getOperand(0);
+
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = CI2->getOperand(0);
+
+ // If Op1 is a constant, we can fold the cast into the constant.
+ if (Op0->getType() != Op1->getType()) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+ } else {
+ // Otherwise, cast the RHS right before the icmp
+ Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+ }
+ }
+ return new ICmpInst(I.getPredicate(), Op0, Op1);
+ }
+ }
+
+ if (isa<CastInst>(Op0)) {
+ // Handle the special case of: icmp (cast bool to X), <cst>
+ // This comes up when you have code like
+ // int X = A < B;
+ // if (X) ...
+ // For generality, we handle any zero-extension of any operand comparison
+ // with a constant or another cast from the same type.
+ if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+ if (Instruction *R = visitICmpInstWithCastAndCast(I))
+ return R;
+ }
+
+ // Special logic for binary operators.
+ BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+ BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+ if (BO0 || BO1) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+ if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+ NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+ if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+ NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+ // Analyze the case when either Op0 or Op1 is an add instruction.
+ // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Add)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Add)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+ return new ICmpInst(Pred, A == Op1 ? B : A,
+ Constant::getNullValue(Op1->getType()));
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+ C == Op0 ? D : C);
+
+ // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse()) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y = (A == C || A == D) ? B : A;
+ Value *Z = (C == A || C == B) ? D : C;
+ return new ICmpInst(Pred, Y, Z);
+ }
+
+ // Analyze the case when either Op0 or Op1 is a sub instruction.
+ // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+ A = 0; B = 0; C = 0; D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Sub)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Sub)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+ if (A == Op1 && NoOp0WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+ // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+ if (C == Op0 && NoOp1WrapProblem)
+ return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+ // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+ if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, A, C);
+
+ // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+ if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, D, B);
+
+ BinaryOperator *SRem = NULL;
+ // icmp (srem X, Y), Y
+ if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+ Op1 == BO0->getOperand(1))
+ SRem = BO0;
+ // icmp Y, (srem X, Y)
+ else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+ Op0 == BO1->getOperand(1))
+ SRem = BO1;
+ if (SRem) {
+ // We don't check hasOneUse to avoid increasing register pressure because
+ // the value we use is the same value this instruction was already using.
+ switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ case ICmpInst::ICMP_NE:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+ Constant::getAllOnesValue(SRem->getType()));
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+ Constant::getNullValue(SRem->getType()));
+ }
+ }
+
+ if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+ BO0->hasOneUse() && BO1->hasOneUse() &&
+ BO0->getOperand(1) == BO1->getOperand(1)) {
+ switch (BO0->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+
+ if (CI->isMaxValue(true)) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
+ break;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+ Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+ return new ICmpInst(I.getPredicate(), And1, And2);
+ }
+ }
+ break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (I.isSigned())
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!BO0->isExact() || !BO1->isExact())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ case Instruction::Shl: {
+ bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+ bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && I.isSigned())
+ break;
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+ }
+ }
+ }
+
+ { Value *A, *B;
+ // ~x < ~y --> y < x
+ // ~x < cst --> ~cst < x
+ if (match(Op0, m_Not(m_Value(A)))) {
+ if (match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+ }
+
+ // (a+b) <u a --> llvm.uadd.with.overflow.
+ // (a+b) <u b --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+ match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ (Op1 == A || Op1 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+ return R;
+
+ // a >u (a+b) --> llvm.uadd.with.overflow.
+ // b >u (a+b) --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+ (Op0 == A || Op0 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+ return R;
+ }
+
+ if (I.isEquality()) {
+ Value *A, *B, *C, *D;
+
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
+ Value *OtherVal = A == Op1 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+ // A^c1 == C^c2 --> A == C^(c1^c2)
+ ConstantInt *C1, *C2;
+ if (match(B, m_ConstantInt(C1)) &&
+ match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+ Constant *NC = ConstantInt::get(I.getContext(),
+ C1->getValue() ^ C2->getValue());
+ Value *Xor = Builder->CreateXor(C, NC, "tmp");
+ return new ICmpInst(I.getPredicate(), A, Xor);
+ }
+
+ // A^B == A^D -> B == D
+ if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+ if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+ if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+ if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+ }
+ }
+
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0)) {
+ // A == (A^B) -> B == 0
+ Value *OtherVal = A == Op0 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+ if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+ match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+ Value *X = 0, *Y = 0, *Z = 0;
+
+ if (A == C) {
+ X = B; Y = D; Z = A;
+ } else if (A == D) {
+ X = B; Y = C; Z = A;
+ } else if (B == C) {
+ X = A; Y = D; Z = B;
+ } else if (B == D) {
+ X = A; Y = C; Z = B;
+ }
+
+ if (X) { // Build (X^Y) & Z
+ Op1 = Builder->CreateXor(X, Y, "tmp");
+ Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+ I.setOperand(0, Op1);
+ I.setOperand(1, Constant::getNullValue(Op1->getType()));
+ return &I;
+ }
+ }
+
+ // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+ // "icmp (and X, mask), cst"
+ uint64_t ShAmt = 0;
+ ConstantInt *Cst1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
+ m_ConstantInt(ShAmt))))) &&
+ match(Op1, m_ConstantInt(Cst1)) &&
+ // Only do this when A has multiple uses. This is most important to do
+ // when it exposes other optimizations.
+ !A->hasOneUse()) {
+ unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+
+ if (ShAmt < ASize) {
+ APInt MaskV =
+ APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+ MaskV <<= ShAmt;
+
+ APInt CmpV = Cst1->getValue().zext(ASize);
+ CmpV <<= ShAmt;
+
+ Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+ return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+ }
+ }
+ }
+
+ {
+ Value *X; ConstantInt *Cst;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+ // icmp X, X+Cst
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ }
+ return Changed ? &I : 0;
+}
+
+
+
+
+
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
+ if (!isa<ConstantFP>(RHSC)) return 0;
+ const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+
+ // Get the width of the mantissa. We don't want to hack on conversions that
+ // might lose information from the integer, e.g. "i64 -> float"
+ int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+ if (MantissaWidth == -1) return 0; // Unknown.
+
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
+
+ // If this is a uitofp instruction, we need an extra bit to hold the sign.
+ bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+ if (LHSUnsigned)
+ ++InputSize;
+
+ // If the conversion would lose info, don't hack on this.
+ if ((int)InputSize > MantissaWidth)
+ return 0;
+
+ // Otherwise, we can potentially simplify the comparison. We know that it
+ // will always come through as an integer value and we know the constant is
+ // not a NAN (it would have been previously simplified).
+ assert(!RHS.isNaN() && "NaN comparison not already folded!");
+
+ ICmpInst::Predicate Pred;
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unexpected predicate!");
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+ break;
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+ break;
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+ break;
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case FCmpInst::FCMP_ORD:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ case FCmpInst::FCMP_UNO:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+
+ const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
+ // Now we know that the APFloat is a normal number, zero or inf.
+
+ // See if the FP constant is too large for the integer. For example,
+ // comparing an i8 to 300.0.
+ unsigned IntWidth = IntTy->getScalarSizeInBits();
+
+ if (!LHSUnsigned) {
+ // If the RHS value is > SignedMax, fold the comparison. This handles +INF
+ // and large values.
+ APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+ SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ } else {
+ // If the RHS value is > UnsignedMax, fold the comparison. This handles
+ // +INF and large values.
+ APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+ UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+ APFloat::rmNearestTiesToEven);
+ if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
+ Pred == ICmpInst::ICMP_ULE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ }
+
+ if (!LHSUnsigned) {
+ // See if the RHS value is < SignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ }
+
+ // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+ // [0, UMAX], but it may still be fractional. See if it is fractional by
+ // casting the FP value to the integer value and back, checking for equality.
+ // Don't do this for zero, because -0.0 is not fractional.
+ Constant *RHSInt = LHSUnsigned
+ ? ConstantExpr::getFPToUI(RHSC, IntTy)
+ : ConstantExpr::getFPToSI(RHSC, IntTy);
+ if (!RHS.isZero()) {
+ bool Equal = LHSUnsigned
+ ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+ : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+ if (!Equal) {
+ // If we had a comparison against a fractional value, we have to adjust
+ // the compare predicate and sometimes the value. RHSC is rounded towards
+ // zero at this point.
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected integer comparison!");
+ case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ case ICmpInst::ICMP_ULE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> false
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> int < -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // (float)int < -4.4 --> false
+ // (float)int < 4.4 --> int <= 4
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ Pred = ICmpInst::ICMP_ULE;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // (float)int < -4.4 --> int < -4
+ // (float)int < 4.4 --> int <= 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> true
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> int >= -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // (float)int >= -4.4 --> true
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ Pred = ICmpInst::ICMP_UGT;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // (float)int >= -4.4 --> int >= -4
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ }
+ }
+ }
+
+ // Lower this FP comparison into an appropriate integer version of the
+ // comparison.
+ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Simplify 'fcmp pred X, X'
+ if (Op0 == Op1) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown predicate!");
+ case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
+ case FCmpInst::FCMP_ULT: // True if unordered or less than
+ case FCmpInst::FCMP_UGT: // True if unordered or greater than
+ case FCmpInst::FCMP_UNE: // True if unordered or not equal
+ // Canonicalize these to be 'fcmp uno %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_UNO);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+
+ case FCmpInst::FCMP_ORD: // True if ordered (no nans)
+ case FCmpInst::FCMP_OEQ: // True if ordered and equal
+ case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
+ case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
+ // Canonicalize these to be 'fcmp ord %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_ORD);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+ }
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::FPExt: {
+ // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
+ FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
+ ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
+ if (!RHSF)
+ break;
+
+ // We can't convert a PPC double double.
+ if (RHSF->getType()->isPPC_FP128Ty())
+ break;
+
+ const fltSemantics *Sem;
+ // FIXME: This shouldn't be here.
+ if (LHSExt->getSrcTy()->isFloatTy())
+ Sem = &APFloat::IEEEsingle;
+ else if (LHSExt->getSrcTy()->isDoubleTy())
+ Sem = &APFloat::IEEEdouble;
+ else if (LHSExt->getSrcTy()->isFP128Ty())
+ Sem = &APFloat::IEEEquad;
+ else if (LHSExt->getSrcTy()->isX86_FP80Ty())
+ Sem = &APFloat::x87DoubleExtended;
+ else
+ break;
+
+ bool Lossy;
+ APFloat F = RHSF->getValueAPF();
+ F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
+
+ // Avoid lossy conversions and denormals.
+ if (!Lossy &&
+ F.compare(APFloat::getSmallestNormalized(*Sem)) !=
+ APFloat::cmpLessThan)
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ ConstantFP::get(RHSC->getContext(), F));
+ break;
+ }
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (LHSI->hasOneUse()) {
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+ // Fold the known value into the constant operand.
+ Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op2 = Builder->CreateFCmp(I.getPredicate(),
+ LHSI->getOperand(2), RHSC, I.getName());
+ } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+ // Fold the known value into the constant operand.
+ Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ }
+ }
+
+ if (Op1)
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ break;
+ }
+ case Instruction::FSub: {
+ // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
+ Value *Op;
+ if (match(LHSI, m_FNeg(m_Value(Op))))
+ return new FCmpInst(I.getSwappedPredicate(), Op,
+ ConstantExpr::getFNeg(RHSC));
+ break;
+ }
+ case Instruction::Load:
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ }
+ }
+
+ // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y);
+
+ // fcmp (fpext x), (fpext y) -> fcmp x, y
+ if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
+ if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
+ if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
+ return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+ RHSExt->getOperand(0));
+
+ return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 000000000000..f499290fe876
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,615 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ if (TD) {
+ const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = Builder->CreateIntCast(AI.getArraySize(),
+ IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+ }
+
+ // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+ if (AI.isArrayAllocation()) { // Check C != 1
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ const Type *NewTy =
+ ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+ AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+ Value *Idx[2];
+ Idx[0] = NullIdx;
+ Idx[1] = NullIdx;
+ Instruction *GEP =
+ GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+ New->getName()+".sub");
+ InsertNewInstBefore(GEP, *It);
+
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return ReplaceInstUsesWith(AI, GEP);
+ } else if (isa<UndefValue>(AI.getArraySize())) {
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+ }
+ }
+
+ if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+ // If alloca'ing a zero byte object, replace the alloca with a null pointer.
+ // Note that we only do this for alloca's, because malloc should allocate
+ // and return a unique pointer, even for a zero byte allocation.
+ if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // If the alignment is 0 (unspecified), assign it the preferred alignment.
+ if (AI.getAlignment() == 0)
+ AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+ }
+
+ return 0;
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+ const TargetData *TD) {
+ User *CI = cast<User>(LI.getOperand(0));
+ Value *CastOp = CI->getOperand(0);
+
+ const PointerType *DestTy = cast<PointerType>(CI->getType());
+ const Type *DestPTy = DestTy->getElementType();
+ if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+ // If the address spaces don't match, don't eliminate the cast.
+ if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+ return 0;
+
+ const Type *SrcPTy = SrcTy->getElementType();
+
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ DestPTy->isVectorTy()) {
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+ if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+ if (ASrcTy->getNumElements() != 0) {
+ Value *Idxs[2];
+ Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
+ Idxs[1] = Idxs[0];
+ CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+ SrcTy = cast<PointerType>(CastOp->getType());
+ SrcPTy = SrcTy->getElementType();
+ }
+
+ if (IC.getTargetData() &&
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ SrcPTy->isVectorTy()) &&
+ // Do not allow turning this into a load of an integer, which is then
+ // casted to a pointer, this pessimizes pointer analysis a lot.
+ (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+ IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before the load, cast
+ // the result of the loaded value.
+ LoadInst *NewLoad =
+ IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+ NewLoad->setAlignment(LI.getAlignment());
+ // Now cast the result of the load.
+ return new BitCastInst(NewLoad, LI.getType());
+ }
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+ Value *Op = LI.getOperand(0);
+
+ // Attempt to improve the alignment.
+ if (TD) {
+ unsigned KnownAlign =
+ getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+ TD->getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
+ }
+
+ // load (cast X) --> cast (load X) iff safe.
+ if (isa<CastInst>(Op))
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ // None of the following transforms are legal for volatile loads.
+ if (LI.isVolatile()) return 0;
+
+ // Do really simple store-to-load forwarding and load CSE, to catch cases
+ // where there are several consecutive memory accesses to the same location,
+ // separated by a few arithmetic operations.
+ BasicBlock::iterator BBI = &LI;
+ if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+ return ReplaceInstUsesWith(LI, AvailableVal);
+
+ // load(gep null, ...) -> unreachable
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+ }
+
+ // load null/undef -> unreachable
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. We do this instead of inserting an
+ // unreachable instruction directly because we cannot modify the CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+
+ // Instcombine load (constantexpr_cast global) -> cast (load global)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ if (Op->hasOneUse()) {
+ // Change select and PHI nodes to select values instead of addresses: this
+ // helps alias analysis out a lot, allows many others simplifications, and
+ // exposes redundancy in the code.
+ //
+ // Note that we cannot do the transformation unless we know that the
+ // introduced loads cannot trap! Something like this is valid as long as
+ // the condition is always false: load (select bool %C, int* null, int* %G),
+ // but it would not be valid if we transformed it to load from null
+ // unconditionally.
+ //
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+ // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
+ unsigned Align = LI.getAlignment();
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+ LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+ SI->getOperand(1)->getName()+".val");
+ LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+ SI->getOperand(2)->getName()+".val");
+ V1->setAlignment(Align);
+ V2->setAlignment(Align);
+ return SelectInst::Create(SI->getCondition(), V1, V2);
+ }
+
+ // load (select (cond, null, P)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(2));
+ return &LI;
+ }
+
+ // load (select (cond, P, null)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(1));
+ return &LI;
+ }
+ }
+ }
+ return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible. This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+ User *CI = cast<User>(SI.getOperand(1));
+ Value *CastOp = CI->getOperand(0);
+
+ const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+ if (SrcTy == 0) return 0;
+
+ const Type *SrcPTy = SrcTy->getElementType();
+
+ if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
+ return 0;
+
+ /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+ /// to its first element. This allows us to handle things like:
+ /// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+ /// on 32-bit hosts.
+ SmallVector<Value*, 4> NewGEPIndices;
+
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
+ // Index through pointer.
+ Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
+ NewGEPIndices.push_back(Zero);
+
+ while (1) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+ if (!STy->getNumElements()) /* Struct can be empty {} */
+ break;
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = STy->getElementType(0);
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = ATy->getElementType();
+ } else {
+ break;
+ }
+ }
+
+ SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+ }
+
+ if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
+ return 0;
+
+ // If the pointers point into different address spaces or if they point to
+ // values with different sizes, we can't do the transformation.
+ if (!IC.getTargetData() ||
+ SrcTy->getAddressSpace() !=
+ cast<PointerType>(CI->getType())->getAddressSpace() ||
+ IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+ IC.getTargetData()->getTypeSizeInBits(DestPTy))
+ return 0;
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before
+ // the store, cast the value to be stored.
+ Value *NewCast;
+ Value *SIOp0 = SI.getOperand(0);
+ Instruction::CastOps opcode = Instruction::BitCast;
+ const Type* CastSrcTy = SIOp0->getType();
+ const Type* CastDstTy = SrcPTy;
+ if (CastDstTy->isPointerTy()) {
+ if (CastSrcTy->isIntegerTy())
+ opcode = Instruction::IntToPtr;
+ } else if (CastDstTy->isIntegerTy()) {
+ if (SIOp0->getType()->isPointerTy())
+ opcode = Instruction::PtrToInt;
+ }
+
+ // SIOp0 is a pointer to aggregate and this is a store to the first field,
+ // emit a GEP to index into its first field.
+ if (!NewGEPIndices.empty())
+ CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+ NewGEPIndices.end());
+
+ NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+ SIOp0->getName()+".c");
+ SI.setOperand(0, NewCast);
+ SI.setOperand(1, CastOp);
+ return &SI;
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come form identical arithmetic instructions.
+ // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+ // its only used to compare two uses within the same basic block, which
+ // means that they'll always either have the same value or one of them
+ // will have an undefined value.
+ if (isa<BinaryOperator>(A) ||
+ isa<CastInst>(A) ||
+ isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
+ if (Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+ Value *Val = SI.getOperand(0);
+ Value *Ptr = SI.getOperand(1);
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ if (!SI.isVolatile()) {
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr))
+ return EraseInstFromFunction(SI);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse())
+ return EraseInstFromFunction(SI);
+ }
+ }
+ }
+ }
+
+ // Attempt to improve the alignment.
+ if (TD) {
+ unsigned KnownAlign =
+ getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+ TD);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+ TD->getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+ }
+
+ // Do really simple DSE, to catch cases where there are several consecutive
+ // stores to the same location, separated by a few arithmetic operations. This
+ // situation often occurs with bitfield accesses.
+ BasicBlock::iterator BBI = &SI;
+ for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+ --ScanInsts) {
+ --BBI;
+ // Don't count debug info directives, lest they affect codegen,
+ // and we skip pointer-to-pointer bitcasts, which are NOPs.
+ if (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ ScanInsts++;
+ continue;
+ }
+
+ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+ // Prev store isn't volatile, and stores to the same location?
+ if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
+ ++NumDeadStore;
+ ++BBI;
+ EraseInstFromFunction(*PrevSI);
+ continue;
+ }
+ break;
+ }
+
+ // If this is a load, we have to stop. However, if the loaded value is from
+ // the pointer we're loading and is producing the pointer we're storing,
+ // then *this* store is dead (X = load P; store X -> P).
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+ !SI.isVolatile())
+ return EraseInstFromFunction(SI);
+
+ // Otherwise, this is a load from some other location. Stores before it
+ // may not be dead.
+ break;
+ }
+
+ // Don't skip over loads or things that can modify memory.
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+ break;
+ }
+
+
+ if (SI.isVolatile()) return 0; // Don't hack volatile stores.
+
+ // store X, null -> turns into 'unreachable' in SimplifyCFG
+ if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+ if (!isa<UndefValue>(Val)) {
+ SI.setOperand(0, UndefValue::get(Val->getType()));
+ if (Instruction *U = dyn_cast<Instruction>(Val))
+ Worklist.Add(U); // Dropped a use.
+ }
+ return 0; // Do not modify these!
+ }
+
+ // store undef, Ptr -> noop
+ if (isa<UndefValue>(Val))
+ return EraseInstFromFunction(SI);
+
+ // If the pointer destination is a cast, see if we can fold the cast into the
+ // source instead.
+ if (isa<CastInst>(Ptr))
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+
+
+ // If this store is the last instruction in the basic block (possibly
+ // excepting debug info instructions), and if the block ends with an
+ // unconditional branch, try to move it to the successor block.
+ BBI = &SI;
+ do {
+ ++BBI;
+ } while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+ if (BI->isUnconditional())
+ if (SimplifyStoreAtEndOfBlock(SI))
+ return 0; // xform done!
+
+ return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+/// *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+ BasicBlock *StoreBB = SI.getParent();
+
+ // Check to see if the successor block has exactly two incoming edges. If
+ // so, see if the other predecessor contains a store to the same location.
+ // if so, insert a PHI node (if needed) and move the stores down.
+ BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+
+ // Determine whether Dest has exactly two predecessors and, if so, compute
+ // the other predecessor.
+ pred_iterator PI = pred_begin(DestBB);
+ BasicBlock *P = *PI;
+ BasicBlock *OtherBB = 0;
+
+ if (P != StoreBB)
+ OtherBB = P;
+
+ if (++PI == pred_end(DestBB))
+ return false;
+
+ P = *PI;
+ if (P != StoreBB) {
+ if (OtherBB)
+ return false;
+ OtherBB = P;
+ }
+ if (++PI != pred_end(DestBB))
+ return false;
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
+ // Verify that the other block ends in a branch and is not otherwise empty.
+ BasicBlock::iterator BBI = OtherBB->getTerminator();
+ BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+ if (!OtherBr || BBI == OtherBB->begin())
+ return false;
+
+ // If the other block ends in an unconditional branch, check for the 'if then
+ // else' case. there is an instruction before the branch.
+ StoreInst *OtherStore = 0;
+ if (OtherBr->isUnconditional()) {
+ --BBI;
+ // Skip over debugging info.
+ while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ if (BBI==OtherBB->begin())
+ return false;
+ --BBI;
+ }
+ // If this isn't a store, isn't a store to the same location, or if the
+ // alignments differ, bail out.
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+ OtherStore->getAlignment() != SI.getAlignment())
+ return false;
+ } else {
+ // Otherwise, the other block ended with a conditional branch. If one of the
+ // destinations is StoreBB, then we have the if/then case.
+ if (OtherBr->getSuccessor(0) != StoreBB &&
+ OtherBr->getSuccessor(1) != StoreBB)
+ return false;
+
+ // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+ // if/then triangle. See if there is a store to the same ptr as SI that
+ // lives in OtherBB.
+ for (;; --BBI) {
+ // Check to see if we find the matching store.
+ if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+ if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+ OtherStore->getAlignment() != SI.getAlignment())
+ return false;
+ break;
+ }
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+ BBI == OtherBB->begin())
+ return false;
+ }
+
+ // In order to eliminate the store in OtherBr, we have to
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
+ for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+ // FIXME: This should really be AA driven.
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
+ return false;
+ }
+ }
+
+ // Insert a PHI node now if we need it.
+ Value *MergedVal = OtherStore->getOperand(0);
+ if (MergedVal != SI.getOperand(0)) {
+ PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
+ PN->addIncoming(SI.getOperand(0), SI.getParent());
+ PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ }
+
+ // Advance to a place where it is safe to insert the new store and
+ // insert it.
+ BBI = DestBB->getFirstNonPHI();
+ StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
+ OtherStore->isVolatile(),
+ SI.getAlignment());
+ InsertNewInstBefore(NewSI, *BBI);
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
+
+ // Nuke the old stores.
+ EraseInstFromFunction(SI);
+ EraseInstFromFunction(*OtherStore);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 000000000000..630a6fee3990
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,731 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// simplifyValueKnownNonZero - The specific integer value is used in a context
+/// where it is known to be non-zero. If this allows us to simplify the
+/// computation, do so and return the new operand, otherwise return null.
+static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
+ // If V has multiple uses, then we would have to do more analysis to determine
+ // if this is safe. For example, the use could be in dynamically unreached
+ // code.
+ if (!V->hasOneUse()) return 0;
+
+ bool MadeChange = false;
+
+ // ((1 << A) >>u B) --> (1 << (A-B))
+ // Because V cannot be zero, we know that B is less than A.
+ Value *A = 0, *B = 0, *PowerOf2 = 0;
+ if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
+ m_Value(B))) &&
+ // The "1" can be any value known to be a power of 2.
+ isPowerOfTwo(PowerOf2, IC.getTargetData())) {
+ A = IC.Builder->CreateSub(A, B, "tmp");
+ return IC.Builder->CreateShl(PowerOf2, A);
+ }
+
+ // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
+ // inexact. Similarly for <<.
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
+ if (I->isLogicalShift() &&
+ isPowerOfTwo(I->getOperand(0), IC.getTargetData())) {
+ // We know that this is an exact/nuw shift and that the input is a
+ // non-zero context as well.
+ if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
+ I->setOperand(0, V2);
+ MadeChange = true;
+ }
+
+ if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
+ I->setIsExact();
+ MadeChange = true;
+ }
+
+ if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
+ I->setHasNoUnsignedWrap();
+ MadeChange = true;
+ }
+ }
+
+ // TODO: Lots more we could do here:
+ // If V is a phi node, we can call this on each of its operands.
+ // "select cond, X, 0" can simplify to "X".
+
+ return MadeChange ? V : 0;
+}
+
+
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+ uint32_t W = C1->getBitWidth();
+ APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+ if (sign) {
+ LHSExt = LHSExt.sext(W * 2);
+ RHSExt = RHSExt.sext(W * 2);
+ } else {
+ LHSExt = LHSExt.zext(W * 2);
+ RHSExt = RHSExt.zext(W * 2);
+ }
+
+ APInt MulExt = LHSExt * RHSExt;
+
+ if (!sign)
+ return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+
+ APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+ APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+ return MulExt.slt(Min) || MulExt.sgt(Max);
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
+ if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+ if (SI->getOpcode() == Instruction::Shl)
+ if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+ return BinaryOperator::CreateMul(SI->getOperand(0),
+ ConstantExpr::getShl(CI, ShOp));
+
+ const APInt &Val = CI->getValue();
+ if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
+ Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+ BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+ if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+ if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+ return Shl;
+ }
+
+ // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+ { Value *X; ConstantInt *C1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+ Value *Add = Builder->CreateMul(X, CI, "tmp");
+ return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
+ }
+ }
+
+ // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
+ // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
+ // The "* (2**n)" thus becomes a potential shifting opportunity.
+ {
+ const APInt & Val = CI->getValue();
+ const APInt &PosVal = Val.abs();
+ if (Val.isNegative() && PosVal.isPowerOf2()) {
+ Value *X = 0, *Y = 0;
+ if (Op0->hasOneUse()) {
+ ConstantInt *C1;
+ Value *Sub = 0;
+ if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
+ Sub = Builder->CreateSub(X, Y, "suba");
+ else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
+ Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
+ if (Sub)
+ return
+ BinaryOperator::CreateMul(Sub,
+ ConstantInt::get(Y->getType(), PosVal));
+ }
+ }
+ }
+ }
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castNegVal(Op1))
+ return BinaryOperator::CreateMul(Op0v, Op1v);
+
+ // (X / Y) * Y = X - (X % Y)
+ // (X / Y) * -Y = (X % Y) - X
+ {
+ Value *Op1C = Op1;
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+ if (!BO ||
+ (BO->getOpcode() != Instruction::UDiv &&
+ BO->getOpcode() != Instruction::SDiv)) {
+ Op1C = Op0;
+ BO = dyn_cast<BinaryOperator>(Op1);
+ }
+ Value *Neg = dyn_castNegVal(Op1C);
+ if (BO && BO->hasOneUse() &&
+ (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+ (BO->getOpcode() == Instruction::UDiv ||
+ BO->getOpcode() == Instruction::SDiv)) {
+ Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+ // If the division is exact, X % Y is zero, so we end up with X or -X.
+ if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
+ if (SDiv->isExact()) {
+ if (Op1BO == Op1C)
+ return ReplaceInstUsesWith(I, Op0BO);
+ return BinaryOperator::CreateNeg(Op0BO);
+ }
+
+ Value *Rem;
+ if (BO->getOpcode() == Instruction::UDiv)
+ Rem = Builder->CreateURem(Op0BO, Op1BO);
+ else
+ Rem = Builder->CreateSRem(Op0BO, Op1BO);
+ Rem->takeName(BO);
+
+ if (Op1BO == Op1C)
+ return BinaryOperator::CreateSub(Op0BO, Rem);
+ return BinaryOperator::CreateSub(Rem, Op0BO);
+ }
+ }
+
+ /// i1 mul -> i1 and.
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateAnd(Op0, Op1);
+
+ // X*(1 << Y) --> X << Y
+ // (1 << Y)*X --> X << Y
+ {
+ Value *Y;
+ if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+ return BinaryOperator::CreateShl(Op1, Y);
+ if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+ return BinaryOperator::CreateShl(Op0, Y);
+ }
+
+ // If one of the operands of the multiply is a cast from a boolean value, then
+ // we know the bool is either zero or one, so this is a 'masking' multiply.
+ // X * Y (where Y is 0 or 1) -> X & (0-Y)
+ if (!I.getType()->isVectorTy()) {
+ // -2 is "-1 << 1" so it is all bits set except the low one.
+ APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+
+ Value *BoolCast = 0, *OtherOp = 0;
+ if (MaskedValueIsZero(Op0, Negative2))
+ BoolCast = Op0, OtherOp = Op1;
+ else if (MaskedValueIsZero(Op1, Negative2))
+ BoolCast = Op1, OtherOp = Op0;
+
+ if (BoolCast) {
+ Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+ BoolCast, "tmp");
+ return BinaryOperator::CreateAnd(V, OtherOp);
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Simplify mul instructions with a constant RHS...
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
+ // "In IEEE floating point, x*1 is not equivalent to x for nans. However,
+ // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+ if (Op1F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0'
+ } else if (Op1C->getType()->isVectorTy()) {
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (Constant *Splat = Op1V->getSplatValue()) {
+ if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
+ }
+ }
+ }
+
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castFNegVal(Op1))
+ return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+ return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ Worklist.Add(BBI);
+ } else if (*I == SelectCond) {
+ *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
+ ConstantInt::getFalse(BBI->getContext());
+ Worklist.Add(BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = 0;
+ if (&*BBI == SelectCond)
+ SelectCond = 0;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (SelectCond == 0 && SI == 0)
+ break;
+
+ }
+ return true;
+}
+
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // The RHS is known non-zero.
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+ I.setOperand(1, V);
+ return &I;
+ }
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // (X / C1) / C2 -> X / (C1*C2)
+ if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+ if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+ if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+ if (MultiplyOverflows(RHS, LHSRHS,
+ I.getOpcode()==Instruction::SDiv))
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+ ConstantExpr::getMul(RHS, LHSRHS));
+ }
+
+ if (!RHS->isZero()) { // avoid X udiv 0
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+ }
+
+ // See if we can fold away this div instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+ Value *X = 0, *Z = 0;
+ if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+ (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+ return BinaryOperator::Create(I.getOpcode(), X, Op1);
+ }
+
+ return 0;
+}
+
+/// dyn_castZExtVal - Checks if V is a zext or constant that can
+/// be truncated to Ty without losing bits.
+static Value *dyn_castZExtVal(Value *V, const Type *Ty) {
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
+ if (Z->getSrcTy() == Ty)
+ return Z->getOperand(0);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+ if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
+ return ConstantExpr::getTrunc(C, Ty);
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+ // X udiv 2^C -> X >> C
+ // Check to see if this is an unsigned division with an exact power of 2,
+ // if so, convert to a right shift.
+ if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+ BinaryOperator *LShr =
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+ if (I.isExact()) LShr->setIsExact();
+ return LShr;
+ }
+
+ // X udiv C, where C >= signbit
+ if (C->getValue().isNegative()) {
+ Value *IC = Builder->CreateICmpULT(Op0, C);
+ return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+ ConstantInt::get(I.getType(), 1));
+ }
+ }
+
+ // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+ { const APInt *CI; Value *N;
+ if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+ if (*CI != 1)
+ N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
+ "tmp");
+ if (I.isExact())
+ return BinaryOperator::CreateExactLShr(Op0, N);
+ return BinaryOperator::CreateLShr(Op0, N);
+ }
+ }
+
+ // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+ // where C1&C2 are powers of two.
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ // Construct the "on true" case of the select
+ Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+ I.isExact());
+
+ // Construct the "on false" case of the select
+ Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+ I.isExact());
+
+ // construct the select instruction and return it.
+ return SelectInst::Create(Cond, TSI, FSI);
+ }
+ }
+
+ // (zext A) udiv (zext B) --> zext (A udiv B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
+ I.isExact()),
+ I.getType());
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // sdiv X, -1 == -X
+ if (RHS->isAllOnesValue())
+ return BinaryOperator::CreateNeg(Op0);
+
+ // sdiv X, C --> ashr exact X, log2(C)
+ if (I.isExact() && RHS->getValue().isNonNegative() &&
+ RHS->getValue().isPowerOf2()) {
+ Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+ RHS->getValue().exactLogBase2());
+ return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
+ }
+
+ // -X/C --> X/-C provided the negation doesn't overflow.
+ if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+ if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
+ return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+ ConstantExpr::getNeg(RHS));
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a udiv.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op0, Mask)) {
+ if (MaskedValueIsZero(Op1, Mask)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+ // Safe because the only negative value (1 << Y) can take on is
+ // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+ // the sign bit set.
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ const APFloat &Op1F = Op1C->getValueAPF();
+
+ // If the divisor has an exact multiplicative inverse we can turn the fdiv
+ // into a cheaper fmul.
+ APFloat Reciprocal(Op1F.getSemantics());
+ if (Op1F.getExactInverse(&Reciprocal)) {
+ ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
+ return BinaryOperator::CreateFMul(Op0, RFP);
+ }
+ }
+
+ return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // The RHS is known non-zero.
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+ I.setOperand(1, V);
+ return &I;
+ }
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (isa<ConstantInt>(Op1)) {
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ } else if (isa<PHINode>(Op0I)) {
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // See if we can fold away this rem instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyURemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *common = commonIRemTransforms(I))
+ return common;
+
+ // X urem C^2 -> X and C-1
+ { const APInt *C;
+ if (match(Op1, m_Power2(C)))
+ return BinaryOperator::CreateAnd(Op0,
+ ConstantInt::get(I.getType(), *C-1));
+ }
+
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+ return BinaryOperator::CreateAnd(Op0, Add);
+ }
+
+ // urem X, (select Cond, 2^C1, 2^C2) -->
+ // select Cond, (and X, C1-1), (and X, C2-1)
+ // when C1&C2 are powers of two.
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+ Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+ return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+ }
+ }
+
+ // (zext A) urem (zext B) --> zext (A urem B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+ I.getType());
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifySRemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer rem common cases
+ if (Instruction *Common = commonIRemTransforms(I))
+ return Common;
+
+ if (Value *RHSNeg = dyn_castNegVal(Op1))
+ if (!isa<Constant>(RHSNeg) ||
+ (isa<ConstantInt>(RHSNeg) &&
+ cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+ // X % -Y -> X % Y
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, RHSNeg);
+ return &I;
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a urem.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+ }
+ }
+
+ // If it's a constant vector, flip any negative values positive.
+ if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+ unsigned VWidth = RHSV->getNumOperands();
+
+ bool hasNegative = false;
+ for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+ if (RHS->isNegative())
+ hasNegative = true;
+
+ if (hasNegative) {
+ std::vector<Constant *> Elts(VWidth);
+ for (unsigned i = 0; i != VWidth; ++i) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+ if (RHS->isNegative())
+ Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+ else
+ Elts[i] = RHS;
+ }
+ }
+
+ Constant *NewRHSV = ConstantVector::get(Elts);
+ if (NewRHSV != RHSV) {
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, NewRHSV);
+ return &I;
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFRemInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 000000000000..37773403490c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,900 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+ assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+ unsigned Opc = FirstInst->getOpcode();
+ Value *LHSVal = FirstInst->getOperand(0);
+ Value *RHSVal = FirstInst->getOperand(1);
+
+ const Type *LHSType = LHSVal->getType();
+ const Type *RHSType = RHSVal->getType();
+
+ bool isNUW = false, isNSW = false, isExact = false;
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+ // Verify type of the LHS matches so we don't fold cmp's of different
+ // types.
+ I->getOperand(0)->getType() != LHSType ||
+ I->getOperand(1)->getType() != RHSType)
+ return 0;
+
+ // If they are CmpInst instructions, check their predicates
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
+ return 0;
+
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
+
+ // Keep track of which operand needs a phi node.
+ if (I->getOperand(0) != LHSVal) LHSVal = 0;
+ if (I->getOperand(1) != RHSVal) RHSVal = 0;
+ }
+
+ // If both LHS and RHS would need a PHI, don't do this transformation,
+ // because it would increase the number of PHIs entering the block,
+ // which leads to higher register pressure. This is especially
+ // bad when the PHIs are in the header of a loop.
+ if (!LHSVal && !RHSVal)
+ return 0;
+
+ // Otherwise, this is safe to transform!
+
+ Value *InLHS = FirstInst->getOperand(0);
+ Value *InRHS = FirstInst->getOperand(1);
+ PHINode *NewLHS = 0, *NewRHS = 0;
+ if (LHSVal == 0) {
+ NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
+ FirstInst->getOperand(0)->getName() + ".pn");
+ NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewLHS, PN);
+ LHSVal = NewLHS;
+ }
+
+ if (RHSVal == 0) {
+ NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
+ FirstInst->getOperand(1)->getName() + ".pn");
+ NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewRHS, PN);
+ RHSVal = NewRHS;
+ }
+
+ // Add all operands to the new PHIs.
+ if (NewLHS || NewRHS) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ if (NewLHS) {
+ Value *NewInLHS = InInst->getOperand(0);
+ NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ }
+ if (NewRHS) {
+ Value *NewInRHS = InInst->getOperand(1);
+ NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ }
+ }
+ }
+
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
+ CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ LHSVal, RHSVal);
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+ }
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+ BinaryOperator *NewBinOp =
+ BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+ if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+ if (isNSW) NewBinOp->setHasNoSignedWrap();
+ if (isExact) NewBinOp->setIsExact();
+ NewBinOp->setDebugLoc(FirstInst->getDebugLoc());
+ return NewBinOp;
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+ GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+ FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
+
+ // We don't want to replace this phi if the replacement would require
+ // more than one phi, which leads to higher register pressure. This is
+ // especially bad when the PHIs are in the header of a loop.
+ bool NeededPhi = false;
+
+ bool AllInBounds = true;
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+ GEP->getNumOperands() != FirstInst->getNumOperands())
+ return 0;
+
+ AllInBounds &= GEP->isInBounds();
+
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
+ // Compare the operand lists.
+ for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+ if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ continue;
+
+ // Don't merge two GEPs when two operands differ (introducing phi nodes)
+ // if one of the PHIs has a constant for the index. The index may be
+ // substantially cheaper to compute for the constants, so making it a
+ // variable index could pessimize the path. This also handles the case
+ // for struct indices, which must always be constant.
+ if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+ isa<ConstantInt>(GEP->getOperand(op)))
+ return 0;
+
+ if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ return 0;
+
+ // If we already needed a PHI for an earlier operand, and another operand
+ // also requires a PHI, we'd be introducing more PHIs than we're
+ // eliminating, which increases register pressure on entry to the PHI's
+ // block.
+ if (NeededPhi)
+ return 0;
+
+ FixedOperands[op] = 0; // Needs a PHI.
+ NeededPhi = true;
+ }
+ }
+
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return 0;
+
+ // Otherwise, this is safe to transform. Insert PHI nodes for each operand
+ // that is variable.
+ SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+
+ bool HasAnyPHIs = false;
+ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+ if (FixedOperands[i]) continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(i);
+ PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
+ FirstOp->getName()+".pn");
+ InsertNewInstBefore(NewPN, PN);
+
+ NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+ OperandPhis[i] = NewPN;
+ FixedOperands[i] = NewPN;
+ HasAnyPHIs = true;
+ }
+
+
+ // Add all operands to the new PHIs.
+ if (HasAnyPHIs) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ BasicBlock *InBB = PN.getIncomingBlock(i);
+
+ for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+ if (PHINode *OpPhi = OperandPhis[op])
+ OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ }
+ }
+
+ Value *Base = FixedOperands[0];
+ GetElementPtrInst *NewGEP =
+ GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+ FixedOperands.end());
+ if (AllInBounds) NewGEP->setIsInBounds();
+ NewGEP->setDebugLoc(FirstInst->getDebugLoc());
+ return NewGEP;
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it. This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targeting a
+/// non-address-taken alloca. Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+ BasicBlock::iterator BBI = L, E = L->getParent()->end();
+
+ for (++BBI; BBI != E; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ // Check for non-address taken alloca. If not address-taken already, it isn't
+ // profitable to do this xform.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+ bool isAddressTaken = false;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (isa<LoadInst>(U)) continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If storing TO the alloca, then the address isn't taken.
+ if (SI->getOperand(1) == AI) continue;
+ }
+ isAddressTaken = true;
+ break;
+ }
+
+ if (!isAddressTaken && AI->isStaticAlloca())
+ return false;
+ }
+
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
+ return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+ LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+
+ // When processing loads, we need to propagate two bits of information to the
+ // sunk load: whether it is volatile, and what its alignment is. We currently
+ // don't sink loads when some have their alignment specified and some don't.
+ // visitLoadInst will propagate an alignment onto the load when TD is around,
+ // and if TD isn't around, we can't handle the mixed case.
+ bool isVolatile = FirstLI->isVolatile();
+ unsigned LoadAlignment = FirstLI->getAlignment();
+ unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+
+ // We can't sink the load if the loaded value could be modified between the
+ // load and the PHI.
+ if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+ !isSafeAndProfitableToSinkLoad(FirstLI))
+ return 0;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+ if (!LI || !LI->hasOneUse())
+ return 0;
+
+ // We can't sink the load if the loaded value could be modified between
+ // the load and the PHI.
+ if (LI->isVolatile() != isVolatile ||
+ LI->getParent() != PN.getIncomingBlock(i) ||
+ LI->getPointerAddressSpace() != LoadAddrSpace ||
+ !isSafeAndProfitableToSinkLoad(LI))
+ return 0;
+
+ // If some of the loads have an alignment specified but not all of them,
+ // we can't do the transformation.
+ if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+ return 0;
+
+ LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
+ PN.getName()+".in");
+
+ Value *InVal = FirstLI->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // If this was a volatile load that we are merging, make sure to loop through
+ // and mark all the input loads as non-volatile. If we don't do this, we will
+ // insert a new volatile load and the old ones will not be deletable.
+ if (isVolatile)
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+
+ LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+ NewLI->setDebugLoc(FirstLI->getDebugLoc());
+ return NewLI;
+}
+
+
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+ if (isa<GetElementPtrInst>(FirstInst))
+ return FoldPHIArgGEPIntoPHI(PN);
+ if (isa<LoadInst>(FirstInst))
+ return FoldPHIArgLoadIntoPHI(PN);
+
+ // Scan the instruction, looking for input operations that can be folded away.
+ // If all input operands to the phi are the same instruction (e.g. a cast from
+ // the same type or "+42") we can pull the operation through the PHI, reducing
+ // code size and simplifying code.
+ Constant *ConstantOp = 0;
+ const Type *CastSrcTy = 0;
+ bool isNUW = false, isNSW = false, isExact = false;
+
+ if (isa<CastInst>(FirstInst)) {
+ CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy))
+ return 0;
+ }
+ } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+ // Can fold binop, compare or shift here if the RHS is a constant,
+ // otherwise call FoldPHIArgBinOpIntoPHI.
+ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+ if (ConstantOp == 0)
+ return FoldPHIArgBinOpIntoPHI(PN);
+
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
+ } else {
+ return 0; // Cannot fold this operation.
+ }
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return 0;
+ if (CastSrcTy) {
+ if (I->getOperand(0)->getType() != CastSrcTy)
+ return 0; // Cast operation must match.
+ } else if (I->getOperand(1) != ConstantOp) {
+ return 0;
+ }
+
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+ PN.getNumIncomingValues(),
+ PN.getName()+".in");
+
+ Value *InVal = FirstInst->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // Insert and return the new operation.
+ if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) {
+ CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal,
+ PN.getType());
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+ }
+
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+ BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (isNUW) BinOp->setHasNoUnsignedWrap();
+ if (isNSW) BinOp->setHasNoSignedWrap();
+ if (isExact) BinOp->setIsExact();
+ BinOp->setDebugLoc(FirstInst->getDebugLoc());
+ return BinOp;
+ }
+
+ CmpInst *CIOp = cast<CmpInst>(FirstInst);
+ CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ PhiVal, ConstantOp);
+ NewCI->setDebugLoc(FirstInst->getDebugLoc());
+ return NewCI;
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+ SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+ if (PN->use_empty()) return true;
+ if (!PN->hasOneUse()) return false;
+
+ // Remember this node, and if we find the cycle, return.
+ if (!PotentiallyDeadPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PotentiallyDeadPHIs.size() == 16)
+ return false;
+
+ if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+ return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+ return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
+/// z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+ SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+ // See if we already saw this PHI node.
+ if (!ValueEqualPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (ValueEqualPHIs.size() == 16)
+ return false;
+
+ // Scan the operands to see if they are either phi nodes or are equal to
+ // the value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Op = PN->getIncomingValue(i);
+ if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+ return false;
+ } else if (Op != NonPhiInVal)
+ return false;
+ }
+
+ return true;
+}
+
+
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(0, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(0, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ };
+ template <>
+ struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If
+/// so, we split the PHI into the various pieces being extracted. This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ // Scan the input list of the PHI. If any input is an invoke, and if the
+ // input is defined in the predecessor, then we won't be split the critical
+ // edge which is required to insert a truncate. Because of this, we have to
+ // bail out.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+ if (II == 0) continue;
+ if (II->getParent() != PN->getIncomingBlock(i))
+ continue;
+
+ // If we have a phi, and if it's directly in the predecessor, then we have
+ // a critical edge where we need to put the truncate. Since we can't
+ // split the edge in instcombine, we have to bail out.
+ return 0;
+ }
+
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHIsInspected.insert(UserPN))
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(User)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (User->getOpcode() != Instruction::LShr ||
+ !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return 0;
+
+ unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ const Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
+ PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+ if (Value *V = SimplifyInstruction(&PN, TD))
+ return ReplaceInstUsesWith(PN, V);
+
+ // If all PHI operands are the same operation, pull them through the PHI,
+ // reducing code size.
+ if (isa<Instruction>(PN.getIncomingValue(0)) &&
+ isa<Instruction>(PN.getIncomingValue(1)) &&
+ cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+ cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+ // FIXME: The hasOneUse check will fail for PHIs that use the value more
+ // than themselves more than once.
+ PN.getIncomingValue(0)->hasOneUse())
+ if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+ return Result;
+
+ // If this is a trivial cycle in the PHI node graph, remove it. Basically, if
+ // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+ // PHI)... break the cycle.
+ if (PN.hasOneUse()) {
+ Instruction *PHIUser = cast<Instruction>(PN.use_back());
+ if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+ SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+ PotentiallyDeadPHIs.insert(&PN);
+ if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+
+ // If this phi has a single use, and if that use just computes a value for
+ // the next iteration of a loop, delete the phi. This occurs with unused
+ // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
+ // common case here is good because the only other things that catch this
+ // are induction variable analysis (sometimes) and ADCE, which is only run
+ // late.
+ if (PHIUser->hasOneUse() &&
+ (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+ PHIUser->use_back() == &PN) {
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+ }
+
+ // We sometimes end up with phi cycles that non-obviously end up being the
+ // same value, for example:
+ // z = some value; x = phi (y, z); y = phi (x, z)
+ // where the phi nodes don't necessarily need to be in the same block. Do a
+ // quick check to see if the PHI node only contains a single non-phi value, if
+ // so, scan to see if the phi cycle is actually equal to that value.
+ {
+ unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
+ // Scan for the first non-phi operand.
+ while (InValNo != NumIncomingVals &&
+ isa<PHINode>(PN.getIncomingValue(InValNo)))
+ ++InValNo;
+
+ if (InValNo != NumIncomingVals) {
+ Value *NonPhiInVal = PN.getIncomingValue(InValNo);
+
+ // Scan the rest of the operands to see if there are any conflicts, if so
+ // there is no need to recursively scan other phis.
+ for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
+ Value *OpVal = PN.getIncomingValue(InValNo);
+ if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+ break;
+ }
+
+ // If we scanned over all operands, then we have one unique value plus
+ // phi values. Scan PHI nodes to see if they all merge in each other or
+ // the value.
+ if (InValNo == NumIncomingVals) {
+ SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+ return ReplaceInstUsesWith(PN, NonPhiInVal);
+ }
+ }
+ }
+
+ // If there are multiple PHIs, sort their operands so that they all list
+ // the blocks in the same order. This will help identical PHIs be eliminated
+ // by other passes. Other passes shouldn't depend on this for correctness
+ // however.
+ PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+ if (&PN != FirstPN)
+ for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BBA = PN.getIncomingBlock(i);
+ BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+ if (BBA != BBB) {
+ Value *VA = PN.getIncomingValue(i);
+ unsigned j = PN.getBasicBlockIndex(BBB);
+ Value *VB = PN.getIncomingValue(j);
+ PN.setIncomingBlock(i, BBB);
+ PN.setIncomingValue(i, VB);
+ PN.setIncomingBlock(j, BBA);
+ PN.setIncomingValue(j, VA);
+ // NOTE: Instcombine normally would want us to "return &PN" if we
+ // modified any of the operands of an instruction. However, since we
+ // aren't adding or removing uses (just rearranging them) we don't do
+ // this in this case.
+ }
+ }
+
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (PN.getType()->isIntegerTy() && TD &&
+ !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 000000000000..5733c20828c6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,876 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (SI == 0) return SPF_UNKNOWN;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+ if (ICI == 0) return SPF_UNKNOWN;
+
+ LHS = ICI->getOperand(0);
+ RHS = ICI->getOperand(1);
+
+ // (icmp X, Y) ? X : Y
+ if (SI->getTrueValue() == ICI->getOperand(0) &&
+ SI->getFalseValue() == ICI->getOperand(1)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMAX;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMAX;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMIN;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ }
+ }
+
+ // (icmp X, Y) ? Y : X
+ if (SI->getTrueValue() == ICI->getOperand(1) &&
+ SI->getFalseValue() == ICI->getOperand(0)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ }
+ }
+
+ // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
+
+ return SPF_UNKNOWN;
+}
+
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// %C = or %A, %B
+/// %D = select %cond, %C, %A
+/// into:
+/// %C = select %cond, %B, 0
+/// %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return 3; // Can fold through either operand.
+ case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::Shl: // Can only fold on the shift amount.
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return 1;
+ default:
+ return 0; // Cannot fold
+ }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("This cannot happen!");
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Constant::getNullValue(I->getType());
+ case Instruction::And:
+ return Constant::getAllOnesValue(I->getType());
+ case Instruction::Mul:
+ return ConstantInt::get(I->getType(), 1);
+ }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each. Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI) {
+ if (TI->getNumOperands() == 1) {
+ // If this is a non-volatile load or a cast from the same type,
+ // merge.
+ if (TI->isCast()) {
+ if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+ return 0;
+ } else {
+ return 0; // unknown unary op.
+ }
+
+ // Fold this by inserting a select from the input values.
+ Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
+ FI->getOperand(0), SI.getName()+".v");
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ TI->getType());
+ }
+
+ // Only handle binary operators here.
+ if (!isa<BinaryOperator>(TI))
+ return 0;
+
+ // Figure out if the operations have any operands in common.
+ Value *MatchOp, *OtherOpT, *OtherOpF;
+ bool MatchIsOpZero;
+ if (TI->getOperand(0) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = false;
+ } else if (!TI->isCommutative()) {
+ return 0;
+ } else if (TI->getOperand(0) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else {
+ return 0;
+ }
+
+ // If we reach here, they do have operations in common.
+ Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT,
+ OtherOpF, SI.getName()+".v");
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+ if (MatchIsOpZero)
+ return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+ else
+ return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+ }
+ llvm_unreachable("Shouldn't get here");
+ return 0;
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+ ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+ if (!C1I)
+ return false;
+ ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+ if (!C2I)
+ return false;
+ if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+ return false;
+ return C1I->isOne() || C1I->isAllOnesValue() ||
+ C2I->isOne() || C2I->isAllOnesValue();
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+ Value *FalseVal) {
+ // See the comment above GetSelectFoldableOperands for a description of the
+ // transformation we are doing here.
+ if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+ if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+ !isa<Constant>(FalseVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(TVI);
+ Value *OOp = TVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
+ NewSel->takeName(TVI);
+ BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
+ BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
+ FalseVal, NewSel);
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(TVI_BO->isExact());
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap());
+ }
+ return BO;
+ }
+ }
+ }
+ }
+ }
+
+ if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+ if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+ !isa<Constant>(TrueVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(FVI);
+ Value *OOp = FVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
+ NewSel->takeName(FVI);
+ BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
+ BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
+ TrueVal, NewSel);
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(FVI_BO->isExact());
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap());
+ }
+ return BO;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
+/// replaced with RepOp.
+static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+ const TargetData *TD) {
+ // Trivial replacement.
+ if (V == Op)
+ return RepOp;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return 0;
+
+ // If this is a binary operator, try to simplify it with the replaced op.
+ if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
+ if (B->getOperand(0) == Op)
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+ if (B->getOperand(1) == Op)
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+ }
+
+ // Same for CmpInsts.
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ if (C->getOperand(0) == Op)
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+ if (C->getOperand(1) == Op)
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+ }
+
+ // TODO: We could hand off more cases to instsimplify here.
+
+ // If all operands are constant after substituting Op for RepOp then we can
+ // constant fold the instruction.
+ if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
+ // Build a list of all constant operands.
+ SmallVector<Constant*, 8> ConstOps;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (I->getOperand(i) == Op)
+ ConstOps.push_back(CRepOp);
+ else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
+ ConstOps.push_back(COp);
+ else
+ break;
+ }
+
+ // All operands were constants, fold it.
+ if (ConstOps.size() == I->getNumOperands())
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ ConstOps.data(), ConstOps.size(), TD);
+ }
+
+ return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ bool Changed = false;
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // Check cases where the comparison is with a constant that
+ // can be adjusted to fit the min/max idiom. We may move or edit ICI
+ // here, so make sure the select is the only user.
+ if (ICI->hasOneUse())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ // X < MIN ? T : F --> F
+ if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+ && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > MAX ? T : F --> F
+ else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+ && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ // These transformations only work for selects over integers.
+ const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+ if (!SelectTy)
+ break;
+
+ Constant *AdjustedRHS;
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+ else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
+ // X > C ? X : C+1 --> X < C+1 ? C+1 : X
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+ ; // Nothing to do here. Values match without any sign/zero extension.
+
+ // Types do not match. Instead of calculating this with mixed types
+ // promote all to the larger type. This enables scalar evolution to
+ // analyze this expression.
+ else if (CmpRHS->getType()->getScalarSizeInBits()
+ < SelectTy->getBitWidth()) {
+ Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+ // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+ // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+ // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+ // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+ if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = sextRHS;
+ } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = sextRHS;
+ } else if (ICI->isUnsigned()) {
+ Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+ // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+ // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+ // zext + signed compare cannot be changed:
+ // 0xff <s 0x00, but 0x00ff >s 0x0000
+ if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = zextRHS;
+ } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = zextRHS;
+ } else
+ break;
+ } else
+ break;
+ } else
+ break;
+
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(0, CmpLHS);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+
+ // Move ICI instruction right before the select instruction. Otherwise
+ // the sext/zext value may be defined after the ICI instruction uses it.
+ ICI->moveBefore(&SI);
+
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+ // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+ // FIXME: Type and constness constraints could be lifted, but we have to
+ // watch code size carefully. We should consider xor instead of
+ // sub/add when we decide to do that.
+ if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+ if (TrueVal->getType() == Ty) {
+ if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+ ConstantInt *C1 = NULL, *C2 = NULL;
+ if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+ C1 = dyn_cast<ConstantInt>(TrueVal);
+ C2 = dyn_cast<ConstantInt>(FalseVal);
+ } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+ C1 = dyn_cast<ConstantInt>(FalseVal);
+ C2 = dyn_cast<ConstantInt>(TrueVal);
+ }
+ if (C1 && C2) {
+ // This shift results in either -1 or 0.
+ Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+ // Check if we can express the operation with a single or.
+ if (C2->isAllOnesValue())
+ return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+ Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+ return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+ }
+ }
+ }
+ }
+
+ // If we have an equality comparison then we know the value in one of the
+ // arms of the select. See if substituting this value into the arm and
+ // simplifying the result yields the same value as the other arm.
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+
+ // NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+ if (isa<Constant>(CmpRHS)) {
+ if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
+ // Transform (X == C) ? X : Y -> (X == C) ? C : Y
+ SI.setOperand(1, CmpRHS);
+ Changed = true;
+ } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
+ // Transform (X != C) ? Y : X -> (X != C) ? Y : C
+ SI.setOperand(2, CmpRHS);
+ Changed = true;
+ }
+ }
+
+ return Changed ? &SI : 0;
+}
+
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks). See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI. For
+/// example, cases like this cannot be mapped:
+///
+/// X = phi [ C1, BB1], [C2, BB2]
+/// Y = add
+/// Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+ const SelectInst &SI) {
+ // If the value is a non-instruction value like a constant or argument, it
+ // can always be mapped.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return true;
+
+ // If V is a PHI node defined in the same block as the condition PHI, we can
+ // map the arguments.
+ const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+ if (const PHINode *VP = dyn_cast<PHINode>(I))
+ if (VP->getParent() == CondPHI->getParent())
+ return true;
+
+ // Otherwise, if the PHI and select are defined in the same block and if V is
+ // defined in a different block, then we can transform it.
+ if (SI.getParent() == CondPHI->getParent() &&
+ I->getParent() != CondPHI->getParent())
+ return true;
+
+ // Otherwise we have a 'hard' case and we can't tell without doing more
+ // detailed dominator based analysis, punt.
+ return false;
+}
+
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+ SelectPatternFlavor SPF1,
+ Value *A, Value *B,
+ Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C) {
+ if (C == A || C == B) {
+ // MAX(MAX(A, B), B) -> MAX(A, B)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ if (SPF1 == SPF2)
+ return ReplaceInstUsesWith(Outer, Inner);
+
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+ (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+ (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+ return ReplaceInstUsesWith(Outer, C);
+ }
+
+ // TODO: MIN(MIN(A, 23), 97)
+ return 0;
+}
+
+
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+ ConstantInt *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ if (!match(IC->getOperand(1), m_Zero()))
+ return 0;
+
+ ConstantInt *AndRHS;
+ Value *LHS = IC->getOperand(0);
+ if (LHS->getType() != SI.getType() ||
+ !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ return 0;
+
+ // If both select arms are non-zero see if we have a select of the form
+ // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+ // for 'x ? 2^n : 0' and fix the thing up at the end.
+ ConstantInt *Offset = 0;
+ if (!TrueVal->isZero() && !FalseVal->isZero()) {
+ if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+ Offset = FalseVal;
+ else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+ Offset = TrueVal;
+ else
+ return 0;
+
+ // Adjust TrueVal and FalseVal to the offset.
+ TrueVal = ConstantInt::get(Builder->getContext(),
+ TrueVal->getValue() - Offset->getValue());
+ FalseVal = ConstantInt::get(Builder->getContext(),
+ FalseVal->getValue() - Offset->getValue());
+ }
+
+ // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+ if (!AndRHS->getValue().isPowerOf2() ||
+ (!TrueVal->getValue().isPowerOf2() &&
+ !FalseVal->getValue().isPowerOf2()))
+ return 0;
+
+ // Determine which shift is needed to transform result of the 'and' into the
+ // desired result.
+ ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+ unsigned ValZeros = ValC->getValue().logBase2();
+ unsigned AndZeros = AndRHS->getValue().logBase2();
+
+ Value *V = LHS;
+ if (ValZeros > AndZeros)
+ V = Builder->CreateShl(V, ValZeros - AndZeros);
+ else if (ValZeros < AndZeros)
+ V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+ // Okay, now we know that everything is set up, we just don't know whether we
+ // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+ bool ShouldNotVal = !TrueVal->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ if (ShouldNotVal)
+ V = Builder->CreateXor(V, ValC);
+
+ // Apply an offset if needed.
+ if (Offset)
+ V = Builder->CreateAdd(V, Offset);
+ return V;
+}
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+ return ReplaceInstUsesWith(SI, V);
+
+ if (SI.getType()->isIntegerTy(1)) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+ if (C->getZExtValue()) {
+ // Change: A = select B, true, C --> A = or B, C
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ }
+ // Change: A = select B, false, C --> A = and !B, C
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return BinaryOperator::CreateAnd(NotCond, FalseVal);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ if (C->getZExtValue() == false) {
+ // Change: A = select B, C, false --> A = and B, C
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+ // Change: A = select B, C, true --> A = or !B, C
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return BinaryOperator::CreateOr(NotCond, TrueVal);
+ }
+
+ // select a, b, a -> a&b
+ // select a, a, b -> a|b
+ if (CondVal == TrueVal)
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ else if (CondVal == FalseVal)
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+
+ // Selecting between two integer constants?
+ if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+ if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+ // select C, 1, 0 -> zext C to int
+ if (FalseValC->isZero() && TrueValC->getValue() == 1)
+ return new ZExtInst(CondVal, SI.getType());
+
+ // select C, -1, 0 -> sext C to int
+ if (FalseValC->isZero() && TrueValC->isAllOnesValue())
+ return new SExtInst(CondVal, SI.getType());
+
+ // select C, 0, 1 -> zext !C to int
+ if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return new ZExtInst(NotCond, SI.getType());
+ }
+
+ // select C, 0, -1 -> sext !C to int
+ if (TrueValC->isZero() && FalseValC->isAllOnesValue()) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return new SExtInst(NotCond, SI.getType());
+ }
+
+ if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+ return ReplaceInstUsesWith(SI, V);
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+ // Transform (X == Y) ? Y : X -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+ }
+ // NOTE: if we wanted to, this is where to detect ABS
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+ if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+ return Result;
+
+ if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+ if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+ if (TI->hasOneUse() && FI->hasOneUse()) {
+ Instruction *AddOp = 0, *SubOp = 0;
+
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+ if (TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
+
+ // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is
+ // even legal for FP.
+ if ((TI->getOpcode() == Instruction::Sub &&
+ FI->getOpcode() == Instruction::Add) ||
+ (TI->getOpcode() == Instruction::FSub &&
+ FI->getOpcode() == Instruction::FAdd)) {
+ AddOp = FI; SubOp = TI;
+ } else if ((FI->getOpcode() == Instruction::Sub &&
+ TI->getOpcode() == Instruction::Add) ||
+ (FI->getOpcode() == Instruction::FSub &&
+ TI->getOpcode() == Instruction::FAdd)) {
+ AddOp = TI; SubOp = FI;
+ }
+
+ if (AddOp) {
+ Value *OtherAddOp = 0;
+ if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+ OtherAddOp = AddOp->getOperand(1);
+ } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+ OtherAddOp = AddOp->getOperand(0);
+ }
+
+ if (OtherAddOp) {
+ // So at this point we know we have (Y -> OtherAddOp):
+ // select C, (add X, Y), (sub X, Z)
+ Value *NegVal; // Compute -Z
+ if (SI.getType()->isFPOrFPVectorTy()) {
+ NegVal = Builder->CreateFNeg(SubOp->getOperand(1));
+ } else {
+ NegVal = Builder->CreateNeg(SubOp->getOperand(1));
+ }
+
+ Value *NewTrueOp = OtherAddOp;
+ Value *NewFalseOp = NegVal;
+ if (AddOp != TI)
+ std::swap(NewTrueOp, NewFalseOp);
+ Value *NewSel =
+ Builder->CreateSelect(CondVal, NewTrueOp,
+ NewFalseOp, SI.getName() + ".p");
+
+ if (SI.getType()->isFPOrFPVectorTy())
+ return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+ else
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ }
+ }
+ }
+
+ // See if we can fold the select into one of our operands.
+ if (SI.getType()->isIntegerTy()) {
+ if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+ return FoldI;
+
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ Value *LHS, *RHS, *LHS2, *RHS2;
+ if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ SI, SPF, RHS))
+ return R;
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+ SI, SPF, LHS))
+ return R;
+ }
+
+ // TODO.
+ // ABS(-X) -> ABS(X)
+ // ABS(ABS(X)) -> ABS(X)
+ }
+
+ // See if we can fold the select into a phi node if the condition is a select.
+ if (isa<PHINode>(SI.getCondition()))
+ // The true/false values have to be live in the PHI predecessor's blocks.
+ if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+ CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+ if (Instruction *NV = FoldOpIntoPhi(SI))
+ return NV;
+
+ if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+ if (TrueSI->getCondition() == CondVal) {
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ }
+ if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+ if (FalseSI->getCondition() == CondVal) {
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ }
+
+ if (BinaryOperator::isNot(CondVal)) {
+ SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ return &SI;
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 000000000000..811f94976f68
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,753 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // See if we can fold away this shift.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Try to fold constant and into select arguments.
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+ return Res;
+
+ // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+ // Because shifts by negative values (which could occur if A were negative)
+ // are undefined.
+ Value *A; const APInt *B;
+ if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+ // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+ // demand the sign bit (and many others) here??
+ Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+ Op1->getName());
+ I.setOperand(1, Rem);
+ return &I;
+ }
+
+ return 0;
+}
+
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI = 0;
+ if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+
+ case Instruction::Shl: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) return true;
+
+ // We can always turn shl(c)+shr(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = TypeWidth - CI->getZExtValue();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::LShr: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) return true;
+
+ // We can always turn lshr(c)+shl(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: assert(0 && "Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ return I;
+
+ case Instruction::Shl: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+ case Instruction::LShr: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+
+ case Instruction::Select:
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+ NumBits, isLeftShift, IC));
+ return PN;
+ }
+ }
+}
+
+
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I) {
+ bool isLeftShift = I.getOpcode() == Instruction::Shl;
+
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return ReplaceInstUsesWith(I,
+ GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ }
+
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+
+ // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+ // a signed shift.
+ //
+ if (Op1->uge(TypeBits)) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+ // ashr i32 X, 32 --> ashr i32 X, 31
+ I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+ return &I;
+ }
+
+ // ((X*C1) << C2) == (X * (C1 << C2))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+ if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+ if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+ return BinaryOperator::CreateMul(BO->getOperand(0),
+ ConstantExpr::getShl(BOOp, Op1));
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+ if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+ Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+ // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+ // place. Don't try to do this transformation in this case. Also, we
+ // require that the input operand is a shift-by-constant so that we have
+ // confidence that the shifts will get folded together. We could do this
+ // xform in more cases, but it is unlikely to be profitable.
+ if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
+ isa<ConstantInt>(TrOp->getOperand(1))) {
+ // Okay, we'll do this xform. Make the shift of shift.
+ Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+ // (shift2 (shift1 & 0x00FF), c2)
+ Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+ // For logical shifts, the truncation has the effect of making the high
+ // part of the register be zeros. Emulate this by inserting an AND to
+ // clear the top bits as needed. This 'and' will usually be zapped by
+ // other xforms later if dead.
+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+ unsigned DstSize = TI->getType()->getScalarSizeInBits();
+ APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+
+ // The mask we constructed says what the trunc would do if occurring
+ // between the shifts. We want to know the effect *after* the second
+ // shift. We know that it is a logical shift by a constant, so adjust the
+ // mask as appropriate.
+ if (I.getOpcode() == Instruction::Shl)
+ MaskV <<= Op1->getZExtValue();
+ else {
+ assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+ MaskV = MaskV.lshr(Op1->getZExtValue());
+ }
+
+ // shift1 & 0x00FF
+ Value *And = Builder->CreateAnd(NSh,
+ ConstantInt::get(I.getContext(), MaskV),
+ TI->getName());
+
+ // Return the value truncated to the interesting size.
+ return new TruncInst(And, I.getType());
+ }
+ }
+
+ if (Op0->hasOneUse()) {
+ if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ Value *V1, *V2;
+ ConstantInt *CC;
+ switch (Op0BO->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // These operators commute.
+ // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+ match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+ Op0BO->getOperand(1)->getName());
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
+ Value *Op0BOOp1 = Op0BO->getOperand(1);
+ if (isLeftShift && Op0BOOp1->hasOneUse() &&
+ match(Op0BOOp1,
+ m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
+ m_ConstantInt(CC))) &&
+ cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1,
+ Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+ return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+ }
+ }
+
+ // FALL THROUGH.
+ case Instruction::Sub: {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+ Op0BO->getOperand(0)->getName());
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0),
+ m_And(m_Shr(m_Value(V1), m_Value(V2)),
+ m_ConstantInt(CC))) && V2 == Op1 &&
+ cast<BinaryOperator>(Op0BO->getOperand(0))
+ ->getOperand(0)->hasOneUse()) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+ }
+
+ break;
+ }
+ }
+
+
+ // If the operand is an bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+ bool isValid = true; // Valid only for And, Or, Xor
+ bool highBitSet = false; // Transform if high bit of constant set?
+
+ switch (Op0BO->getOpcode()) {
+ default: isValid = false; break; // Do not perform transform!
+ case Instruction::Add:
+ isValid = isLeftShift;
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ highBitSet = false;
+ break;
+ case Instruction::And:
+ highBitSet = true;
+ break;
+ }
+
+ // If this is a signed shift right, and the high bit is modified
+ // by the logical operation, do not perform the transformation.
+ // The highBitSet boolean indicates the value of the high bit of
+ // the constant which would cause it to be modified for this
+ // operation.
+ //
+ if (isValid && I.getOpcode() == Instruction::AShr)
+ isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+
+ if (isValid) {
+ Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+
+ Value *NewShift =
+ Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ NewShift->takeName(Op0BO);
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+ NewRHS);
+ }
+ }
+ }
+ }
+
+ // Find out if this is a shift of a shift by a constant.
+ BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+ if (ShiftOp && !ShiftOp->isShift())
+ ShiftOp = 0;
+
+ if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+ ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+ uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+ assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+ if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
+ Value *X = ShiftOp->getOperand(0);
+
+ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
+
+ const IntegerType *Ty = cast<IntegerType>(I.getType());
+
+ // Check for (X << c1) << c2 and (X >> c1) >> c2
+ if (I.getOpcode() == ShiftOp->getOpcode()) {
+ // If this is oversized composite shift, then unsigned shifts get 0, ashr
+ // saturates.
+ if (AmtSum >= TypeBits) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr.
+ }
+
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(Ty, AmtSum));
+ }
+
+ if (ShiftAmt1 == ShiftAmt2) {
+ // If we have ((X >>? C) << C), turn this into X & (-1 << C).
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+ // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X,
+ ConstantInt::get(I.getContext(), Mask));
+ }
+ } else if (ShiftAmt1 < ShiftAmt2) {
+ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+
+ // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
+ assert(ShiftOp->getOpcode() == Instruction::LShr ||
+ ShiftOp->getOpcode() == Instruction::AShr);
+ Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ assert(ShiftOp->getOpcode() == Instruction::Shl);
+ Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+ } else {
+ assert(ShiftAmt2 < ShiftAmt1);
+ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+ // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
+ Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+ ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+ I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+ TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *V = commonShiftTransforms(I))
+ return V;
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the shifted-out value is known-zero, then this is a NUW shift.
+ if (!I.hasNoUnsignedWrap() &&
+ MaskedValueIsZero(I.getOperand(0),
+ APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+ I.setHasNoUnsignedWrap();
+ return &I;
+ }
+
+ // If the shifted out value is all signbits, this is a NSW shift.
+ if (!I.hasNoSignedWrap() &&
+ ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+ I.setHasNoSignedWrap();
+ return &I;
+ }
+ }
+
+ // (C1 << A) << C2 -> (C1 << C2) << A
+ Constant *C1, *C2;
+ Value *A;
+ if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
+ match(I.getOperand(1), m_Constant(C2)))
+ return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ // ctlz.i32(x)>>5 --> zext(x == 0)
+ // cttz.i32(x)>>5 --> zext(x == 0)
+ // ctpop.i32(x)>>5 --> zext(x == -1)
+ if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+ II->getIntrinsicID() == Intrinsic::cttz ||
+ II->getIntrinsicID() == Intrinsic::ctpop) &&
+ isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
+ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+ Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+ return new ZExtInst(Cmp, II->getType());
+ }
+ }
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+ // have a sign-extend idiom.
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
+ // If the left shift is just shifting out partial signbits, delete the
+ // extension.
+ if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ return ReplaceInstUsesWith(I, X);
+
+ // If the input is an extension from the shifted amount value, e.g.
+ // %x = zext i8 %A to i32
+ // %y = shl i32 %x, 24
+ // %z = ashr %y, 24
+ // then turn this into "z = sext i8 A to i32".
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
+ uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
+ uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
+ if (Op1C->getZExtValue() == DestBits-SrcBits)
+ return new SExtInst(ZI->getOperand(0), ZI->getType());
+ }
+ }
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
+ // See if we can turn a signed shr into an unsigned shr.
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+ return BinaryOperator::CreateLShr(Op0, Op1);
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
+
+ return 0;
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 000000000000..8fea8eb7efb5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1153 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/IntrinsicInst.h"
+
+using namespace llvm;
+
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
+ APInt Demanded) {
+ assert(I && "No instruction?");
+ assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+ // If the operand is not a constant integer, nothing to do.
+ ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+ if (!OpC) return false;
+
+ // If there are no bits set that aren't demanded, nothing to do.
+ Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ if ((~Demanded & OpC->getValue()) == 0)
+ return false;
+
+ // This instruction is producing bits that are not demanded. Shrink the RHS.
+ Demanded &= OpC->getValue();
+ I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+ return true;
+}
+
+
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about. See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
+ KnownZero, KnownOne, 0);
+ if (V == 0) return false;
+ if (V == &Inst) return true;
+ ReplaceInstUsesWith(Inst, V);
+ return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place. It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+ KnownZero, KnownOne, Depth);
+ if (NewVal == 0) return false;
+ U = NewVal;
+ return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits. When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression. KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification. This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ assert(V != 0 && "Null pointer of Value???");
+ assert(Depth <= 6 && "Limit Search Depth");
+ uint32_t BitWidth = DemandedMask.getBitWidth();
+ const Type *VTy = V->getType();
+ assert((TD || !VTy->isPointerTy()) &&
+ "SimplifyDemandedBits needs to know bit widths!");
+ assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+ (!VTy->isIntOrIntVectorTy() ||
+ VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & DemandedMask;
+ KnownZero = ~KnownOne & DemandedMask;
+ return 0;
+ }
+ if (isa<ConstantPointerNull>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne.clearAllBits();
+ KnownZero = DemandedMask;
+ return 0;
+ }
+
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ if (DemandedMask == 0) { // Not demanding any bits from V.
+ if (isa<UndefValue>(V))
+ return 0;
+ return UndefValue::get(VTy);
+ }
+
+ if (Depth == 6) // Limit search depth.
+ return 0;
+
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ return 0; // Only analyze instructions.
+ }
+
+ // If there are multiple uses of this value and we aren't at the root, then
+ // we can't do any simplifications of the operands, because DemandedMask
+ // only reflects the bits demanded by *one* of the users.
+ if (Depth != 0 && !I->hasOneUse()) {
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ if (I->getOpcode() == Instruction::And) {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ } else if (I->getOpcode() == Instruction::Or) {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+ }
+
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ return 0;
+ }
+
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits so that we can try to simplify the
+ // operands. This allows visitTruncInst (for example) to simplify the
+ // operand of a trunc without duplicating all the logic below.
+ if (Depth == 0 && !V->hasOneUse())
+ DemandedMask = APInt::getAllOnesValue(BitWidth);
+
+ switch (I->getOpcode()) {
+ default:
+ ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ case Instruction::And:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ return I;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero = RHSKnownZero | LHSKnownZero;
+ break;
+ case Instruction::Or:
+ // If either the LHS or the RHS are One, the result is One.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne = RHSKnownOne | LHSKnownOne;
+ break;
+ case Instruction::Xor: {
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+
+ // If all of the demanded bits are known to be zero on one side or the
+ // other, turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstWith(Or, *I);
+ }
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
+ // all known
+ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+ Constant *AndC = Constant::getIntegerValue(VTy,
+ ~RHSKnownOne & DemandedMask);
+ Instruction *And =
+ BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ return InsertNewInstWith(And, *I);
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // If our LHS is an 'and' and if it has one use, and if any of the bits we
+ // are flipping are known to be set, then the xor is just resetting those
+ // bits to zero. We can just knock out bits from the 'and' and the 'xor',
+ // simplifying both of them.
+ if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+ if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ isa<ConstantInt>(LHSInst->getOperand(1)) &&
+ (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+ ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+ ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+ APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+
+ Constant *AndC =
+ ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+ Instruction *NewAnd =
+ BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ InsertNewInstWith(NewAnd, *I);
+
+ Constant *XorC =
+ ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+ Instruction *NewXor =
+ BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+ return InsertNewInstWith(NewXor, *I);
+ }
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+ break;
+ }
+ case Instruction::Select:
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+ ShrinkDemandedConstant(I, 2, DemandedMask))
+ return I;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ break;
+ case Instruction::Trunc: {
+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+ DemandedMask = DemandedMask.zext(truncBf);
+ KnownZero = KnownZero.zext(truncBf);
+ KnownOne = KnownOne.zext(truncBf);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ DemandedMask = DemandedMask.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ }
+ case Instruction::BitCast:
+ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+ return 0; // vector->int or fp->int?
+
+ if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+ if (const VectorType *SrcVTy =
+ dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+ if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+ // Don't touch a bitcast between vectors of different element counts.
+ return 0;
+ } else
+ // Don't touch a scalar-to-vector bitcast.
+ return 0;
+ } else if (I->getOperand(0)->getType()->isVectorTy())
+ // Don't touch a vector-to-scalar bitcast.
+ return 0;
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ case Instruction::ZExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ DemandedMask = DemandedMask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ DemandedMask = DemandedMask.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // The top bits are known to be zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt InputDemandedBits = DemandedMask &
+ APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+ APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if ((NewBits & DemandedMask) != 0)
+ InputDemandedBits.setBit(SrcBitWidth-1);
+
+ InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ InputDemandedBits = InputDemandedBits.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, or if the NewBits are not demanded
+ // convert this into a zero extension.
+ if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ // Convert to ZExt cast
+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+ return InsertNewInstWith(NewCast, *I);
+ } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
+ KnownOne |= NewBits;
+ }
+ break;
+ }
+ case Instruction::Add: {
+ // Figure out what the input bits are. If the top bits of the and result
+ // are not demanded, then the add doesn't demand them from its input
+ // either.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+
+ // If there is a constant on the RHS, there are a variety of xformations
+ // we can do.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // If null, this should be simplified elsewhere. Some of the xforms here
+ // won't work if the RHS is zero.
+ if (RHS->isZero())
+ break;
+
+ // If the top bit of the output is demanded, demand everything from the
+ // input. Otherwise, we demand all the input bits except NLZ top bits.
+ APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+ // Find information about known zero/one bits in the input.
+ if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // If the RHS of the add has bits set that can't affect the input, reduce
+ // the constant.
+ if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+ return I;
+
+ // Avoid excess work.
+ if (LHSKnownZero == 0 && LHSKnownOne == 0)
+ break;
+
+ // Turn it into OR if input bits are zero.
+ if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstWith(Or, *I);
+ }
+
+ // We can say something about the output known-zero and known-one bits,
+ // depending on potential carries from the input constant and the
+ // unknowns. For example if the LHS is known to have at most the 0x0F0F0
+ // bits set and the RHS constant is 0x01001, then we know we have a known
+ // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+
+ // To compute this, we first compute the potential carry bits. These are
+ // the bits which may be modified. I'm not aware of a better way to do
+ // this scan.
+ const APInt &RHSVal = RHS->getValue();
+ APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+
+ // Now that we know which bits have carries, compute the known-1/0 sets.
+
+ // Bits are known one if they are known zero in one operand and one in the
+ // other, and there is no input carry.
+ KnownOne = ((LHSKnownZero & RHSVal) |
+ (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+
+ // Bits are known zero if they are known zero in both operands and there
+ // is no input carry.
+ KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+ } else {
+ // If the high-bits of this ADD are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this ADD to demand the most
+ // significant bit and all those below it.
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ }
+ break;
+ }
+ case Instruction::Sub:
+ // If the high-bits of this SUB are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this SUB to demand the most
+ // significant bit and all those below it.
+ uint32_t NLZ = DemandedMask.countLeadingZeros();
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // the known zeros and ones.
+ ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ case Instruction::Shl:
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ ShlOperator *IOp = cast<ShlOperator>(I);
+ if (IOp->hasNoSignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (IOp->hasNoUnsignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ // low bits known zero.
+ if (ShiftAmt)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ // For a logical shift right
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Unsigned shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<LShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ if (ShiftAmt) {
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero |= HighBits; // high bits known zero.
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), I->getOperand(1), I->getName());
+ return InsertNewInstWith(NewVal, *I);
+ }
+
+ // If the sign bit is the only bit demanded by this ashr, then there is no
+ // need to do it, the shift doesn't change the high bit.
+ if (DemandedMask.isSignBit())
+ return I->getOperand(0);
+
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Signed shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ // If any of the "high bits" are demanded, we should set the sign bit as
+ // demanded.
+ if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+ DemandedMaskIn.setBit(BitWidth-1);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<AShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ // Handle the sign bits.
+ APInt SignBit(APInt::getSignBit(BitWidth));
+ // Adjust to where it is now in the mask.
+ SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
+ (HighBits & ~DemandedMask) == HighBits) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), SA, I->getName());
+ return InsertNewInstWith(NewVal, *I);
+ } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // X % -1 demands all the bits because we don't want to introduce
+ // INT_MIN % -1 (== undef) by accident.
+ if (Rem->isAllOnesValue())
+ break;
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits
+ return I->getOperand(0);
+
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // The low bits of LHS are unchanged by the srem.
+ KnownZero = LHSKnownZero & LowBits;
+ KnownOne = LHSKnownOne & LowBits;
+
+ // If LHS is non-negative or has all low bits zero, then the upper bits
+ // are all zero.
+ if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If LHS is negative and not all low bits are zero, then the upper bits
+ // are all one.
+ if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ }
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+ APInt Mask2 = APInt::getSignBit(BitWidth);
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
+ Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero |= LHSKnownZero;
+ }
+ break;
+ case Instruction::URem: {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+ KnownZero2, KnownOne2, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+ KnownZero2, KnownOne2, Depth+1))
+ return I;
+
+ unsigned Leaders = KnownZero2.countLeadingOnes();
+ Leaders = std::max(Leaders,
+ KnownZero2.countLeadingOnes());
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ return InsertNewInstWith(NewVal, *I);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ case Intrinsic::x86_sse42_crc32_64_8:
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ return 0;
+ }
+ }
+ ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, KnownOne);
+ return 0;
+}
+
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller. This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned. This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) {
+ unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ APInt EltMask(APInt::getAllOnesValue(VWidth));
+ assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+ if (isa<UndefValue>(V)) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return 0;
+ }
+
+ if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+ UndefElts = EltMask;
+ return UndefValue::get(V->getType());
+ }
+
+ UndefElts = 0;
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Undef = UndefValue::get(EltTy);
+
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != VWidth; ++i)
+ if (!DemandedElts[i]) { // If not demanded, set to undef.
+ Elts.push_back(Undef);
+ UndefElts.setBit(i);
+ } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef.
+ Elts.push_back(Undef);
+ UndefElts.setBit(i);
+ } else { // Otherwise, defined.
+ Elts.push_back(CV->getOperand(i));
+ }
+
+ // If we changed the constant, return it.
+ Constant *NewCP = ConstantVector::get(Elts);
+ return NewCP != CV ? NewCP : 0;
+ }
+
+ if (isa<ConstantAggregateZero>(V)) {
+ // Simplify the CAZ to a ConstantVector where the non-demanded elements are
+ // set to undef.
+
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts.isAllOnesValue())
+ return 0;
+
+ const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Zero = Constant::getNullValue(EltTy);
+ Constant *Undef = UndefValue::get(EltTy);
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = DemandedElts[i] ? Zero : Undef;
+ Elts.push_back(Elt);
+ }
+ UndefElts = DemandedElts ^ EltMask;
+ return ConstantVector::get(Elts);
+ }
+
+ // Limit search depth.
+ if (Depth == 10)
+ return 0;
+
+ // If multiple users are using the root value, proceed with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
+ // TODO: Just compute the UndefElts information recursively.
+ return 0;
+
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return 0; // Only analyze instructions.
+
+ bool MadeChange = false;
+ APInt UndefElts2(VWidth, 0);
+ Value *TmpV;
+ switch (I->getOpcode()) {
+ default: break;
+
+ case Instruction::InsertElement: {
+ // If this is a variable index, we don't know which element it overwrites.
+ // demand exactly the same input as we produce.
+ ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (Idx == 0) {
+ // Note that we can't propagate undef elt info, because we don't know
+ // which elt is getting updated.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+ }
+
+ // If this is inserting an element that isn't demanded, remove this
+ // insertelement.
+ unsigned IdxNo = Idx->getZExtValue();
+ if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+ Worklist.Add(I);
+ return I->getOperand(0);
+ }
+
+ // Otherwise, the element inserted overwrites whatever was there, so the
+ // input demanded set is simpler than the output set.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clearBit(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ // The inserted element is defined.
+ UndefElts.clearBit(IdxNo);
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ uint64_t LHSVWidth =
+ cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (DemandedElts[i]) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal != -1u) {
+ assert(MaskVal < LHSVWidth * 2 &&
+ "shufflevector mask index out of range!");
+ if (MaskVal < LHSVWidth)
+ LeftDemanded.setBit(MaskVal);
+ else
+ RightDemanded.setBit(MaskVal - LHSVWidth);
+ }
+ }
+ }
+
+ APInt UndefElts4(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+ UndefElts4, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ APInt UndefElts3(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+ UndefElts3, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ bool NewUndefElts = false;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u) {
+ UndefElts.setBit(i);
+ } else if (MaskVal < LHSVWidth) {
+ if (UndefElts4[MaskVal]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ }
+ } else {
+ if (UndefElts3[MaskVal - LHSVWidth]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
+ }
+ }
+ }
+
+ if (NewUndefElts) {
+ // Add additional discovered undefs.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (UndefElts[i])
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+ else
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Shuffle->getMaskValue(i)));
+ }
+ I->setOperand(2, ConstantVector::get(Elts));
+ MadeChange = true;
+ }
+ break;
+ }
+ case Instruction::BitCast: {
+ // Vector->vector casts only.
+ const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ if (!VTy) break;
+ unsigned InVWidth = VTy->getNumElements();
+ APInt InputDemandedElts(InVWidth, 0);
+ unsigned Ratio;
+
+ if (VWidth == InVWidth) {
+ // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+ // elements as are demanded of us.
+ Ratio = 1;
+ InputDemandedElts = DemandedElts;
+ } else if (VWidth > InVWidth) {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the result than there are in the source,
+ // then an input element is live if any of the corresponding output
+ // elements are live.
+ Ratio = VWidth/InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ if (DemandedElts[OutIdx])
+ InputDemandedElts.setBit(OutIdx/Ratio);
+ }
+ } else {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the source than there are in the result,
+ // then an input element is live if the corresponding output element is
+ // live.
+ Ratio = InVWidth/VWidth;
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (DemandedElts[InIdx/Ratio])
+ InputDemandedElts.setBit(InIdx);
+ }
+
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) {
+ I->setOperand(0, TmpV);
+ MadeChange = true;
+ }
+
+ UndefElts = UndefElts2;
+ if (VWidth > InVWidth) {
+ llvm_unreachable("Unimp");
+ // If there are more elements in the result than there are in the source,
+ // then an output element is undef if the corresponding input element is
+ // undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (UndefElts2[OutIdx/Ratio])
+ UndefElts.setBit(OutIdx);
+ } else if (VWidth < InVWidth) {
+ llvm_unreachable("Unimp");
+ // If there are more elements in the source than there are in the result,
+ // then a result element is undef if all of the corresponding input
+ // elements are undef.
+ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (!UndefElts2[InIdx]) // Not undef?
+ UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
+ }
+ break;
+ }
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+
+ case Instruction::Call: {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II) break;
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // Binary vector operations that work column-wise. A dest element is a
+ // function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // If only the low elt is demanded and this is a scalarizable intrinsic,
+ // scalarize it now.
+ if (DemandedElts == 1) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ // TODO: Lower MIN/MAX/ABS/etc
+ Value *LHS = II->getArgOperand(0);
+ Value *RHS = II->getArgOperand(1);
+ // Extract the element as scalars.
+ LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+ RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_mul_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
+ II->getName()), *II);
+ break;
+ }
+
+ Instruction *New =
+ InsertElementInst::Create(
+ UndefValue::get(II->getType()), TmpV,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+ II->getName());
+ InsertNewInstWith(New, *II);
+ return New;
+ }
+ }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+ }
+ break;
+ }
+ }
+ return MadeChange ? I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 000000000000..ad6a8d054ee7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,575 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+using namespace llvm;
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+ if (isa<ConstantAggregateZero>(V))
+ return true;
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+ if (isConstant) return true;
+ // If all elts are the same, we can extract.
+ Constant *Op0 = C->getOperand(0);
+ for (unsigned i = 1; i < C->getNumOperands(); ++i)
+ if (C->getOperand(i) != Op0)
+ return false;
+ return true;
+ }
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Insert element gets simplified to the inserted element or is deleted if
+ // this is constant idx extract element and its a constant idx insertelt.
+ if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+ isa<ConstantInt>(I->getOperand(2)))
+ return true;
+ if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+ return true;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+ if (BO->hasOneUse() &&
+ (CheapToScalarize(BO->getOperand(0), isConstant) ||
+ CheapToScalarize(BO->getOperand(1), isConstant)))
+ return true;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->hasOneUse() &&
+ (CheapToScalarize(CI->getOperand(0), isConstant) ||
+ CheapToScalarize(CI->getOperand(1), isConstant)))
+ return true;
+
+ return false;
+}
+
+/// getShuffleMask - Read and decode a shufflevector mask.
+/// Turn undef elements into negative values.
+static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
+ unsigned NElts = SVI->getType()->getNumElements();
+ if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
+ return std::vector<int>(NElts, 0);
+ if (isa<UndefValue>(SVI->getOperand(2)))
+ return std::vector<int>(NElts, -1);
+
+ std::vector<int> Result;
+ const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
+ for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+ if (isa<UndefValue>(*i))
+ Result.push_back(-1); // undef
+ else
+ Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
+ return Result;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+ const VectorType *PTy = cast<VectorType>(V->getType());
+ unsigned Width = PTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(PTy->getElementType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(PTy->getElementType());
+ if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(PTy->getElementType());
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
+ return CP->getOperand(EltNo);
+
+ if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantInt>(III->getOperand(2)))
+ return 0;
+ unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt)
+ return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return FindScalarElement(III->getOperand(0), EltNo);
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned LHSWidth =
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+ int InEl = getShuffleMask(SVI)[EltNo];
+ if (InEl < 0)
+ return UndefValue::get(PTy->getElementType());
+ if (InEl < (int)LHSWidth)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+ }
+
+ // Otherwise, we don't know.
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ // If vector val is undef, replace extract with scalar undef.
+ if (isa<UndefValue>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // If vector val is constant 0, replace extract with scalar 0.
+ if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. When the elements are not identical, we cannot replace yet
+ // (we do that below, but only when the index is constant).
+ Constant *op0 = C->getOperand(0);
+ for (unsigned i = 1; i != C->getNumOperands(); ++i)
+ if (C->getOperand(i) != op0) {
+ op0 = 0;
+ break;
+ }
+ if (op0)
+ return ReplaceInstUsesWith(EI, op0);
+ }
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+
+ // If this is extracting an invalid index, turn this into undef, to avoid
+ // crashing the code below.
+ if (IndexVal >= VectorWidth)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // This instruction only demands the single element from the input vector.
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+ APInt UndefElts(VectorWidth, 0);
+ APInt DemandedMask(VectorWidth, 0);
+ DemandedMask.setBit(IndexVal);
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+ DemandedMask, UndefElts)) {
+ EI.setOperand(0, V);
+ return &EI;
+ }
+ }
+
+ if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+ return ReplaceInstUsesWith(EI, Elt);
+
+ // If the this extractelement is directly using a bitcast from a vector of
+ // the same number of elements, see if we can find the source element from
+ // it. In this case, we will end up needing to bitcast the scalars.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+ if (const VectorType *VT =
+ dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+ if (VT->getNumElements() == VectorWidth)
+ if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+ return new BitCastInst(Elt, EI.getType());
+ }
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+ // Push extractelement into predecessor operation if legal and
+ // profitable to do so
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (I->hasOneUse() &&
+ CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ Value *newEI0 =
+ Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
+ Value *newEI1 =
+ Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
+ return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+ }
+ } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ // Extracting the inserted element?
+ if (IE->getOperand(2) == EI.getOperand(1))
+ return ReplaceInstUsesWith(EI, IE->getOperand(1));
+ // If the inserted and extracted elements are constants, they must not
+ // be the same value, extract from the pre-inserted value instead.
+ if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+ Worklist.AddValue(EI.getOperand(0));
+ EI.setOperand(0, IE->getOperand(0));
+ return &EI;
+ }
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+ // If this is extracting an element from a shufflevector, figure out where
+ // it came from and extract from the appropriate input element instead.
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+ Value *Src;
+ unsigned LHSWidth =
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+ if (SrcIdx < 0)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ if (SrcIdx < (int)LHSWidth)
+ Src = SVI->getOperand(0);
+ else {
+ SrcIdx -= LHSWidth;
+ Src = SVI->getOperand(1);
+ }
+ const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+ return ExtractElementInst::Create(Src,
+ ConstantInt::get(Int32Ty,
+ SrcIdx, false));
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // Canonicalize extractelement(cast) -> cast(extractelement)
+ // bitcasts can change the number of vector elements and they cost nothing
+ if (CI->hasOneUse() && EI.hasOneUse() &&
+ (CI->getOpcode() != Instruction::BitCast)) {
+ Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+ EI.getIndexOperand());
+ return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+ }
+ }
+ }
+ return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ std::vector<Constant*> &Mask) {
+ assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return true;
+ }
+
+ if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return true;
+ }
+
+ if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ i+NumElts));
+ return true;
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1)) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx+NumElts);
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+ Value *&RHS) {
+ assert(V->getType()->isVectorTy() &&
+ (RHS == 0 || V->getType() == RHS->getType()) &&
+ "Invalid shuffle!");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return V;
+ } else if (isa<ConstantAggregateZero>(V)) {
+ Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+ return V;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ // Either the extracted from or inserted into vector must be RHSVec,
+ // otherwise we'd end up with a shuffle of three inputs.
+ if (EI->getOperand(0) == RHS || RHS == 0) {
+ RHS = EI->getOperand(0);
+ Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+ExtractedIdx);
+ return V;
+ }
+
+ if (VecOp == RHS) {
+ Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Everything but the extracted element is replaced with the RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i != InsertedIdx)
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+i);
+ }
+ return V;
+ }
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+ return EI->getOperand(0);
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ // Otherwise, can't do anything fancy. Return an identity vector.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+ Value *VecOp = IE.getOperand(0);
+ Value *ScalarOp = IE.getOperand(1);
+ Value *IdxOp = IE.getOperand(2);
+
+ // Inserting an undef or into an undefined place, remove this.
+ if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+ ReplaceInstUsesWith(IE, VecOp);
+
+ // If the inserted element was extracted from some other vector, and if the
+ // indexes are constant, try to turn this into a shufflevector operation.
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == IE.getType()) {
+ unsigned NumVectorElts = IE.getType()->getNumElements();
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumVectorElts) // Out of range insert.
+ return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ // If this insertelement isn't used by some other insertelement, turn it
+ // (and any insertelements it points to), into one big shuffle.
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+ std::vector<Constant*> Mask;
+ Value *RHS = 0;
+ Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+ if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+ // We now have a shuffle of LHS, RHS, Mask.
+ return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+ }
+ }
+ }
+
+ unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+ if (V != &IE)
+ return ReplaceInstUsesWith(IE, V);
+ return &IE;
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Value *LHS = SVI.getOperand(0);
+ Value *RHS = SVI.getOperand(1);
+ std::vector<int> Mask = getShuffleMask(&SVI);
+
+ bool MadeChange = false;
+
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(SVI.getOperand(2)))
+ return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+ if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+ return 0;
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return ReplaceInstUsesWith(SVI, V);
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ if (LHS == RHS || isa<UndefValue>(LHS)) {
+ if (isa<UndefValue>(LHS) && LHS == RHS) {
+ // shuffle(undef,undef,mask) -> undef.
+ return ReplaceInstUsesWith(SVI, LHS);
+ }
+
+ // Remap any references to RHS to use LHS.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0)
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ else {
+ if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
+ Mask[i] = -1; // Turn into undef.
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+ Mask[i]));
+ }
+ }
+ }
+ SVI.setOperand(0, SVI.getOperand(1));
+ SVI.setOperand(1, UndefValue::get(RHS->getType()));
+ SVI.setOperand(2, ConstantVector::get(Elts));
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+ bool isLHSID = true, isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == (int)i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+
+ // Eliminate identity shuffles.
+ if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+
+ // If the LHS is a shufflevector itself, see if we can combine it with this
+ // one without producing an unusual shuffle. Here we are really conservative:
+ // we are absolutely afraid of producing a shuffle mask not in the input
+ // program, because the code gen may not be smart enough to turn a merged
+ // shuffle into two specific shuffles: it may produce worse code. As such,
+ // we only merge two shuffles if the result is either a splat or one of the
+ // two input shuffle masks. In this case, merging the shuffles just removes
+ // one instruction, which we know is safe. This is good for things like
+ // turning: (splat(splat)) -> splat.
+ if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+ if (isa<UndefValue>(RHS)) {
+ std::vector<int> LHSMask = getShuffleMask(LHSSVI);
+
+ if (LHSMask.size() == Mask.size()) {
+ std::vector<int> NewMask;
+ bool isSplat = true;
+ int SplatElt = -1; // undef
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ int MaskElt;
+ if (Mask[i] < 0 || Mask[i] >= (int)e)
+ MaskElt = -1; // undef
+ else
+ MaskElt = LHSMask[Mask[i]];
+ // Check if this could still be a splat.
+ if (MaskElt >= 0) {
+ if (SplatElt >=0 && SplatElt != MaskElt)
+ isSplat = false;
+ SplatElt = MaskElt;
+ }
+ NewMask.push_back(MaskElt);
+ }
+
+ // If the result mask is equal to the src shuffle or this
+ // shuffle mask, do the replacement.
+ if (isSplat || NewMask == LHSMask || NewMask == Mask) {
+ std::vector<Constant*> Elts;
+ const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+ for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+ if (NewMask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
+ } else {
+ Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
+ }
+ }
+ return new ShuffleVectorInst(LHSSVI->getOperand(0),
+ LHSSVI->getOperand(1),
+ ConstantVector::get(Elts));
+ }
+ }
+ }
+ }
+
+ return MadeChange ? &SVI : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 000000000000..32009c39ec25
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,106 @@
+//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_WORKLIST_H
+#define INSTCOMBINE_WORKLIST_H
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
+ SmallVector<Instruction*, 256> Worklist;
+ DenseMap<Instruction*, unsigned> WorklistMap;
+
+ void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT
+ InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
+public:
+ InstCombineWorklist() {}
+
+ bool isEmpty() const { return Worklist.empty(); }
+
+ /// Add - Add the specified instruction to the worklist if it isn't already
+ /// in it.
+ void Add(Instruction *I) {
+ if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+ DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ Worklist.push_back(I);
+ }
+ }
+
+ void AddValue(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Add(I);
+ }
+
+ /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+ /// which should only be done when the worklist is empty and when the group
+ /// has no duplicates.
+ void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+ assert(Worklist.empty() && "Worklist must be empty to add initial group");
+ Worklist.reserve(NumEntries+16);
+ WorklistMap.resize(NumEntries);
+ DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ for (; NumEntries; --NumEntries) {
+ Instruction *I = List[NumEntries-1];
+ WorklistMap.insert(std::make_pair(I, Worklist.size()));
+ Worklist.push_back(I);
+ }
+ }
+
+ // Remove - remove I from the worklist if it exists.
+ void Remove(Instruction *I) {
+ DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+ if (It == WorklistMap.end()) return; // Not in worklist.
+
+ // Don't bother moving everything down, just null out the slot.
+ Worklist[It->second] = 0;
+
+ WorklistMap.erase(It);
+ }
+
+ Instruction *RemoveOne() {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ WorklistMap.erase(I);
+ return I;
+ }
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ Add(cast<Instruction>(*UI));
+ }
+
+
+ /// Zap - check that the worklist is empty and nuke the backing store for
+ /// the map if it is large.
+ void Zap() {
+ assert(WorklistMap.empty() && "Worklist empty, but map not?");
+
+ // Do an explicit clear, this shrinks the map if needed.
+ WorklistMap.clear();
+ }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 000000000000..ab98ef9fccf8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,1677 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions. This pass does not modify the CFG. This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+// %Y = add i32 %X, 1
+// %Z = add i32 %Y, 1
+// into:
+// %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+// 1. If a binary operator has a constant operand, it is moved to the RHS
+// 2. Bitwise operators with constant operands are always grouped so that
+// shifts are performed first, then or's, then and's, then xor's.
+// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+// 4. All cmp instructions on boolean values are replaced with logical ops
+// 5. add X, X is represented as (X*2) => (X << 1)
+// 6. Multiplies with a power-of-two constant argument are transformed into
+// shifts.
+// ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc , "Number of reassociations");
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstCombine(*unwrap(R));
+}
+
+char InstCombiner::ID = 0;
+INITIALIZE_PASS(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false)
+
+void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
+ assert(From->isIntegerTy() && To->isIntegerTy());
+
+ // If we don't have TD, we don't know if the source/dest are legal.
+ if (!TD) return false;
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ bool FromLegal = TD->isLegalInteger(FromWidth);
+ bool ToLegal = TD->isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
+
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+// Commutative operators:
+//
+// 1. Order operands such that they are listed from right (least complex) to
+// left (most complex). This puts constants before unary operators before
+// binary operators.
+//
+// Associative operators:
+//
+// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+// Associative and commutative operators:
+//
+// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+// if C1 and C2 are constants.
+//
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ bool Changed = false;
+
+ do {
+ // Order operands such that they are listed from right (least complex) to
+ // left (most complex). This puts constants before unary operators before
+ // binary operators.
+ if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+ getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+ if (I.isAssociative()) {
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+ // It simplifies to V. Form "A op V".
+ I.setOperand(0, A);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+ // It simplifies to V. Form "V op C".
+ I.setOperand(0, V);
+ I.setOperand(1, C);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+ }
+
+ if (I.isAssociative() && I.isCommutative()) {
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "V op B".
+ I.setOperand(0, V);
+ I.setOperand(1, B);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "B op V".
+ I.setOperand(0, B);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+ // if C1 and C2 are constants.
+ if (Op0 && Op1 &&
+ Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+ isa<Constant>(Op0->getOperand(1)) &&
+ isa<Constant>(Op1->getOperand(1)) &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *A = Op0->getOperand(0);
+ Constant *C1 = cast<Constant>(Op0->getOperand(1));
+ Value *B = Op1->getOperand(0);
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+ Instruction *New = BinaryOperator::Create(Opcode, A, B);
+ InsertNewInstWith(New, I);
+ New->takeName(Op1);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ continue;
+ }
+ }
+
+ // No further simplifications.
+ return Changed;
+ } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ switch (LOp) {
+ default:
+ return false;
+
+ case Instruction::And:
+ // And distributes over Or and Xor.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+
+ case Instruction::Mul:
+ // Multiplication distributes over addition and subtraction.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ }
+
+ case Instruction::Or:
+ // Or distributes over And.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::And:
+ return true;
+ }
+ }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ if (Instruction::isCommutative(ROp))
+ return LeftDistributesOverRight(ROp, LOp);
+ // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+ // but this requires knowing that the addition does not overflow and other
+ // such subtleties.
+ return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win). Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+ // Factorization.
+ if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Does "X op' Y" always equal "Y op' X"?
+ bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+ // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+ if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (InnerCommutative && A == D)) {
+ if (A != C)
+ std::swap(C, D);
+ // Consider forming "A op' (B op D)".
+ // If "B op D" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+ // If "B op D" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, A, V);
+ V->takeName(&I);
+ return V;
+ }
+ }
+
+ // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+ if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (InnerCommutative && B == C)) {
+ if (B != D)
+ std::swap(C, D);
+ // Consider forming "(A op C) op' B".
+ // If "A op C" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+ // If "A op C" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, V, B);
+ V->takeName(&I);
+ return V;
+ }
+ }
+ }
+
+ // Expansion.
+ if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+ // The instruction has the form "(A op' B) op C". See if expanding it out
+ // to "(A op C) op' (B op C)" results in simplifications.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) ||
+ (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+ return Op0;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ C = Builder->CreateBinOp(InnerOpcode, L, R);
+ C->takeName(&I);
+ return C;
+ }
+ }
+
+ if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+ // The instruction has the form "A op (B op' C)". See if expanding it out
+ // to "(A op B) op' (A op C)" results in simplifications.
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) ||
+ (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+ return Op1;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ A = Builder->CreateBinOp(InnerOpcode, L, R);
+ A->takeName(&I);
+ return A;
+ }
+ }
+
+ return 0;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+ if (BinaryOperator::isNeg(V))
+ return BinaryOperator::getNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantExpr::getNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isIntegerTy())
+ return ConstantExpr::getNeg(C);
+
+ return 0;
+}
+
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+Value *InstCombiner::dyn_castFNegVal(Value *V) const {
+ if (BinaryOperator::isFNeg(V))
+ return BinaryOperator::getFNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+ return ConstantExpr::getFNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isFloatingPointTy())
+ return ConstantExpr::getFNeg(C);
+
+ return 0;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+ InstCombiner *IC) {
+ if (CastInst *CI = dyn_cast<CastInst>(&I)) {
+ return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+ }
+
+ // Figure out if the constant is the left or the right argument.
+ bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+ Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+ if (Constant *SOC = dyn_cast<Constant>(SO)) {
+ if (ConstIsRHS)
+ return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+ return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+ }
+
+ Value *Op0 = SO, *Op1 = ConstOperand;
+ if (!ConstIsRHS)
+ std::swap(Op0, Op1);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+ return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+ SO->getName()+".op");
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ llvm_unreachable("Unknown binary instruction type!");
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments. This also works for Cast instructions, which obviously do
+// not have a second operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+ // Don't modify shared select instructions
+ if (!SI->hasOneUse()) return 0;
+ Value *TV = SI->getOperand(1);
+ Value *FV = SI->getOperand(2);
+
+ if (isa<Constant>(TV) || isa<Constant>(FV)) {
+ // Bool selects with constant operands can be folded to logical ops.
+ if (SI->getType()->isIntegerTy(1)) return 0;
+
+ // If it's a bitcast involving vectors, make sure it has the same number of
+ // elements on both sides.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+ const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+ const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+ // Verify that either both or neither are vectors.
+ if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+ // If vectors, verify that they have the same number of elements.
+ if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+ return 0;
+ }
+
+ Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+ Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+ return SelectInst::Create(SI->getCondition(),
+ SelectTrueVal, SelectFalseVal);
+ }
+ return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+ PHINode *PN = cast<PHINode>(I.getOperand(0));
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0)
+ return 0;
+
+ // We normally only transform phis with a single use. However, if a PHI has
+ // multiple uses and they are all the same operation, we can fold *all* of the
+ // uses into the PHI.
+ if (!PN->hasOneUse()) {
+ // Walk the use list for the instruction, comparing them to I.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User != &I && !I.isIdenticalTo(User))
+ return 0;
+ }
+ // Otherwise, we can replace *all* users with the new PHI we form.
+ }
+
+ // Check to see if all of the operands of the PHI are simple constants
+ // (constantint/constantfp/undef). If there is one non-constant value,
+ // remember the BB it is in. If there is more than one or if *it* is a PHI,
+ // bail out. We don't do arbitrary constant expressions here because moving
+ // their computation can be expensive without a cost model.
+ BasicBlock *NonConstBB = 0;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+ continue;
+
+ if (isa<PHINode>(InVal)) return 0; // Itself a phi.
+ if (NonConstBB) return 0; // More than one non-const value.
+
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the InVal is an invoke at the end of the pred block, then we can't
+ // insert a computation after it without breaking the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == NonConstBB)
+ return 0;
+
+ // If the incoming non-constant value is in I's block, we will remove one
+ // instruction, but insert another equivalent one, leading to infinite
+ // instcombine.
+ if (NonConstBB == I.getParent())
+ return 0;
+ }
+
+ // If there is exactly one non-constant value, we can insert a copy of the
+ // operation in that block. However, if this is a critical edge, we would be
+ // inserting the computation one some other paths (e.g. inside a loop). Only
+ // do this if the pred block is unconditionally branching into the phi block.
+ if (NonConstBB != 0) {
+ BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+ if (!BI || !BI->isUnconditional()) return 0;
+ }
+
+ // Okay, we can do the transformation: create the new PHI node.
+ PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
+ InsertNewInstBefore(NewPN, *PN);
+ NewPN->takeName(PN);
+
+ // If we are going to have to insert a new computation, do so right before the
+ // predecessors terminator.
+ if (NonConstBB)
+ Builder->SetInsertPoint(NonConstBB->getTerminator());
+
+ // Next, add all of the operands to the PHI.
+ if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+ // We only currently try to fold the condition of a select when it is a phi,
+ // not the true/false values.
+ Value *TrueV = SI->getTrueValue();
+ Value *FalseV = SI->getFalseValue();
+ BasicBlock *PhiTransBB = PN->getParent();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ BasicBlock *ThisBB = PN->getIncomingBlock(i);
+ Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+ else
+ InV = Builder->CreateSelect(PN->getIncomingValue(i),
+ TrueVInPred, FalseVInPred, "phitmp");
+ NewPN->addIncoming(InV, ThisBB);
+ }
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else if (isa<ICmpInst>(CI))
+ InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ else
+ InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else if (I.getNumOperands() == 2) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ else
+ InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else {
+ CastInst *CI = cast<CastInst>(&I);
+ const Type *RetTy = CI->getType();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+ else
+ InV = Builder->CreateCast(CI->getOpcode(),
+ PN->getIncomingValue(i), I.getType(), "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ }
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (User == &I) continue;
+ ReplaceInstUsesWith(*User, NewPN);
+ EraseInstFromFunction(*User);
+ }
+ return ReplaceInstUsesWith(I, NewPN);
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset. If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ if (!TD) return 0;
+ if (!Ty->isSized()) return 0;
+
+ // Start with the index over the outer type. Note that the type size
+ // might be zero (even if the offset isn't zero) if the indexed type
+ // is something like [0 x {int, int}]
+ const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ int64_t FirstIdx = 0;
+ if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+ FirstIdx = Offset/TySize;
+ Offset -= FirstIdx*TySize;
+
+ // Handle hosts where % returns negative instead of values [0..TySize).
+ if (Offset < 0) {
+ --FirstIdx;
+ Offset += TySize;
+ assert(Offset >= 0);
+ }
+ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+ }
+
+ NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+ // Index into the types. If we fail, set OrigBase to null.
+ while (Offset) {
+ // Indexing into tail padding between struct/array elements.
+ if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+ return 0;
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ assert(Offset < (int64_t)SL->getSizeInBytes() &&
+ "Offset must stay within the indexed type");
+
+ unsigned Elt = SL->getElementContainingOffset(Offset);
+ NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ Elt));
+
+ Offset -= SL->getElementOffset(Elt);
+ Ty = STy->getElementType(Elt);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+ assert(EltSize && "Cannot index into a zero-sized array");
+ NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+ Offset %= EltSize;
+ Ty = AT->getElementType();
+ } else {
+ // Otherwise, we can't index into the middle of this atomic type, bail.
+ return 0;
+ }
+ }
+
+ return Ty;
+}
+
+
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+ if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+ return ReplaceInstUsesWith(GEP, V);
+
+ Value *PtrOp = GEP.getOperand(0);
+
+ // Eliminate unneeded casts for indices, and replace indices which displace
+ // by multiples of a zero size type with zero.
+ if (TD) {
+ bool MadeChange = false;
+ const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+ I != E; ++I, ++GTI) {
+ // Skip indices into struct types.
+ const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy) continue;
+
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
+ MadeChange = true;
+ }
+
+ if ((*I)->getType() != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
+ }
+ }
+ if (MadeChange) return &GEP;
+ }
+
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+
+ // If this GEP has only 0 indices, it is the same pointer as
+ // Src. If Src is not a trivial GEP too, don't combine
+ // the indices.
+ if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
+ !Src->hasOneUse())
+ return 0;
+
+ // Note that if our source is a gep chain itself that we wait for that
+ // chain to be resolved before we perform this transformation. This
+ // avoids us creating a TON of code in some cases.
+ //
+ if (GetElementPtrInst *SrcGEP =
+ dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+ if (SrcGEP->getNumOperands() == 2)
+ return 0; // Wait until our source is folded to completion.
+
+ SmallVector<Value*, 8> Indices;
+
+ // Find out whether the last index in the source GEP is a sequential idx.
+ bool EndsWithSequential = false;
+ for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+ I != E; ++I)
+ EndsWithSequential = !(*I)->isStructTy();
+
+ // Can we combine the two pointer arithmetics offsets?
+ if (EndsWithSequential) {
+ // Replace: gep (gep %P, long B), long A, ...
+ // With: T = long A+B; gep %P, T, ...
+ //
+ Value *Sum;
+ Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+ Value *GO1 = GEP.getOperand(1);
+ if (SO1 == Constant::getNullValue(SO1->getType())) {
+ Sum = GO1;
+ } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+ Sum = SO1;
+ } else {
+ // If they aren't the same type, then the input hasn't been processed
+ // by the loop above yet (which canonicalizes sequential index types to
+ // intptr_t). Just avoid transforming this until the input has been
+ // normalized.
+ if (SO1->getType() != GO1->getType())
+ return 0;
+ Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+ }
+
+ // Update the GEP in place if possible.
+ if (Src->getNumOperands() == 2) {
+ GEP.setOperand(0, Src->getOperand(0));
+ GEP.setOperand(1, Sum);
+ return &GEP;
+ }
+ Indices.append(Src->op_begin()+1, Src->op_end()-1);
+ Indices.push_back(Sum);
+ Indices.append(GEP.op_begin()+2, GEP.op_end());
+ } else if (isa<Constant>(*GEP.idx_begin()) &&
+ cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+ Src->getNumOperands() != 1) {
+ // Otherwise we can do the fold if the first index of the GEP is a zero
+ Indices.append(Src->op_begin()+1, Src->op_end());
+ Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+ }
+
+ if (!Indices.empty())
+ return (GEP.isInBounds() && Src->isInBounds()) ?
+ GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+ Indices.end(), GEP.getName()) :
+ GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+ Indices.end(), GEP.getName());
+ }
+
+ // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+ Value *StrippedPtr = PtrOp->stripPointerCasts();
+ const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ if (StrippedPtr != PtrOp &&
+ StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
+ bool HasZeroPointerIndex = false;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+ HasZeroPointerIndex = C->isZero();
+
+ // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+ // into : GEP [10 x i8]* X, i32 0, ...
+ //
+ // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+ // into : GEP i8* X, ...
+ //
+ // This occurs when the program declares an array extern like "int X[];"
+ if (HasZeroPointerIndex) {
+ const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+ if (const ArrayType *CATy =
+ dyn_cast<ArrayType>(CPTy->getElementType())) {
+ // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+ // -> GEP i8* X, ...
+ SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+ GetElementPtrInst *Res =
+ GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
+ Idx.end(), GEP.getName());
+ Res->setIsInBounds(GEP.isInBounds());
+ return Res;
+ }
+
+ if (const ArrayType *XATy =
+ dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XATy->getElementType()) {
+ // -> GEP [10 x i8]* X, i32 0, ...
+ // At this point, we know that the cast source type is a pointer
+ // to an array of the same type as the destination pointer
+ // array. Because the array type is never stepped over (there
+ // is a leading zero) we can fold the cast into this GEP.
+ GEP.setOperand(0, StrippedPtr);
+ return &GEP;
+ }
+ }
+ }
+ } else if (GEP.getNumOperands() == 2) {
+ // Transform things like:
+ // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+ // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+ const Type *SrcElTy = StrippedPtrTy->getElementType();
+ const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ if (TD && SrcElTy->isArrayTy() &&
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(ResElTy)) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Idx[1] = GEP.getOperand(1);
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ // V and GEP are both pointer types --> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+
+ // Transform things like:
+ // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+ // (where tmp = 8*tmp2) into:
+ // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+
+ if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
+ uint64_t ArrayEltSize =
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+
+ // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We
+ // allow either a mul, shift, or constant here.
+ Value *NewIdx = 0;
+ ConstantInt *Scale = 0;
+ if (ArrayEltSize == 1) {
+ NewIdx = GEP.getOperand(1);
+ Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
+ NewIdx = ConstantInt::get(CI->getType(), 1);
+ Scale = CI;
+ } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
+ if (Inst->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
+ uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
+ Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+ 1ULL << ShAmtVal);
+ NewIdx = Inst->getOperand(0);
+ } else if (Inst->getOpcode() == Instruction::Mul &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ Scale = cast<ConstantInt>(Inst->getOperand(1));
+ NewIdx = Inst->getOperand(0);
+ }
+ }
+
+ // If the index will be to exactly the right offset with the scale taken
+ // out, perform the transformation. Note, we don't know whether Scale is
+ // signed or not. We'll use unsigned version of division/modulo
+ // operation after making sure Scale doesn't have the sign bit set.
+ if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
+ Scale->getZExtValue() % ArrayEltSize == 0) {
+ Scale = ConstantInt::get(Scale->getType(),
+ Scale->getZExtValue() / ArrayEltSize);
+ if (Scale->getZExtValue() != 1) {
+ Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+ false /*ZExt*/);
+ NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
+ }
+
+ // Insert the new GEP instruction.
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Idx[1] = NewIdx;
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
+ Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+ }
+
+ /// See if we can simplify:
+ /// X = bitcast A* to B*
+ /// Y = gep X, <...constant indices...>
+ /// into a gep of the original struct. This is important for SROA and alias
+ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+ if (TD &&
+ !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+ StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
+ // Determine how much the GEP moves the pointer. We are guaranteed to get
+ // a constant back from EmitGEPOffset.
+ ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
+ int64_t Offset = OffsetV->getSExtValue();
+
+ // If this GEP instruction doesn't move the pointer, just replace the GEP
+ // with a bitcast of the real input to the dest type.
+ if (Offset == 0) {
+ // If the bitcast is of an allocation, and the allocation will be
+ // converted to match the type of the cast, don't touch this.
+ if (isa<AllocaInst>(BCI->getOperand(0)) ||
+ isMalloc(BCI->getOperand(0))) {
+ // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+ if (Instruction *I = visitBitCast(*BCI)) {
+ if (I != BCI) {
+ I->takeName(BCI);
+ BCI->getParent()->getInstList().insert(BCI, I);
+ ReplaceInstUsesWith(*BCI, I);
+ }
+ return &GEP;
+ }
+ }
+ return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ }
+
+ // Otherwise, if the offset is non-zero, we need to find out if there is a
+ // field at Offset in 'A's type. If so, we can pull the cast through the
+ // GEP.
+ SmallVector<Value*, 8> NewIndices;
+ const Type *InTy =
+ cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+ if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+ Value *NGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+ NewIndices.end()) :
+ Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+ NewIndices.end());
+
+ if (NGEP->getType() == GEP.getType())
+ return ReplaceInstUsesWith(GEP, NGEP);
+ NGEP->takeName(&GEP);
+ return new BitCastInst(NGEP, GEP.getType());
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+static bool IsOnlyNullComparedAndFreed(const Value &V) {
+ for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (isFreeCall(U))
+ continue;
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
+ if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+Instruction *InstCombiner::visitMalloc(Instruction &MI) {
+ // If we have a malloc call which is only used in any amount of comparisons
+ // to null and free calls, delete the calls and replace the comparisons with
+ // true or false as appropriate.
+ if (IsOnlyNullComparedAndFreed(MI)) {
+ for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
+ UI != UE;) {
+ // We can assume that every remaining use is a free call or an icmp eq/ne
+ // to null, so the cast is safe.
+ Instruction *I = cast<Instruction>(*UI);
+
+ // Early increment here, as we're about to get rid of the user.
+ ++UI;
+
+ if (isFreeCall(I)) {
+ EraseInstFromFunction(*cast<CallInst>(I));
+ continue;
+ }
+ // Again, the cast is safe.
+ ICmpInst *C = cast<ICmpInst>(I);
+ ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ EraseInstFromFunction(*C);
+ }
+ return EraseInstFromFunction(MI);
+ }
+ return 0;
+}
+
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
+
+ // free undef -> unreachable.
+ if (isa<UndefValue>(Op)) {
+ // Insert a new store to null because we cannot modify the CFG here.
+ Builder->CreateStore(ConstantInt::getTrue(FI.getContext()),
+ UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+ return EraseInstFromFunction(FI);
+ }
+
+ // If we have 'free null' delete the instruction. This can happen in stl code
+ // when lots of inlining happens.
+ if (isa<ConstantPointerNull>(Op))
+ return EraseInstFromFunction(FI);
+
+ return 0;
+}
+
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+ // Change br (not X), label True, label False to: br X, label False, True
+ Value *X = 0;
+ BasicBlock *TrueDest;
+ BasicBlock *FalseDest;
+ if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+ !isa<Constant>(X)) {
+ // Swap Destinations and condition...
+ BI.setCondition(X);
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ return &BI;
+ }
+
+ // Cannonicalize fcmp_one -> fcmp_oeq
+ FCmpInst::Predicate FPred; Value *Y;
+ if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+ FPred == FCmpInst::FCMP_OGE) {
+ FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+ Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+
+ // Swap Destinations and condition.
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ // Cannonicalize icmp_ne -> icmp_eq
+ ICmpInst::Predicate IPred;
+ if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
+ IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+ IPred == ICmpInst::ICMP_SGE) {
+ ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+ Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+ // Swap Destinations and condition.
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+ Value *Cond = SI.getCondition();
+ if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+ if (I->getOpcode() == Instruction::Add)
+ if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
+ SI.setOperand(i,
+ ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+ AddRHS));
+ SI.setOperand(0, I->getOperand(0));
+ Worklist.Add(I);
+ return &SI;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ Value *Agg = EV.getAggregateOperand();
+
+ if (!EV.hasIndices())
+ return ReplaceInstUsesWith(EV, Agg);
+
+ if (Constant *C = dyn_cast<Constant>(Agg)) {
+ if (isa<UndefValue>(C))
+ return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+
+ if (isa<ConstantAggregateZero>(C))
+ return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ // Extract the element indexed by the first index out of the constant
+ Value *V = C->getOperand(*EV.idx_begin());
+ if (EV.getNumIndices() > 1)
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(V, EV.getIndices().slice(1));
+ else
+ return ReplaceInstUsesWith(EV, V);
+ }
+ return 0; // Can't handle other constants
+ }
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.getIndices());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+ EV.getIndices());
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ ArrayRef<unsigned>(insi, inse));
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ ArrayRef<unsigned>(exti, exte));
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value.
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+ return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+ ConstantExpr::getNot(CI));
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+ // If the (non-volatile) load only has one use, we can rewrite this to a
+ // load from a GEP. This reduces the size of the load.
+ // FIXME: If a load is used only by extractvalue instructions then this
+ // could be done regardless of having multiple uses.
+ if (!L->isVolatile() && L->hasOneUse()) {
+ // extractvalue has integer indices, getelementptr has Value*s. Convert.
+ SmallVector<Value*, 4> Indices;
+ // Prefix an i32 0 since we need the first element.
+ Indices.push_back(Builder->getInt32(0));
+ for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+ I != E; ++I)
+ Indices.push_back(Builder->getInt32(*I));
+
+ // We need to insert these at the location of the old load, not at that of
+ // the extractvalue.
+ Builder->SetInsertPoint(L->getParent(), L);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
+ Indices.begin(), Indices.end());
+ // Returning the load directly will cause the main loop to insert it in
+ // the wrong spot, so use ReplaceInstUsesWith().
+ return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ }
+ // We could simplify extracts from other values. Note that nested extracts may
+ // already be simplified implicitly by the above: extract (extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate. Similarly for extracts from single-use
+ // loads: extract (extract (load)) will be translated to extract (load (gep))
+ // and if again single-use then via load (gep (gep)) to load (gep).
+ // However, double extracts from e.g. function arguments or return values
+ // aren't handled yet.
+ return 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ assert(I->hasOneUse() && "Invariants didn't hold!");
+
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+ return false;
+
+ // Do not sink alloca instructions out of the entry block.
+ if (isa<AllocaInst>(I) && I->getParent() ==
+ &DestBlock->getParent()->getEntryBlock())
+ return false;
+
+ // We can only sink load instructions if there is nothing between the load and
+ // the end of block that could change the value.
+ if (I->mayReadFromMemory()) {
+ for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+ Scan != E; ++Scan)
+ if (Scan->mayWriteToMemory())
+ return false;
+ }
+
+ BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+
+ I->moveBefore(InsertPos);
+ ++NumSunkInst;
+ return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &Visited,
+ InstCombiner &IC,
+ const TargetData *TD) {
+ bool MadeIRChange = false;
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
+
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+ do {
+ BB = Worklist.pop_back_val();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB)) continue;
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst)) {
+ ++NumDeadInst;
+ DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ if (TD) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+ i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == 0) continue;
+
+ Constant*& FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, TD);
+ if (!FoldRes)
+ FoldRes = CE;
+
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
+ }
+ }
+ }
+
+ InstrsForInstCombineWorklist.push_back(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+ if (SI->getCaseValue(i) == Cond) {
+ BasicBlock *ReachableBB = SI->getSuccessor(i);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getSuccessor(0));
+ continue;
+ }
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ Worklist.push_back(TI->getSuccessor(i));
+ } while (!Worklist.empty());
+
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+ InstrsForInstCombineWorklist.size());
+
+ return MadeIRChange;
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+ MadeIRChange = false;
+
+ DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getNameStr() << "\n");
+
+ {
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock*, 64> Visited;
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (!Visited.count(BB)) {
+ Instruction *Term = BB->getTerminator();
+ while (Term != BB->begin()) { // Remove instrs bottom-up
+ BasicBlock::iterator I = Term; --I;
+
+ DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ // A debug intrinsic shouldn't force another iteration if we weren't
+ // going to do one without it.
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I->getType()->isVoidTy())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+ }
+ }
+
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.RemoveOne();
+ if (I == 0) continue; // skip null values.
+
+ // Check to see if we can DCE the instruction.
+ if (isInstructionTriviallyDead(I)) {
+ DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ EraseInstFromFunction(*I);
+ ++NumDeadInst;
+ MadeIRChange = true;
+ continue;
+ }
+
+ // Instruction isn't dead, see if we can constant propagate it.
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(I, TD)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ ReplaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ EraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+
+ // See if we can trivially sink this instruction to a successor basic block.
+ if (I->hasOneUse()) {
+ BasicBlock *BB = I->getParent();
+ Instruction *UserInst = cast<Instruction>(I->use_back());
+ BasicBlock *UserParent;
+
+ // Get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+ else
+ UserParent = UserInst->getParent();
+
+ if (UserParent != BB) {
+ bool UserIsSuccessor = false;
+ // See if the user is one of our successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ if (*SI == UserParent) {
+ UserIsSuccessor = true;
+ break;
+ }
+
+ // If the user is one of our immediate successors, and if that successor
+ // only has us as a predecessors (we'd have to split the critical edge
+ // otherwise), we can keep going.
+ if (UserIsSuccessor && UserParent->getSinglePredecessor())
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ MadeIRChange |= TryToSinkInstruction(I, UserParent);
+ }
+ }
+
+ // Now that we have an instruction, try combining it to simplify it.
+ Builder->SetInsertPoint(I->getParent(), I);
+ Builder->SetCurrentDebugLocation(I->getDebugLoc());
+
+#ifndef NDEBUG
+ std::string OrigI;
+#endif
+ DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+ DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
+ if (Instruction *Result = visit(*I)) {
+ ++NumCombined;
+ // Should we replace the old instruction with a new one?
+ if (Result != I) {
+ DEBUG(errs() << "IC: Old = " << *I << '\n'
+ << " New = " << *Result << '\n');
+
+ if (!I->getDebugLoc().isUnknown())
+ Result->setDebugLoc(I->getDebugLoc());
+ // Everything uses the new instruction now.
+ I->replaceAllUsesWith(Result);
+
+ // Push the new instruction and any users onto the worklist.
+ Worklist.Add(Result);
+ Worklist.AddUsersToWorkList(*Result);
+
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
+ // Insert the new instruction into the basic block...
+ BasicBlock *InstParent = I->getParent();
+ BasicBlock::iterator InsertPos = I;
+
+ if (!isa<PHINode>(Result)) // If combining a PHI, don't insert
+ while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
+ ++InsertPos;
+
+ InstParent->getInstList().insert(InsertPos, Result);
+
+ EraseInstFromFunction(*I);
+ } else {
+#ifndef NDEBUG
+ DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ << " New = " << *I << '\n');
+#endif
+
+ // If the instruction was modified, it's possible that it is now dead.
+ // if so, remove it.
+ if (isInstructionTriviallyDead(I)) {
+ EraseInstFromFunction(*I);
+ } else {
+ Worklist.Add(I);
+ Worklist.AddUsersToWorkList(*I);
+ }
+ }
+ MadeIRChange = true;
+ }
+ }
+
+ Worklist.Zap();
+ return MadeIRChange;
+}
+
+
+bool InstCombiner::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ IRBuilder<true, TargetFolder, InstCombineIRInserter>
+ TheBuilder(F.getContext(), TargetFolder(TD),
+ InstCombineIRInserter(Worklist));
+ Builder = &TheBuilder;
+
+ bool EverMadeChange = false;
+
+ // Lower dbg.declare intrinsics otherwise their value may be clobbered
+ // by instcombiner.
+ EverMadeChange = LowerDbgDeclare(F);
+
+ // Iterate while there is work to do.
+ unsigned Iteration = 0;
+ while (DoOneIteration(F, Iteration++))
+ EverMadeChange = true;
+
+ Builder = 0;
+ return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+ return new InstCombiner();
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
new file mode 100644
index 000000000000..1d31fcc4df3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -0,0 +1,117 @@
+//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+// Note that this implementation is very naive. We insert a counter for *every*
+// edge in the program, instead of using control flow information to prune the
+// number of counters inserted.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-edge-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+ class EdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ EdgeProfiler() : ModulePass(ID) {
+ initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Edge Profiler";
+ }
+ };
+}
+
+char EdgeProfiler::ID = 0;
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+ "Insert instrumentation for edge profiling", false, false)
+
+ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
+
+bool EdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ std::set<BasicBlock*> BlocksToInstrument;
+ unsigned NumEdges = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Reserve space for (0,entry) edge.
+ ++NumEdges;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ BlocksToInstrument.insert(BB);
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+ }
+
+ const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "EdgeProfCounters");
+ NumEdgesInserted = NumEdges;
+
+ // Instrument all of the edges...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Create counter for (0,entry) edge.
+ IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
+ // Okay, we have to add a counter of each outgoing edge. If the
+ // outgoing edge is not critical don't split it, just insert the counter
+ // in the source or destination of the edge.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ // If the edge is critical, split it.
+ SplitCriticalEdge(TI, s, this);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If we
+ // only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(BB, i++, Counters, false);
+ } else {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
+ }
+ }
+ }
+ }
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
new file mode 100644
index 000000000000..3f2c4123882d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,672 @@
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+#include <utility>
+using namespace llvm;
+
+namespace {
+ class GCOVProfiler : public ModulePass {
+ public:
+ static char ID;
+ GCOVProfiler()
+ : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+ initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ }
+ GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+ : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
+ Use402Format(use402Format) {
+ assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "GCOV Profiler";
+ }
+
+ private:
+ bool runOnModule(Module &M);
+
+ // Create the GCNO files for the Module based on DebugInfo.
+ void emitGCNO(DebugInfoFinder &DIF);
+
+ // Modify the program to track transitions along edges and call into the
+ // profiling runtime to emit .gcda files when run.
+ bool emitProfileArcs(DebugInfoFinder &DIF);
+
+ // Get pointers to the functions in the runtime library.
+ Constant *getStartFileFunc();
+ Constant *getIncrementIndirectCounterFunc();
+ Constant *getEmitFunctionFunc();
+ Constant *getEmitArcsFunc();
+ Constant *getEndFileFunc();
+
+ // Create or retrieve an i32 state value that is used to represent the
+ // pred block number for certain non-trivial edges.
+ GlobalVariable *getEdgeStateValue();
+
+ // Produce a table of pointers to counters, by predecessor and successor
+ // block number.
+ GlobalVariable *buildEdgeLookupTable(Function *F,
+ GlobalVariable *Counter,
+ const UniqueVector<BasicBlock *> &Preds,
+ const UniqueVector<BasicBlock *> &Succs);
+
+ // Add the function to write out all our counters to the global destructor
+ // list.
+ void insertCounterWriteout(DebugInfoFinder &,
+ SmallVector<std::pair<GlobalVariable *,
+ MDNode *>, 8> &);
+
+ std::string mangleName(DICompileUnit CU, std::string NewStem);
+
+ bool EmitNotes;
+ bool EmitData;
+ bool Use402Format;
+
+ Module *M;
+ LLVMContext *Ctx;
+ };
+}
+
+char GCOVProfiler::ID = 0;
+INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
+ "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
+ bool Use402Format) {
+ return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+}
+
+static DISubprogram findSubprogram(DIScope Scope) {
+ while (!Scope.isSubprogram()) {
+ assert(Scope.isLexicalBlock() &&
+ "Debug location not lexical block or subprogram");
+ Scope = DILexicalBlock(Scope).getContext();
+ }
+ return DISubprogram(Scope);
+}
+
+namespace {
+ class GCOVRecord {
+ protected:
+ static const char *LinesTag;
+ static const char *FunctionTag;
+ static const char *BlockTag;
+ static const char *EdgeTag;
+
+ GCOVRecord() {}
+
+ void writeBytes(const char *Bytes, int Size) {
+ os->write(Bytes, Size);
+ }
+
+ void write(uint32_t i) {
+ writeBytes(reinterpret_cast<char*>(&i), 4);
+ }
+
+ // Returns the length measured in 4-byte blocks that will be used to
+ // represent this string in a GCOV file
+ unsigned lengthOfGCOVString(StringRef s) {
+ // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
+ // padding out to the next 4-byte word. The length is measured in 4-byte
+ // words including padding, not bytes of actual string.
+ return (s.size() / 4) + 1;
+ }
+
+ void writeGCOVString(StringRef s) {
+ uint32_t Len = lengthOfGCOVString(s);
+ write(Len);
+ writeBytes(s.data(), s.size());
+
+ // Write 1 to 4 bytes of NUL padding.
+ assert((unsigned)(4 - (s.size() % 4)) > 0);
+ assert((unsigned)(4 - (s.size() % 4)) <= 4);
+ writeBytes("\0\0\0\0", 4 - (s.size() % 4));
+ }
+
+ raw_ostream *os;
+ };
+ const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
+ const char *GCOVRecord::FunctionTag = "\0\0\0\1";
+ const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
+ const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
+
+ class GCOVFunction;
+ class GCOVBlock;
+
+ // Constructed only by requesting it from a GCOVBlock, this object stores a
+ // list of line numbers and a single filename, representing lines that belong
+ // to the block.
+ class GCOVLines : public GCOVRecord {
+ public:
+ void addLine(uint32_t Line) {
+ Lines.push_back(Line);
+ }
+
+ uint32_t length() {
+ return lengthOfGCOVString(Filename) + 2 + Lines.size();
+ }
+
+ private:
+ friend class GCOVBlock;
+
+ GCOVLines(std::string Filename, raw_ostream *os)
+ : Filename(Filename) {
+ this->os = os;
+ }
+
+ std::string Filename;
+ SmallVector<uint32_t, 32> Lines;
+ };
+
+ // Represent a basic block in GCOV. Each block has a unique number in the
+ // function, number of lines belonging to each block, and a set of edges to
+ // other blocks.
+ class GCOVBlock : public GCOVRecord {
+ public:
+ GCOVLines &getFile(std::string Filename) {
+ GCOVLines *&Lines = LinesByFile[Filename];
+ if (!Lines) {
+ Lines = new GCOVLines(Filename, os);
+ }
+ return *Lines;
+ }
+
+ void addEdge(GCOVBlock &Successor) {
+ OutEdges.push_back(&Successor);
+ }
+
+ void writeOut() {
+ uint32_t Len = 3;
+ for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+ E = LinesByFile.end(); I != E; ++I) {
+ Len += I->second->length();
+ }
+
+ writeBytes(LinesTag, 4);
+ write(Len);
+ write(Number);
+ for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+ E = LinesByFile.end(); I != E; ++I) {
+ write(0);
+ writeGCOVString(I->second->Filename);
+ for (int i = 0, e = I->second->Lines.size(); i != e; ++i) {
+ write(I->second->Lines[i]);
+ }
+ }
+ write(0);
+ write(0);
+ }
+
+ ~GCOVBlock() {
+ DeleteContainerSeconds(LinesByFile);
+ }
+
+ private:
+ friend class GCOVFunction;
+
+ GCOVBlock(uint32_t Number, raw_ostream *os)
+ : Number(Number) {
+ this->os = os;
+ }
+
+ uint32_t Number;
+ StringMap<GCOVLines *> LinesByFile;
+ SmallVector<GCOVBlock *, 4> OutEdges;
+ };
+
+ // A function has a unique identifier, a checksum (we leave as zero) and a
+ // set of blocks and a map of edges between blocks. This is the only GCOV
+ // object users can construct, the blocks and lines will be rooted here.
+ class GCOVFunction : public GCOVRecord {
+ public:
+ GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+ this->os = os;
+
+ Function *F = SP.getFunction();
+ uint32_t i = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ Blocks[BB] = new GCOVBlock(i++, os);
+ }
+ ReturnBlock = new GCOVBlock(i++, os);
+
+ writeBytes(FunctionTag, 4);
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+ 1 + lengthOfGCOVString(SP.getFilename()) + 1;
+ if (!Use402Format)
+ ++BlockLen; // For second checksum.
+ write(BlockLen);
+ uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
+ write(Ident);
+ write(0); // checksum #1
+ if (!Use402Format)
+ write(0); // checksum #2
+ writeGCOVString(SP.getName());
+ writeGCOVString(SP.getFilename());
+ write(SP.getLineNumber());
+ }
+
+ ~GCOVFunction() {
+ DeleteContainerSeconds(Blocks);
+ delete ReturnBlock;
+ }
+
+ GCOVBlock &getBlock(BasicBlock *BB) {
+ return *Blocks[BB];
+ }
+
+ GCOVBlock &getReturnBlock() {
+ return *ReturnBlock;
+ }
+
+ void writeOut() {
+ // Emit count of blocks.
+ writeBytes(BlockTag, 4);
+ write(Blocks.size() + 1);
+ for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+ write(0); // No flags on our blocks.
+ }
+
+ // Emit edges between blocks.
+ for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ GCOVBlock &Block = *I->second;
+ if (Block.OutEdges.empty()) continue;
+
+ writeBytes(EdgeTag, 4);
+ write(Block.OutEdges.size() * 2 + 1);
+ write(Block.Number);
+ for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ write(Block.OutEdges[i]->Number);
+ write(0); // no flags
+ }
+ }
+
+ // Emit lines for each block.
+ for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ I->second->writeOut();
+ }
+ }
+
+ private:
+ DenseMap<BasicBlock *, GCOVBlock *> Blocks;
+ GCOVBlock *ReturnBlock;
+ };
+}
+
+std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) {
+ if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
+ for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
+ MDNode *N = GCov->getOperand(i);
+ if (N->getNumOperands() != 2) continue;
+ MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+ MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1));
+ if (!GCovFile || !CompileUnit) continue;
+ if (CompileUnit == CU) {
+ SmallString<128> Filename = GCovFile->getString();
+ sys::path::replace_extension(Filename, NewStem);
+ return Filename.str();
+ }
+ }
+ }
+
+ SmallString<128> Filename = CU.getFilename();
+ sys::path::replace_extension(Filename, NewStem);
+ return sys::path::filename(Filename.str());
+}
+
+bool GCOVProfiler::runOnModule(Module &M) {
+ this->M = &M;
+ Ctx = &M.getContext();
+
+ DebugInfoFinder DIF;
+ DIF.processModule(M);
+
+ if (EmitNotes) emitGCNO(DIF);
+ if (EmitData) return emitProfileArcs(DIF);
+ return false;
+}
+
+void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
+ DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
+ for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
+ E = DIF.compile_unit_end(); I != E; ++I) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(*I);
+ raw_fd_ostream *&out = GcnoFiles[CU];
+ std::string ErrorInfo;
+ out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!Use402Format)
+ out->write("oncg*404MVLL", 12);
+ else
+ out->write("oncg*402MVLL", 12);
+ }
+
+ for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+ SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+ DISubprogram SP(*SPI);
+ raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()];
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, os, Use402Format);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ }
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
+ }
+ }
+ Func.writeOut();
+ }
+
+ for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
+ I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
+ raw_fd_ostream *&out = I->second;
+ out->write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out->close();
+ delete out;
+ }
+}
+
+bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) {
+ if (DIF.subprogram_begin() == DIF.subprogram_end())
+ return false;
+
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+ for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+ SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+ DISubprogram SP(*SPI);
+ Function *F = SP.getFunction();
+ if (!F) continue;
+
+ unsigned Edges = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ ++Edges;
+ else
+ Edges += TI->getNumSuccessors();
+ }
+
+ const ArrayType *CounterTy =
+ ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+ GlobalVariable *Counters =
+ new GlobalVariable(*M, CounterTy, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(CounterTy),
+ "__llvm_gcov_ctr", 0, false, 0);
+ CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+
+ UniqueVector<BasicBlock *> ComplexEdgePreds;
+ UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors) {
+ IRBuilder<> Builder(TI);
+
+ if (Successors == 1) {
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ Value *Sel = Builder.CreateSelect(
+ BI->getCondition(),
+ ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
+ ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+ SmallVector<Value *, 2> Idx;
+ Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+ Idx.push_back(Sel);
+ Value *Counter = Builder.CreateInBoundsGEP(Counters,
+ Idx.begin(), Idx.end());
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else {
+ ComplexEdgePreds.insert(BB);
+ for (int i = 0; i != Successors; ++i)
+ ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ }
+ Edge += Successors;
+ }
+ }
+
+ if (!ComplexEdgePreds.empty()) {
+ GlobalVariable *EdgeTable =
+ buildEdgeLookupTable(F, Counters,
+ ComplexEdgePreds, ComplexEdgeSuccs);
+ GlobalVariable *EdgeState = getEdgeStateValue();
+
+ const Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+ IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+ }
+ for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+ // call runtime to perform increment
+ IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI());
+ Value *CounterPtrArray =
+ Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
+ i * ComplexEdgePreds.size());
+ Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+ EdgeState, CounterPtrArray);
+ // clear the predecessor number
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+ }
+ }
+ }
+
+ insertCounterWriteout(DIF, CountersBySP);
+
+ return true;
+}
+
+// All edges with successors that aren't branches are "complex", because it
+// requires complex logic to pick which counter to update.
+GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
+ Function *F,
+ GlobalVariable *Counters,
+ const UniqueVector<BasicBlock *> &Preds,
+ const UniqueVector<BasicBlock *> &Succs) {
+ // TODO: support invoke, threads. We rely on the fact that nothing can modify
+ // the whole-Module pred edge# between the time we set it and the time we next
+ // read it. Threads and invoke make this untrue.
+
+ // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+ const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+ const ArrayType *EdgeTableTy = ArrayType::get(
+ Int64PtrTy, Succs.size() * Preds.size());
+
+ Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
+ Constant *NullValue = Constant::getNullValue(Int64PtrTy);
+ for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i)
+ EdgeTable[i] = NullValue;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
+ for (int i = 0; i != Successors; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ IRBuilder<> builder(Succ);
+ Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge + i);
+ EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
+ (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+ }
+ }
+ Edge += Successors;
+ }
+
+ ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
+ GlobalVariable *EdgeTableGV =
+ new GlobalVariable(
+ *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
+ ConstantArray::get(EdgeTableTy, V),
+ "__llvm_gcda_edge_table");
+ EdgeTableGV->setUnnamedAddr(true);
+ return EdgeTableGV;
+}
+
+Constant *GCOVProfiler::getStartFileFunc() {
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Type::getInt8PtrTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
+}
+
+Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
+ Type *Args[] = {
+ Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor
+ Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row
+ };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy);
+}
+
+Constant *GCOVProfiler::getEmitFunctionFunc() {
+ Type *Args[2] = {
+ Type::getInt32Ty(*Ctx), // uint32_t ident
+ Type::getInt8PtrTy(*Ctx), // const char *function_name
+ };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+Constant *GCOVProfiler::getEmitArcsFunc() {
+ Type *Args[] = {
+ Type::getInt32Ty(*Ctx), // uint32_t num_counters
+ Type::getInt64PtrTy(*Ctx), // uint64_t *counters
+ };
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ Args, false);
+ return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
+}
+
+Constant *GCOVProfiler::getEndFileFunc() {
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+GlobalVariable *GCOVProfiler::getEdgeStateValue() {
+ GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
+ if (!GV) {
+ GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::get(Type::getInt32Ty(*Ctx),
+ 0xffffffff),
+ "__llvm_gcov_global_state_pred");
+ GV->setUnnamedAddr(true);
+ }
+ return GV;
+}
+
+void GCOVProfiler::insertCounterWriteout(
+ DebugInfoFinder &DIF,
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
+ const FunctionType *WriteoutFTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *WriteoutF = Function::Create(WriteoutFTy,
+ GlobalValue::InternalLinkage,
+ "__llvm_gcov_writeout", M);
+ WriteoutF->setUnnamedAddr(true);
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF);
+ IRBuilder<> Builder(BB);
+
+ Constant *StartFile = getStartFileFunc();
+ Constant *EmitFunction = getEmitFunctionFunc();
+ Constant *EmitArcs = getEmitArcsFunc();
+ Constant *EndFile = getEndFileFunc();
+
+ for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
+ CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
+ DICompileUnit compile_unit(*CUI);
+ std::string FilenameGcda = mangleName(compile_unit, "gcda");
+ Builder.CreateCall(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda));
+ for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+ I = CountersBySP.begin(), E = CountersBySP.end();
+ I != E; ++I) {
+ DISubprogram SP(I->second);
+ intptr_t ident = reinterpret_cast<intptr_t>(I->second);
+ Builder.CreateCall2(EmitFunction,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
+ Builder.CreateGlobalStringPtr(SP.getName()));
+
+ GlobalVariable *GV = I->first;
+ unsigned Arcs =
+ cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
+ Builder.CreateCall2(EmitArcs,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+ Builder.CreateConstGEP2_64(GV, 0, 0));
+ }
+ Builder.CreateCall(EndFile);
+ }
+ Builder.CreateRetVoid();
+
+ InsertProfilingShutdownCall(WriteoutF, M);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 000000000000..71adc1ec6de0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,33 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+ initializeEdgeProfilerPass(Registry);
+ initializeOptimalEdgeProfilerPass(Registry);
+ initializePathProfilerPass(Registry);
+ initializeGCOVProfilerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+ initializeInstrumentation(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 000000000000..f76c77e1bdbf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,108 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module provides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+
+ /// MaximumSpanningTree - A MST implementation.
+ /// The type parameter T determines the type of the nodes of the graph.
+ template <typename T>
+ class MaximumSpanningTree {
+
+ // A comparing class for comparing weighted edges.
+ template <typename CT>
+ struct EdgeWeightCompare {
+ bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,
+ typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+ if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+ if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+ if (BBX->size() > BBY->size()) return true;
+ if (BBX->size() < BBY->size()) return false;
+ }
+ }
+ if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+ if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+ if (BBX->size() > BBY->size()) return true;
+ if (BBX->size() < BBY->size()) return false;
+ }
+ }
+ return false;
+ }
+ };
+
+ public:
+ typedef std::pair<const T*, const T*> Edge;
+ typedef std::pair<Edge, double> EdgeWeight;
+ typedef std::vector<EdgeWeight> EdgeWeights;
+ protected:
+ typedef std::vector<Edge> MaxSpanTree;
+
+ MaxSpanTree MST;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+ /// spanning tree.
+ MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+
+ // Create spanning tree, Forest contains a special data structure
+ // that makes checking if two nodes are already in a common (sub-)tree
+ // fast and cheap.
+ EquivalenceClasses<const T*> Forest;
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ Forest.insert(e.first);
+ Forest.insert(e.second);
+ }
+
+ // Iterate over the sorted edges, biggest first.
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+ Forest.unionSets(e.first, e.second);
+ // So we know now that the edge is not already in a subtree, so we push
+ // the edge to the MST.
+ MST.push_back(e);
+ }
+ }
+ }
+
+ typename MaxSpanTree::iterator begin() {
+ return MST.begin();
+ }
+
+ typename MaxSpanTree::iterator end() {
+ return MST.end();
+ }
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 000000000000..e09f882aa323
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,225 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "MaximumSpanningTree.h"
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+ class OptimalEdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ OptimalEdgeProfiler() : ModulePass(ID) {
+ initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(ProfileEstimatorPassID);
+ AU.addRequired<ProfileInfo>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Optimal Edge Profiler";
+ }
+ };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+ return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+ BasicBlock* b,
+ unsigned i) {
+ DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
+ << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ // NumEdges counts all the edges that may be instrumented. Later on its
+ // decided which edges to actually instrument, to achieve optimal profiling.
+ // For the entry block a virtual edge (0,entry) is reserved, for each block
+ // with no successors an edge (BB,0) is reserved. These edges are necessary
+ // to calculate a truly optimal maximum spanning tree and thus an optimal
+ // instrumentation.
+ unsigned NumEdges = 0;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Reserve space for (0,entry) edge.
+ ++NumEdges;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ if (BB->getTerminator()->getNumSuccessors() == 0) {
+ // Reserve space for (BB,0) edge.
+ ++NumEdges;
+ } else {
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+ }
+ }
+
+ // In the profiling output a counter for each edge is reserved, but only few
+ // are used. This is done to be able to read back in the profile without
+ // calulating the maximum spanning tree again, instead each edge counter that
+ // is not used is initialised with -1 to signal that this edge counter has to
+ // be calculated from other edge counters on reading the profile info back
+ // in.
+
+ const Type *Int32 = Type::getInt32Ty(M.getContext());
+ const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "OptEdgeProfCounters");
+ NumEdgesInserted = 0;
+
+ std::vector<Constant*> Initializer(NumEdges);
+ Constant *Zero = ConstantInt::get(Int32, 0);
+ Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+ // Instrument all of the edges not in MST...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n");
+
+ // Calculate a Maximum Spanning Tree with the edge weights determined by
+ // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+ // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+ // edges also participate in the maximum spanning tree calculation.
+ // The third parameter of MaximumSpanningTree() has the effect that not the
+ // actual MST is returned but the edges _not_ in the MST.
+
+ ProfileInfo::EdgeWeights ECs =
+ getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
+ std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+ MaximumSpanningTree<BasicBlock> MST(EdgeVector);
+ std::stable_sort(MST.begin(), MST.end());
+
+ // Check if (0,entry) not in the MST. If not, instrument edge
+ // (IncrementCounterInBlock()) and set the counter initially to zero, if
+ // the edge is in the MST the counter is initialised to -1.
+
+ BasicBlock *entry = &(F->getEntryBlock());
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+ printEdgeCounter(edge, entry, i);
+ IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
+ Initializer[i++] = (Zero);
+ } else{
+ Initializer[i++] = (Uncounted);
+ }
+
+ // InsertedBlocks contains all blocks that were inserted for splitting an
+ // edge, this blocks do not have to be instrumented.
+ DenseSet<BasicBlock*> InsertedBlocks;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Check if block was not inserted and thus does not have to be
+ // instrumented.
+ if (InsertedBlocks.count(BB)) continue;
+
+ // Okay, we have to add a counter of each outgoing edge not in MST. If
+ // the outgoing edge is not critical don't split it, just insert the
+ // counter in the source or destination of the edge. Also, if the block
+ // has no successors, the virtual edge (BB,0) is processed.
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+ printEdgeCounter(edge, BB, i);
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+ Initializer[i++] = (Zero);
+ } else{
+ Initializer[i++] = (Uncounted);
+ }
+ }
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ BasicBlock *Succ = TI->getSuccessor(s);
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+ // If the edge is critical, split it.
+ bool wasInserted = SplitCriticalEdge(TI, s, this);
+ Succ = TI->getSuccessor(s);
+ if (wasInserted)
+ InsertedBlocks.insert(Succ);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If
+ // we only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ printEdgeCounter(edge, BB, i);
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+ } else {
+ // Insert counter at the start of the block
+ printEdgeCounter(edge, Succ, i);
+ IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
+ }
+ Initializer[i++] = (Zero);
+ } else {
+ Initializer[i++] = (Uncounted);
+ }
+ }
+ }
+ }
+
+ // Check if the number of edges counted at first was the number of edges we
+ // considered for instrumentation.
+ assert(i == NumEdges && "the number of edges in counting array is wrong");
+
+ // Assign the now completely defined initialiser to the array.
+ Constant *init = ConstantArray::get(ATy, Initializer);
+ Counters->setInitializer(init);
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 000000000000..75416637db4f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1425 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit. The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96]. The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+// removed in the following manner. For every backedge
+// v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number - The number corresponding to a specific path through a
+// Ball-Larus DAG.
+// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
+// vertices and is a tree.
+// Chord - An edge not in the spanning tree.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+// Thomas Ball. "Efficiently Counting Program Events with Support for
+// On-line queries."
+// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+// September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/DerivedTypes.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+ // Creates a new BLInstrumentationNode from a BasicBlock.
+ BLInstrumentationNode(BasicBlock* BB);
+
+ // Get/sets the Value corresponding to the pathNumber register,
+ // constant or phinode. Used by the instrumentation code to remember
+ // path number Values.
+ Value* getStartingPathNumber();
+ void setStartingPathNumber(Value* pathNumber);
+
+ Value* getEndingPathNumber();
+ void setEndingPathNumber(Value* pathNumber);
+
+ // Get/set the PHINode Instruction for this node.
+ PHINode* getPathPHI();
+ void setPathPHI(PHINode* pathPHI);
+
+private:
+
+ Value* _startingPathNumber; // The Value for the current pathNumber.
+ Value* _endingPathNumber; // The Value for the current pathNumber.
+ PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+ BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target);
+
+ // Sets the target node of this edge. Required to split edges.
+ void setTarget(BallLarusNode* node);
+
+ // Get/set whether edge is in the spanning tree.
+ bool isInSpanningTree() const;
+ void setIsInSpanningTree(bool isInSpanningTree);
+
+ // Get/ set whether this edge will be instrumented with a path number
+ // initialization.
+ bool isInitialization() const;
+ void setIsInitialization(bool isInitialization);
+
+ // Get/set whether this edge will be instrumented with a path counter
+ // increment. Notice this is incrementing the path counter
+ // corresponding to the path number register. The path number
+ // increment is determined by getIncrement().
+ bool isCounterIncrement() const;
+ void setIsCounterIncrement(bool isCounterIncrement);
+
+ // Get/set the path number increment that this edge will be instrumented
+ // with. This is distinct from the path counter increment and the
+ // weight. The counter increment counts the number of executions of
+ // some path, whereas the path number keeps track of which path number
+ // the program is on.
+ long getIncrement() const;
+ void setIncrement(long increment);
+
+ // Get/set whether the edge has been instrumented.
+ bool hasInstrumentation();
+ void setHasInstrumentation(bool hasInstrumentation);
+
+ // Returns the successor number of this edge in the source.
+ unsigned getSuccessorNumber();
+
+private:
+ // The increment that the code will be instrumented with.
+ long long _increment;
+
+ // Whether this edge is in the spanning tree.
+ bool _isInSpanningTree;
+
+ // Whether this edge is an initialiation of the path number.
+ bool _isInitialization;
+
+ // Whether this edge is a path counter increment.
+ bool _isCounterIncrement;
+
+ // Whether this edge has been instrumented.
+ bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+ BLInstrumentationDag(Function &F);
+
+ // Returns the Exit->Root edge. This edge is required for creating
+ // directed cycles in the algorithm for moving instrumentation off of
+ // the spanning tree
+ BallLarusEdge* getExitRootEdge();
+
+ // Returns an array of phony edges which mark those nodes
+ // with function calls
+ BLEdgeVector getCallPhonyEdges();
+
+ // Gets/sets the path counter array
+ GlobalVariable* getCounterArray();
+ void setCounterArray(GlobalVariable* c);
+
+ // Calculates the increments for the chords, thereby removing
+ // instrumentation from the spanning tree edges. Implementation is based
+ // on the algorithm in Figure 4 of [Ball94]
+ void calculateChordIncrements();
+
+ // Updates the state when an edge has been split
+ void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+ // Calculates a spanning tree of the DAG ignoring cycles. Whichever
+ // edges are in the spanning tree will not be instrumented, but this
+ // implementation does not try to minimize the instrumentation overhead
+ // by trying to find hot edges.
+ void calculateSpanningTree();
+
+ // Pushes initialization further down in order to group the first
+ // increment and initialization.
+ void pushInitialization();
+
+ // Pushes the path counter increments up in order to group the last path
+ // number increment.
+ void pushCounters();
+
+ // Removes phony edges from the successor list of the source, and the
+ // predecessor list of the target.
+ void unlinkPhony();
+
+ // Generate dot graph for the function
+ void generateDotGraph();
+
+protected:
+ // BLInstrumentationDag creates BLInstrumentationNode objects in this
+ // method overriding the creation of BallLarusNode objects.
+ //
+ // Allows subclasses to determine which type of Node is created.
+ // Override this method to produce subclasses of BallLarusNode if
+ // necessary.
+ virtual BallLarusNode* createNode(BasicBlock* BB);
+
+ // BLInstrumentationDag create BLInstrumentationEdges.
+ //
+ // Allows subclasses to determine which type of Edge is created.
+ // Override this method to produce subclasses of BallLarusEdge if
+ // necessary. Parameters source and target will have been created by
+ // createNode and can be cast to the subclass of BallLarusNode*
+ // returned by createNode.
+ virtual BallLarusEdge* createEdge(
+ BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+ BLEdgeVector _treeEdges; // All edges in the spanning tree.
+ BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+ GlobalVariable* _counterArray; // Array to store path counters
+
+ // Removes the edge from the appropriate predecessor and successor lists.
+ void unlinkEdge(BallLarusEdge* edge);
+
+ // Makes an edge part of the spanning tree.
+ void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+ // Pushes initialization and calls itself recursively.
+ void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+ // Pushes path counter increments up recursively.
+ void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+ // Depth first algorithm for determining the chord increments.f
+ void calculateChordIncrementsDfs(
+ long weight, BallLarusNode* v, BallLarusEdge* e);
+
+ // Determines the relative direction of two edges.
+ int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which instruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+ // Current context for multi threading support.
+ LLVMContext* Context;
+
+ // Which function are we currently instrumenting
+ unsigned currentFunctionNumber;
+
+ // The function prototype in the profiling runtime for incrementing a
+ // single path counter in a hash table.
+ Constant* llvmIncrementHashFunction;
+ Constant* llvmDecrementHashFunction;
+
+ // Instruments each function with path profiling. 'main' is instrumented
+ // with code to save the profile to disk.
+ bool runOnModule(Module &M);
+
+ // Analyzes the function for Ball-Larus path profiling, and inserts code.
+ void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+ // Creates an increment constant representing incr.
+ ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+ // Creates an increment constant representing the value in
+ // edge->getIncrement().
+ ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+ // Finds the insertion point after pathNumber in block. PathNumber may
+ // be NULL.
+ BasicBlock::iterator getInsertionPoint(
+ BasicBlock* block, Value* pathNumber);
+
+ // Inserts source's pathNumber Value* into target. Target may or may not
+ // have multiple predecessors, and may or may not have its phiNode
+ // initalized.
+ void pushValueIntoNode(
+ BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+ // Inserts source's pathNumber Value* into the appropriate slot of
+ // target's phiNode.
+ void pushValueIntoPHI(
+ BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+ // The Value* in node, oldVal, is updated with a Value* correspodning to
+ // oldVal + addition.
+ void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+ bool atBeginning);
+
+ // Creates a counter increment in the given node. The Value* in node is
+ // taken as the index into a hash table.
+ void insertCounterIncrement(
+ Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment = true);
+
+ // A PHINode is created in the node, and its values initialized to -1U.
+ void preparePHI(BLInstrumentationNode* node);
+
+ // Inserts instrumentation for the given edge
+ //
+ // Pre: The edge's source node has pathNumber set if edge is non zero
+ // path number increment.
+ //
+ // Post: Edge's target node has a pathNumber set to the path number Value
+ // corresponding to the value of the path register after edge's
+ // execution.
+ void insertInstrumentationStartingAt(
+ BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag);
+
+ // If this edge is a critical edge, then inserts a node at this edge.
+ // This edge becomes the first edge, and a new BallLarusEdge is created.
+ bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+ // Inserts instrumentation according to the marked edges in dag. Phony
+ // edges must be unlinked from the DAG, but accessible from the
+ // backedges. Dag must have initializations, path number increments, and
+ // counter increments present.
+ //
+ // Counter storage is created here.
+ void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfiler() : ModulePass(ID) {
+ initializePathProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Path Profiler";
+ }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+ cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+ "Insert instrumentation for Ball-Larus path profiling",
+ false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+ class PathProfilingFunctionTable {};
+
+ // Type for global array storing references to hashes or arrays
+ template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+ xcompile> {
+ public:
+ static const StructType *get(LLVMContext& C) {
+ return( StructType::get(
+ TypeBuilder<types::i<32>, xcompile>::get(C), // type
+ TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+ TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+ NULL));
+ }
+ };
+
+ typedef TypeBuilder<PathProfilingFunctionTable, true>
+ ftEntryTypeBuilder;
+
+ // BallLarusEdge << operator overloading
+ raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge)
+ LLVM_ATTRIBUTE_USED;
+ raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge) {
+ os << "[" << edge.getSource()->getName() << " -> "
+ << edge.getTarget()->getName() << "] init: "
+ << (edge.isInitialization() ? "yes" : "no")
+ << " incr:" << edge.getIncrement() << " cinc: "
+ << (edge.isCounterIncrement() ? "yes" : "no");
+ return(os);
+ }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+ BallLarusNode(BB),
+ _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target)
+ : BallLarusEdge(source, target, 0),
+ _increment(0), _isInSpanningTree(false), _isInitialization(false),
+ _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge. Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+ _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+ return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+ _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+ return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+ _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment. Notice this is incrementing the path counter
+// corresponding to the path number register. The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+ return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+ _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with. This is distinct from the path counter increment and the
+// weight. The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+ return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+ _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+ return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+ _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+ BallLarusNode* sourceNode = getSource();
+ BallLarusNode* targetNode = getTarget();
+ BasicBlock* source = sourceNode->getBlock();
+ BasicBlock* target = targetNode->getBlock();
+
+ if(source == NULL || target == NULL)
+ return(0);
+
+ TerminatorInst* terminator = source->getTerminator();
+
+ unsigned i;
+ for(i=0; i < terminator->getNumSuccessors(); i++) {
+ if(terminator->getSuccessor(i) == target)
+ break;
+ }
+
+ return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+ _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+ BLEdgeIterator erEdge = getExit()->succBegin();
+ return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+ BLEdgeVector callEdges;
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++ ) {
+ if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+ callEdges.push_back(*edge);
+ }
+
+ return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+ return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+ _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+ calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+ BLInstrumentationEdge* chord;
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ chord = (BLInstrumentationEdge*) *chordEdge;
+ chord->setIncrement(chord->getIncrement() + chord->getWeight());
+ }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+ BasicBlock* newBlock) {
+ BallLarusNode* oldTarget = formerEdge->getTarget();
+ BallLarusNode* newNode = addNode(newBlock);
+ formerEdge->setTarget(newNode);
+ newNode->addPredEdge(formerEdge);
+
+ DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
+
+ oldTarget->removePredEdge(formerEdge);
+ BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+ if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+ formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+ newEdge->setType(formerEdge->getType());
+ newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+ newEdge->setPhonyExit(formerEdge->getPhonyExit());
+ formerEdge->setType(BallLarusEdge::NORMAL);
+ formerEdge->setPhonyRoot(NULL);
+ formerEdge->setPhonyExit(NULL);
+ }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles. Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+ std::stack<BallLarusNode*> dfsStack;
+
+ for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+ nodeIt != end; nodeIt++) {
+ (*nodeIt)->setColor(BallLarusNode::WHITE);
+ }
+
+ dfsStack.push(getRoot());
+ while(dfsStack.size() > 0) {
+ BallLarusNode* node = dfsStack.top();
+ dfsStack.pop();
+
+ if(node->getColor() == BallLarusNode::WHITE)
+ continue;
+
+ BallLarusNode* nextNode;
+ bool forward = true;
+ BLEdgeIterator succEnd = node->succEnd();
+
+ node->setColor(BallLarusNode::WHITE);
+ // first iterate over successors then predecessors
+ for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+ edge != predEnd; edge++) {
+ if(edge == succEnd) {
+ edge = node->predBegin();
+ forward = false;
+ }
+
+ // Ignore split edges
+ if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+ if(nextNode->getColor() != BallLarusNode::WHITE) {
+ nextNode->setColor(BallLarusNode::WHITE);
+ makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+ }
+ }
+ }
+
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+ // safe since createEdge is overriden
+ if(!instEdge->isInSpanningTree() && (*edge)->getType()
+ != BallLarusEdge::SPLITEDGE)
+ _chordEdges.push_back(instEdge);
+ }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsInitialization(true);
+ pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+ BallLarusEdge* edge;
+
+ for(BLEdgeIterator next = _edges.begin(),
+ end = _edges.end(); next != end; next++) {
+ edge = (*next);
+
+ if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+ unlinkEdge(edge);
+ }
+ }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+ std::string errorInfo;
+ std::string functionName = getFunction().getNameStr();
+ std::string filename = "pathdag." + functionName + ".dot";
+
+ DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+ raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+ if (!errorInfo.empty()) {
+ errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+ errs() << "\n";
+ return;
+ }
+
+ dotFile << "digraph " << functionName << " {\n";
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ std::string sourceName = (*edge)->getSource()->getName();
+ std::string targetName = (*edge)->getTarget()->getName();
+
+ dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+ << targetName.c_str() << "\" ";
+
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+ switch( (*edge)->getType() ) {
+ case BallLarusEdge::NORMAL:
+ dotFile << "[label=" << inc << "] [color=black];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE:
+ dotFile << "[color=cyan];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE_PHONY:
+ dotFile << "[label=" << inc
+ << "] [color=blue];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE:
+ dotFile << "[color=violet];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=red];\n";
+ break;
+
+ case BallLarusEdge::CALLEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=green];\n";
+ break;
+ }
+ }
+
+ dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+ return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target, unsigned edgeNumber) {
+ // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+ // is overriden to produce BLInstrumentationNode.
+ return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+ (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode. Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+ return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber. Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
+ pathNumber->getNameStr() : "unused") << "\n");
+ _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+ return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " EPN-" << getName() << " <-- "
+ << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+ _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node. Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+ return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node. Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+ _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+ if(edge == getExitRootEdge())
+ DEBUG(dbgs() << " Removing exit->root edge\n");
+
+ edge->getSource()->removeSuccEdge(edge);
+ edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+ edge->setIsInSpanningTree(true);
+ _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+ BLInstrumentationEdge* edge) {
+ BallLarusNode* target;
+
+ target = edge->getTarget();
+ if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+ return;
+ } else {
+ for(BLEdgeIterator next = target->succBegin(),
+ end = target->succEnd(); next != end; next++) {
+ BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+ // Skip split edges
+ if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ intoEdge->setIncrement(intoEdge->getIncrement() +
+ edge->getIncrement());
+ intoEdge->setIsInitialization(true);
+ pushInitializationFromEdge(intoEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsInitialization(false);
+ }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+ BallLarusNode* source;
+
+ source = edge->getSource();
+ if(source->getNumberSuccEdges() > 1 || source == getRoot()
+ || edge->isInitialization()) {
+ return;
+ } else {
+ for(BLEdgeIterator previous = source->predBegin(),
+ end = source->predEnd(); previous != end; previous++) {
+ BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+ // Skip split edges
+ if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ fromEdge->setIncrement(fromEdge->getIncrement() +
+ edge->getIncrement());
+ fromEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(fromEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsCounterIncrement(false);
+ }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+ BallLarusNode* v, BallLarusEdge* e) {
+ BLInstrumentationEdge* f;
+
+ for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+ end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+ f = (BLInstrumentationEdge*) *treeEdge;
+ if(e != f && v == f->getTarget()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getSource(), f);
+ }
+ if(e != f && v == f->getSource()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getTarget(), f);
+ }
+ }
+
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ f = (BLInstrumentationEdge*) *chordEdge;
+ if(v == f->getSource() || v == f->getTarget()) {
+ f->setIncrement(f->getIncrement() +
+ calculateChordIncrementsDir(e,f)*weight);
+ }
+ }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+ BallLarusEdge* f) {
+ if( e == NULL)
+ return(1);
+ else if(e->getSource() == f->getTarget()
+ || e->getTarget() == f->getSource())
+ return(1);
+
+ return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+ int bitsize) {
+ return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+ BLInstrumentationEdge* edge) {
+ return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block. PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+ pathNumber) {
+ if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+ || (((Instruction*)(pathNumber))->getParent()) != block) {
+ return(block->getFirstNonPHI());
+ } else {
+ Instruction* pathNumberInst = (Instruction*) (pathNumber);
+ BasicBlock::iterator insertPoint;
+ BasicBlock::iterator end = block->end();
+
+ for(insertPoint = block->begin();
+ insertPoint != end; insertPoint++) {
+ Instruction* insertInst = &(*insertPoint);
+
+ if(insertInst == pathNumberInst)
+ return(++insertPoint);
+ }
+
+ return(insertPoint);
+ }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+ BasicBlock* block = node->getBlock();
+ BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+ pred_iterator PB = pred_begin(node->getBlock()),
+ PE = pred_end(node->getBlock());
+ PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
+ std::distance(PB, PE), "pathNumber",
+ insertPoint );
+ node->setPathPHI(phi);
+ node->setStartingPathNumber(phi);
+ node->setEndingPathNumber(phi);
+
+ for(pred_iterator predIt = PB; predIt != PE; predIt++) {
+ BasicBlock* pred = (*predIt);
+
+ if(pred != NULL)
+ phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+ }
+}
+
+// Inserts source's pathNumber Value* into target. Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+ BLInstrumentationNode* target) {
+ if(target->getBlock() == NULL)
+ return;
+
+
+ if(target->getNumberPredEdges() <= 1) {
+ assert(target->getStartingPathNumber() == NULL &&
+ "Target already has path number");
+ target->setStartingPathNumber(source->getEndingPathNumber());
+ target->setEndingPathNumber(source->getEndingPathNumber());
+ DEBUG(dbgs() << " Passing path number"
+ << (source->getEndingPathNumber() ? "" : " (null)")
+ << " value through.\n");
+ } else {
+ if(target->getPathPHI() == NULL) {
+ DEBUG(dbgs() << " Initializing PHI node for block '"
+ << target->getName() << "'\n");
+ preparePHI(target);
+ }
+ pushValueIntoPHI(target, source);
+ DEBUG(dbgs() << " Passing number value into PHI for block '"
+ << target->getName() << "'\n");
+ }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+ BLInstrumentationNode* source) {
+ PHINode* phi = target->getPathPHI();
+ assert(phi != NULL && " Tried to push value into node with PHI, but node"
+ " actually had no PHI.");
+ phi->removeIncomingValue(source->getBlock(), false);
+ phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal, is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+ Value* addition, bool atBeginning) {
+ BasicBlock* block = node->getBlock();
+ assert(node->getStartingPathNumber() != NULL);
+ assert(node->getEndingPathNumber() != NULL);
+
+ BasicBlock::iterator insertPoint;
+
+ if( atBeginning )
+ insertPoint = block->getFirstNonPHI();
+ else
+ insertPoint = block->getTerminator();
+
+ DEBUG(errs() << " Creating addition instruction.\n");
+ Value* newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ addition, "pathNumber", insertPoint);
+
+ node->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node. The Value* in node is
+// taken as the index into an array or hash table. The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment) {
+ // Counter increment for array
+ if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ // Get pointer to the array location
+ std::vector<Value*> gepIndices(2);
+ gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+ gepIndices[1] = incValue;
+
+ GetElementPtrInst* pcPointer =
+ GetElementPtrInst::Create(dag->getCounterArray(),
+ gepIndices.begin(), gepIndices.end(),
+ "counterInc", insertPoint);
+
+ // Load from the array - call it oldPC
+ LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+ // Test to see whether adding 1 will overflow the counter
+ ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+ createIncrementConstant(0xffffffff, 32),
+ "isMax");
+
+ // Select increment for the path counter based on overflow
+ SelectInst* inc =
+ SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+ createIncrementConstant(0,32),
+ "pathInc", insertPoint);
+
+ // newPc = oldPc + inc
+ BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+ oldPc, inc, "newPC",
+ insertPoint);
+
+ // Store back in to the array
+ new StoreInst(newPc, pcPointer, insertPoint);
+ } else { // Counter increment for hash
+ std::vector<Value*> args(2);
+ args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+ currentFunctionNumber);
+ args[1] = incValue;
+
+ CallInst::Create(
+ increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+ args, "", insertPoint);
+ }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ // Mark the edge as instrumented
+ edge->setHasInstrumentation(true);
+ DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+ // create a new node for this edge's instrumentation
+ splitCritical(edge, dag);
+
+ BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+ BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+ BLInstrumentationNode* instrumentNode;
+ BLInstrumentationNode* nextSourceNode;
+
+ bool atBeginning = false;
+
+ // Source node has only 1 successor so any information can be simply
+ // inserted in to it without splitting
+ if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << sourceNode->getName() << " (at end)\n");
+ instrumentNode = sourceNode;
+ nextSourceNode = targetNode; // ... since we never made any new nodes
+ }
+
+ // The target node only has one predecessor, so we can safely insert edge
+ // instrumentation into it. If there was splitting, it must have been
+ // successful.
+ else if( targetNode->getNumberPredEdges() == 1 ) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << targetNode->getName() << " (at beginning)\n");
+ pushValueIntoNode(sourceNode, targetNode);
+ instrumentNode = targetNode;
+ nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+ atBeginning = true;
+ }
+
+ // Somehow, splitting must have failed.
+ else {
+ errs() << "Instrumenting could not split a critical edge.\n";
+ DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
+ return;
+ }
+
+ // Insert instrumentation if this is a back or split edge
+ if( edge->getType() == BallLarusEdge::BACKEDGE ||
+ edge->getType() == BallLarusEdge::SPLITEDGE ) {
+ BLInstrumentationEdge* top =
+ (BLInstrumentationEdge*) edge->getPhonyRoot();
+ BLInstrumentationEdge* bottom =
+ (BLInstrumentationEdge*) edge->getPhonyExit();
+
+ assert( top->isInitialization() && " Top phony edge did not"
+ " contain a path number initialization.");
+ assert( bottom->isCounterIncrement() && " Bottom phony edge"
+ " did not contain a path counter increment.");
+
+ // split edge has yet to be initialized
+ if( !instrumentNode->getEndingPathNumber() ) {
+ instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+ instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+ }
+
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getTerminator();
+
+ // add information from the bottom edge, if it exists
+ if( bottom->getIncrement() ) {
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(bottom),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+ }
+
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+ instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+ // Check for path counter increments
+ if( top->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ instrumentNode->getBlock()->getTerminator(),dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Insert instrumentation if this is a normal edge
+ else {
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getTerminator();
+
+ if( edge->isInitialization() ) { // initialize path number
+ instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+ } else if( edge->getIncrement() ) {// increment path number
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(edge),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(newpn);
+ }
+
+ // Check for path counter increments
+ if( edge->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Push it along
+ if (nextSourceNode && instrumentNode->getEndingPathNumber())
+ pushValueIntoNode(instrumentNode, nextSourceNode);
+
+ // Add all the successors
+ for( BLEdgeIterator next = targetNode->succBegin(),
+ end = targetNode->succEnd(); next != end; next++ ) {
+ // So long as it is un-instrumented, add it to the list
+ if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+ insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+ else
+ DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
+ << " already instrumented.\n");
+ }
+}
+
+// Inserts instrumentation according to the marked edges in dag. Phony edges
+// must be unlinked from the DAG, but accessible from the backedges. Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+ BLInstrumentationDag& dag, Module &M) {
+
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) dag.getExitRootEdge();
+ insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+ // Iterate through each call edge and apply the appropriate hash increment
+ // and decrement functions
+ BLEdgeVector callEdges = dag.getCallPhonyEdges();
+ for( BLEdgeIterator edge = callEdges.begin(),
+ end = callEdges.end(); edge != end; edge++ ) {
+ BLInstrumentationNode* node =
+ (BLInstrumentationNode*)(*edge)->getSource();
+ BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+
+ // Find the first function call
+ while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+ insertPoint++;
+
+ DEBUG(dbgs() << "\nInstrumenting method call block '"
+ << node->getBlock()->getNameStr() << "'\n");
+ DEBUG(dbgs() << " Path number initialized: "
+ << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+ Value* newpn;
+ if( node->getStartingPathNumber() ) {
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+ if ( inc )
+ newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ createIncrementConstant(inc,32),
+ "pathNumber", insertPoint);
+ else
+ newpn = node->getStartingPathNumber();
+ } else {
+ newpn = (Value*)createIncrementConstant(
+ ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+ }
+
+ insertCounterIncrement(newpn, insertPoint, &dag);
+ insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+ &dag, false);
+ }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+ Function &F, Module &M) {
+ // Build DAG from CFG
+ BLInstrumentationDag dag = BLInstrumentationDag(F);
+ dag.init();
+
+ // give each path a unique integer value
+ dag.calculatePathNumbers();
+
+ // modify path increments to increase the efficiency
+ // of instrumentation
+ dag.calculateSpanningTree();
+ dag.calculateChordIncrements();
+ dag.pushInitialization();
+ dag.pushCounters();
+ dag.unlinkPhony();
+
+ // potentially generate .dot graph for the dag
+ if (DotPathDag)
+ dag.generateDotGraph ();
+
+ // Should we store the information in an array or hash
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+ dag.getNumberOfPaths());
+
+ dag.setCounterArray(new GlobalVariable(M, t, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(t), ""));
+ }
+
+ insertInstrumentation(dag, M);
+
+ // Add to global function reference table
+ unsigned type;
+ const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+ type = ProfilingArray;
+ else
+ type = ProfilingHash;
+
+ std::vector<Constant*> entryArray(3);
+ entryArray[0] = createIncrementConstant(type,32);
+ entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+ entryArray[2] = dag.getCounterArray() ?
+ ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+ Constant::getNullValue(voidPtr);
+
+ const StructType* at = ftEntryTypeBuilder::get(*Context);
+ ConstantStruct* functionEntry =
+ (ConstantStruct*)ConstantStruct::get(at, entryArray);
+ ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() << \
+ "\n\n============= MODULE BEGIN ===============\n" << M << \
+ "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+ Context = &M.getContext();
+
+ DEBUG(dbgs()
+ << "****************************************\n"
+ << "****************************************\n"
+ << "** **\n"
+ << "** PATH PROFILING INSTRUMENTATION **\n"
+ << "** **\n"
+ << "****************************************\n"
+ << "****************************************\n");
+
+ // No main, no instrumentation!
+ Function *Main = M.getFunction("main");
+
+ // Using fortran? ... this kind of works
+ if (!Main)
+ Main = M.getFunction("MAIN__");
+
+ if (!Main) {
+ errs() << "WARNING: cannot insert path profiling into a module"
+ << " with no main function!\n";
+ return false;
+ }
+
+ llvmIncrementHashFunction = M.getOrInsertFunction(
+ "llvm_increment_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ llvmDecrementHashFunction = M.getOrInsertFunction(
+ "llvm_decrement_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ std::vector<Constant*> ftInit;
+ unsigned functionNumber = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+
+ DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+ functionNumber++;
+
+ // set function number
+ currentFunctionNumber = functionNumber;
+ runOnFunction(ftInit, *F, M);
+ }
+
+ const Type *t = ftEntryTypeBuilder::get(*Context);
+ const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+ Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+ DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+ GlobalVariable* functionTable =
+ new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+ ftInitConstant, "functionPathTable");
+ const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+ InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+ PointerType::getUnqual(eltType));
+
+ DEBUG(PRINT_MODULE);
+
+ return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ unsigned succNum = edge->getSuccessorNumber();
+ BallLarusNode* sourceNode = edge->getSource();
+ BallLarusNode* targetNode = edge->getTarget();
+ BasicBlock* sourceBlock = sourceNode->getBlock();
+ BasicBlock* targetBlock = targetNode->getBlock();
+
+ if(sourceBlock == NULL || targetBlock == NULL
+ || sourceNode->getNumberSuccEdges() <= 1
+ || targetNode->getNumberPredEdges() == 1 ) {
+ return(false);
+ }
+
+ TerminatorInst* terminator = sourceBlock->getTerminator();
+
+ if( SplitCriticalEdge(terminator, succNum, this, false)) {
+ BasicBlock* newBlock = terminator->getSuccessor(succNum);
+ dag->splitUpdate(edge, newBlock);
+ return(true);
+ } else
+ return(false);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
new file mode 100644
index 000000000000..445a5b6f6074
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -0,0 +1,170 @@
+//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+
+void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Array,
+ PointerType *arrayType) {
+ LLVMContext &Context = MainFn->getContext();
+ const Type *ArgVTy =
+ PointerType::getUnqual(Type::getInt8PtrTy(Context));
+ const PointerType *UIntPtr = arrayType ? arrayType :
+ Type::getInt32PtrTy(Context);
+ Module &M = *MainFn->getParent();
+ Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+ Type::getInt32Ty(Context),
+ ArgVTy, UIntPtr,
+ Type::getInt32Ty(Context),
+ (Type *)0);
+
+ // This could force argc and argv into programs that wouldn't otherwise have
+ // them, but instead we just pass null values in.
+ std::vector<Value*> Args(4);
+ Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Args[1] = Constant::getNullValue(ArgVTy);
+
+ // Skip over any allocas in the entry block.
+ BasicBlock *Entry = MainFn->begin();
+ BasicBlock::iterator InsertPos = Entry->begin();
+ while (isa<AllocaInst>(InsertPos)) ++InsertPos;
+
+ std::vector<Constant*> GEPIndices(2,
+ Constant::getNullValue(Type::getInt32Ty(Context)));
+ unsigned NumElements = 0;
+ if (Array) {
+ Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
+ GEPIndices.size());
+ NumElements =
+ cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
+ } else {
+ // If this profiling instrumentation doesn't have a constant array, just
+ // pass null.
+ Args[2] = ConstantPointerNull::get(UIntPtr);
+ }
+ Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
+
+ CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
+
+ // If argc or argv are not available in main, just pass null values in.
+ Function::arg_iterator AI;
+ switch (MainFn->arg_size()) {
+ default:
+ case 2:
+ AI = MainFn->arg_begin(); ++AI;
+ if (AI->getType() != ArgVTy) {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+ false);
+ InitCall->setArgOperand(1,
+ CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+ } else {
+ InitCall->setArgOperand(1, AI);
+ }
+ /* FALL THROUGH */
+
+ case 1:
+ AI = MainFn->arg_begin();
+ // If the program looked at argc, have it look at the return value of the
+ // init call instead.
+ if (!AI->getType()->isIntegerTy(32)) {
+ Instruction::CastOps opcode;
+ if (!AI->use_empty()) {
+ opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
+ AI->replaceAllUsesWith(
+ CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
+ }
+ opcode = CastInst::getCastOpcode(AI, true,
+ Type::getInt32Ty(Context), true);
+ InitCall->setArgOperand(0,
+ CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+ "argc.cast", InitCall));
+ } else {
+ AI->replaceAllUsesWith(InitCall);
+ InitCall->setArgOperand(0, AI);
+ }
+
+ case 0: break;
+ }
+}
+
+void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray, bool beginning) {
+ // Insert the increment after any alloca or PHI instructions...
+ BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+ BB->getTerminator();
+ while (isa<AllocaInst>(InsertPos))
+ ++InsertPos;
+
+ LLVMContext &Context = BB->getContext();
+
+ // Create the getelementptr constant expression
+ std::vector<Constant*> Indices(2);
+ Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
+ Constant *ElementPtr =
+ ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+
+ // Load, increment and store the value back.
+ Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
+ Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ "NewFuncCounter", InsertPos);
+ new StoreInst(NewVal, ElementPtr, InsertPos);
+}
+
+void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
+ // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
+ // types.
+ Type *GlobalDtorElems[2] = {
+ Type::getInt32Ty(Mod->getContext()),
+ FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
+ };
+ const StructType *GlobalDtorElemTy =
+ StructType::get(Mod->getContext(), GlobalDtorElems, false);
+
+ // Construct the new element we'll be adding.
+ Constant *Elem[2] = {
+ ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
+ ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
+ };
+
+ // If llvm.global_dtors exists, make a copy of the things in its list and
+ // delete it, to replace it with one that has a larger array type.
+ std::vector<Constant *> dtors;
+ if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
+ if (ConstantArray *InitList =
+ dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
+ for (unsigned i = 0, e = InitList->getType()->getNumElements();
+ i != e; ++i)
+ dtors.push_back(cast<Constant>(InitList->getOperand(i)));
+ }
+ GlobalDtors->eraseFromParent();
+ }
+
+ // Build up llvm.global_dtors with our new item in it.
+ GlobalVariable *GlobalDtors = new GlobalVariable(
+ *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
+ GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
+
+ dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
+ GlobalDtors->setInitializer(ConstantArray::get(
+ cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
new file mode 100644
index 000000000000..09b22171ff04
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -0,0 +1,36 @@
+//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROFILINGUTILS_H
+#define PROFILINGUTILS_H
+
+namespace llvm {
+ class BasicBlock;
+ class Function;
+ class GlobalValue;
+ class Module;
+ class PointerType;
+
+ void InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Arr = 0,
+ PointerType *arrayType = 0);
+ void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray,
+ bool beginning = true);
+ void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 000000000000..a5adb5e7cefe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,97 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass. This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "adce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+ struct ADCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCE() : FunctionPass(ID) {
+ initializeADCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function& F);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.setPreservesCFG();
+ }
+
+ };
+}
+
+char ADCE::ID = 0;
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
+
+bool ADCE::runOnFunction(Function& F) {
+ SmallPtrSet<Instruction*, 128> alive;
+ SmallVector<Instruction*, 128> worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+ isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ I->mayHaveSideEffects()) {
+ alive.insert(I.getInstructionIterator());
+ worklist.push_back(I.getInstructionIterator());
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!worklist.empty()) {
+ Instruction* curr = worklist.pop_back_val();
+
+ for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+ OI != OE; ++OI)
+ if (Instruction* Inst = dyn_cast<Instruction>(OI))
+ if (alive.insert(Inst))
+ worklist.push_back(Inst);
+ }
+
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the worklist vector here for memory efficiency.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (!alive.count(I.getInstructionIterator())) {
+ worklist.push_back(I.getInstructionIterator());
+ I->dropAllReferences();
+ }
+
+ for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+ E = worklist.end(); I != E; ++I) {
+ ++NumRemoved;
+ (*I)->eraseFromParent();
+ }
+
+ return !worklist.empty();
+}
+
+FunctionPass *llvm::createAggressiveDCEPass() {
+ return new ADCE();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
new file mode 100644
index 000000000000..cee550265622
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,152 @@
+//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a very simple profile guided basic block placement
+// algorithm. The idea is to put frequently executed blocks together at the
+// start of the function, and hopefully increase the number of fall-through
+// conditional branches. If there is no profile information for a particular
+// function, this pass basically orders blocks in depth-first order
+//
+// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+// Positioning" by Pettis and Hansen, except that it uses basic block counts
+// instead of edge counts. This should be improved in many ways, but is very
+// simple for now.
+//
+// Basically we "place" the entry block, then loop over all successors in a DFO,
+// placing the most frequently executed successor until we run out of blocks. I
+// told you this was _extremely_ simplistic. :) This is also much slower than it
+// could be. When it becomes important, this pass will be rewritten to use a
+// better algorithm, and then we can worry about efficiency.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Scalar.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumMoved, "Number of basic blocks moved");
+
+namespace {
+ struct BlockPlacement : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BlockPlacement() : FunctionPass(ID) {
+ initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<ProfileInfo>();
+ //AU.addPreserved<ProfileInfo>(); // Does this work?
+ }
+ private:
+ /// PI - The profile information that is guiding us.
+ ///
+ ProfileInfo *PI;
+
+ /// NumMovedBlocks - Every time we move a block, increment this counter.
+ ///
+ unsigned NumMovedBlocks;
+
+ /// PlacedBlocks - Every time we place a block, remember it so we don't get
+ /// into infinite loops.
+ std::set<BasicBlock*> PlacedBlocks;
+
+ /// InsertPos - This an iterator to the next place we want to insert a
+ /// block.
+ Function::iterator InsertPos;
+
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void PlaceBlocks(BasicBlock *BB);
+ };
+}
+
+char BlockPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+
+FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
+
+bool BlockPlacement::runOnFunction(Function &F) {
+ PI = &getAnalysis<ProfileInfo>();
+
+ NumMovedBlocks = 0;
+ InsertPos = F.begin();
+
+ // Recursively place all blocks.
+ PlaceBlocks(F.begin());
+
+ PlacedBlocks.clear();
+ NumMoved += NumMovedBlocks;
+ return NumMovedBlocks != 0;
+}
+
+
+/// PlaceBlocks - Recursively place the specified blocks and any unplaced
+/// successors.
+void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+ assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+ PlacedBlocks.insert(BB);
+
+ // Place the specified block.
+ if (&*InsertPos != BB) {
+ // Use splice to move the block into the right place. This avoids having to
+ // remove the block from the function then readd it, which causes a bunch of
+ // symbol table traffic that is entirely pointless.
+ Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+ Blocks.splice(InsertPos, Blocks, BB);
+
+ ++NumMovedBlocks;
+ } else {
+ // This block is already in the right place, we don't have to do anything.
+ ++InsertPos;
+ }
+
+ // Keep placing successors until we run out of ones to place. Note that this
+ // loop is very inefficient (N^2) for blocks with many successors, like switch
+ // statements. FIXME!
+ while (1) {
+ // Okay, now place any unplaced successors.
+ succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+ // Scan for the first unplaced successor.
+ for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+ /*empty*/;
+ if (SI == E) return; // No more successors to place.
+
+ double MaxExecutionCount = PI->getExecutionCount(*SI);
+ BasicBlock *MaxSuccessor = *SI;
+
+ // Scan for more frequently executed successors
+ for (; SI != E; ++SI)
+ if (!PlacedBlocks.count(*SI)) {
+ double Count = PI->getExecutionCount(*SI);
+ if (Count > MaxExecutionCount ||
+ // Prefer to not disturb the code.
+ (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+ MaxExecutionCount = Count;
+ MaxSuccessor = *SI;
+ }
+ }
+
+ // Now that we picked the maximally executed successor, place it.
+ PlaceBlocks(MaxSuccessor);
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
new file mode 100644
index 000000000000..0af14ed17bf6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -0,0 +1,1161 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+
+static cl::opt<bool> DisableBranchOpts(
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
+
+namespace {
+ class CodeGenPrepare : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *TLI;
+ DominatorTree *DT;
+ ProfileInfo *PFI;
+
+ /// CurInstIterator - As we scan instructions optimizing them, this is the
+ /// next instruction to optimize. Xforms that can invalidate this should
+ /// update it.
+ BasicBlock::iterator CurInstIterator;
+
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
+
+ /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+ /// be updated.
+ bool ModifiedDT;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit CodeGenPrepare(const TargetLowering *tli = 0)
+ : FunctionPass(ID), TLI(tli) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ProfileInfo>();
+ }
+
+ private:
+ bool EliminateMostlyEmptyBlocks(Function &F);
+ bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void EliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool OptimizeBlock(BasicBlock &BB);
+ bool OptimizeInst(Instruction *I);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeInlineAsmInst(CallInst *CS);
+ bool OptimizeCallInst(CallInst *CI);
+ bool MoveExtToFormExtLoad(Instruction *I);
+ bool OptimizeExtUses(Instruction *I);
+ bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ };
+}
+
+char CodeGenPrepare::ID = 0;
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
+ return new CodeGenPrepare(TLI);
+}
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+ bool EverMadeChange = false;
+
+ ModifiedDT = false;
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ PFI = getAnalysisIfAvailable<ProfileInfo>();
+
+ // First pass, eliminate blocks that contain only PHI nodes and an
+ // unconditional branch.
+ EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ MadeChange |= OptimizeBlock(*BB);
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ SunkAddrs.clear();
+
+ if (!DisableBranchOpts) {
+ MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ MadeChange |= ConstantFoldTerminator(BB, true);
+
+ if (MadeChange)
+ ModifiedDT = true;
+ EverMadeChange |= MadeChange;
+ }
+
+ if (ModifiedDT && DT)
+ DT->DT->recalculate(F);
+
+ return EverMadeChange;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch. Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel. Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+ bool MadeChange = false;
+ // Note that this intentionally skips the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ continue;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI;
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ continue;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ continue;
+
+ if (!CanMergeBlocks(BB, DestBB))
+ continue;
+
+ EliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+ const BasicBlock *DestBB) const {
+ // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+ // the successor. If there are more complex condition (e.g. preheaders),
+ // don't mess around with them.
+ BasicBlock::const_iterator BBI = BB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != DestBB || !isa<PHINode>(User))
+ return false;
+ // If User is inside DestBB block and it is a PHINode then check
+ // incoming value. If incoming value is not from BB then this is
+ // a complex condition (e.g. preheaders) we want to avoid here.
+ if (User->getParent() == DestBB) {
+ if (const PHINode *UPN = dyn_cast<PHINode>(User))
+ for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+ Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+ if (Insn && Insn->getParent() == BB &&
+ Insn->getParent() != UPN->getIncomingBlock(I))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+ // and DestBB may have conflicting incoming values for the block. If so, we
+ // can't merge the block.
+ const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+ if (!DestBBPN) return true; // no conflict.
+
+ // Collect the preds of BB.
+ SmallPtrSet<const BasicBlock*, 16> BBPreds;
+ if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ // It is faster to get preds from a PHI than with pred_iterator.
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ BBPreds.insert(BBPN->getIncomingBlock(i));
+ } else {
+ BBPreds.insert(pred_begin(BB), pred_end(BB));
+ }
+
+ // Walk the preds of DestBB.
+ for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+ if (BBPreds.count(Pred)) { // Common predecessor?
+ BBI = DestBB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ const Value *V1 = PN->getIncomingValueForBlock(Pred);
+ const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+ // If V2 is a phi node in BB, look up what the mapped value will be.
+ if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+ if (V2PN->getParent() == BB)
+ V2 = V2PN->getIncomingValueForBlock(Pred);
+
+ // If there is a conflict, bail out.
+ if (V1 != V2) return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ BasicBlock *DestBB = BI->getSuccessor(0);
+
+ DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+ // If the destination block has a single pred, then this is a trivial edge,
+ // just collapse it.
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(DestBB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+ return;
+ }
+ }
+
+ // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
+ // to handle the new incoming edges it is about to have.
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = DestBB->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ // Remove the incoming value for BB, and remember it.
+ Value *InVal = PN->removeIncomingValue(BB, false);
+
+ // Two options: either the InVal is a phi node defined in BB or it is some
+ // value that dominates BB.
+ PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+ if (InValPhi && InValPhi->getParent() == BB) {
+ // Add all of the input values of the input PHI as inputs of this phi.
+ for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
+ } else {
+ // Otherwise, add one instance of the dominating value for each edge that
+ // we will be adding.
+ if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ PN->addIncoming(InVal, *PI);
+ }
+ }
+ }
+
+ // The PHIs are now updated, change everything that refers to BB to use
+ // DestBB and remove BB.
+ BB->replaceAllUsesWith(DestBB);
+ if (DT && !ModifiedDT) {
+ BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
+ BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+ DT->changeImmediateDominator(DestBB, NewIDom);
+ DT->eraseNode(BB);
+ }
+ if (PFI) {
+ PFI->replaceAllUses(BB, DestBB);
+ PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+ }
+ BB->eraseFromParent();
+ ++NumBlocksElim;
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+ // If this is a noop copy,
+ EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(CI->getType());
+
+ // This is an fp<->int conversion?
+ if (SrcVT.isInteger() != DstVT.isInteger())
+ return false;
+
+ // If this is an extension, it will be a zero or sign extension, which
+ // isn't a noop.
+ if (SrcVT.bitsLT(DstVT)) return false;
+
+ // If these values will be promoted, find out what they will be promoted
+ // to. This helps us consider truncates on PPC as noop copies when they
+ // are.
+ if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+ TargetLowering::TypePromoteInteger)
+ SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+ if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+ TargetLowering::TypePromoteInteger)
+ DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+ // If, after promotion, these are the same types, this is a noop copy.
+ if (SrcVT != DstVT)
+ return false;
+
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCasts - Only insert a cast in each block once.
+ DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this cast is used in. For PHI's this is the
+ // appropriate predecessor block.
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ UserBB = PN->getIncomingBlock(UI);
+ }
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // If this user is in the same block as the cast, don't change the cast.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cast into this block, use it.
+ CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+ if (!InsertedCast) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedCast =
+ CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+ InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cast with a use of the new cast.
+ TheUse = InsertedCast;
+ ++NumCastUses;
+ }
+
+ // If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced. This is
+/// a clear win except on targets with multiple condition code registers
+/// (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCmp - Only insert a cmp in each block once.
+ DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedCmp =
+ CmpInst::Create(CI->getOpcode(),
+ CI->getPredicate(), CI->getOperand(0),
+ CI->getOperand(1), "", InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ ++NumCmpUses;
+ }
+
+ // If we removed all uses, nuke the cmp.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return MadeChange;
+}
+
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+ void replaceCall(Value *With) {
+ CI->replaceAllUsesWith(With);
+ CI->eraseFromParent();
+ }
+ bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+ return SizeCI->isAllOnesValue();
+ return false;
+ }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (OptimizeInlineAsmInst(CI))
+ return true;
+ }
+
+ // Lower all uses of llvm.objectsize.*
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ const Type *ReturnTy = CI->getType();
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ ModifiedDT ? 0 : DT);
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ return true;
+ }
+
+ // From here on out we're working with named functions.
+ if (CI->getCalledFunction() == 0) return false;
+
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI))
+ if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()))
+ if (!VI->isTerminator() &&
+ (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstNonPHI());
+ else
+ DVI->insertAfter(VI);
+ return true;
+ }
+
+ // We'll need TargetData from here on out.
+ const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+ if (!TD) return false;
+
+ // Lower all default uses of _chk calls. This is very similar
+ // to what InstCombineCalls does, but here we are only lowering calls
+ // that have the default "don't know" as the objectsize. Anything else
+ // should be left alone.
+ CodeGenPrepareFortifiedLibCalls Simplifier;
+ return Simplifier.fold(CI, TD);
+}
+
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+ if (!TLI)
+ return false;
+
+ Value *V = RI->getReturnValue();
+ PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+ if (V && !PN)
+ return false;
+
+ BasicBlock *BB = RI->getParent();
+ if (PN && PN->getParent() != BB)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ // See llvm::isInTailCallPosition().
+ const Function *F = BB->getParent();
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Make sure there are no instructions between the PHI and return, or that the
+ // return is the first instruction in the block.
+ if (PN) {
+ BasicBlock::iterator BI = BB->begin();
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI != RI)
+ return false;
+ } else {
+ BasicBlock::iterator BI = BB->begin();
+ while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+ if (&*BI != RI)
+ return false;
+ }
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+ /// call.
+ SmallVector<CallInst*, 4> TailCalls;
+ if (PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Make sure the phi value is indeed produced by the tail call.
+ if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ } else {
+ SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (!VisitedBBs.insert(*PI))
+ continue;
+
+ BasicBlock::InstListType &InstList = (*PI)->getInstList();
+ BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+ BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+ do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+ if (RI == RE)
+ continue;
+
+ CallInst *CI = dyn_cast<CallInst>(&*RI);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+ CallInst *CI = TailCalls[i];
+ CallSite CS(CI);
+
+ // Conservatively require the attributes of the call to match those of the
+ // return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ continue;
+
+ // Make sure the call instruction is followed by an unconditional branch to
+ // the return block.
+ BasicBlock *CallBB = CI->getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into CallBB.
+ (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ ModifiedDT = Changed = true;
+ ++NumRetsDup;
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (Changed && pred_begin(BB) == pred_end(BB))
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() != BB;
+ return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions often have
+/// addressing modes that can do significant amounts of computation. As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program. The problem is that isel can only
+/// see within a single block. As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ const Type *AccessTy) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value*, 8> worklist;
+ SmallPtrSet<Value*, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI nodes, and ensure that
+ // the addressing mode obtained from the non-PHI roots of the graph
+ // are equivalent.
+ Value *Consensus = 0;
+ unsigned NumUsesConsensus = 0;
+ bool IsNumUsesConsensusValid = false;
+ SmallVector<Instruction*, 16> AddrModeInsts;
+ ExtAddrMode AddrMode;
+ while (!worklist.empty()) {
+ Value *V = worklist.back();
+ worklist.pop_back();
+
+ // Break use-def graph loops.
+ if (Visited.count(V)) {
+ Consensus = 0;
+ break;
+ }
+
+ Visited.insert(V);
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+ worklist.push_back(P->getIncomingValue(i));
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed.
+ SmallVector<Instruction*, 16> NewAddrModeInsts;
+ ExtAddrMode NewAddrMode =
+ AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ NewAddrModeInsts, *TLI);
+
+ // This check is broken into two cases with very similar code to avoid using
+ // getNumUses() as much as possible. Some values have a lot of uses, so
+ // calling getNumUses() unconditionally caused a significant compile-time
+ // regression.
+ if (!Consensus) {
+ Consensus = V;
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ continue;
+ } else if (NewAddrMode == AddrMode) {
+ if (!IsNumUsesConsensusValid) {
+ NumUsesConsensus = Consensus->getNumUses();
+ IsNumUsesConsensusValid = true;
+ }
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher
+ // applicable.
+ unsigned NumUses = V->getNumUses();
+ if (NumUses > NumUsesConsensus) {
+ Consensus = V;
+ NumUsesConsensus = NumUses;
+ AddrModeInsts = NewAddrModeInsts;
+ }
+ continue;
+ }
+
+ Consensus = 0;
+ break;
+ }
+
+ // If the addressing mode couldn't be determined, or if multiple different
+ // ones were determined, bail out now.
+ if (!Consensus) return false;
+
+ // Check to see if any of the instructions supersumed by this addr mode are
+ // non-local to I's BB.
+ bool AnyNonLocal = false;
+ for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+ if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+ AnyNonLocal = true;
+ break;
+ }
+ }
+
+ // If all the instructions matched are already in this BB, don't do anything.
+ if (!AnyNonLocal) {
+ DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
+ return false;
+ }
+
+ // Insert this computation right after this user. Since our caller is
+ // scanning from the top of the BB to the bottom, reuse of the expr are
+ // guaranteed to happen later.
+ BasicBlock::iterator InsertPt = MemoryInst;
+
+ // Now that we determined the addressing expression we want to use and know
+ // that we have to sink it into this block. Check to see if we have already
+ // done this for some other load/store instr in this block. If so, reuse the
+ // computation.
+ Value *&SunkAddr = SunkAddrs[Addr];
+ if (SunkAddr) {
+ DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ } else {
+ DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ const Type *IntPtrTy =
+ TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
+
+ Value *Result = 0;
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType()->isPointerTy())
+ V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ if (V->getType() != IntPtrTy)
+ V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
+ "sunkaddr", InsertPt);
+ Result = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (V->getType()->isPointerTy()) {
+ V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ } else {
+ V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ }
+ if (AddrMode.Scale != 1)
+ V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
+ AddrMode.Scale),
+ "sunkaddr", InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ // Add in the BaseGV if present.
+ if (AddrMode.BaseGV) {
+ Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
+ InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ if (Result == 0)
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ else
+ SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ }
+
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
+ if (Repl->use_empty()) {
+ // This can cause recursive deletion, which can invalidate our iterator.
+ // Use a WeakVH to hold onto it in case this happens.
+ WeakVH IterHandle(CurInstIterator);
+ BasicBlock *BB = CurInstIterator->getParent();
+
+ RecursivelyDeleteTriviallyDeadInstructions(Repl);
+
+ if (IterHandle != CurInstIterator) {
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ } else {
+ // This address is now available for reassignment, so erase the table
+ // entry; we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
+ }
+ ++NumMemoryInsts;
+ return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+ bool MadeChange = false;
+
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI->ParseConstraints(CS);
+ unsigned ArgNo = 0;
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.isIndirect) {
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
+ }
+
+ return MadeChange;
+}
+
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+ // Look for a load being extended.
+ LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI) return false;
+
+ // If they're already in the same block, there's nothing to do.
+ if (LI->getParent() == I->getParent())
+ return false;
+
+ // If the load has other users and the truncate is not free, this probably
+ // isn't worthwhile.
+ if (!LI->hasOneUse() &&
+ TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+ !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType()))
+ return false;
+
+ // Check whether the target supports casts folded into loads.
+ unsigned LType;
+ if (isa<ZExtInst>(I))
+ LType = ISD::ZEXTLOAD;
+ else {
+ assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ LType = ISD::SEXTLOAD;
+ }
+ if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+ return false;
+
+ // Move the extend into the same block as the load, so that SelectionDAG
+ // can fold it.
+ I->removeFromParent();
+ I->insertAfter(LI);
+ ++NumExtsMoved;
+ return true;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure non of the uses are PHI nodes.
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ TheUse = InsertedTrunc;
+ ++NumExtUses;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
+ }
+ return false;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
+
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ return true;
+
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ bool MadeChange = MoveExtToFormExtLoad(I);
+ return MadeChange | OptimizeExtUses(I);
+ }
+ return false;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ return OptimizeCmpExpression(CI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ return false;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType());
+ return false;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ ++NumGEPsElim;
+ OptimizeInst(NC);
+ return true;
+ }
+ return false;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return OptimizeCallInst(CI);
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ return DupRetToEnableTailCallOpts(RI);
+
+ return false;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+ SunkAddrs.clear();
+ bool MadeChange = false;
+
+ CurInstIterator = BB.begin();
+ for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+ MadeChange |= OptimizeInst(CurInstIterator++);
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
new file mode 100644
index 000000000000..664c3f6a222f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,91 @@
+//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements constant propagation and merging:
+//
+// Specifically, this:
+// * Converts instructions like "add int 1, 2" into 3
+//
+// Notice that:
+// * This pass has a habit of making definitions be dead. It is a good idea
+// to run a DIE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constant.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInstKilled, "Number of instructions killed");
+
+namespace {
+ struct ConstantPropagation : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantPropagation() : FunctionPass(ID) {
+ initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char ConstantPropagation::ID = 0;
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+
+FunctionPass *llvm::createConstantPropagationPass() {
+ return new ConstantPropagation();
+}
+
+
+bool ConstantPropagation::runOnFunction(Function &F) {
+ // Initialize the worklist to all of the instructions ready to process...
+ std::set<Instruction*> WorkList;
+ for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+ WorkList.insert(&*i);
+ }
+ bool Changed = false;
+
+ while (!WorkList.empty()) {
+ Instruction *I = *WorkList.begin();
+ WorkList.erase(WorkList.begin()); // Get an element from the worklist...
+
+ if (!I->use_empty()) // Don't muck with dead instructions...
+ if (Constant *C = ConstantFoldInstruction(I)) {
+ // Add all of the users of this instruction to the worklist, they might
+ // be constant propagatable now...
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ WorkList.insert(cast<Instruction>(*UI));
+
+ // Replace all of the uses of a variable with uses of the constant.
+ I->replaceAllUsesWith(C);
+
+ // Remove the dead instruction.
+ WorkList.erase(I);
+ I->eraseFromParent();
+
+ // We made a change to the function...
+ Changed = true;
+ ++NumInstKilled;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 000000000000..e275268fc4ea
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,207 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis, "Number of phis propagated");
+STATISTIC(NumSelects, "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps, "Number of comparisons propagated");
+
+namespace {
+ class CorrelatedValuePropagation : public FunctionPass {
+ LazyValueInfo *LVI;
+
+ bool processSelect(SelectInst *SI);
+ bool processPHI(PHINode *P);
+ bool processMemAccess(Instruction *I);
+ bool processCmp(CmpInst *C);
+
+ public:
+ static char ID;
+ CorrelatedValuePropagation(): FunctionPass(ID) {
+ initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ }
+ };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+ return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+ if (S->getType()->isVectorTy()) return false;
+ if (isa<Constant>(S->getOperand(0))) return false;
+
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ if (!C) return false;
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return false;
+
+ Value *ReplaceWith = S->getOperand(1);
+ Value *Other = S->getOperand(2);
+ if (!CI->isOne()) std::swap(ReplaceWith, Other);
+ if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+ S->replaceAllUsesWith(ReplaceWith);
+ S->eraseFromParent();
+
+ ++NumSelects;
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+ bool Changed = false;
+
+ BasicBlock *BB = P->getParent();
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ Value *Incoming = P->getIncomingValue(i);
+ if (isa<Constant>(Incoming)) continue;
+
+ Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+ P->getIncomingBlock(i),
+ BB);
+ if (!C) continue;
+
+ P->setIncomingValue(i, C);
+ Changed = true;
+ }
+
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ Changed = true;
+ }
+
+ ++NumPhis;
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+ Value *Pointer = 0;
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ Pointer = L->getPointerOperand();
+ else
+ Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+ if (isa<Constant>(Pointer)) return false;
+
+ Constant *C = LVI->getConstant(Pointer, I->getParent());
+ if (!C) return false;
+
+ ++NumMemAccess;
+ I->replaceUsesOfWith(Pointer, C);
+ return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out. Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge. If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+ Value *Op0 = C->getOperand(0);
+ if (isa<Instruction>(Op0) &&
+ cast<Instruction>(Op0)->getParent() == C->getParent())
+ return false;
+
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return false;
+
+ pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+ if (PI == PE) return false;
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Result == LazyValueInfo::Unknown) return false;
+
+ ++PI;
+ while (PI != PE) {
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Res != Result) return false;
+ ++PI;
+ }
+
+ ++NumCmps;
+
+ if (Result == LazyValueInfo::True)
+ C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+ else
+ C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+ C->eraseFromParent();
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ bool FnChanged = false;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool BBChanged = false;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *II = BI++;
+ switch (II->getOpcode()) {
+ case Instruction::Select:
+ BBChanged |= processSelect(cast<SelectInst>(II));
+ break;
+ case Instruction::PHI:
+ BBChanged |= processPHI(cast<PHINode>(II));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ BBChanged |= processCmp(cast<CmpInst>(II));
+ break;
+ case Instruction::Load:
+ case Instruction::Store:
+ BBChanged |= processMemAccess(II);
+ break;
+ }
+ }
+
+ FnChanged |= BBChanged;
+ }
+
+ return FnChanged;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
new file mode 100644
index 000000000000..8dbcc23d7ec8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,135 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead inst elimination and dead code elimination.
+//
+// Dead Inst Elimination performs a single pass over the function removing
+// instructions that are obviously dead. Dead Code Elimination is similar, but
+// it rechecks instructions that were used by removed instructions to see if
+// they are newly dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
+STATISTIC(DCEEliminated, "Number of insts removed");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadInstElimination pass implementation
+ //
+ struct DeadInstElimination : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+ DeadInstElimination() : BasicBlockPass(ID) {
+ initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
+ Instruction *Inst = DI++;
+ if (isInstructionTriviallyDead(Inst)) {
+ Inst->eraseFromParent();
+ Changed = true;
+ ++DIEEliminated;
+ }
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DeadInstElimination::ID = 0;
+INITIALIZE_PASS(DeadInstElimination, "die",
+ "Dead Instruction Elimination", false, false)
+
+Pass *llvm::createDeadInstEliminationPass() {
+ return new DeadInstElimination();
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadCodeElimination pass implementation
+ //
+ struct DCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DCE() : FunctionPass(ID) {
+ initializeDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DCE::ID = 0;
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+
+bool DCE::runOnFunction(Function &F) {
+ // Start out with all of the instructions in the worklist...
+ std::vector<Instruction*> WorkList;
+ for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+ WorkList.push_back(&*i);
+
+ // Loop over the worklist finding instructions that are dead. If they are
+ // dead make them drop all of their uses, making other instructions
+ // potentially dead, and work until the worklist is empty.
+ //
+ bool MadeChange = false;
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ if (isInstructionTriviallyDead(I)) { // If the instruction is dead.
+ // Loop over all of the values that the instruction uses, if there are
+ // instructions being used, add them to the worklist, because they might
+ // go dead after this one is removed.
+ //
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *Used = dyn_cast<Instruction>(*OI))
+ WorkList.push_back(Used);
+
+ // Remove the instruction.
+ I->eraseFromParent();
+
+ // Remove the instruction from the worklist if it still exists in it.
+ for (std::vector<Instruction*>::iterator WI = WorkList.begin();
+ WI != WorkList.end(); ) {
+ if (*WI == I)
+ WI = WorkList.erase(WI);
+ else
+ ++WI;
+ }
+
+ MadeChange = true;
+ ++DCEEliminated;
+ }
+ }
+ return MadeChange;
+}
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+ return new DCE();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
new file mode 100644
index 000000000000..cb9b5bebc5c7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -0,0 +1,727 @@
+//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a trivial dead store elimination that only considers
+// basic-block local redundant stores.
+//
+// FIXME: This should eventually be extended to be a post-dominator tree
+// traversal. Doing so would be pretty trivial.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumFastStores, "Number of stores deleted");
+STATISTIC(NumFastOther , "Number of other instrs removed");
+
+namespace {
+ struct DSE : public FunctionPass {
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+
+ static char ID; // Pass identification, replacement for typeid
+ DSE() : FunctionPass(ID), AA(0), MD(0) {
+ initializeDSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ AA = &getAnalysis<AliasAnalysis>();
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ // Only check non-dead blocks. Dead blocks may have strange pointer
+ // cycles that will confuse alias analysis.
+ if (DT.isReachableFromEntry(I))
+ Changed |= runOnBasicBlock(*I);
+
+ AA = 0; MD = 0;
+ return Changed;
+ }
+
+ bool runOnBasicBlock(BasicBlock &BB);
+ bool HandleFree(CallInst *F);
+ bool handleEndBlock(BasicBlock &BB);
+ void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+ };
+}
+
+char DSE::ID = 0;
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
+
+FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+ MemoryDependenceAnalysis &MD,
+ SmallPtrSet<Value*, 16> *ValueSet = 0) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MD.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->erase(DeadInst);
+ } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory? This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
+ if (isa<StoreInst>(I))
+ return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ return false;
+}
+
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return AA.getLocation(SI);
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+ // memcpy/memmove/memset.
+ AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // memset/memcpy, which writes more than an i8.
+ if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+ return AliasAnalysis::Location();
+ return Loc;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II == 0) return AliasAnalysis::Location();
+
+ switch (II->getIntrinsicID()) {
+ default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+ case Intrinsic::init_trampoline:
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // init.trampoline, which writes more than an i8.
+ if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+
+ // FIXME: We don't know the size of the trampoline, so we can't really
+ // handle it here.
+ return AliasAnalysis::Location(II->getArgOperand(0));
+ case Intrinsic::lifetime_end: {
+ uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ return AliasAnalysis::Location(II->getArgOperand(1), Len);
+ }
+ }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+ assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+
+ // The only instructions that both read and write are the mem transfer
+ // instructions (memcpy/memmove).
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+ return AA.getLocationForSource(MTI);
+ return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+ // Don't remove volatile stores.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
+
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
+}
+
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return MI->getDest();
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getArgOperand(0);
+ }
+}
+
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+ const TargetData *TD = AA.getTargetData();
+ if (TD == 0)
+ return AliasAnalysis::UnknownSize;
+
+ if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+ // Get size information for the alloca
+ if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+ return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+ return AliasAnalysis::UnknownSize;
+ }
+
+ assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+ const PointerType *PT = cast<PointerType>(V->getType());
+ return TD->getTypeAllocSize(PT->getElementType());
+}
+
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return !AI->isArrayAllocation();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return !GV->mayBeOverridden();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA) {
+ const Value *P1 = Earlier.Ptr->stripPointerCasts();
+ const Value *P2 = Later.Ptr->stripPointerCasts();
+
+ // If the start pointers are the same, we just have to compare sizes to see if
+ // the later store was larger than the earlier store.
+ if (P1 == P2) {
+ // If we don't know the sizes of either access, then we can't do a
+ // comparison.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize) {
+ // If we have no TargetData information around, then the size of the store
+ // is inferrable from the pointee type. If they are the same type, then
+ // we know that the store is safe.
+ if (AA.getTargetData() == 0)
+ return Later.Ptr->getType() == Earlier.Ptr->getType();
+ return false;
+ }
+
+ // Make sure that the Later size is >= the Earlier size.
+ if (Later.Size < Earlier.Size)
+ return false;
+ return true;
+ }
+
+ // Otherwise, we have to have size information, and the later store has to be
+ // larger than the earlier one.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize ||
+ Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+ return false;
+
+ // Check to see if the later store is to the entire object (either a global,
+ // an alloca, or a byval argument). If so, then it clearly overwrites any
+ // other store to the same object.
+ const TargetData &TD = *AA.getTargetData();
+
+ const Value *UO1 = GetUnderlyingObject(P1, &TD),
+ *UO2 = GetUnderlyingObject(P2, &TD);
+
+ // If we can't resolve the same pointers to the same object, then we can't
+ // analyze them at all.
+ if (UO1 != UO2)
+ return false;
+
+ // If the "Later" store is to a recognizable object, get its size.
+ if (isObjectPointerWithTrustworthySize(UO2)) {
+ uint64_t ObjectSize =
+ TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+ if (ObjectSize == Later.Size)
+ return true;
+ }
+
+ // Okay, we have stores to two completely different pointers. Try to
+ // decompose the pointer into a "base + constant_offset" form. If the base
+ // pointers are equal, then we can reason about the two stores.
+ int64_t EarlierOff = 0, LaterOff = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
+
+ // If the base pointers still differ, we have two completely different stores.
+ if (BP1 != BP2)
+ return false;
+
+ // The later store completely overlaps the earlier store if:
+ //
+ // 1. Both start at the same offset and the later one's size is greater than
+ // or equal to the earlier one's, or
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // 2. The earlier store has an offset greater than the later offset, but which
+ // still lies completely within the later store.
+ //
+ // |--earlier--|
+ // |----- later ------|
+ //
+ // We have to be careful here as *Off is signed while *.Size is unsigned.
+ if (EarlierOff >= LaterOff &&
+ uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+ return true;
+
+ // Otherwise, they don't completely overlap.
+ return false;
+}
+
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense. Consider this case:
+///
+/// memcpy(A <- B)
+/// memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
+///
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+ const AliasAnalysis::Location &InstStoreLoc,
+ Instruction *DepWrite, AliasAnalysis &AA) {
+ // Self reads can only happen for instructions that read memory. Get the
+ // location read.
+ AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+ if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
+
+ // If the read and written loc obviously don't alias, it isn't a read.
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
+
+ // Okay, 'Inst' may copy over itself. However, we can still remove a the
+ // DepWrite instruction if we can prove that it reads from the same location
+ // as Inst. This handles useful cases like:
+ // memcpy(A <- B)
+ // memcpy(A <- B)
+ // Here we don't know if A/B may alias, but we do know that B/B are must
+ // aliases, so removing the first memcpy is safe (assuming it writes <= #
+ // bytes as the second one.
+ AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
+
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
+
+ // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+ // then it can't be considered dead.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ bool MadeChange = false;
+
+ // Do a top-down walk on the BB.
+ for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+ Instruction *Inst = BBI++;
+
+ // Handle 'free' calls specially.
+ if (CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= HandleFree(F);
+ continue;
+ }
+
+ // If we find something that writes memory, get its memory dependence.
+ if (!hasMemoryWrite(Inst))
+ continue;
+
+ MemDepResult InstDep = MD->getDependency(Inst);
+
+ // Ignore any store where we can't find a local dependence.
+ // FIXME: cross-block DSE would be fun. :)
+ if (InstDep.isNonLocal() || InstDep.isUnknown())
+ continue;
+
+ // If we're storing the same value back to a pointer that we just
+ // loaded from, then the store can be removed.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
+ << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
+
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(SI, *MD);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+ }
+
+ // Figure out what location is being stored to.
+ AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+ // If we didn't get a useful location, fail.
+ if (Loc.Ptr == 0)
+ continue;
+
+ while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep will
+ // skip any instructions that 'Loc' clearly doesn't interact with. If we
+ // end up depending on a may- or must-aliased load, then we can't optimize
+ // away the store and we bail out. However, if we depend on on something
+ // that overwrites the memory location we *can* potentially optimize it.
+ //
+ // Find out what memory location the dependent instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (DepLoc.Ptr == 0)
+ break;
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know that
+ // 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
+
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
+ }
+
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that stores
+ // to the same location. For example, in:
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and Q
+ // alias.
+ if (DepWrite == &BB.front()) break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ break;
+
+ InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+ }
+ }
+
+ // If this block ends in a return, unwind, or unreachable, all allocas are
+ // dead at its end, which means stores to them are also dead.
+ if (BB.getTerminator()->getNumSuccessors() == 0)
+ MadeChange |= handleEndBlock(BB);
+
+ return MadeChange;
+}
+
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+ bool MadeChange = false;
+
+ MemDepResult Dep = MD->getDependency(F);
+
+ while (!Dep.isNonLocal() && !Dep.isUnknown()) {
+ Instruction *Dependency = Dep.getInst();
+ if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ return MadeChange;
+
+ Value *DepPointer =
+ GetUnderlyingObject(getStoredPointerOperand(Dependency));
+
+ // Check for aliasing.
+ if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+ return MadeChange;
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency, *MD);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // Inst's old Dependency is now deleted. Compute the next dependency,
+ // which may also be dead, as in
+ // s[0] = 0;
+ // s[1] = 0; // This has just been deleted.
+ // free(s);
+ Dep = MD->getDependency(F);
+ };
+
+ return MadeChange;
+}
+
+/// handleEndBlock - Remove dead stores to stack-allocated locations in the
+/// function end block. Ex:
+/// %A = alloca i32
+/// ...
+/// store i32 1, i32* %A
+/// ret void
+bool DSE::handleEndBlock(BasicBlock &BB) {
+ bool MadeChange = false;
+
+ // Keep track of all of the stack objects that are dead at the end of the
+ // function.
+ SmallPtrSet<Value*, 16> DeadStackObjects;
+
+ // Find all of the alloca'd pointers in the entry block.
+ BasicBlock *Entry = BB.getParent()->begin();
+ for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ DeadStackObjects.insert(AI);
+
+ // Treat byval arguments the same, stores to them are dead at the end of the
+ // function.
+ for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+ AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+ if (AI->hasByValAttr())
+ DeadStackObjects.insert(AI);
+
+ // Scan the basic block backwards
+ for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
+ --BBI;
+
+ // If we find a store, check to see if it points into a dead stack value.
+ if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ // See through pointer-to-pointer bitcasts
+ Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+ // Stores to stack values are valid candidates for removal.
+ if (DeadStackObjects.count(Pointer)) {
+ Instruction *Dead = BBI++;
+
+ DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
+ << *Dead << "\n Object: " << *Pointer << '\n');
+
+ // DCE instructions only used to calculate that store.
+ DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Remove any dead non-memory-mutating instructions.
+ if (isInstructionTriviallyDead(BBI)) {
+ Instruction *Inst = BBI++;
+ DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
+
+ if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+ DeadStackObjects.erase(A);
+ continue;
+ }
+
+ if (CallSite CS = cast<Value>(BBI)) {
+ // If this call does not access memory, it can't be loading any of our
+ // pointers.
+ if (AA->doesNotAccessMemory(CS))
+ continue;
+
+ // If the call might load from any of our allocas, then any store above
+ // the call is live.
+ SmallVector<Value*, 8> LiveAllocas;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // See if the call site touches it.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
+
+ if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
+ LiveAllocas.push_back(*I);
+ }
+
+ for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+ E = LiveAllocas.end(); I != E; ++I)
+ DeadStackObjects.erase(*I);
+
+ // If all of the allocas were clobbered by the call then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ return MadeChange;
+
+ continue;
+ }
+
+ AliasAnalysis::Location LoadedLoc;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ LoadedLoc = AA->getLocation(L);
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+ LoadedLoc = AA->getLocation(V);
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+ LoadedLoc = AA->getLocationForSource(MTI);
+ } else {
+ // Not a loading instruction.
+ continue;
+ }
+
+ // Remove any allocas from the DeadPointer set that are loaded, as this
+ // makes any stores above the access live.
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+
+ // If all of the allocas were clobbered by the access then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ break;
+ }
+
+ return MadeChange;
+}
+
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set. If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+ // A constant can't be in the dead pointer set.
+ if (isa<Constant>(UnderlyingPointer))
+ return;
+
+ // If the kill pointer can be easily reduced to an alloca, don't bother doing
+ // extraneous AA queries.
+ if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+ DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+ return;
+ }
+
+ SmallVector<Value*, 16> NowLive;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+ if (!AA->isNoAlias(StackLoc, LoadedLoc))
+ NowLive.push_back(*I);
+ }
+
+ for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
+ I != E; ++I)
+ DeadStackObjects.erase(*I);
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 000000000000..3d3f17b26fc6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE, "Number of instructions CSE'd");
+STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
+STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumDSE, "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+ return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SimpleValue - Instances of this struct represent available values in the
+ /// scoped hash table.
+ struct SimpleValue {
+ Instruction *Inst;
+
+ SimpleValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // This can only handle non-void readnone functions.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ }
+ };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+ static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+ static inline SimpleValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline SimpleValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(SimpleValue Val);
+ static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+ Instruction *Inst = Val.Inst;
+
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ Res ^= getHash(Inst->getOperand(i)) << i;
+
+ if (CastInst *CI = dyn_cast<CastInst>(Inst))
+ Res ^= getHash(CI->getType());
+ else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+ Res ^= CI->getPredicate();
+ else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+ for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+ E = EVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+ for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+ E = IVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else {
+ // nothing extra to hash in.
+ assert((isa<CallInst>(Inst) ||
+ isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+ "Invalid/unknown instruction");
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+
+ if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// CallValue - Instances of this struct represent available call values in
+ /// the scoped hash table.
+ struct CallValue {
+ Instruction *Inst;
+
+ CallValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // Don't value number anything that returns void.
+ if (Inst->getType()->isVoidTy())
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(Inst);
+ if (CI == 0 || !CI->onlyReadsMemory())
+ return false;
+ return true;
+ }
+ };
+}
+
+namespace llvm {
+ // CallValue is POD.
+ template<> struct isPodLike<CallValue> {
+ static const bool value = true;
+ };
+
+ template<> struct DenseMapInfo<CallValue> {
+ static inline CallValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline CallValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CallValue Val);
+ static bool isEqual(CallValue LHS, CallValue RHS);
+ };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+ Instruction *Inst = Val.Inst;
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+ assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+ "Cannot value number calls with metadata operands");
+ Res ^= getHash(Inst->getOperand(i)) << i;
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes. It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective. It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+ const TargetData *TD;
+ DominatorTree *DT;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+ typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+ AllocatorTy> ScopedHTType;
+
+ /// AvailableValues - This scoped hash table contains the current values of
+ /// all of our simple scalar expressions. As we walk down the domtree, we
+ /// look to see if instructions are in this: if so, we replace them with what
+ /// we find, otherwise we insert them so that dominated values can succeed in
+ /// their lookup.
+ ScopedHTType *AvailableValues;
+
+ /// AvailableLoads - This scoped hash table contains the current values
+ /// of loads. This allows us to get efficient access to dominating loads when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is
+ /// incremented after every possibly writing memory operation, which ensures
+ /// that we only CSE loads with other loads that have no intervening store.
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+ typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+ DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+ LoadHTType *AvailableLoads;
+
+ /// AvailableCalls - This scoped hash table contains the current values
+ /// of read-only call values. It uses the same generation count as loads.
+ typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+ CallHTType *AvailableCalls;
+
+ /// CurrentGeneration - This is the current generation of the memory value.
+ unsigned CurrentGeneration;
+
+ static char ID;
+ explicit EarlyCSE() : FunctionPass(ID) {
+ initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+private:
+
+ bool processNode(DomTreeNode *Node);
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+ return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+ // Define a scope in the scoped hash table. When we are done processing this
+ // domtree node and recurse back up to our parent domtree node, this will pop
+ // off all the values we install.
+ ScopedHTType::ScopeTy Scope(*AvailableValues);
+
+ // Define a scope for the load values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+
+ // Define a scope for the call values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ CallHTType::ScopeTy CallScope(*AvailableCalls);
+
+ BasicBlock *BB = Node->getBlock();
+
+ // If this block has a single predecessor, then the predecessor is the parent
+ // of the domtree node and all of the live out memory values are still current
+ // in this block. If this block has multiple predecessors, then they could
+ // have invalidated the live-out memory values of our parent value. For now,
+ // just be conservative and invalidate memory if this block has multiple
+ // predecessors.
+ if (BB->getSinglePredecessor() == 0)
+ ++CurrentGeneration;
+
+ /// LastStore - Keep track of the last non-volatile store that we saw... for
+ /// as long as there in no instruction that reads memory. If we see a store
+ /// to the same location, we delete the dead store. This zaps trivial dead
+ /// stores which can occur in bitfield code among other things.
+ StoreInst *LastStore = 0;
+
+ bool Changed = false;
+
+ // See if any instructions in the block can be eliminated. If so, do it. If
+ // not, add them to AvailableValues.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ // Dead instructions should just be removed.
+ if (isInstructionTriviallyDead(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+ // its simpler value.
+ if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If this is a simple instruction that we can value number, process it.
+ if (SimpleValue::canHandle(Inst)) {
+ // See if the instruction has an available value. If so, use it.
+ if (Value *V = AvailableValues->lookup(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSE;
+ continue;
+ }
+
+ // Otherwise, just remember that this value is available.
+ AvailableValues->insert(Inst, Inst);
+ continue;
+ }
+
+ // If this is a non-volatile load, process it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Ignore volatile loads.
+ if (LI->isVolatile()) {
+ LastStore = 0;
+ continue;
+ }
+
+ // If we have an available version of this load, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal =
+ AvailableLoads->lookup(Inst->getOperand(0));
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableLoads->insert(Inst->getOperand(0),
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ LastStore = 0;
+ continue;
+ }
+
+ // If this instruction may read from memory, forget LastStore.
+ if (Inst->mayReadFromMemory())
+ LastStore = 0;
+
+ // If this is a read-only call, process it.
+ if (CallValue::canHandle(Inst)) {
+ // If we have an available version of this call, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSECall;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableCalls->insert(Inst,
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ continue;
+ }
+
+ // Okay, this isn't something we can CSE at all. Check to see if it is
+ // something that could modify memory. If so, our available memory values
+ // cannot be used so bump the generation count.
+ if (Inst->mayWriteToMemory()) {
+ ++CurrentGeneration;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // We do a trivial form of DSE if there are two stores to the same
+ // location with no intervening loads. Delete the earlier store.
+ if (LastStore &&
+ LastStore->getPointerOperand() == SI->getPointerOperand()) {
+ DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
+ << *Inst << '\n');
+ LastStore->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ LastStore = 0;
+ continue;
+ }
+
+ // Okay, we just invalidated anything we knew about loaded values. Try
+ // to salvage *something* by remembering that the stored value is a live
+ // version of the pointer. It is safe to forward from volatile stores
+ // to non-volatile loads, so we don't have to check for volatility of
+ // the store.
+ AvailableLoads->insert(SI->getPointerOperand(),
+ std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+
+ // Remember that this was the last store we saw for DSE.
+ if (!SI->isVolatile())
+ LastStore = SI;
+ }
+ }
+ }
+
+ unsigned LiveOutGeneration = CurrentGeneration;
+ for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+ Changed |= processNode(*I);
+ // Pop any generation changes off the stack from the recursive walk.
+ CurrentGeneration = LiveOutGeneration;
+ }
+ return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Tables that the pass uses when walking the domtree.
+ ScopedHTType AVTable;
+ AvailableValues = &AVTable;
+ LoadHTType LoadTable;
+ AvailableLoads = &LoadTable;
+ CallHTType CallTable;
+ AvailableCalls = &CallTable;
+
+ CurrentGeneration = 0;
+ return processNode(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
new file mode 100644
index 000000000000..87b7317ad2dd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -0,0 +1,2287 @@
+//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions. It also performs simple dead load elimination.
+//
+// Note that this pass does the value numbering itself; it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+STATISTIC(NumGVNInstr, "Number of instructions deleted");
+STATISTIC(NumGVNLoad, "Number of loads deleted");
+STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
+STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumPRELoad, "Number of loads PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+ cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers. It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+ struct Expression {
+ uint32_t opcode;
+ const Type *type;
+ SmallVector<uint32_t, 4> varargs;
+
+ Expression(uint32_t o = ~2U) : opcode(o) { }
+
+ bool operator==(const Expression &other) const {
+ if (opcode != other.opcode)
+ return false;
+ if (opcode == ~0U || opcode == ~1U)
+ return true;
+ if (type != other.type)
+ return false;
+ if (varargs != other.varargs)
+ return false;
+ return true;
+ }
+ };
+
+ class ValueTable {
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+
+ uint32_t nextValueNumber;
+
+ Expression create_expression(Instruction* I);
+ Expression create_extractvalue_expression(ExtractValueInst* EI);
+ uint32_t lookup_or_add_call(CallInst* C);
+ public:
+ ValueTable() : nextValueNumber(1) { }
+ uint32_t lookup_or_add(Value *V);
+ uint32_t lookup(Value *V) const;
+ void add(Value *V, uint32_t num);
+ void clear();
+ void erase(Value *v);
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ AliasAnalysis *getAliasAnalysis() const { return AA; }
+ void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+ void verifyRemoved(const Value *) const;
+ };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+ static inline Expression getEmptyKey() {
+ return ~0U;
+ }
+
+ static inline Expression getTombstoneKey() {
+ return ~1U;
+ }
+
+ static unsigned getHashValue(const Expression e) {
+ unsigned hash = e.opcode;
+
+ hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+ (unsigned)((uintptr_t)e.type >> 9));
+
+ for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+ E = e.varargs.end(); I != E; ++I)
+ hash = *I + hash * 37;
+
+ return hash;
+ }
+ static bool isEqual(const Expression &LHS, const Expression &RHS) {
+ return LHS == RHS;
+ }
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+
+Expression ValueTable::create_expression(Instruction *I) {
+ Expression e;
+ e.type = I->getType();
+ e.opcode = I->getOpcode();
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+ } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ }
+
+ return e;
+}
+
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+ assert(EI != 0 && "Not an ExtractValueInst?");
+ Expression e;
+ e.type = EI->getType();
+ e.opcode = 0;
+
+ IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+ if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ // EI might be an extract from one of our recognised intrinsics. If it
+ // is we'll synthesize a semantically equivalent expression instead on
+ // an extract value expression.
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ e.opcode = Instruction::Add;
+ break;
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ e.opcode = Instruction::Sub;
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ e.opcode = Instruction::Mul;
+ break;
+ default:
+ break;
+ }
+
+ if (e.opcode != 0) {
+ // Intrinsic recognized. Grab its args to finish building the expression.
+ assert(I->getNumArgOperands() == 2 &&
+ "Expect two args for recognised intrinsics.");
+ e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+ e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+ return e;
+ }
+ }
+
+ // Not a recognised intrinsic. Fall back to producing an extract value
+ // expression.
+ e.opcode = EI->getOpcode();
+ for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+
+ return e;
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value *V, uint32_t num) {
+ valueNumbering.insert(std::make_pair(V, num));
+}
+
+uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+ if (AA->doesNotAccessMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ } else if (AA->onlyReadsMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
+ if (!MD) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
+
+ MemDepResult local_dep = MD->getDependency(C);
+
+ if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (local_dep.isDef()) {
+ CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+
+ if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(local_cdep);
+ valueNumbering[C] = v;
+ return v;
+ }
+
+ // Non-local case.
+ const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+ MD->getNonLocalCallDependency(CallSite(C));
+ // FIXME: Move the checking logic to MemDep!
+ CallInst* cdep = 0;
+
+ // Check to see if we have a single dominating call instruction that is
+ // identical to C.
+ for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+ const NonLocalDepEntry *I = &deps[i];
+ if (I->getResult().isNonLocal())
+ continue;
+
+ // We don't handle non-definitions. If we already have a call, reject
+ // instruction dependencies.
+ if (!I->getResult().isDef() || cdep != 0) {
+ cdep = 0;
+ break;
+ }
+
+ CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
+ // FIXME: All duplicated with non-local case.
+ if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
+ cdep = NonLocalDepCall;
+ continue;
+ }
+
+ cdep = 0;
+ break;
+ }
+
+ if (!cdep) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(cdep);
+ valueNumbering[C] = v;
+ return v;
+
+ } else {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+}
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
+
+ if (!isa<Instruction>(V)) {
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ Instruction* I = cast<Instruction>(V);
+ Expression exp;
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ return lookup_or_add_call(cast<CallInst>(I));
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or :
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::Select:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::InsertValue:
+ case Instruction::GetElementPtr:
+ exp = create_expression(I);
+ break;
+ case Instruction::ExtractValue:
+ exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+ break;
+ default:
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[V] = e;
+ return e;
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value *V) const {
+ DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+}
+
+/// clear - Remove all entries from the ValueTable.
+void ValueTable::clear() {
+ valueNumbering.clear();
+ expressionNumbering.clear();
+ nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering.
+void ValueTable::erase(Value *V) {
+ valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+ for (DenseMap<Value*, uint32_t>::const_iterator
+ I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+ assert(I->first != V && "Inst still occurs in value numbering map!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+ class GVN : public FunctionPass {
+ bool NoLoads;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+ const TargetData *TD;
+
+ ValueTable VN;
+
+ /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+ /// have that value number. Use findLeader to query it.
+ struct LeaderTableEntry {
+ Value *Val;
+ BasicBlock *BB;
+ LeaderTableEntry *Next;
+ };
+ DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+ BumpPtrAllocator TableAllocator;
+
+ SmallVector<Instruction*, 8> InstrsToErase;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GVN(bool noloads = false)
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ /// markInstructionForDeletion - This removes the specified instruction from
+ /// our various maps and marks it for deletion.
+ void markInstructionForDeletion(Instruction *I) {
+ VN.erase(I);
+ InstrsToErase.push_back(I);
+ }
+
+ const TargetData *getTargetData() const { return TD; }
+ DominatorTree &getDominatorTree() const { return *DT; }
+ AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+ MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+ private:
+ /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+ /// its value number.
+ void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry &Curr = LeaderTable[N];
+ if (!Curr.Val) {
+ Curr.Val = V;
+ Curr.BB = BB;
+ return;
+ }
+
+ LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
+ Node->Val = V;
+ Node->BB = BB;
+ Node->Next = Curr.Next;
+ Curr.Next = Node;
+ }
+
+ /// removeFromLeaderTable - Scan the list of values corresponding to a given
+ /// value number, and remove the given value if encountered.
+ void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Curr = &LeaderTable[N];
+
+ while (Curr->Val != V || Curr->BB != BB) {
+ Prev = Curr;
+ Curr = Curr->Next;
+ }
+
+ if (Prev) {
+ Prev->Next = Curr->Next;
+ } else {
+ if (!Curr->Next) {
+ Curr->Val = 0;
+ Curr->BB = 0;
+ } else {
+ LeaderTableEntry* Next = Curr->Next;
+ Curr->Val = Next->Val;
+ Curr->BB = Next->BB;
+ Curr->Next = Next->Next;
+ }
+ }
+ }
+
+ // List of critical edges to be split between iterations.
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<AliasAnalysis>();
+ }
+
+
+ // Helper fuctions
+ // FIXME: eliminate or document these better
+ bool processLoad(LoadInst *L);
+ bool processInstruction(Instruction *I);
+ bool processNonLocalLoad(LoadInst *L);
+ bool processBlock(BasicBlock *BB);
+ void dump(DenseMap<uint32_t, Value*> &d);
+ bool iterateOnFunction(Function &F);
+ bool performPRE(Function &F);
+ Value *findLeader(BasicBlock *BB, uint32_t num);
+ void cleanupGlobalSets();
+ void verifyRemoved(const Instruction *I) const;
+ bool splitCriticalEdges();
+ };
+
+ char GVN::ID = 0;
+}
+
+// createGVNPass - The public interface to this file...
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+ return new GVN(NoLoads);
+}
+
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+ errs() << "{\n";
+ for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
+ E = d.end(); I != E; ++I) {
+ errs() << I->first << "\n";
+ I->second->dump();
+ }
+ errs() << "}\n";
+}
+
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block. As we go, keep
+/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
+/// map is actually a tri-state map with the following values:
+/// 0) we know the block *is not* fully available.
+/// 1) we know the block *is* fully available.
+/// 2) we do not know whether the block is fully available or not, but we are
+/// currently speculating that it will be.
+/// 3) we are speculating for this block and have used that to speculate for
+/// other blocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+ DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
+ // Optimistically assume that the block is fully available and check to see
+ // if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+ FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+
+ // If the entry already existed for this block, return the precomputed value.
+ if (!IV.second) {
+ // If this is a speculative "available" value, mark it as being used for
+ // speculation of other blocks.
+ if (IV.first->second == 2)
+ IV.first->second = 3;
+ return IV.first->second != 0;
+ }
+
+ // Otherwise, see if it is fully available in all predecessors.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // If this block has no predecessors, it isn't live-in here.
+ if (PI == PE)
+ goto SpeculationFailure;
+
+ for (; PI != PE; ++PI)
+ // If the value isn't fully available in one of our predecessors, then it
+ // isn't fully available in this block either. Undo our previous
+ // optimistic assumption and bail out.
+ if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ goto SpeculationFailure;
+
+ return true;
+
+// SpeculationFailure - If we get here, we found out that this is not, after
+// all, a fully-available block. We have a problem if we speculated on this and
+// used the speculation to mark other blocks as available.
+SpeculationFailure:
+ char &BBVal = FullyAvailableBlocks[BB];
+
+ // If we didn't speculate on this, just return with it set to false.
+ if (BBVal == 2) {
+ BBVal = 0;
+ return false;
+ }
+
+ // If we did speculate on this value, we could have blocks set to 1 that are
+ // incorrect. Walk the (transitive) successors of this block and mark them as
+ // 0 if set to one.
+ SmallVector<BasicBlock*, 32> BBWorklist;
+ BBWorklist.push_back(BB);
+
+ do {
+ BasicBlock *Entry = BBWorklist.pop_back_val();
+ // Note that this sets blocks to 0 (unavailable) if they happen to not
+ // already be in FullyAvailableBlocks. This is safe.
+ char &EntryVal = FullyAvailableBlocks[Entry];
+ if (EntryVal == 0) continue; // Already unavailable.
+
+ // Mark as unavailable.
+ EntryVal = 0;
+
+ for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
+ BBWorklist.push_back(*I);
+ } while (!BBWorklist.empty());
+
+ return false;
+}
+
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+ const Type *LoadTy,
+ const TargetData &TD) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (TD.getTypeSizeInBits(StoredVal->getType()) <
+ TD.getTypeSizeInBits(LoadTy))
+ return false;
+
+ return true;
+}
+
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
+ const Type *LoadedTy,
+ Instruction *InsertPt,
+ const TargetData &TD) {
+ if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+ return 0;
+
+ // If this is already the right type, just return it.
+ const Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
+ uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoreSize == LoadSize) {
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
+ return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ const Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->isPointerTy())
+ TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->isPointerTy())
+ StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+
+ return StoredVal;
+ }
+
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+ StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (TD.isBigEndian()) {
+ Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+ StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+ }
+
+ // Truncate the integer to the right size now.
+ const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+ StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+
+ if (LoadedTy == NewIntTy)
+ return StoredVal;
+
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->isPointerTy())
+ return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+
+ // Otherwise, bitcast.
+ return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove). This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const TargetData &TD) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+#if 0
+ if (LoadOffset == StoreOffset) {
+ dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
+ }
+#endif
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
+ LoadSize >>= 3;
+
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset)
+ isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+ else
+ isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+
+ if (isAAFailure) {
+#if 0
+ dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
+#endif
+ return -1;
+ }
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset+StoreSize < LoadOffset+LoadSize)
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset-StoreOffset;
+}
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI,
+ const TargetData &TD) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ StorePtr, StoreSize, TD);
+}
+
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+ LoadInst *DepLI, const TargetData &TD){
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ if (R != -1) return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ if (Size == 0) return -1;
+
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI,
+ const TargetData &TD) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (SizeCst == 0) return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, TD);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (Src == 0) return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
+ if (GV == 0 || !GV->isConstant()) return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ MI->getDest(), MemSizeInBits, TD);
+ if (Offset == -1)
+ return Offset;
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ if (ConstantFoldLoadFromConstPtr(Src, &TD))
+ return Offset;
+ return -1;
+}
+
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store. This means
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias. Check this case to see if there is anything more we can do
+/// before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+ const Type *LoadTy,
+ Instruction *InsertPt, const TargetData &TD){
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->isPointerTy())
+ SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+ "tmp");
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (TD.isLittleEndian())
+ ShiftAmt = Offset*8;
+ else
+ ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+
+ if (ShiftAmt)
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+
+ if (LoadSize != StoreSize)
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+ "tmp");
+
+ return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load. This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+ const Type *LoadTy, Instruction *InsertPt,
+ GVN &gvn) {
+ const TargetData &TD = *gvn.getTargetData();
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ if (Offset+LoadSize > SrcValSize) {
+ assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+ assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset+LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ const Type *DestPTy =
+ IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+ DestPTy = PointerType::get(DestPTy,
+ cast<PointerType>(PtrVal->getType())->getAddressSpace());
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (TD.isBigEndian())
+ RV = Builder.CreateLShr(RV,
+ NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ // We would like to use gvn.markInstructionForDeletion here, but we can't
+ // because the load is already memoized into the leader map table that GVN
+ // tracks. It is potentially possible to remove the load from the table,
+ // but then there all of the operations based on it would need to be
+ // rehashed. Just leave the dead load around.
+ gvn.getMemDep().removeInstruction(SrcVal);
+ SrcVal = NewLoad;
+ }
+
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ const Type *LoadTy, Instruction *InsertPt,
+ const TargetData &TD){
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ Value *Val = MSI->getValue();
+ if (LoadSize != 1)
+ Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+
+ Value *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet*2 <= LoadSize) {
+ Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+ Val = Builder.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ Value *ShVal = Builder.CreateShl(Val, 1*8);
+ Val = Builder.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+namespace {
+
+struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const TargetData *TD = gvn.getTargetData();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const TargetData *TD = gvn.getTargetData();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+ }
+};
+
+} // end anonymous namespace
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI. This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI,
+ SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+ GVN &gvn) {
+ // Check for the fully redundant, dominating load case. In this case, we can
+ // just use the dominating value directly.
+ if (ValuesPerBlock.size() == 1 &&
+ gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+ LI->getParent()))
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+
+ // Otherwise, we have to construct SSA form.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSAUpdate(&NewPHIs);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
+
+ const Type *LoadTy = LI->getType();
+
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ BasicBlock *BB = AV.BB;
+
+ if (SSAUpdate.HasValueForBlock(BB))
+ continue;
+
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
+ }
+
+ // Perform PHI construction.
+ Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+
+ // If new PHI nodes were created, notify alias analysis.
+ if (V->getType()->isPointerTy()) {
+ AliasAnalysis *AA = gvn.getAliasAnalysis();
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ AA->copyValue(LI, NewPHIs[i]);
+
+ // Now that we've copied information to the new PHIs, scan through
+ // them again and inform alias analysis that we've added potentially
+ // escaping uses to any values that are operands to these PHIs.
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+ PHINode *P = NewPHIs[i];
+ for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+ unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+ AA->addEscapingUse(P->getOperandUse(jj));
+ }
+ }
+ }
+
+ return V;
+}
+
+static bool isLifetimeStart(const Instruction *Inst) {
+ if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return false;
+}
+
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Find the non-local dependencies of the load.
+ SmallVector<NonLocalDepResult, 64> Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+ //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
+ // << Deps.size() << *LI << '\n');
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ if (Deps.size() > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Filter out useless results (non-locals, etc). Keep track of the blocks
+ // where we have a value available in repl, also keep track of whether we see
+ // dependencies that produce an unknown value for the load (such as a call
+ // that could potentially clobber the load).
+ SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
+ SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ BasicBlock *DepBB = Deps[i].getBB();
+ MemDepResult DepInfo = Deps[i].getResult();
+
+ if (DepInfo.isUnknown()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ if (DepInfo.isClobber()) {
+ // The address being loaded in this non-local block may not be the same as
+ // the pointer operand of the load if PHI translation occurs. Make sure
+ // to consider the right address.
+ Value *Address = Deps[i].getAddress();
+
+ // If the dependence is to a store that writes to a superset of the bits
+ // read by the load, we can extract the bits we need for the load from the
+ // stored value.
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+ DepSI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ DepSI->getValueOperand(),
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI != LI && Address && TD) {
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+ LI->getPointerOperand(),
+ DepLI, *TD);
+
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can
+ // forward a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ DepMI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ assert(DepInfo.isDef() && "Expecting def here");
+
+ Instruction *DepInst = DepInfo.getInst();
+
+ // Loading the allocation -> undef.
+ if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+ // Loading immediately after lifetime begin -> undef.
+ isLifetimeStart(DepInst)) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ UndefValue::get(LI->getType())));
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+ // Reject loads and stores that are to the same address but are of
+ // different types if we have to.
+ if (S->getValueOperand()->getType() != LI->getType()) {
+ // If the stored value is larger or equal to the loaded value, we can
+ // reuse it.
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *TD)) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ S->getValueOperand()));
+ continue;
+ }
+
+ if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+ // If the types mismatch and we can't handle it, reject reuse of the load.
+ if (LD->getType() != LI->getType()) {
+ // If the stored value is larger or equal to the loaded value, we can
+ // reuse it.
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
+ continue;
+ }
+
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty()) return false;
+
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ // Okay, we have *some* definitions of the value. This means that the value
+ // is available in some of our (transitive) predecessors. Lets think about
+ // doing PRE of this load. This will involve inserting a new load into the
+ // predecessor when it's not available. We could do this in general, but
+ // prefer to not increase code size. As such, we only do this when we know
+ // that we only have to insert *one* load (which means we're basically moving
+ // the load, not inserting a new one).
+
+ SmallPtrSet<BasicBlock *, 4> Blockers;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ Blockers.insert(UnavailableBlocks[i]);
+
+ // Lets find first basic block with more than one predecessor. Walk backwards
+ // through predecessors if needed.
+ BasicBlock *LoadBB = LI->getParent();
+ BasicBlock *TmpBB = LoadBB;
+
+ bool isSinglePred = false;
+ bool allSingleSucc = true;
+ while (TmpBB->getSinglePredecessor()) {
+ isSinglePred = true;
+ TmpBB = TmpBB->getSinglePredecessor();
+ if (TmpBB == LoadBB) // Infinite (unreachable) loop.
+ return false;
+ if (Blockers.count(TmpBB))
+ return false;
+
+ // If any of these blocks has more than one successor (i.e. if the edge we
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
+ // above this block would be adding the load to execution paths along
+ // which it was not previously executed.
+ if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+ return false;
+ }
+
+ assert(TmpBB);
+ LoadBB = TmpBB;
+
+ // FIXME: It is extremely unclear what this loop is doing, other than
+ // artificially restricting loadpre.
+ if (isSinglePred) {
+ bool isHot = false;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ if (AV.isSimpleValue())
+ // "Hot" Instruction is in some loop (because it dominates its dep.
+ // instruction).
+ if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+ if (DT->dominates(LI, I)) {
+ isHot = true;
+ break;
+ }
+ }
+
+ // We are interested only in "hot" instructions. We don't want to do any
+ // mis-optimizations here.
+ if (!isHot)
+ return false;
+ }
+
+ // Check to see how many predecessors have the loaded value fully
+ // available.
+ DenseMap<BasicBlock*, Value*> PredLoads;
+ DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+ PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
+ continue;
+ }
+ PredLoads[Pred] = 0;
+
+ if (Pred->getTerminator()->getNumSuccessors() != 1) {
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
+ DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+ unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+ NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ }
+ }
+ if (!NeedToSplit.empty()) {
+ toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
+ return false;
+ }
+
+ // Decide whether PRE is profitable for this load.
+ unsigned NumUnavailablePreds = PredLoads.size();
+ assert(NumUnavailablePreds != 0 &&
+ "Fully available value should be eliminated above!");
+
+ // If this load is unavailable in multiple predecessors, reject it.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ if (NumUnavailablePreds != 1)
+ return false;
+
+ // Check if the load can safely be moved to all the unavailable predecessors.
+ bool CanDoPRE = true;
+ SmallVector<Instruction*, 8> NewInsts;
+ for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+ E = PredLoads.end(); I != E; ++I) {
+ BasicBlock *UnavailablePred = I->first;
+
+ // Do PHI translation to get its value in the predecessor if necessary. The
+ // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+
+ // If all preds have a single successor, then we know it is safe to insert
+ // the load on the pred (?!?), so we can insert code to materialize the
+ // pointer if it is not available.
+ PHITransAddr Address(LI->getPointerOperand(), TD);
+ Value *LoadPtr = 0;
+ if (allSingleSucc) {
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
+ } else {
+ Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
+ LoadPtr = Address.getAddr();
+ }
+
+ // If we couldn't find or insert a computation of this phi translated value,
+ // we fail PRE.
+ if (LoadPtr == 0) {
+ DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+ << *LI->getPointerOperand() << "\n");
+ CanDoPRE = false;
+ break;
+ }
+
+ // Make sure it is valid to move this load here. We have to watch out for:
+ // @1 = getelementptr (i8* p, ...
+ // test p and branch if == 0
+ // load @1
+ // It is valid to have the getelementptr before the test, even if p can
+ // be 0, as getelementptr only does address arithmetic.
+ // If we are not pushing the value through any multiple-successor blocks
+ // we do not have this case. Otherwise, check that the load is safe to
+ // put anywhere; this can be improved, but should be conservatively safe.
+ if (!allSingleSucc &&
+ // FIXME: REEVALUTE THIS.
+ !isSafeToLoadUnconditionally(LoadPtr,
+ UnavailablePred->getTerminator(),
+ LI->getAlignment(), TD)) {
+ CanDoPRE = false;
+ break;
+ }
+
+ I->second = LoadPtr;
+ }
+
+ if (!CanDoPRE) {
+ while (!NewInsts.empty()) {
+ Instruction *I = NewInsts.pop_back_val();
+ if (MD) MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
+ return false;
+ }
+
+ // Okay, we can eliminate this load by inserting a reload in the predecessor
+ // and using PHI construction to get the value in the other predecessors, do
+ // it.
+ DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+ DEBUG(if (!NewInsts.empty())
+ dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
+ << *NewInsts.back() << '\n');
+
+ // Assign value numbers to the new instructions.
+ for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ // FIXME: We really _ought_ to insert these value numbers into their
+ // parent's availability map. However, in doing so, we risk getting into
+ // ordering issues. If a block hasn't been processed yet, we would be
+ // marking a value as AVAIL-IN, which isn't what we intend.
+ VN.lookup_or_add(NewInsts[i]);
+ }
+
+ for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+ E = PredLoads.end(); I != E; ++I) {
+ BasicBlock *UnavailablePred = I->first;
+ Value *LoadPtr = I->second;
+
+ Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ // Transfer the old load's TBAA tag to the new load.
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+ NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+
+ // Transfer DebugLoc.
+ NewLoad->setDebugLoc(LI->getDebugLoc());
+
+ // Add the newly created load.
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
+ NewLoad));
+ MD->invalidateCachedPointerInfo(LoadPtr);
+ DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
+ }
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumPRELoad;
+ return true;
+}
+
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
+bool GVN::processLoad(LoadInst *L) {
+ if (!MD)
+ return false;
+
+ if (L->isVolatile())
+ return false;
+
+ if (L->use_empty()) {
+ markInstructionForDeletion(L);
+ return true;
+ }
+
+ // ... to a pointer that has been loaded from before...
+ MemDepResult Dep = MD->getDependency(L);
+
+ // If we have a clobber and target data is around, see if this is a clobber
+ // that we can fix up through code synthesis.
+ if (Dep.isClobber() && TD) {
+ // Check to see if we have something like this:
+ // store i32 123, i32* %P
+ // %A = bitcast i32* %P to i8*
+ // %B = gep i8* %A, i32 1
+ // %C = load i8* %B
+ //
+ // We could do that by recognizing if the clobber instructions are obviously
+ // a common base + constant offset, and if the previous store (or memset)
+ // completely covers this load. This sort of thing can happen in bitfield
+ // access code.
+ Value *AvailVal = 0;
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+ int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+ L->getPointerOperand(),
+ DepSI, *TD);
+ if (Offset != -1)
+ AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+ L->getType(), L, *TD);
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI == L)
+ return false;
+
+ int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+ L->getPointerOperand(),
+ DepLI, *TD);
+ if (Offset != -1)
+ AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+ // a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+ L->getPointerOperand(),
+ DepMI, *TD);
+ if (Offset != -1)
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
+ }
+
+ if (AvailVal) {
+ DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+ << *AvailVal << '\n' << *L << "\n\n\n");
+
+ // Replace the load!
+ L->replaceAllUsesWith(AvailVal);
+ if (AvailVal->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(AvailVal);
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+ }
+
+ // If the value isn't available, don't do anything!
+ if (Dep.isClobber()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ Instruction *I = Dep.getInst();
+ dbgs() << " is clobbered by " << *I << '\n';
+ );
+ return false;
+ }
+
+ if (Dep.isUnknown()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ dbgs() << " has unknown dependence\n";
+ );
+ return false;
+ }
+
+ // If it is defined in another block, try harder.
+ if (Dep.isNonLocal())
+ return processNonLocalLoad(L);
+
+ assert(Dep.isDef() && "Expecting def here");
+
+ Instruction *DepInst = Dep.getInst();
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+ Value *StoredVal = DepSI->getValueOperand();
+
+ // The store and load are to a must-aliased pointer, but they may not
+ // actually have the same type. See if we know how to reuse the stored
+ // value (depending on its type).
+ if (StoredVal->getType() != L->getType()) {
+ if (TD) {
+ StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+ L, *TD);
+ if (StoredVal == 0)
+ return false;
+
+ DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+ << '\n' << *L << "\n\n\n");
+ }
+ else
+ return false;
+ }
+
+ // Remove it!
+ L->replaceAllUsesWith(StoredVal);
+ if (StoredVal->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(StoredVal);
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+ Value *AvailableVal = DepLI;
+
+ // The loads are of a must-aliased pointer, but they may not actually have
+ // the same type. See if we know how to reuse the previously loaded value
+ // (depending on its type).
+ if (DepLI->getType() != L->getType()) {
+ if (TD) {
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+ L, *TD);
+ if (AvailableVal == 0)
+ return false;
+
+ DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+ << "\n" << *L << "\n\n\n");
+ }
+ else
+ return false;
+ }
+
+ // Remove it!
+ L->replaceAllUsesWith(AvailableVal);
+ if (DepLI->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(DepLI);
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // If this load really doesn't depend on anything, then we must be loading an
+ // undef value. This can happen when loading for a fresh allocation with no
+ // intervening stores, for example.
+ if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // If this load occurs either right after a lifetime begin,
+ // then the loaded value is undefined.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// findLeader - In order to find a leader for a given value number at a
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in
+// question. This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+ LeaderTableEntry Vals = LeaderTable[num];
+ if (!Vals.Val) return 0;
+
+ Value *Val = 0;
+ if (DT->dominates(Vals.BB, BB)) {
+ Val = Vals.Val;
+ if (isa<Constant>(Val)) return Val;
+ }
+
+ LeaderTableEntry* Next = Vals.Next;
+ while (Next) {
+ if (DT->dominates(Next->BB, BB)) {
+ if (isa<Constant>(Next->Val)) return Next->Val;
+ if (!Val) Val = Next->Val;
+ }
+
+ Next = Next->Next;
+ }
+
+ return Val;
+}
+
+
+/// processInstruction - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+bool GVN::processInstruction(Instruction *I) {
+ // Ignore dbg info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // If the instruction can be easily simplified then do so now in preference
+ // to value numbering it. Value numbering often exposes redundancies, for
+ // example if it determines that %y is equal to %x then the instruction
+ // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+ if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ I->replaceAllUsesWith(V);
+ if (MD && V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(I);
+ return true;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (processLoad(LI))
+ return true;
+
+ unsigned Num = VN.lookup_or_add(LI);
+ addToLeaderTable(Num, LI, LI->getParent());
+ return false;
+ }
+
+ // For conditions branches, we can perform simple conditional propagation on
+ // the condition value itself.
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ return false;
+
+ Value *BranchCond = BI->getCondition();
+ uint32_t CondVN = VN.lookup_or_add(BranchCond);
+
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+
+ if (TrueSucc->getSinglePredecessor())
+ addToLeaderTable(CondVN,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
+ if (FalseSucc->getSinglePredecessor())
+ addToLeaderTable(CondVN,
+ ConstantInt::getFalse(TrueSucc->getContext()),
+ FalseSucc);
+
+ return false;
+ }
+
+ // Instructions with void type don't return a value, so there's
+ // no point in trying to find redudancies in them.
+ if (I->getType()->isVoidTy()) return false;
+
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ unsigned Num = VN.lookup_or_add(I);
+
+ // Allocations are always uniquely numbered, so we can save time and memory
+ // by fast failing them.
+ if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
+ // If the number we were assigned was a brand new VN, then we don't
+ // need to do a lookup to see if the number already exists
+ // somewhere in the domtree: it can't!
+ if (Num == NextNum) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
+ // Perform fast-path value-number based elimination of values inherited from
+ // dominators.
+ Value *repl = findLeader(I->getParent(), Num);
+ if (repl == 0) {
+ // Failure, just remember this instance for future use.
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
+ // Remove it!
+ I->replaceAllUsesWith(repl);
+ if (MD && repl->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(repl);
+ markInstructionForDeletion(I);
+ return true;
+}
+
+/// runOnFunction - This is the main transformation entry point for a function.
+bool GVN::runOnFunction(Function& F) {
+ if (!NoLoads)
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setMemDep(MD);
+ VN.setDomTree(DT);
+
+ bool Changed = false;
+ bool ShouldContinue = true;
+
+ // Merge unconditional branches, allowing PRE to catch more
+ // optimization opportunities.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
+ BasicBlock *BB = FI++;
+
+ bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+ if (removedBlock) ++NumGVNBlocks;
+
+ Changed |= removedBlock;
+ }
+
+ unsigned Iteration = 0;
+ while (ShouldContinue) {
+ DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+ ShouldContinue = iterateOnFunction(F);
+ if (splitCriticalEdges())
+ ShouldContinue = true;
+ Changed |= ShouldContinue;
+ ++Iteration;
+ }
+
+ if (EnablePRE) {
+ bool PREChanged = true;
+ while (PREChanged) {
+ PREChanged = performPRE(F);
+ Changed |= PREChanged;
+ }
+ }
+ // FIXME: Should perform GVN again after PRE does something. PRE can move
+ // computations into blocks where they become fully redundant. Note that
+ // we can't do this until PRE's critical edge splitting updates memdep.
+ // Actually, when this happens, we should just fully integrate PRE into GVN.
+
+ cleanupGlobalSets();
+
+ return Changed;
+}
+
+
+bool GVN::processBlock(BasicBlock *BB) {
+ // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+ // (and incrementing BI before processing an instruction).
+ assert(InstrsToErase.empty() &&
+ "We expect InstrsToErase to be empty across iterations");
+ bool ChangedFunction = false;
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ ChangedFunction |= processInstruction(BI);
+ if (InstrsToErase.empty()) {
+ ++BI;
+ continue;
+ }
+
+ // If we need some instructions deleted, do it now.
+ NumGVNInstr += InstrsToErase.size();
+
+ // Avoid iterator invalidation.
+ bool AtStart = BI == BB->begin();
+ if (!AtStart)
+ --BI;
+
+ for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+ E = InstrsToErase.end(); I != E; ++I) {
+ DEBUG(dbgs() << "GVN removed: " << **I << '\n');
+ if (MD) MD->removeInstruction(*I);
+ (*I)->eraseFromParent();
+ DEBUG(verifyRemoved(*I));
+ }
+ InstrsToErase.clear();
+
+ if (AtStart)
+ BI = BB->begin();
+ else
+ ++BI;
+ }
+
+ return ChangedFunction;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function &F) {
+ bool Changed = false;
+ DenseMap<BasicBlock*, Value*> predMap;
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock *CurrentBlock = *DI;
+
+ // Nothing to PRE in the entry block.
+ if (CurrentBlock == &F.getEntryBlock()) continue;
+
+ for (BasicBlock::iterator BI = CurrentBlock->begin(),
+ BE = CurrentBlock->end(); BI != BE; ) {
+ Instruction *CurInst = BI++;
+
+ if (isa<AllocaInst>(CurInst) ||
+ isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+ CurInst->getType()->isVoidTy() ||
+ CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+ isa<DbgInfoIntrinsic>(CurInst))
+ continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
+
+ uint32_t ValNo = VN.lookup(CurInst);
+
+ // Look for the predecessors for PRE opportunities. We're
+ // only trying to solve the basic diamond case, where
+ // a value is computed in the successor and one predecessor,
+ // but not the other. We also explicitly disallow cases
+ // where the successor is its own predecessor, because they're
+ // more complicated to get right.
+ unsigned NumWith = 0;
+ unsigned NumWithout = 0;
+ BasicBlock *PREPred = 0;
+ predMap.clear();
+
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // We're not interested in PRE where the block is its
+ // own predecessor, or in blocks with predecessors
+ // that are not reachable.
+ if (P == CurrentBlock) {
+ NumWithout = 2;
+ break;
+ } else if (!DT->dominates(&F.getEntryBlock(), P)) {
+ NumWithout = 2;
+ break;
+ }
+
+ Value* predV = findLeader(P, ValNo);
+ if (predV == 0) {
+ PREPred = P;
+ ++NumWithout;
+ } else if (predV == CurInst) {
+ NumWithout = 2;
+ } else {
+ predMap[P] = predV;
+ ++NumWith;
+ }
+ }
+
+ // Don't do PRE when it might increase code size, i.e. when
+ // we would need to insert instructions in more than one pred.
+ if (NumWithout != 1 || NumWith == 0)
+ continue;
+
+ // Don't do PRE across indirect branch.
+ if (isa<IndirectBrInst>(PREPred->getTerminator()))
+ continue;
+
+ // We can't do PRE safely on a critical edge, so instead we schedule
+ // the edge to be split and perform the PRE the next time we iterate
+ // on the function.
+ unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+ if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+ toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+ continue;
+ }
+
+ // Instantiate the expression in the predecessor that lacked it.
+ // Because we are going top-down through the block, all value numbers
+ // will be available in the predecessor by the time we need them. Any
+ // that weren't originally present will have been instantiated earlier
+ // in this loop.
+ Instruction *PREInstr = CurInst->clone();
+ bool success = true;
+ for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+ Value *Op = PREInstr->getOperand(i);
+ if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+ continue;
+
+ if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
+ PREInstr->setOperand(i, V);
+ } else {
+ success = false;
+ break;
+ }
+ }
+
+ // Fail out if we encounter an operand that is not available in
+ // the PRE predecessor. This is typically because of loads which
+ // are not value numbered precisely.
+ if (!success) {
+ delete PREInstr;
+ DEBUG(verifyRemoved(PREInstr));
+ continue;
+ }
+
+ PREInstr->insertBefore(PREPred->getTerminator());
+ PREInstr->setName(CurInst->getName() + ".pre");
+ PREInstr->setDebugLoc(CurInst->getDebugLoc());
+ predMap[PREPred] = PREInstr;
+ VN.add(PREInstr, ValNo);
+ ++NumGVNPRE;
+
+ // Update the availability map to include the new instruction.
+ addToLeaderTable(ValNo, PREInstr, PREPred);
+
+ // Create a PHI to make the value available in this block.
+ pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
+ PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
+ CurInst->getName() + ".pre-phi",
+ CurrentBlock->begin());
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ Phi->addIncoming(predMap[P], P);
+ }
+
+ VN.add(Phi, ValNo);
+ addToLeaderTable(ValNo, Phi, CurrentBlock);
+ Phi->setDebugLoc(CurInst->getDebugLoc());
+ CurInst->replaceAllUsesWith(Phi);
+ if (Phi->getType()->isPointerTy()) {
+ // Because we have added a PHI-use of the pointer value, it has now
+ // "escaped" from alias analysis' perspective. We need to inform
+ // AA of this.
+ for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+ ++ii) {
+ unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+ VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+ }
+
+ if (MD)
+ MD->invalidateCachedPointerInfo(Phi);
+ }
+ VN.erase(CurInst);
+ removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
+
+ DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
+ if (MD) MD->removeInstruction(CurInst);
+ CurInst->eraseFromParent();
+ DEBUG(verifyRemoved(CurInst));
+ Changed = true;
+ }
+ }
+
+ if (splitCriticalEdges())
+ Changed = true;
+
+ return Changed;
+}
+
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+ if (toSplit.empty())
+ return false;
+ do {
+ std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+ SplitCriticalEdge(Edge.first, Edge.second, this);
+ } while (!toSplit.empty());
+ if (MD) MD->invalidateCachedPredecessors();
+ return true;
+}
+
+/// iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+ cleanupGlobalSets();
+
+ // Top-down walk of the dominator tree
+ bool Changed = false;
+#if 0
+ // Needed for value numbering with phi construction to work.
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
+ RE = RPOT.end(); RI != RE; ++RI)
+ Changed |= processBlock(*RI);
+#else
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI)
+ Changed |= processBlock(DI->getBlock());
+#endif
+
+ return Changed;
+}
+
+void GVN::cleanupGlobalSets() {
+ VN.clear();
+ LeaderTable.clear();
+ TableAllocator.Reset();
+}
+
+/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// internal data structures.
+void GVN::verifyRemoved(const Instruction *Inst) const {
+ VN.verifyRemoved(Inst);
+
+ // Walk through the value number scope to make sure the instruction isn't
+ // ferreted away in it.
+ for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+ I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+ const LeaderTableEntry *Node = &I->second;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+
+ while (Node->Next) {
+ Node = Node->Next;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
new file mode 100644
index 000000000000..dee3d38d72af
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,1871 @@
+//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into simpler forms suitable for subsequent
+// analysis and transformation.
+//
+// This transformation makes the following changes to each loop with an
+// identifiable induction variable:
+// 1. All loops are transformed to have a SINGLE canonical induction variable
+// which starts at zero and steps by one.
+// 2. The canonical induction variable is guaranteed to be the first PHI node
+// in the loop header block.
+// 3. The canonical induction variable is guaranteed to be in a wide enough
+// type so that IV expressions need not be (directly) zero-extended or
+// sign-extended.
+// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
+//
+// If the trip count of a loop is computable, this pass also makes the following
+// changes:
+// 1. The exit condition for the loop is canonicalized to compare the
+// induction value against the exit value. This turns loops like:
+// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
+// 2. Any use outside of the loop of an expression derived from the indvar
+// is changed to compute the derived value outside of the loop, eliminating
+// the dependence on the exit value of the induction variable. If the only
+// purpose of the loop is to compute the exit value of some derived
+// expression, this transformation will make the loop dead.
+//
+// This transformation should be followed by strength reduction after all of the
+// desired loop transformations have been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumWidened , "Number of indvars widened");
+STATISTIC(NumInserted , "Number of canonical indvars added");
+STATISTIC(NumReplaced , "Number of exit values replaced");
+STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
+
+static cl::opt<bool> DisableIVRewrite(
+ "disable-iv-rewrite", cl::Hidden,
+ cl::desc("Disable canonical induction variable rewriting"));
+
+namespace {
+ class IndVarSimplify : public LoopPass {
+ IVUsers *IU;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ TargetData *TD;
+
+ SmallVector<WeakVH, 16> DeadInsts;
+ bool Changed;
+ public:
+
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+ Changed(false) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ if (!DisableIVRewrite)
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ if (!DisableIVRewrite)
+ AU.addPreserved<IVUsers>();
+ AU.setPreservesCFG();
+ }
+
+ private:
+ virtual void releaseMemory() {
+ DeadInsts.clear();
+ }
+
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
+
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
+ void RewriteNonIntegerIVs(Loop *L);
+
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
+ void SimplifyIVUsers(SCEVExpander &Rewriter);
+ void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+ bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void EliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned);
+
+ void SimplifyCongruentIVs(Loop *L);
+
+ void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
+
+ ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ SCEVExpander &Rewriter);
+
+ void SinkUnusedInvariants(Loop *L);
+ };
+}
+
+char IndVarSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+ "Induction Variable Simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+ "Induction Variable Simplification", false, false)
+
+Pass *llvm::createIndVarSimplifyPass() {
+ return new IndVarSimplify();
+}
+
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+ // If an SCEV expression subsumed multiple pointers, its expansion could
+ // reassociate the GEP changing the base pointer. This is illegal because the
+ // final address produced by a GEP chain must be inbounds relative to its
+ // underlying object. Otherwise basic alias analysis, among other things,
+ // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+ // producing an expression involving multiple pointers. Until then, we must
+ // bail out here.
+ //
+ // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // because it understands lcssa phis while SCEV does not.
+ Value *FromPtr = FromVal;
+ Value *ToPtr = ToVal;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ FromPtr = GEP->getPointerOperand();
+ }
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ ToPtr = GEP->getPointerOperand();
+ }
+ if (FromPtr != FromVal || ToPtr != ToVal) {
+ // Quickly check the common case
+ if (FromPtr == ToPtr)
+ return true;
+
+ // SCEV may have rewritten an expression that produces the GEP's pointer
+ // operand. That's ok as long as the pointer operand has the same base
+ // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // base of a recurrence. This handles the case in which SCEV expansion
+ // converts a pointer type recurrence into a nonrecurrent pointer base
+ // indexed by an integer recurrence.
+ const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+ const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+ if (FromBase == ToBase)
+ return true;
+
+ DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+ << *FromBase << " != " << *ToBase << "\n");
+
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+ bool isExact = false;
+ if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+ // See if we can convert this to an int64_t
+ uint64_t UIntVal;
+ if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+ &isExact) != APFloat::opOK || !isExact)
+ return false;
+ IntVal = UIntVal;
+ return true;
+}
+
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
+///
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
+
+ // Check incoming value.
+ ConstantFP *InitValueVal =
+ dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+
+ int64_t InitValue;
+ if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+ return;
+
+ // Check IV increment. Reject this PN if increment operation is not
+ // an add or increment value can not be represented by an integer.
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+
+ // If this is not an add of the PHI with a constantfp, or if the constant fp
+ // is not an integer, bail out.
+ ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+ int64_t IncValue;
+ if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+ return;
+
+ // Check Incr uses. One user is PN and the other user is an exit condition
+ // used by the conditional terminator.
+ Value::use_iterator IncrUse = Incr->use_begin();
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
+ if (IncrUse == Incr->use_end()) return;
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
+ if (IncrUse != Incr->use_end()) return;
+
+ // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
+ // only used by a branch, we can't transform it.
+ FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+ if (!Compare)
+ Compare = dyn_cast<FCmpInst>(U2);
+ if (Compare == 0 || !Compare->hasOneUse() ||
+ !isa<BranchInst>(Compare->use_back()))
+ return;
+
+ BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+ // We need to verify that the branch actually controls the iteration count
+ // of the loop. If not, the new IV can overflow and no one will notice.
+ // The branch block must be in the loop and one of the successors must be out
+ // of the loop.
+ assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+ if (!L->contains(TheBr->getParent()) ||
+ (L->contains(TheBr->getSuccessor(0)) &&
+ L->contains(TheBr->getSuccessor(1))))
+ return;
+
+
+ // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+ // transform it.
+ ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+ int64_t ExitValue;
+ if (ExitValueVal == 0 ||
+ !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+ return;
+
+ // Find new predicate for integer comparison.
+ CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (Compare->getPredicate()) {
+ default: return; // Unknown comparison.
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
+ }
+
+ // We convert the floating point induction variable to a signed i32 value if
+ // we can. This is only safe if the comparison will not overflow in a way
+ // that won't be trapped by the integer equivalent operations. Check for this
+ // now.
+ // TODO: We could use i64 if it is native and the range requires it.
+
+ // The start/stride/exit values must all fit in signed i32.
+ if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+ return;
+
+ // If not actually striding (add x, 0.0), avoid touching the code.
+ if (IncValue == 0)
+ return;
+
+ // Positive and negative strides have different safety conditions.
+ if (IncValue > 0) {
+ // If we have a positive stride, we require the init to be less than the
+ // exit value and an equality or less than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ return;
+
+ uint32_t Range = uint32_t(ExitValue-InitValue);
+ if (NewPred == CmpInst::ICMP_SLE) {
+ // Normalize SLE -> SLT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+ return;
+
+ } else {
+ // If we have a negative stride, we require the init to be greater than the
+ // exit value and an equality or greater than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ return;
+
+ uint32_t Range = uint32_t(InitValue-ExitValue);
+ if (NewPred == CmpInst::ICMP_SGE) {
+ // Normalize SGE -> SGT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(-IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+ return;
+ }
+
+ const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+
+ // Insert new integer induction variable.
+ PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+ NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+ PN->getIncomingBlock(IncomingEdge));
+
+ Value *NewAdd =
+ BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+ Incr->getName()+".int", Incr);
+ NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
+
+ ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+ ConstantInt::get(Int32Ty, ExitValue),
+ Compare->getName());
+
+ // In the following deletions, PN may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PN;
+
+ // Delete the old floating point exit comparison. The branch starts using the
+ // new comparison.
+ NewCompare->takeName(Compare);
+ Compare->replaceAllUsesWith(NewCompare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+ // Delete the old floating point increment.
+ Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+ // If the FP induction variable still has uses, this is because something else
+ // in the loop uses its value. In order to canonicalize the induction
+ // variable, we chose to eliminate the IV and rewrite it in terms of an
+ // int->fp cast.
+ //
+ // We give preference to sitofp over uitofp because it is faster on most
+ // platforms.
+ if (WeakPH) {
+ Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+ PN->getParent()->getFirstNonPHI());
+ PN->replaceAllUsesWith(Conv);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ }
+
+ // Add a new IVUsers entry for the newly-created integer PHI.
+ if (IU)
+ IU->AddUsersIfInteresting(NewPHI);
+}
+
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+ // First step. Check to see if there are any floating-point recurrences.
+ // If there are, change them into integer recurrences, permitting analysis by
+ // the SCEV routines.
+ //
+ BasicBlock *Header = L->getHeader();
+
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+ HandleFloatingPointIV(L, PN);
+
+ // If the loop previously had floating-point IV, ScalarEvolution
+ // may not have been able to compute a trip count. Now that we've done some
+ // re-writing, the trip count may be computable.
+ if (Changed)
+ SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
+/// RewriteLoopExitValues - Check to see if this loop has a computable
+/// loop-invariant execution count. If so, this means that we can compute the
+/// final value of any expressions that are recurrent in the loop, and
+/// substitute the exit values from the loop into any instructions outside of
+/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+ // Verify the input to the pass in already in LCSSA form.
+ assert(L->isLCSSAForm(*DT));
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Find all values that are computed inside the loop, but used outside of it.
+ // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
+ // the exit blocks of the loop to find them.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBB = ExitBlocks[i];
+
+ // If there are no PHI nodes in this exit block, then no values defined
+ // inside the loop are used on this path, skip it.
+ PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+ if (!PN) continue;
+
+ unsigned NumPreds = PN->getNumIncomingValues();
+
+ // Iterate over all of the PHI nodes.
+ BasicBlock::iterator BBI = ExitBB->begin();
+ while ((PN = dyn_cast<PHINode>(BBI++))) {
+ if (PN->use_empty())
+ continue; // dead use, don't replace it
+
+ // SCEV only supports integer expressions for now.
+ if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+ continue;
+
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
+ // Iterate over all of the values in all the PHI nodes.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // If the value being merged in is not integer or is not defined
+ // in the loop, skip it.
+ Value *InVal = PN->getIncomingValue(i);
+ if (!isa<Instruction>(InVal))
+ continue;
+
+ // If this pred is for a subloop, not L itself, skip it.
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+ continue; // The Block is in a subloop, skip it.
+
+ // Check that InVal is defined in the loop.
+ Instruction *Inst = cast<Instruction>(InVal);
+ if (!L->contains(Inst))
+ continue;
+
+ // Okay, this instruction has a user outside of the current loop
+ // and varies predictably *inside* the loop. Evaluate the value it
+ // contains when the loop exits, if possible.
+ const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ if (!SE->isLoopInvariant(ExitValue, L))
+ continue;
+
+ Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+
+ DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+ << " LoopVal = " << *Inst << "\n");
+
+ if (!isValidRewrite(Inst, ExitVal)) {
+ DeadInsts.push_back(ExitVal);
+ continue;
+ }
+ Changed = true;
+ ++NumReplaced;
+
+ PN->setIncomingValue(i, ExitVal);
+
+ // If this instruction is dead now, delete it.
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+ if (NumPreds == 1) {
+ // Completely replace a single-pred PHI. This is safe, because the
+ // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+ // node anymore.
+ PN->replaceAllUsesWith(ExitVal);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ }
+ }
+ if (NumPreds != 1) {
+ // Clone the PHI and delete the original one. This lets IVUsers and
+ // any other maps purge the original user from their records.
+ PHINode *NewPN = cast<PHINode>(PN->clone());
+ NewPN->takeName(PN);
+ NewPN->insertBefore(PN);
+ PN->replaceAllUsesWith(NewPN);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // The insertion point instruction may have been deleted; clear it out
+ // so that the rewriter doesn't trip over it later.
+ Rewriter.clearInsertPoint();
+}
+
+//===----------------------------------------------------------------------===//
+// Rewrite IV users based on a canonical IV.
+// To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+ // Each round of simplification involves a round of eliminating operations
+ // followed by a round of widening IVs. A single IVUsers worklist is used
+ // across all rounds. The inner loop advances the user. If widening exposes
+ // more uses, then another pass through the outer loop is triggered.
+ for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+ Instruction *UseInst = I->getUser();
+ Value *IVOperand = I->getOperandValToReplace();
+
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
+ continue;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned);
+ continue;
+ }
+ }
+ }
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables. Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials. For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+ // Loop-invariant values are safe.
+ if (SE->isLoopInvariant(S, L)) return true;
+
+ // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+ // to transform them into efficient code.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ return AR->isAffine();
+
+ // An add is safe it all its operands are safe.
+ if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+ E = Commutative->op_end(); I != E; ++I)
+ if (!isSafe(*I, L, SE)) return false;
+ return true;
+ }
+
+ // A cast is safe if its operand is.
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ return isSafe(C->getOperand(), L, SE);
+
+ // A udiv is safe if its operands are.
+ if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+ return isSafe(UD->getLHS(), L, SE) &&
+ isSafe(UD->getRHS(), L, SE);
+
+ // SCEVUnknown is always safe.
+ if (isa<SCEVUnknown>(S))
+ return true;
+
+ // Nothing else is safe.
+ return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+ // Rewrite all induction variable expressions in terms of the canonical
+ // induction variable.
+ //
+ // If there were induction variables of other sizes or offsets, manually
+ // add the offsets to the primary induction variable and cast, avoiding
+ // the need for the code evaluation methods to insert induction variables
+ // of different sizes.
+ for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+ Value *Op = UI->getOperandValToReplace();
+ const Type *UseTy = Op->getType();
+ Instruction *User = UI->getUser();
+
+ // Compute the final addrec to expand into code.
+ const SCEV *AR = IU->getReplacementExpr(*UI);
+
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(UI->getUser())) {
+ const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+ if (SE->isLoopInvariant(ExitVal, L))
+ AR = ExitVal;
+ }
+
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!isSafe(AR, L, SE))
+ continue;
+
+ // Determine the insertion point for this user. By default, insert
+ // immediately before the user. The SCEVExpander class will automatically
+ // hoist loop invariants out of the loop. For PHI nodes, there may be
+ // multiple uses, so compute the nearest common dominator for the
+ // incoming blocks.
+ Instruction *InsertPt = User;
+ if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingValue(i) == Op) {
+ if (InsertPt == User)
+ InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+ else
+ InsertPt =
+ DT->findNearestCommonDominator(InsertPt->getParent(),
+ PHI->getIncomingBlock(i))
+ ->getTerminator();
+ }
+
+ // Now expand it into actual Instructions and patch it into place.
+ Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+
+ if (!isValidRewrite(Op, NewVal)) {
+ DeadInsts.push_back(NewVal);
+ continue;
+ }
+ // Inform ScalarEvolution that this value is changing. The change doesn't
+ // affect its value, but it does potentially affect which use lists the
+ // value will be on after the replacement, which affects ScalarEvolution's
+ // ability to walk use lists and drop dangling pointers when a value is
+ // deleted.
+ SE->forgetValue(User);
+
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+ NewValI->setDebugLoc(User->getDebugLoc());
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ // Collect information about induction variables that are used by sign/zero
+ // extend operations. This information is recorded by CollectExtend and
+ // provides the input to WidenIV.
+ struct WideIVInfo {
+ const Type *WidestNativeType; // Widest integer type created [sz]ext
+ bool IsSigned; // Was an sext user seen before a zext?
+
+ WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+ };
+}
+
+/// CollectExtend - Update information about the induction variable that is
+/// extended by this sign or zero extend operation. This is used to determine
+/// the final width of the IV before actually widening it.
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+ ScalarEvolution *SE, const TargetData *TD) {
+ const Type *Ty = Cast->getType();
+ uint64_t Width = SE->getTypeSizeInBits(Ty);
+ if (TD && !TD->isLegalInteger(Width))
+ return;
+
+ if (!WI.WidestNativeType) {
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ WI.IsSigned = IsSigned;
+ return;
+ }
+
+ // We extend the IV to satisfy the sign of its first user, arbitrarily.
+ if (WI.IsSigned != IsSigned)
+ return;
+
+ if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+}
+
+namespace {
+/// WidenIV - The goal of this transform is to remove sign and zero extends
+/// without creating any new induction variables. To do this, it creates a new
+/// phi of the wider type and redirects all users, either removing extends or
+/// inserting truncs whenever we stop propagating the type.
+///
+class WidenIV {
+ // Parameters
+ PHINode *OrigPhi;
+ const Type *WideType;
+ bool IsSigned;
+
+ // Context
+ LoopInfo *LI;
+ Loop *L;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+
+ // Result
+ PHINode *WidePhi;
+ Instruction *WideInc;
+ const SCEV *WideIncExpr;
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ SmallPtrSet<Instruction*,16> Widened;
+ SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
+
+public:
+ WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+ ScalarEvolution *SEv, DominatorTree *DTree,
+ SmallVectorImpl<WeakVH> &DI) :
+ OrigPhi(PN),
+ WideType(WI.WidestNativeType),
+ IsSigned(WI.IsSigned),
+ LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())),
+ SE(SEv),
+ DT(DTree),
+ WidePhi(0),
+ WideInc(0),
+ WideIncExpr(0),
+ DeadInsts(DI) {
+ assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+ }
+
+ PHINode *CreateWideIV(SCEVExpander &Rewriter);
+
+protected:
+ Instruction *CloneIVUser(Instruction *NarrowUse,
+ Instruction *NarrowDef,
+ Instruction *WideDef);
+
+ const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+
+ Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
+ Instruction *WideDef);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
+};
+} // anonymous namespace
+
+static Value *getExtend( Value *NarrowOper, const Type *WideType,
+ bool IsSigned, IRBuilder<> &Builder) {
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
+}
+
+/// CloneIVUser - Instantiate a wide operation to replace a narrow
+/// operation. This only needs to handle operations that can evaluation to
+/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
+Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
+ Instruction *NarrowDef,
+ Instruction *WideDef) {
+ unsigned Opcode = NarrowUse->getOpcode();
+ switch (Opcode) {
+ default:
+ return 0;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
+
+ IRBuilder<> Builder(NarrowUse);
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know
+ // anything about the narrow operand yet so must insert a [sz]ext. It is
+ // probably loop invariant and will be folded or hoisted. If it actually
+ // comes from a widened IV, it should be removed during a future call to
+ // WidenIVUse.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
+ getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
+ getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+
+ BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
+ LHS, RHS,
+ NarrowBO->getName());
+ Builder.Insert(WideBO);
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+ if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+ if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ }
+ return WideBO;
+ }
+ llvm_unreachable(0);
+}
+
+/// HoistStep - Attempt to hoist an IV increment above a potential use.
+///
+/// To successfully hoist, two criteria must be met:
+/// - IncV operands dominate InsertPos and
+/// - InsertPos dominates IncV
+///
+/// Meeting the second condition means that we don't need to check all of IncV's
+/// existing uses (it's moving up in the domtree).
+///
+/// This does not yet recursively hoist the operands, although that would
+/// not be difficult.
+static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
+ const DominatorTree *DT)
+{
+ if (DT->dominates(IncV, InsertPos))
+ return true;
+
+ if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ // Attempt to hoist IncV
+ for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
+ OI != OE; ++OI) {
+ Instruction *OInst = dyn_cast<Instruction>(OI);
+ if (OInst && !DT->dominates(OInst, InsertPos))
+ return false;
+ }
+ IncV->moveBefore(InsertPos);
+ return true;
+}
+
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+ if (!SE->isSCEVable(NarrowUse->getType()))
+ return 0;
+
+ const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType())
+ >= SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return 0;
+ }
+
+ const SCEV *WideExpr = IsSigned ?
+ SE->getSignExtendExpr(NarrowExpr, WideType) :
+ SE->getZeroExtendExpr(NarrowExpr, WideType);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+
+ return AddRec;
+}
+
+/// WidenIVUse - Determine whether an individual user of the narrow IV can be
+/// widened. If so, return the wide clone of the user.
+Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
+ Instruction *WideDef) {
+ Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
+
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
+ return 0;
+
+ // Our raison d'etre! Eliminate sign and zero extension.
+ if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
+ Value *NewDef = WideDef;
+ if (NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(NarrowDefUse);
+ NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *NarrowUse << "\n");
+ NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
+ NewDef = NarrowUse;
+ }
+ }
+ if (NewDef != NarrowUse) {
+ DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
+ << " replaced by " << *WideDef << "\n");
+ ++NumElimExt;
+ NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.push_back(NarrowUse);
+ }
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
+
+ // No further widening is needed. The deceased [sz]ext had done it for us.
+ return 0;
+ }
+
+ // Does this user itself evaluate to a recurrence after widening?
+ const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
+ if (!WideAddRec) {
+ // This user does not evaluate to a recurence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ IRBuilder<> Builder(NarrowDefUse);
+ Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
+ NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
+ return 0;
+ }
+ // We assume that block terminators are not SCEVable. We wouldn't want to
+ // insert a Trunc after a terminator if there happens to be a critical edge.
+ assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+ "SCEV is not expected to evaluate a block terminator");
+
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = 0;
+ if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) {
+ WideUse = WideInc;
+ }
+ else {
+ WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef);
+ if (!WideUse)
+ return 0;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec != SE->getSCEV(WideUse)) {
+ DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
+ << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
+ DeadInsts.push_back(WideUse);
+ return 0;
+ }
+
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+}
+
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ for (Value::use_iterator UI = NarrowDef->use_begin(),
+ UE = NarrowDef->use_end(); UI != UE; ++UI) {
+ Use &U = UI.getUse();
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(cast<Instruction>(U.getUser())))
+ continue;
+
+ NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+ }
+}
+
+/// CreateWideIV - Process a single induction variable. First use the
+/// SCEVExpander to create a wide induction variable that evaluates to the same
+/// recurrence as the original narrow IV. Then use a worklist to forward
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
+/// interesting IV users, the narrow IV will be isolated for removal by
+/// DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+///
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+ // Is this phi an induction variable?
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+ if (!AddRec)
+ return NULL;
+
+ // Widen the induction variable expression.
+ const SCEV *WideIVExpr = IsSigned ?
+ SE->getSignExtendExpr(AddRec, WideType) :
+ SE->getZeroExtendExpr(AddRec, WideType);
+
+ assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+ "Expect the new IV expression to preserve its type");
+
+ // Can the IV be extended outside the loop without overflow?
+ AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return NULL;
+
+ // An AddRec must have loop-invariant operands. Since this AddRec is
+ // materialized by a loop header phi, the expression cannot have any post-loop
+ // operands, so they must dominate the loop header.
+ assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
+ && "Loop header phi recurrence inputs do not dominate the loop");
+
+ // The rewriter provides a value for the desired IV expression. This may
+ // either find an existing phi or materialize a new one. Either way, we
+ // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+ // of the phi-SCC dominates the loop entry.
+ Instruction *InsertPt = L->getHeader()->begin();
+ WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
+
+ // Remembering the WideIV increment generated by SCEVExpander allows
+ // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // employ a general reuse mechanism because the call above is the only call to
+ // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ WideInc =
+ cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ WideIncExpr = SE->getSCEV(WideInc);
+ }
+
+ DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+ ++NumWidened;
+
+ // Traverse the def-use chain using a worklist starting at the original IV.
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
+
+ while (!NarrowIVUsers.empty()) {
+ Use *UsePtr;
+ Instruction *WideDef;
+ tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
+ Use &NarrowDefUse = *UsePtr;
+
+ // Process a def-use edge. This may replace the use, so don't hold a
+ // use_iterator across it.
+ Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
+ Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
+
+ // Follow all def-use edges from the previous narrow use.
+ if (WideUse)
+ pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+
+ // WidenIVUse may have removed the def-use edge.
+ if (NarrowDef->use_empty())
+ DeadInsts.push_back(NarrowDef);
+ }
+ return WidePhi;
+}
+
+//===----------------------------------------------------------------------===//
+// Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
+void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
+ Changed = true;
+ DeadInsts.push_back(ICmp);
+}
+
+void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1),
+ "tmp");
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ // Inform IVUsers about the new users.
+ if (IU) {
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I);
+ }
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.push_back(Rem);
+}
+
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // We assume that terminators are not SCEVable.
+ assert((!S || I != I->getParent()->getTerminator()) &&
+ "can't fold terminators");
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+ std::map<PHINode *, WideIVInfo> WideIVMap;
+
+ SmallVector<PHINode*, 8> LoopPhis;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ LoopPhis.push_back(cast<PHINode>(I));
+ }
+ // Each round of simplification iterates through the SimplifyIVUsers worklist
+ // for all current phis, then determines whether any IVs can be
+ // widened. Widening adds new phis to LoopPhis, inducing another round of
+ // simplification on the wide IVs.
+ while (!LoopPhis.empty()) {
+ // Evaluate as many IV expressions as possible before widening any IVs. This
+ // forces SCEV to set no-wrap flags before evaluating sign/zero
+ // extension. The first time SCEV attempts to normalize sign/zero extension,
+ // the result becomes final. So for the most predictable results, we delay
+ // evaluation of sign/zero extend evaluation until needed, and avoid running
+ // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+ do {
+ PHINode *CurrIV = LoopPhis.pop_back_val();
+
+ // Information about sign/zero extensions of CurrIV.
+ WideIVInfo WI;
+
+ // Instructions processed by SimplifyIVUsers for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ Instruction *UseInst, *Operand;
+ tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseInst == CurrIV) continue;
+
+ if (EliminateIVUser(UseInst, Operand)) {
+ pushIVUsers(Operand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+ CollectExtend(Cast, IsSigned, WI, SE, TD);
+ }
+ continue;
+ }
+ if (isSimpleIVUser(UseInst, L, SE)) {
+ pushIVUsers(UseInst, Simplified, SimpleIVUsers);
+ }
+ }
+ if (WI.WidestNativeType) {
+ WideIVMap[CurrIV] = WI;
+ }
+ } while(!LoopPhis.empty());
+
+ for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
+ E = WideIVMap.end(); I != E; ++I) {
+ WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+ if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ Changed = true;
+ LoopPhis.push_back(WidePhi);
+ }
+ }
+ WideIVMap.clear();
+ }
+}
+
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ const SCEV *S = SE->getSCEV(Phi);
+ DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+ bool Inserted;
+ tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+ if (Inserted)
+ continue;
+ PHINode *OrigPhi = Pos->second;
+ // Replacing the congruent phi is sufficient because acyclic redundancy
+ // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+ // that a phi is congruent, it's almost certain to be the head of an IV
+ // user cycle that is isomorphic with the original phi. So it's worth
+ // eagerly cleaning up the common case of a single IV increment.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+ if (OrigInc != IsomorphicInc &&
+ SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+ HoistStep(OrigInc, IsomorphicInc, DT)) {
+ DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ IsomorphicInc->replaceAllUsesWith(OrigInc);
+ DeadInsts.push_back(IsomorphicInc);
+ }
+ }
+ DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+ ++NumElimIV;
+ Phi->replaceAllUsesWith(OrigPhi);
+ DeadInsts.push_back(Phi);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ BackedgeTakenCount->isZero())
+ return false;
+
+ if (!L->getExitingBlock())
+ return false;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return false;
+
+ // Special case: If the backedge-taken count is a UDiv, it's very likely a
+ // UDiv that ScalarEvolution produced in order to compute a precise
+ // expression, rather than a UDiv from the user's code. If we can't find a
+ // UDiv in the code with some simple searching, assume the former and forego
+ // rewriting the loop.
+ if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return false;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != BackedgeTakenCount) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != BackedgeTakenCount)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+ if (!L->getExitingBlock())
+ return 0;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return 0;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return 0;
+
+ const Type *Ty = 0;
+ for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+ OI != OE; ++OI) {
+ assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+ TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+ if (!Trunc)
+ continue;
+
+ return Trunc->getSrcTy();
+ }
+ return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+ const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ SCEVExpander &Rewriter) {
+ assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+ // If the exiting block is not the same as the backedge block, we must compare
+ // against the preincremented value, otherwise we prefer to compare against
+ // the post-incremented value.
+ Value *CmpIndVar;
+ const SCEV *RHS = BackedgeTakenCount;
+ if (L->getExitingBlock() == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // If this addition may overflow, we have to be more pessimistic and
+ // cast the induction variable before doing the add.
+ const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+ const SCEV *N =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+ } else {
+ // Potential overflow. Cast before doing the add.
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ RHS = SE->getAddExpr(RHS,
+ SE->getConstant(IndVar->getType(), 1));
+ }
+
+ // The BackedgeTaken expression contains the number of times that the
+ // backedge branches to the loop header. This is one less than the
+ // number of times the loop executes, so use the incremented indvar.
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ } else {
+ // We have to use the preincremented value...
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ CmpIndVar = IndVar;
+ }
+
+ // Expand the code for the iteration count.
+ assert(SE->isLoopInvariant(RHS, L) &&
+ "Computed iteration count is not loop invariant!");
+ Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+ // Insert a new icmp_ne or icmp_eq instruction before the branch.
+ ICmpInst::Predicate Opcode;
+ if (L->contains(BI->getSuccessor(0)))
+ Opcode = ICmpInst::ICMP_NE;
+ else
+ Opcode = ICmpInst::ICMP_EQ;
+
+ DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t"
+ << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *RHS << "\n");
+
+ ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+ Cond->setDebugLoc(BI->getDebugLoc());
+ Value *OrigCond = BI->getCondition();
+ // It's tempting to use replaceAllUsesWith here to fully replace the old
+ // comparison, but that's not immediately safe, since users of the old
+ // comparison may not be dominated by the new comparison. Instead, just
+ // update the branch to use the new comparison; in the common case this
+ // will make old comparison dead.
+ BI->setCondition(Cond);
+ DeadInsts.push_back(OrigCond);
+
+ ++NumLFTR;
+ Changed = true;
+ return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Also don't
+ // move instructions which might read memory, since the loop may modify
+ // memory. Note that it's okay if the instruction might have undefined
+ // behavior: LoopSimplify guarantees that the preheader dominates the exit
+ // block.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ continue;
+
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Don't sink static AllocaInsts out of the entry block, which would
+ // turn them into dynamic allocas!
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (AI->isStaticAlloca())
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+
+ if (I != Preheader->begin()) {
+ // Skip debug info intrinsics.
+ do {
+ --I;
+ } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+ if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ Done = true;
+ } else {
+ Done = true;
+ }
+
+ ToMove->moveBefore(InsertPt);
+ if (Done) break;
+ InsertPt = ToMove;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // If LoopSimplify form is not available, stay out of trouble. Some notes:
+ // - LSR currently only supports LoopSimplify-form loops. Indvars'
+ // canonicalization can be a pessimization without LSR to "clean up"
+ // afterwards.
+ // - We depend on having a preheader; in particular,
+ // Loop::getCanonicalInductionVariable only supports loops with preheaders,
+ // and we're in trouble if we can't find the induction variable even when
+ // we've manually inserted one.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ if (!DisableIVRewrite)
+ IU = &getAnalysis<IVUsers>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ DeadInsts.clear();
+ Changed = false;
+
+ // If there are any floating-point recurrences, attempt to
+ // transform them to use integer recurrences.
+ RewriteNonIntegerIVs(L);
+
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+ // Create a rewriter object which we'll use to transform the code with.
+ SCEVExpander Rewriter(*SE, "indvars");
+
+ // Eliminate redundant IV users.
+ //
+ // Simplification works best when run before other consumers of SCEV. We
+ // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+ // other expressions involving loop IVs have been evaluated. This helps SCEV
+ // set no-wrap flags before normalizing sign/zero extension.
+ if (DisableIVRewrite) {
+ Rewriter.disableCanonicalMode();
+ SimplifyIVUsersNoRewrite(L, Rewriter);
+ }
+
+ // Check to see if this loop has a computable loop-invariant execution count.
+ // If so, this means that we can compute the final value of any expressions
+ // that are recurrent in the loop, and substitute the exit values from the
+ // loop into any instructions outside of the loop that use the final values of
+ // the current expressions.
+ //
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ RewriteLoopExitValues(L, Rewriter);
+
+ // Eliminate redundant IV users.
+ if (!DisableIVRewrite)
+ SimplifyIVUsers(Rewriter);
+
+ // Eliminate redundant IV cycles.
+ if (DisableIVRewrite)
+ SimplifyCongruentIVs(L);
+
+ // Compute the type of the largest recurrence expression, and decide whether
+ // a canonical induction variable should be inserted.
+ const Type *LargestType = 0;
+ bool NeedCannIV = false;
+ bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
+ if (ExpandBECount) {
+ // If we have a known trip count and a single exit block, we'll be
+ // rewriting the loop exit test condition below, which requires a
+ // canonical induction variable.
+ NeedCannIV = true;
+ const Type *Ty = BackedgeTakenCount->getType();
+ if (DisableIVRewrite) {
+ // In this mode, SimplifyIVUsers may have already widened the IV used by
+ // the backedge test and inserted a Trunc on the compare's operand. Get
+ // the wider type to avoid creating a redundant narrow IV only used by the
+ // loop test.
+ LargestType = getBackedgeIVType(L);
+ }
+ if (!LargestType ||
+ SE->getTypeSizeInBits(Ty) >
+ SE->getTypeSizeInBits(LargestType))
+ LargestType = SE->getEffectiveSCEVType(Ty);
+ }
+ if (!DisableIVRewrite) {
+ for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+ NeedCannIV = true;
+ const Type *Ty =
+ SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
+ if (!LargestType ||
+ SE->getTypeSizeInBits(Ty) >
+ SE->getTypeSizeInBits(LargestType))
+ LargestType = Ty;
+ }
+ }
+
+ // Now that we know the largest of the induction variable expressions
+ // in this loop, insert a canonical induction variable of the largest size.
+ PHINode *IndVar = 0;
+ if (NeedCannIV) {
+ // Check to see if the loop already has any canonical-looking induction
+ // variables. If any are present and wider than the planned canonical
+ // induction variable, temporarily remove them, so that the Rewriter
+ // doesn't attempt to reuse them.
+ SmallVector<PHINode *, 2> OldCannIVs;
+ while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
+ if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+ SE->getTypeSizeInBits(LargestType))
+ OldCannIV->removeFromParent();
+ else
+ break;
+ OldCannIVs.push_back(OldCannIV);
+ }
+
+ IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
+
+ ++NumInserted;
+ Changed = true;
+ DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
+
+ // Now that the official induction variable is established, reinsert
+ // any old canonical-looking variables after it so that the IR remains
+ // consistent. They will be deleted as part of the dead-PHI deletion at
+ // the end of the pass.
+ while (!OldCannIVs.empty()) {
+ PHINode *OldCannIV = OldCannIVs.pop_back_val();
+ OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ }
+ }
+
+ // If we have a trip count expression, rewrite the loop's exit condition
+ // using it. We can currently only handle loops with a single exit.
+ ICmpInst *NewICmp = 0;
+ if (ExpandBECount) {
+ assert(canExpandBackedgeTakenCount(L, SE) &&
+ "canonical IV disrupted BackedgeTaken expansion");
+ assert(NeedCannIV &&
+ "LinearFunctionTestReplace requires a canonical induction variable");
+ // Check preconditions for proper SCEVExpander operation. SCEV does not
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+ // pass that uses the SCEVExpander must do it. This does not work well for
+ // loop passes because SCEVExpander makes assumptions about all loops, while
+ // LoopPassManager only forces the current loop to be simplified.
+ //
+ // FIXME: SCEV expansion has no way to bail out, so the caller must
+ // explicitly check any assumptions made by SCEV. Brittle.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ if (!AR || AR->getLoop()->getLoopPreheader())
+ NewICmp =
+ LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
+ }
+ // Rewrite IV-derived expressions.
+ if (!DisableIVRewrite)
+ RewriteIVExpressions(L, Rewriter);
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted in the loop below, causing the AssertingVH in the cache to
+ // trigger.
+ Rewriter.clear();
+
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+ // The Rewriter may not be used from this point on.
+
+ // Loop-invariant instructions in the preheader that aren't used in the
+ // loop may be sunk below the loop to reduce register pressure.
+ SinkUnusedInvariants(L);
+
+ // For completeness, inform IVUsers of the IV use in the newly-created
+ // loop exit test instruction.
+ if (NewICmp && IU)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+
+ // Clean up dead instructions.
+ Changed |= DeleteDeadPHIs(L->getHeader());
+ // Check a post-condition.
+ assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
new file mode 100644
index 000000000000..b500d5b4fdff
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,1594 @@
+//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Jump Threading pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jump-threading"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumThreads, "Number of jumps threaded");
+STATISTIC(NumFolds, "Number of terminators folded");
+STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
+
+static cl::opt<unsigned>
+Threshold("jump-threading-threshold",
+ cl::desc("Max block size to duplicate for jump threading"),
+ cl::init(6), cl::Hidden);
+
+namespace {
+ // These are at global scope so static functions can use them too.
+ typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+ typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+ // This is used to keep track of what kind of constant we're currently hoping
+ // to find.
+ enum ConstantPreference {
+ WantInteger,
+ WantBlockAddress
+ };
+
+ /// This pass performs 'jump threading', which looks at blocks that have
+ /// multiple predecessors and multiple successors. If one or more of the
+ /// predecessors of the block can be proven to always jump to one of the
+ /// successors, we forward the edge from the predecessor to the successor by
+ /// duplicating the contents of this block.
+ ///
+ /// An example of when this can occur is code like this:
+ ///
+ /// if () { ...
+ /// X = 4;
+ /// }
+ /// if (X < 3) {
+ ///
+ /// In this case, the unconditional branch at the end of the first if can be
+ /// revectored to the false side of the second if.
+ ///
+ class JumpThreading : public FunctionPass {
+ TargetData *TD;
+ LazyValueInfo *LVI;
+#ifdef NDEBUG
+ SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+#else
+ SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
+ public:
+ static char ID; // Pass identification
+ JumpThreading() : FunctionPass(ID) {
+ initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
+ }
+
+ void FindLoopHeaders(Function &F);
+ bool ProcessBlock(BasicBlock *BB);
+ bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB);
+ bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs);
+
+ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ PredValueInfo &Result,
+ ConstantPreference Preference);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference);
+
+ bool ProcessBranchOnPHI(PHINode *PN);
+ bool ProcessBranchOnXOR(BinaryOperator *BO);
+
+ bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+ };
+}
+
+char JumpThreading::ID = 0;
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+
+// Public interface to the Jump Threading pass
+FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+
+/// runOnFunction - Top level algorithm.
+///
+bool JumpThreading::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+ TD = getAnalysisIfAvailable<TargetData>();
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ FindLoopHeaders(F);
+
+ bool Changed, EverChanged = false;
+ do {
+ Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I;
+ // Thread all of the branches we can over this block.
+ while (ProcessBlock(BB))
+ Changed = true;
+
+ ++I;
+
+ // If the block is trivially dead, zap it. This eliminates the successor
+ // edges which simplifies the CFG.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
+ << "' with terminator: " << *BB->getTerminator() << '\n');
+ LoopHeaders.erase(BB);
+ LVI->eraseBlock(BB);
+ DeleteDeadBlock(BB);
+ Changed = true;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
+ // If the terminator is the only non-phi instruction, try to nuke it.
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
+ }
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
+ }
+ }
+ EverChanged |= Changed;
+ } while (Changed);
+
+ LoopHeaders.clear();
+ return EverChanged;
+}
+
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+ /// Ignore PHI nodes, these will be flattened when duplication happens.
+ BasicBlock::const_iterator I = BB->getFirstNonPHI();
+
+ // FIXME: THREADING will delete values that are just used to compute the
+ // branch, so they shouldn't count against the duplication cost.
+
+
+ // Sum up the cost of each instruction until we get to the terminator. Don't
+ // include the terminator because the copy won't include it.
+ unsigned Size = 0;
+ for (; !isa<TerminatorInst>(I); ++I) {
+ // Debugger intrinsics don't incur code size.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+
+ // If this is a pointer->pointer bitcast, it is free.
+ if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
+ continue;
+
+ // All other instructions count for at least one unit.
+ ++Size;
+
+ // Calls are more expensive. If they are non-intrinsic calls, we model them
+ // as having cost of 4. If they are a non-vector intrinsic, we model them
+ // as having cost of 2 total, and if they are a vector intrinsic, we model
+ // them as having cost 1.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<IntrinsicInst>(CI))
+ Size += 3;
+ else if (!CI->getType()->isVectorTy())
+ Size += 1;
+ }
+ }
+
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(I))
+ Size = Size > 6 ? Size-6 : 0;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(I))
+ Size = Size > 8 ? Size-8 : 0;
+
+ return Size;
+}
+
+/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// structures into irreducible loops. Doing this breaks up the loop nesting
+/// hierarchy and pessimizes later transformations. To prevent this from
+/// happening, we first have to find the loop headers. Here we approximate this
+/// by finding targets of backedges in the CFG.
+///
+/// Note that there definitely are cases when we want to allow threading of
+/// edges across a loop header. For example, threading a jump from outside the
+/// loop (the preheader) to an exit block of the loop is definitely profitable.
+/// It is also almost always profitable to thread backedges from within the loop
+/// to exit blocks, and is often profitable to thread backedges to other blocks
+/// within the loop (forming a nested loop). This simple analysis is not rich
+/// enough to track all of these properties and keep it up-to-date as the CFG
+/// mutates, so we don't allow any of these transformations.
+///
+void JumpThreading::FindLoopHeaders(Function &F) {
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+ LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+}
+
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+ if (!Val)
+ return 0;
+
+ // Undef is "known" enough.
+ if (UndefValue *U = dyn_cast<UndefValue>(Val))
+ return U;
+
+ if (Preference == WantBlockAddress)
+ return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+ return dyn_cast<ConstantInt>(Val);
+}
+
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors. If so, return the known list of value and pred
+/// BB in the result vector.
+///
+/// This returns true if there were any known values.
+///
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference) {
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
+ // If V is a constant, then it is known in all predecessors.
+ if (Constant *KC = getKnownConstant(V, Preference)) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(KC, *PI));
+
+ return true;
+ }
+
+ // If V is a non-instruction value, or an instruction in a different block,
+ // then it can't be derived from a PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ if (Constant *KC = getKnownConstant(PredCst, Preference))
+ Result.push_back(std::make_pair(KC, P));
+ }
+
+ return !Result.empty();
+ }
+
+ /// If I is a PHI node, then we know the incoming values for any constants.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (Constant *KC = getKnownConstant(InVal, Preference)) {
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ } else {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ if (Constant *KC = getKnownConstant(CI, Preference))
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ PredValueInfoTy LHSVals, RHSVals;
+
+ // Handle some boolean conditions.
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ assert(Preference == WantInteger && "One-bit non-integer type?");
+ // X | true -> true
+ // X & false -> false
+ if (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+ WantInteger);
+
+ if (LHSVals.empty() && RHSVals.empty())
+ return false;
+
+ ConstantInt *InterestingVal;
+ if (I->getOpcode() == Instruction::Or)
+ InterestingVal = ConstantInt::getTrue(I->getContext());
+ else
+ InterestingVal = ConstantInt::getFalse(I->getContext());
+
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
+ // Scan for the sentinel. If we find an undef, force it to the
+ // interesting value: x|undef -> true and x&undef -> false.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+ if (LHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(LHSVals[i].first)) {
+ Result.push_back(LHSVals[i]);
+ Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
+ }
+ for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+ if (RHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(RHSVals[i].first)) {
+ // If we already inferred a value for this block on the LHS, don't
+ // re-add it.
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
+ Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ // Handle the NOT form of XOR.
+ if (I->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isOne()) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+ WantInteger);
+ if (Result.empty())
+ return false;
+
+ // Invert the known values.
+ for (unsigned i = 0, e = Result.size(); i != e; ++i)
+ Result[i].first = ConstantExpr::getNot(Result[i].first);
+
+ return true;
+ }
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ assert(Preference != WantBlockAddress
+ && "A binary operator creating a block address?");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+ WantInteger);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first;
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ // Handle compare with phi operand, where the PHI is defined in this block.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ assert(Preference == WantInteger && "Compares only produce integers");
+ PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ if (PN && PN->getParent() == BB) {
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ Value *LHS = PN->getIncomingValue(i);
+ Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
+
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.push_back(std::make_pair(KC, PredBB));
+ }
+
+ return !Result.empty();
+ }
+
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(ResC, P));
+ }
+
+ return !Result.empty();
+ }
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first;
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ if (Constant *KC = getKnownConstant(CI, Preference)) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(KC, *PI));
+ }
+
+ return !Result.empty();
+}
+
+
+
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors. This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+ TerminatorInst *BBTerm = BB->getTerminator();
+ unsigned MinSucc = 0;
+ BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+ // Compute the successor with the minimum number of predecessors.
+ unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ TestBB = BBTerm->getSuccessor(i);
+ unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ if (NumPreds < MinNumPreds) {
+ MinSucc = i;
+ MinNumPreds = NumPreds;
+ }
+ }
+
+ return MinSucc;
+}
+
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
+/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// through to a successor, transform them now.
+bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+ // If the block is trivially dead, just return and let the caller nuke it.
+ // This simplifies other transformations.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock())
+ return false;
+
+ // If this block has a single predecessor, and if that pred has a single
+ // successor, merge the blocks. This encourages recursive jump threading
+ // because now the condition in this block can be threaded through
+ // predecessors of our predecessor block.
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
+ if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
+ // If SinglePred was a loop header, BB becomes one.
+ if (LoopHeaders.erase(SinglePred))
+ LoopHeaders.insert(BB);
+
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ LVI->eraseBlock(SinglePred);
+ MergeBasicBlockIntoOnlyPred(BB);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+ return true;
+ }
+ }
+
+ // What kind of constant we're looking for.
+ ConstantPreference Preference = WantInteger;
+
+ // Look to see if the terminator is a conditional branch, switch or indirect
+ // branch, if not we can't thread it.
+ Value *Condition;
+ Instruction *Terminator = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
+ // Can't thread an unconditional jump.
+ if (BI->isUnconditional()) return false;
+ Condition = BI->getCondition();
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
+ Condition = SI->getCondition();
+ } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ Condition = IB->getAddress()->stripPointerCasts();
+ Preference = WantBlockAddress;
+ } else {
+ return false; // Must be an invoke.
+ }
+
+ // Run constant folding to see if we can reduce the condition to a simple
+ // constant.
+ if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+ Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ if (SimpleVal) {
+ I->replaceAllUsesWith(SimpleVal);
+ I->eraseFromParent();
+ Condition = SimpleVal;
+ }
+ }
+
+ // If the terminator is branching on an undef, we can pick any of the
+ // successors to branch to. Let GetBestDestForJumpOnUndef decide.
+ if (isa<UndefValue>(Condition)) {
+ unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
+
+ // Fold the branch/switch.
+ TerminatorInst *BBTerm = BB->getTerminator();
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ if (i == BestSucc) continue;
+ BBTerm->getSuccessor(i)->removePredecessor(BB, true);
+ }
+
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding undef terminator: " << *BBTerm << '\n');
+ BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
+ BBTerm->eraseFromParent();
+ return true;
+ }
+
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (getKnownConstant(Condition, Preference)) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding terminator: " << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(BB, true);
+ return true;
+ }
+
+ Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
+ // All the rest of our checks depend on the condition being an instruction.
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (ProcessThreadableEdges(Condition, BB, Preference))
+ return true;
+ return false;
+ }
+
+
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+ CondCmp->getOperand(0), CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
+ }
+ }
+ }
+
+ // Check for some cases that are worth simplifying. Right now we want to look
+ // for loads that are used by a switch or by the condition for the branch. If
+ // we see one, check to see if it's partially redundant. If so, insert a PHI
+ // which can then be used to thread the values.
+ //
+ Value *SimplifyValue = CondInst;
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
+ if (isa<Constant>(CondCmp->getOperand(1)))
+ SimplifyValue = CondCmp->getOperand(0);
+
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
+ if (SimplifyPartiallyRedundantLoad(LI))
+ return true;
+
+
+ // Handle a variety of cases where we are branching on something derived from
+ // a PHI node in the current block. If we can prove that any predecessors
+ // compute a predictable value based on a PHI node, thread those predecessors.
+ //
+ if (ProcessThreadableEdges(CondInst, BB, Preference))
+ return true;
+
+ // If this is an otherwise-unfoldable branch on a phi node in the current
+ // block, see if we can simplify.
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnPHI(PN);
+
+
+ // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+ if (CondInst->getOpcode() == Instruction::Xor &&
+ CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
+
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)", thread through this block.
+
+ return false;
+}
+
+
+/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
+/// load instruction, eliminate it by replacing it with a PHI node. This is an
+/// important optimization that encourages jump threading, and needs to be run
+/// interlaced with other jump threading tasks.
+bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+ // Don't hack volatile loads.
+ if (LI->isVolatile()) return false;
+
+ // If the load is defined in a block with exactly one predecessor, it can't be
+ // partially redundant.
+ BasicBlock *LoadBB = LI->getParent();
+ if (LoadBB->getSinglePredecessor())
+ return false;
+
+ Value *LoadedPtr = LI->getOperand(0);
+
+ // If the loaded operand is defined in the LoadBB, it can't be available.
+ // TODO: Could do simple PHI translation, that would be fun :)
+ if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
+ if (PtrOp->getParent() == LoadBB)
+ return false;
+
+ // Scan a few instructions up from the load, to see if it is obviously live at
+ // the entry to its block.
+ BasicBlock::iterator BBIt = LI;
+
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
+ // If the value if the load is locally available within the block, just use
+ // it. This frequently occurs for reg2mem'd allocas.
+ //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
+
+ // If the returned value is the load itself, replace with an undef. This can
+ // only happen in dead loops.
+ if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(AvailableVal);
+ LI->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, if we scanned the whole block and got to the top of the block,
+ // we know the block is locally transparent to the load. If not, something
+ // might clobber its value.
+ if (BBIt != LoadBB->begin())
+ return false;
+
+
+ SmallPtrSet<BasicBlock*, 8> PredsScanned;
+ typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+ AvailablePredsTy AvailablePreds;
+ BasicBlock *OneUnavailablePred = 0;
+
+ // If we got here, the loaded value is transparent through to the start of the
+ // block. Check to see if it is available in any of the predecessor blocks.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *PredBB = *PI;
+
+ // If we already scanned this predecessor, skip it.
+ if (!PredsScanned.insert(PredBB))
+ continue;
+
+ // Scan the predecessor to see if the value is available in the pred.
+ BBIt = PredBB->end();
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ if (!PredAvailable) {
+ OneUnavailablePred = PredBB;
+ continue;
+ }
+
+ // If so, this load is partially redundant. Remember this info so that we
+ // can create a PHI node.
+ AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+ }
+
+ // If the loaded value isn't available in any predecessor, it isn't partially
+ // redundant.
+ if (AvailablePreds.empty()) return false;
+
+ // Okay, the loaded value is available in at least one (and maybe all!)
+ // predecessors. If the value is unavailable in more than one unique
+ // predecessor, we want to insert a merge block for those common predecessors.
+ // This ensures that we only have to insert one reload, thus not increasing
+ // code size.
+ BasicBlock *UnavailablePred = 0;
+
+ // If there is exactly one predecessor where the value is unavailable, the
+ // already computed 'OneUnavailablePred' block is it. If it ends in an
+ // unconditional branch, we know that it isn't a critical edge.
+ if (PredsScanned.size() == AvailablePreds.size()+1 &&
+ OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
+ UnavailablePred = OneUnavailablePred;
+ } else if (PredsScanned.size() != AvailablePreds.size()) {
+ // Otherwise, we had multiple unavailable predecessors or we had a critical
+ // edge from the one.
+ SmallVector<BasicBlock*, 8> PredsToSplit;
+ SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
+
+ for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
+ AvailablePredSet.insert(AvailablePreds[i].first);
+
+ // Add all the unavailable predecessors to the PredsToSplit list.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // If the predecessor is an indirect goto, we can't split the edge.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return false;
+
+ if (!AvailablePredSet.count(P))
+ PredsToSplit.push_back(P);
+ }
+
+ // Split them out to their own block.
+ UnavailablePred =
+ SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
+ "thread-pre-split", this);
+ }
+
+ // If the value isn't available in all predecessors, then there will be
+ // exactly one where it isn't available. Insert a load on that edge and add
+ // it to the AvailablePreds list.
+ if (UnavailablePred) {
+ assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
+ "Can't handle critical edge here!");
+ LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+ NewVal->setDebugLoc(LI->getDebugLoc());
+ AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+ }
+
+ // Now we know that each predecessor of this block has a value in
+ // AvailablePreds, sort them for efficient access as we're walking the preds.
+ array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
+
+ // Create a PHI node at the start of the block for the PRE'd load value.
+ pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+ LoadBB->begin());
+ PN->takeName(LI);
+ PN->setDebugLoc(LI->getDebugLoc());
+
+ // Insert new entries into the PHI for each predecessor. A single block may
+ // have multiple entries here.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ AvailablePredsTy::iterator I =
+ std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
+ std::make_pair(P, (Value*)0));
+
+ assert(I != AvailablePreds.end() && I->first == P &&
+ "Didn't find entry for predecessor!");
+
+ PN->addIncoming(I->second, I->first);
+ }
+
+ //cerr << "PRE: " << *LI << *PN << "\n";
+
+ LI->replaceAllUsesWith(PN);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations. Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+ const SmallVectorImpl<std::pair<BasicBlock*,
+ BasicBlock*> > &PredToDestList) {
+ assert(!PredToDestList.empty());
+
+ // Determine popularity. If there are multiple possible destinations, we
+ // explicitly choose to ignore 'undef' destinations. We prefer to thread
+ // blocks with known and real destinations to threading undef. We'll handle
+ // them later if interesting.
+ DenseMap<BasicBlock*, unsigned> DestPopularity;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second)
+ DestPopularity[PredToDestList[i].second]++;
+
+ // Find the most popular dest.
+ DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+ BasicBlock *MostPopularDest = DPI->first;
+ unsigned Popularity = DPI->second;
+ SmallVector<BasicBlock*, 4> SamePopularity;
+
+ for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+ // If the popularity of this entry isn't higher than the popularity we've
+ // seen so far, ignore it.
+ if (DPI->second < Popularity)
+ ; // ignore.
+ else if (DPI->second == Popularity) {
+ // If it is the same as what we've seen so far, keep track of it.
+ SamePopularity.push_back(DPI->first);
+ } else {
+ // If it is more popular, remember it.
+ SamePopularity.clear();
+ MostPopularDest = DPI->first;
+ Popularity = DPI->second;
+ }
+ }
+
+ // Okay, now we know the most popular destination. If there is more than one
+ // destination, we need to determine one. This is arbitrary, but we need
+ // to make a deterministic decision. Pick the first one that appears in the
+ // successor list.
+ if (!SamePopularity.empty()) {
+ SamePopularity.push_back(MostPopularDest);
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0; ; ++i) {
+ assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
+
+ if (std::find(SamePopularity.begin(), SamePopularity.end(),
+ TI->getSuccessor(i)) == SamePopularity.end())
+ continue;
+
+ MostPopularDest = TI->getSuccessor(i);
+ break;
+ }
+ }
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference) {
+ // If threading this would thread across a loop header, don't even try to
+ // thread the edge.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ PredValueInfoTy PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
+ return false;
+
+ assert(!PredValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ DEBUG(dbgs() << "IN BB: " << *BB;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
+ << *PredValues[i].first
+ << " for pred '" << PredValues[i].second->getName() << "'.\n";
+ });
+
+ // Decide what we want to thread through. Convert our list of known values to
+ // a list of known destinations for each pred. This also discards duplicate
+ // predecessors and keeps track of the undefined inputs (which are represented
+ // as a null dest in the PredToDestList).
+ SmallPtrSet<BasicBlock*, 16> SeenPreds;
+ SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+ BasicBlock *OnlyDest = 0;
+ BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ BasicBlock *Pred = PredValues[i].second;
+ if (!SeenPreds.insert(Pred))
+ continue; // Duplicate predecessor entry.
+
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
+
+ Constant *Val = PredValues[i].first;
+
+ BasicBlock *DestBB;
+ if (isa<UndefValue>(Val))
+ DestBB = 0;
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
+ else {
+ assert(isa<IndirectBrInst>(BB->getTerminator())
+ && "Unexpected terminator");
+ DestBB = cast<BlockAddress>(Val)->getBasicBlock();
+ }
+
+ // If we have exactly one destination, remember it for efficiency below.
+ if (PredToDestList.empty())
+ OnlyDest = DestBB;
+ else if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+
+ PredToDestList.push_back(std::make_pair(Pred, DestBB));
+ }
+
+ // If all edges were unthreadable, we fail.
+ if (PredToDestList.empty())
+ return false;
+
+ // Determine which is the most common successor. If we have many inputs and
+ // this block is a switch, we want to start by threading the batch that goes
+ // to the most popular destination first. If we only know about one
+ // threadable destination (the common case) we can avoid this.
+ BasicBlock *MostPopularDest = OnlyDest;
+
+ if (MostPopularDest == MultipleDestSentinel)
+ MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+ // Now that we know what the most popular destination is, factor all
+ // predecessors that will jump to it into a single predecessor.
+ SmallVector<BasicBlock*, 16> PredsToFactor;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second == MostPopularDest) {
+ BasicBlock *Pred = PredToDestList[i].first;
+
+ // This predecessor may be a switch or something else that has multiple
+ // edges to the block. Factor each of these edges by listing them
+ // according to # occurrences in PredsToFactor.
+ TerminatorInst *PredTI = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+ if (PredTI->getSuccessor(i) == BB)
+ PredsToFactor.push_back(Pred);
+ }
+
+ // If the threadable edges are branching on an undefined value, we get to pick
+ // the destination that these predecessors should get to.
+ if (MostPopularDest == 0)
+ MostPopularDest = BB->getTerminator()->
+ getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block. See if there are any simplifications we
+/// can do based on inputs to the phi node.
+///
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
+ BasicBlock *BB = PN->getParent();
+
+ // TODO: We could make use of this to do it once for blocks with common PHI
+ // values.
+ SmallVector<BasicBlock*, 1> PredBBs;
+ PredBBs.resize(1);
+
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the conditional branch into that block in order to further
+ // encourage jump threading and to eliminate cases where we have branch on a
+ // phi of an icmp (branch on icmp is much better).
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+ if (PredBr->isUnconditional()) {
+ PredBBs[0] = PredBB;
+ // Try to duplicate BB into PredBB.
+ if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block. See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+ BasicBlock *BB = BO->getParent();
+
+ // If either the LHS or RHS of the xor is a constant, don't do this
+ // optimization.
+ if (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1)))
+ return false;
+
+ // If the first instruction in BB isn't a phi, we won't be able to infer
+ // anything special about any particular predecessor.
+ if (!isa<PHINode>(BB->front()))
+ return false;
+
+ // If we have a xor as the branch input to this block, and we know that the
+ // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+ // the condition into the predecessor and fix that value to true, saving some
+ // logical ops on that path and encouraging other paths to simplify.
+ //
+ // This copies something like this:
+ //
+ // BB:
+ // %X = phi i1 [1], [%X']
+ // %Y = icmp eq i32 %A, %B
+ // %Z = xor i1 %X, %Y
+ // br i1 %Z, ...
+ //
+ // Into:
+ // BB':
+ // %Y = icmp ne i32 %A, %B
+ // br i1 %Z, ...
+
+ PredValueInfoTy XorOpValues;
+ bool isLHS = true;
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ WantInteger)) {
+ assert(XorOpValues.empty());
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ WantInteger))
+ return false;
+ isLHS = false;
+ }
+
+ assert(!XorOpValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ // Scan the information to see which is most popular: true or false. The
+ // predecessors can be of the set true, false, or undef.
+ unsigned NumTrue = 0, NumFalse = 0;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (isa<UndefValue>(XorOpValues[i].first))
+ // Ignore undefs for the count.
+ continue;
+ if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+ ++NumFalse;
+ else
+ ++NumTrue;
+ }
+
+ // Determine which value to split on, true, false, or undef if neither.
+ ConstantInt *SplitVal = 0;
+ if (NumTrue > NumFalse)
+ SplitVal = ConstantInt::getTrue(BB->getContext());
+ else if (NumTrue != 0 || NumFalse != 0)
+ SplitVal = ConstantInt::getFalse(BB->getContext());
+
+ // Collect all of the blocks that this can be folded into so that we can
+ // factor this once and clone it once.
+ SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (XorOpValues[i].first != SplitVal &&
+ !isa<UndefValue>(XorOpValues[i].first))
+ continue;
+
+ BlocksToFoldInto.push_back(XorOpValues[i].second);
+ }
+
+ // If we inferred a value for all of the predecessors, then duplication won't
+ // help us. However, we can just replace the LHS or RHS with the constant.
+ if (BlocksToFoldInto.size() ==
+ cast<PHINode>(BB->front()).getNumIncomingValues()) {
+ if (SplitVal == 0) {
+ // If all preds provide undef, just nuke the xor, because it is undef too.
+ BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+ BO->eraseFromParent();
+ } else if (SplitVal->isZero()) {
+ // If all preds provide 0, replace the xor with the other input.
+ BO->replaceAllUsesWith(BO->getOperand(isLHS));
+ BO->eraseFromParent();
+ } else {
+ // If all preds provide 1, set the computed value to 1.
+ BO->setOperand(!isLHS, SplitVal);
+ }
+
+ return true;
+ }
+
+ // Try to duplicate BB into PredBB.
+ return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
+
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+ BasicBlock *OldPred,
+ BasicBlock *NewPred,
+ DenseMap<Instruction*, Value*> &ValueMap) {
+ for (BasicBlock::iterator PNI = PHIBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(OldPred);
+
+ // Remap the value if necessary.
+ if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+ if (I != ValueMap.end())
+ IV = I->second;
+ }
+
+ PN->addIncoming(IV, NewPred);
+ }
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB) {
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
+ << "' - would thread to self!\n");
+ return false;
+ }
+
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
+ << "' to dest BB '" << SuccBB->getName()
+ << "' - it might create an irreducible loop!\n");
+ return false;
+ }
+
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DEBUG(dbgs() << " Not threading BB '" << BB->getName()
+ << "' - Cost is too high: " << JumpThreadCost << "\n");
+ return false;
+ }
+
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
+ // And finally, do it!
+ DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
+ << SuccBB->getName() << "' with cost: " << JumpThreadCost
+ << ", across block:\n "
+ << *BB << "\n");
+
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+ BB->getName()+".thread",
+ BB->getParent(), BB);
+ NewBB->moveAfter(PredBB);
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; !isa<TerminatorInst>(BI); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ NewBB->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ // We didn't copy the terminator from BB over to NewBB, because there is now
+ // an unconditional jump to SuccBB. Insert the unconditional jump.
+ BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
+
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
+ // PHI nodes for NewBB now.
+ AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use*, 16> UsesToRename;
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(UI) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I->getType(), I->getName());
+ SSAUpdate.AddAvailableValue(BB, I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ DEBUG(dbgs() << "\n");
+ }
+
+
+ // Ok, NewBB is good to go. Update the terminator of PredBB to jump to
+ // NewBB instead of BB. This eliminates predecessors from BB, which requires
+ // us to simplify any PHI nodes in BB.
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
+ if (PredTerm->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB, true);
+ PredTerm->setSuccessor(i, NewBB);
+ }
+
+ // At this point, the IR is fully up to date and consistent. Do a quick scan
+ // over the new instructions and zap any that are constants or dead. This
+ // frequently happens because of phi translation.
+ SimplifyInstructionsInBlock(NewBB, TD);
+
+ // Threaded an edge!
+ ++NumThreads;
+ return true;
+}
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs) {
+ assert(!PredBBs.empty() && "Can't handle an empty set");
+
+ // If BB is a loop header, then duplicating this block outside the loop would
+ // cause us to transform this into an irreducible loop, don't do this.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
+ << "' into predecessor block '" << PredBBs[0]->getName()
+ << "' - it might create an irreducible loop!\n");
+ return false;
+ }
+
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+ if (DuplicationCost > Threshold) {
+ DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
+ << "' - Cost is too high: " << DuplicationCost << "\n");
+ return false;
+ }
+
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
+ // Okay, we decided to do this! Clone all the instructions in BB onto the end
+ // of PredBB.
+ DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
+ << PredBB->getName() << "' to eliminate branch on phi. Cost: "
+ << DuplicationCost << " block is:" << *BB << "\n");
+
+ // Unless PredBB ends with an unconditional branch, split the edge so that we
+ // can just clone the bits from BB into the end of the new PredBB.
+ BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+ if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ PredBB = SplitEdge(PredBB, BB, this);
+ OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+ }
+
+ // We are going to have to map operands from the original BB block into the
+ // PredBB block. Evaluate PHI nodes in BB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into PredBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; BI != BB->end(); ++BI) {
+ Instruction *New = BI->clone();
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+
+ // If this instruction can be simplified after the operands are updated,
+ // just use the simplified value instead. This frequently happens due to
+ // phi translation.
+ if (Value *IV = SimplifyInstruction(New, TD)) {
+ delete New;
+ ValueMapping[BI] = IV;
+ } else {
+ // Otherwise, insert the new instruction into the block.
+ New->setName(BI->getName());
+ PredBB->getInstList().insert(OldPredBranch, New);
+ ValueMapping[BI] = New;
+ }
+ }
+
+ // Check to see if the targets of the branch had PHI nodes. If so, we need to
+ // add entries to the PHI nodes for branch from PredBB now.
+ BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+ AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+ ValueMapping);
+ AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+ ValueMapping);
+
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use*, 16> UsesToRename;
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(UI) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I->getType(), I->getName());
+ SSAUpdate.AddAvailableValue(BB, I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ DEBUG(dbgs() << "\n");
+ }
+
+ // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+ // that we nuked.
+ BB->removePredecessor(PredBB, true);
+
+ // Remove the unconditional branch at the end of the PredBB block.
+ OldPredBranch->eraseFromParent();
+
+ ++NumDupes;
+ return true;
+}
+
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
new file mode 100644
index 000000000000..66add6ca01ee
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -0,0 +1,808 @@
+//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion, attempting to remove as much
+// code from the body of a loop as possible. It does this by either hoisting
+// code into the preheader block, or by sinking code to the exit blocks if it is
+// safe. This pass also promotes must-aliased memory locations in the loop to
+// live in registers, thus hoisting and sinking "invariant" loads and stores.
+//
+// This pass uses alias analysis for two purposes:
+//
+// 1. Moving loop invariant loads and calls out of loops. If we can determine
+// that a load or call inside of a loop never aliases anything stored to,
+// we can hoist it or sink it like any other instruction.
+// 2. Scalar Promotion of Memory - If there is a store instruction inside of
+// the loop, we try to move the store to happen AFTER the loop instead of
+// inside of the loop. This can only happen if a few conditions are true:
+// A. The pointer stored through is loop invariant
+// B. There are no stores or loads in the loop which _may_ alias the
+// pointer. There are no calls in the loop which mod/ref the pointer.
+// If these conditions are true, we can promote the loads and stores in the
+// loop of the pointer to use a temporary alloca'd variable. We then use
+// the SSAUpdater to construct the appropriate SSA form for the value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSunk , "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
+STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
+STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
+STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
+
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
+
+namespace {
+ struct LICM : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LICM() : LoopPass(ID) {
+ initializeLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved("scalar-evolution");
+ AU.addPreservedID(LoopSimplifyID);
+ }
+
+ bool doFinalization() {
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+ return false;
+ }
+
+ private:
+ AliasAnalysis *AA; // Current AliasAnalysis information
+ LoopInfo *LI; // Current LoopInfo
+ DominatorTree *DT; // Dominator Tree for the current Loop.
+
+ // State that is updated as we process loops.
+ bool Changed; // Set to true when we change anything.
+ BasicBlock *Preheader; // The preheader block of the current loop...
+ Loop *CurLoop; // The current loop we are working on...
+ AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+
+ /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+ /// set.
+ void deleteAnalysisValue(Value *V, Loop *L);
+
+ /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+ /// dominated by the specified block, and that are in the current loop) in
+ /// reverse depth first order w.r.t the DominatorTree. This allows us to
+ /// visit uses before definitions, allowing us to sink a loop body in one
+ /// pass without iteration.
+ ///
+ void SinkRegion(DomTreeNode *N);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(DomTreeNode *N);
+
+ /// inSubLoop - Little predicate that returns true if the specified basic
+ /// block is in a subloop of the current one, not the current one itself.
+ ///
+ bool inSubLoop(BasicBlock *BB) {
+ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+ return LI->getLoopFor(BB) != CurLoop;
+ }
+
+ /// sink - When an instruction is found to only be used outside of the loop,
+ /// this function moves it to the exit blocks and patches up SSA form as
+ /// needed.
+ ///
+ void sink(Instruction &I);
+
+ /// hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void hoist(Instruction &I);
+
+ /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
+ /// is not a trapping instruction or if it is a trapping instruction and is
+ /// guaranteed to execute.
+ ///
+ bool isSafeToExecuteUnconditionally(Instruction &I);
+
+ /// pointerInvalidatedByLoop - Return true if the body of this loop may
+ /// store into the memory location pointed to by V.
+ ///
+ bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const MDNode *TBAAInfo) {
+ // Check to see if any of the basic blocks in CurLoop invalidate *V.
+ return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
+ }
+
+ bool canSinkOrHoistInst(Instruction &I);
+ bool isNotUsedInLoop(Instruction &I);
+
+ void PromoteAliasSet(AliasSet &AS);
+ };
+}
+
+char LICM::ID = 0;
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
+
+Pass *llvm::createLICMPass() { return new LICM(); }
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner
+/// loop is not preserved so it is not a good idea to run LICM multiple
+/// times on one loop.
+///
+bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ Changed = false;
+
+ // Get our Loop and Alias Analysis information...
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ CurAST = new AliasSetTracker(*AA);
+ // Collect Alias info from subloops.
+ for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
+ LoopItr != LoopItrE; ++LoopItr) {
+ Loop *InnerL = *LoopItr;
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
+
+ // What if InnerLoop was modified by other passes ?
+ CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
+ }
+
+ CurLoop = L;
+
+ // Get the preheader block to move instructions into...
+ Preheader = L->getLoopPreheader();
+
+ // Loop over the body of this loop, looking for calls, invokes, and stores.
+ // Because subloops have already been incorporated into AST, we skip blocks in
+ // subloops.
+ //
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
+ CurAST->add(*BB); // Incorporate the specified basic block
+ }
+
+ // We want to visit all of the instructions in this loop... that are not parts
+ // of our subloops (they have already had their invariants hoisted out of
+ // their loop, into this loop, so there is no need to process the BODIES of
+ // the subloops).
+ //
+ // Traverse the body of the loop in depth first order on the dominator tree so
+ // that we are guaranteed to see definitions before we see uses. This allows
+ // us to sink instructions in one pass, without iteration. After sinking
+ // instructions, we perform another pass to hoist them out of the loop.
+ //
+ if (L->hasDedicatedExits())
+ SinkRegion(DT->getNode(L->getHeader()));
+ if (Preheader)
+ HoistRegion(DT->getNode(L->getHeader()));
+
+ // Now that all loop invariants have been removed from the loop, promote any
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I);
+ }
+
+ // Clear out loops state information for the next iteration
+ CurLoop = 0;
+ Preheader = 0;
+
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
+ return Changed;
+}
+
+/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration.
+///
+void LICM::SinkRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // We are processing blocks in reverse dfo, so process children first.
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ SinkRegion(Children[i]);
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (inSubLoop(BB)) return;
+
+ for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+ Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // Check to see if we can sink this instruction to the exit blocks
+ // of the loop. We can do this if the all users of the instruction are
+ // outside of the loop. In this case, it doesn't even matter if the
+ // operands of the instruction are loop invariant.
+ //
+ if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+ ++II;
+ sink(I);
+ }
+ }
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void LICM::HoistRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (!inSubLoop(BB))
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+ Instruction &I = *II++;
+
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
+ // Try hoisting the instruction out to the preheader. We can only do this
+ // if all of the operands of the instruction are loop invariant and if it
+ // is safe to hoist the instruction.
+ //
+ if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
+ isSafeToExecuteUnconditionally(I))
+ hoist(I);
+ }
+
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ HoistRegion(Children[i]);
+}
+
+/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
+/// instruction.
+///
+bool LICM::canSinkOrHoistInst(Instruction &I) {
+ // Loads have extra constraints we have to verify before we can hoist them.
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isVolatile())
+ return false; // Don't hoist volatile loads!
+
+ // Loads from constant memory are always safe to move, even if they end up
+ // in the same alias set as something that ends up being modified.
+ if (AA->pointsToConstantMemory(LI->getOperand(0)))
+ return true;
+
+ // Don't hoist loads which have may-aliased stores in loop.
+ uint64_t Size = 0;
+ if (LI->getType()->isSized())
+ Size = AA->getTypeStoreSize(LI->getType());
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+ LI->getMetadata(LLVMContext::MD_tbaa));
+ } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Don't sink or hoist dbg info; it's legal, but not useful.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Handle simple cases by querying alias analysis.
+ AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return true;
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ // If this call only reads from memory and there are no writes to memory
+ // in the loop, we can hoist or sink the call as appropriate.
+ bool FoundMod = false;
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I) {
+ AliasSet &AS = *I;
+ if (!AS.isForwardingAliasSet() && AS.isMod()) {
+ FoundMod = true;
+ break;
+ }
+ }
+ if (!FoundMod) return true;
+ }
+
+ // FIXME: This should use mod/ref information to see if we can hoist or sink
+ // the call.
+
+ return false;
+ }
+
+ // Otherwise these instructions are hoistable/sinkable
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I);
+}
+
+/// isNotUsedInLoop - Return true if the only users of this instruction are
+/// outside of the loop. If this is true, we can sink the instruction to the
+/// exit blocks of the loop.
+///
+bool LICM::isNotUsedInLoop(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // PHI node uses occur in predecessor blocks!
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I)
+ if (CurLoop->contains(PN->getIncomingBlock(i)))
+ return false;
+ } else if (CurLoop->contains(User)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// sink - When an instruction is found to only be used outside of the loop,
+/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// This method is guaranteed to remove the original instruction from its
+/// position, and may either delete it or move it to outside of the loop.
+///
+void LICM::sink(Instruction &I) {
+ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumSunk;
+ Changed = true;
+
+ // The case where there is only a single exit node of this loop is common
+ // enough that we handle it as a special (more efficient) case. It is more
+ // efficient to handle because there are no PHI nodes that need to be placed.
+ if (ExitBlocks.size() == 1) {
+ if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+ // Instruction is not used, just delete it.
+ CurAST->deleteValue(&I);
+ // If I has users in unreachable blocks, eliminate.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ } else {
+ // Move the instruction to the start of the exit block, after any PHI
+ // nodes in it.
+ I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
+ }
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
+ // The instruction is actually dead if there ARE NO exit blocks.
+ CurAST->deleteValue(&I);
+ // If I has users in unreachable blocks, eliminate.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!DT->dominates(InstOrigBB, ExitBlock))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
+ }
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
+ }
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
+}
+
+/// hoist - When an instruction is found to only use loop invariant operands
+/// that is safe to hoist, this instruction is called to do the dirty work.
+///
+void LICM::hoist(Instruction &I) {
+ DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
+ << I << "\n");
+
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumHoisted;
+ Changed = true;
+}
+
+/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
+/// not a trapping instruction or if it is a trapping instruction and is
+/// guaranteed to execute.
+///
+bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
+ // If it is not a trapping instruction, it is always safe to hoist.
+ if (Inst.isSafeToSpeculativelyExecute())
+ return true;
+
+ // Otherwise we have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ return true;
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // Verify that the block dominates each of the exit blocks of the loop.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+ return false;
+
+ return true;
+}
+
+namespace {
+ class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSet<Value*, 4> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ AliasSetTracker &AST;
+ DebugLoc DL;
+ int Alignment;
+ public:
+ LoopPromoter(Value *SP,
+ const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ SmallPtrSet<Value*, 4> &PMA,
+ SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
+ DebugLoc dl, int alignment)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
+ Alignment(alignment) {}
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
+
+ virtual void doExtraRewritesBeforeFinalDeletion() const {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+ NewSI->setAlignment(Alignment);
+ NewSI->setDebugLoc(DL);
+ }
+ }
+
+ virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ virtual void instructionDeleted(Instruction *I) const {
+ AST.deleteValue(I);
+ }
+ };
+} // end anon namespace
+
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
+/// stores out of the loop and moving loads to before the loop. We do this by
+/// looping over the stores in the loop, looking for stores to Must pointers
+/// which are loop invariant.
+///
+void LICM::PromoteAliasSet(AliasSet &AS) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
+
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
+ //
+ // for () { if (c) *P += 1; }
+ //
+ // into:
+ //
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
+
+ // We start with an alignment of one and try to find instructions that allow
+ // us to prove better alignment.
+ unsigned Alignment = 1;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
+ UI != UE; ++UI) {
+ // Ignore instructions that are outside the loop.
+ Instruction *Use = dyn_cast<Instruction>(*UI);
+ if (!Use || !CurLoop->contains(Use))
+ continue;
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ unsigned InstAlignment;
+ if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
+ assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+ InstAlignment = load->getAlignment();
+ } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
+ // Stores *of* the pointer are not interesting, only stores *to* the
+ // pointer.
+ if (Use->getOperand(1) != ASIV)
+ continue;
+ InstAlignment = store->getAlignment();
+ assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ } else
+ return; // Not a load or store.
+
+ // If the alignment of this instruction allows us to specify a more
+ // restrictive (and performant) alignment and if we are sure this
+ // instruction will be executed, update the alignment.
+ // Larger is better, with the exception of 0 being the best alignment.
+ if ((InstAlignment > Alignment || InstAlignment == 0)
+ && (Alignment != 0))
+ if (isSafeToExecuteUnconditionally(*Use)) {
+ GuaranteedToExecute = true;
+ Alignment = InstAlignment;
+ }
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+ LoopUses.push_back(Use);
+ }
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
+
+ // Grab a debug location for the inserted loads/stores; given that the
+ // inserted loads/stores have little relation to the original loads/stores,
+ // this code just arbitrarily picks a location from one, since any debug
+ // location is better than none.
+ DebugLoc DL = LoopUses[0]->getDebugLoc();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+ *CurAST, DL, Alignment);
+
+ // Set up the preheader to have a definition of the value. It is the live-out
+ // value from the preheader that uses in the loop will use.
+ LoadInst *PreheaderLoad =
+ new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ PreheaderLoad->setAlignment(Alignment);
+ PreheaderLoad->setDebugLoc(DL);
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Rewrite all the loads in the loop and remember all the definitions from
+ // stores in the loop.
+ Promoter.run(LoopUses);
+
+ // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+ if (PreheaderLoad->use_empty())
+ PreheaderLoad->eraseFromParent();
+}
+
+
+/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ AST->copyValue(From, To);
+}
+
+/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+/// set.
+void LICM::deleteAnalysisValue(Value *V, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ AST->deleteValue(V);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
new file mode 100644
index 000000000000..f7f32981baa7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,248 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+ class LoopDeletion : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDeletion() : LoopPass(ID) {
+ initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Possibly eliminate loop L if it is dead.
+ bool runOnLoop(Loop* L, LPPassManager& LPM);
+
+ bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ }
+ };
+}
+
+char LoopDeletion::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+
+Pass* llvm::createLoopDeletionPass() {
+ return new LoopDeletion();
+}
+
+/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::IsLoopDead(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks,
+ bool &Changed, BasicBlock *Preheader) {
+ BasicBlock* exitBlock = exitBlocks[0];
+
+ // Make sure that all PHI entries coming from the loop are loop invariant.
+ // Because the code is in LCSSA form, any values used outside of the loop
+ // must pass through a PHI in the exit block, meaning that this check is
+ // sufficient to guarantee that no loop-variant values are used outside
+ // of the loop.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+
+ // Make sure all exiting blocks produce the same incoming value for the exit
+ // block. If there are different incoming values for different exiting
+ // blocks, then it is impossible to statically determine which value should
+ // be used.
+ for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+ if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
+ return false;
+ }
+
+ if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
+ return false;
+
+ ++BI;
+ }
+
+ // Make sure that no instructions in the block have potential side-effects.
+ // This includes instructions that could write to memory, and loads that are
+ // marked volatile. This could be made more aggressive by using aliasing
+ // information to identify readonly and readnone calls.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ++BI) {
+ if (BI->mayHaveSideEffects())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time. Note
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+ // We can only remove the loop if there is a preheader that we can
+ // branch from after removing it.
+ BasicBlock* preheader = L->getLoopPreheader();
+ if (!preheader)
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->hasDedicatedExits())
+ return false;
+
+ // We can't remove loops that contain subloops. If the subloops were dead,
+ // they would already have been removed in earlier executions of this pass.
+ if (L->begin() != L->end())
+ return false;
+
+ SmallVector<BasicBlock*, 4> exitingBlocks;
+ L->getExitingBlocks(exitingBlocks);
+
+ SmallVector<BasicBlock*, 4> exitBlocks;
+ L->getUniqueExitBlocks(exitBlocks);
+
+ // We require that the loop only have a single exit block. Otherwise, we'd
+ // be in the situation of needing to be able to solve statically which exit
+ // block will be branched to, or trying to preserve the branching logic in
+ // a loop invariant manner.
+ if (exitBlocks.size() != 1)
+ return false;
+
+ // Finally, we have to check that the loop really is dead.
+ bool Changed = false;
+ if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ return Changed;
+
+ // Don't remove loops for which we can't solve the trip count.
+ // They could be infinite, in which case we'd be changing program behavior.
+ ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ const SCEV *S = SE.getMaxBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(S))
+ return Changed;
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ BasicBlock* exitBlock = exitBlocks[0];
+
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues. Don't
+ // mess with this unless you have good reason and know what you're doing.
+
+ // Tell ScalarEvolution that the loop is deleted. Do this before
+ // deleting the loop so that ScalarEvolution can look at the loop
+ // to determine what it needs to clean up.
+ SE.forgetLoop(L);
+
+ // Connect the preheader directly to the exit block.
+ TerminatorInst* TI = preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+ // Rewrite phis in the exit block to get their inputs from
+ // the preheader instead of the exiting block.
+ BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ int j = P->getBasicBlockIndex(exitingBlock);
+ assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+ P->setIncomingBlock(j, preheader);
+ for (unsigned i = 1; i < exitingBlocks.size(); ++i)
+ P->removeIncomingValue(exitingBlocks[i]);
+ ++BI;
+ }
+
+ // Update the dominator tree and remove the instructions and blocks that will
+ // be deleted from the reference counting scheme.
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ SmallVector<DomTreeNode*, 8> ChildNodes;
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ // Move all of the block's children to be children of the preheader, which
+ // allows us to remove the domtree entry for the block.
+ ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ DE = ChildNodes.end(); DI != DE; ++DI) {
+ DT.changeImmediateDominator(*DI, DT[preheader]);
+ }
+
+ ChildNodes.clear();
+ DT.eraseNode(*LI);
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ (*LI)->dropAllReferences();
+ }
+
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove its
+ // entry from the loop's block list. We do that in the next section.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ (*LI)->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+ LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ SmallPtrSet<BasicBlock*, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+ E = blocks.end(); I != E; ++I)
+ loopInfo.removeBlock(*I);
+
+ // The last step is to inform the loop pass manager that we've
+ // eliminated this loop.
+ LPM.deleteLoopFromQueue(L);
+ Changed = true;
+
+ ++NumDeleted;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 000000000000..a0e41d9a9772
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,634 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form. In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+// memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+// fpowi
+// Future integer operation idioms to recognize:
+// ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set. It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop. This would handle things like:
+// void foo(_Complex float *P)
+// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
+//
+// We should enhance this to handle negative strides through memory.
+// Alternatively (and perhaps better) we could rely on an earlier pass to force
+// forward iteration through memory, which is generally better for cache
+// behavior. Negative strides *do* happen for memset/memcpy loops.
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+ class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ const TargetData *TD;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// deleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+
+ // Before we touch this instruction, remove it from SE!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // SCEV.
+ SE.forgetValue(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ } while (!NowDeadInsts.empty());
+}
+
+/// deleteIfDeadInstruction - If the specified value is a dead instruction,
+/// delete it and any recursively used instructions.
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (isInstructionTriviallyDead(I))
+ deleteDeadInstruction(I, SE);
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ CurLoop = L;
+
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy")
+ return false;
+
+ // The trip count of the loop must be analyzable.
+ SE = &getAnalysis<ScalarEvolution>();
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return false;
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount)) return false;
+
+ // If this loop executes exactly one time, then it should be peeled, not
+ // optimized by this pass.
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ if (BECst->getValue()->getValue() == 0)
+ return false;
+
+ // We require target data for now.
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD == 0) return false;
+
+ DT = &getAnalysis<DominatorTree>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ DEBUG(dbgs() << "loop-idiom Scanning: F["
+ << L->getHeader()->getParent()->getName()
+ << "] Loop %" << L->getHeader()->getName() << "\n");
+
+ bool MadeChange = false;
+ // Scan all the blocks in the loop that are not in subloops.
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI) {
+ // Ignore blocks in subloops.
+ if (LI.getLoopFor(*BI) != CurLoop)
+ continue;
+
+ MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+ }
+ return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, ExitBlocks[i]))
+ return false;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopStore(SI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the store invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+
+ // Look for memset instructions, which may be optimized to a larger memset.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopMemSet(MSI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the memset invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+ }
+
+ return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ if (SI->isVolatile()) return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+
+ if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ // TODO: Could also handle negative stride here someday, that will require
+ // the validity check in mayLoopAccessLocation to be updated though.
+ // Enable this to print exact negative strides.
+ if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
+ dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
+ dbgs() << "BB: " << *SI->getParent();
+ }
+
+ return false;
+ }
+
+ // See if we can optimize just this store in isolation.
+ if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount))
+ return true;
+
+ // If the stored value is a strided load in the same loop with the same stride
+ // this this may be transformable into a memcpy. This kicks in for stuff like
+ // for (i) A[i] = B[i];
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+ if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ return true;
+ }
+ //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+ return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+ // We can only handle non-volatile memsets with a constant size.
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+ // If we're not allowed to hack on memset, we fail.
+ if (!TLI->has(LibFunc::memset))
+ return false;
+
+ Value *Pointer = MSI->getDest();
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+ if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ return false;
+
+ // Reject memsets that are so large that they overflow an unsigned.
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if ((SizeInBytes >> 32) != 0)
+ return false;
+
+ // Check to see if the stride matches the size of the memset. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ return false;
+
+ return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+ Loop *L, const SCEV *BECount,
+ unsigned StoreSize, AliasAnalysis &AA,
+ Instruction *IgnoredStore) {
+ // Get the location that may be stored across the loop. Since the access is
+ // strided positively through memory, we say that the modified location starts
+ // at the pointer and has infinite size.
+ uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access size
+ // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI)
+ for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+ if (&*I != IgnoredStore &&
+ (AA.getModRefInfo(I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C == 0) return 0;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+ return 0;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (TD.isBigEndian())
+ return 0;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16) return 0;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16) return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16/Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value. If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *StoredVal,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount) {
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = 0;
+
+ // If we're allowed to form a memset, and the stored value would be acceptable
+ // for memset, use it.
+ if (SplatValue && TLI->has(LibFunc::memset) &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // Keep and use SplatValue.
+ PatternValue = 0;
+ } else if (TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // It looks like we can use PatternValue!
+ SplatValue = 0;
+ } else {
+ // Otherwise, this isn't an idiom we can transform. For example, we can't
+ // do anything with a 3-byte store, for example.
+ return false;
+ }
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE, "loop-idiom");
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for any overlap by generating the
+ // base pointer and checking the region.
+ unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ Value *BasePtr =
+ Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Preheader->getTerminator());
+
+
+ if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(BasePtr, *SE);
+ return false;
+ }
+
+ // Okay, everything looks good, insert the memset.
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ CallInst *NewCall;
+ if (SplatValue)
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+ else {
+ Module *M = TheStore->getParent()->getParent()->getParent();
+ Value *MSP = M->getOrInsertFunction("memset_pattern16",
+ Builder.getVoidTy(),
+ Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(), IntPtr,
+ (void*)0);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known to be
+ // an constant array of 16-bytes. Plop the value into a mergable global.
+ GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+ GlobalValue::InternalLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(16);
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ }
+
+ DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *TheStore << "\n");
+ NewCall->setDebugLoc(TheStore->getDebugLoc());
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ deleteDeadInstruction(TheStore, *SE);
+ ++NumMemSet;
+ return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount) {
+ // If we're not allowed to form memcpy, we fail.
+ if (!TLI->has(LibFunc::memcpy))
+ return false;
+
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE, "loop-idiom");
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write the memory region we're storing to. This includes the load that
+ // feeds the stores. Check for an alias by generating the base address and
+ // checking everything.
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(StoreBasePtr, *SE);
+ return false;
+ }
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ Value *LoadBasePtr =
+ Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(LoadBasePtr, *SE);
+ deleteIfDeadInstruction(StoreBasePtr, *SE);
+ return false;
+ }
+
+ // Okay, everything is safe, we can transform this!
+
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ CallInst *NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
+ NewCall->setDebugLoc(SI->getDebugLoc());
+
+ DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ deleteDeadInstruction(SI, *SE);
+ ++NumMemCpy;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 000000000000..af25c5c1a661
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+ class LoopInstSimplify : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopInstSimplify() : LoopPass(ID) {
+ initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop*, LPPassManager&);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved("scalar-evolution");
+ }
+ };
+}
+
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+ return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+ // The bit we are stealing from the pointer represents whether this basic
+ // block is the header of a subloop, in which case we only process its phis.
+ typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+ SmallVector<WorklistItem, 16> VisitStack;
+ SmallPtrSet<BasicBlock*, 32> Visited;
+
+ bool Changed = false;
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+
+ VisitStack.clear();
+ Visited.clear();
+
+ VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+ while (!VisitStack.empty()) {
+ WorklistItem Item = VisitStack.pop_back_val();
+ BasicBlock *BB = Item.getPointer();
+ bool IsSubloopHeader = Item.getInt();
+
+ // Simplify instructions in the current basic block.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = BI++;
+
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+
+ // Don't bother simplifying unused instructions.
+ if (!I->use_empty()) {
+ Value *V = SimplifyInstruction(I, TD, DT);
+ if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+
+ I->replaceAllUsesWith(V);
+ LocalChanged = true;
+ ++NumSimplified;
+ }
+ }
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+ if (IsSubloopHeader && !isa<PHINode>(I))
+ break;
+ }
+
+ // Add all successors to the worklist, except for loop exit blocks and the
+ // bodies of subloops. We visit the headers of loops so that we can process
+ // their phis, but we contract the rest of the subloop body and only follow
+ // edges leading back to the original loop.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ BasicBlock *SuccBB = *SI;
+ if (!Visited.insert(SuccBB))
+ continue;
+
+ const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+ if (SuccLoop && SuccLoop->getHeader() == SuccBB
+ && L->contains(SuccLoop)) {
+ VisitStack.push_back(WorklistItem(SuccBB, true));
+
+ SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+ SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+ for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+ BasicBlock *ExitBB = SubLoopExitBlocks[i];
+ if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+ VisitStack.push_back(WorklistItem(ExitBB, false));
+ }
+
+ continue;
+ }
+
+ bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+ ExitBlocks.end(), SuccBB);
+ if (IsExitBlock)
+ continue;
+
+ VisitStack.push_back(WorklistItem(SuccBB, false));
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+
+ Changed |= LocalChanged;
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
new file mode 100644
index 000000000000..9fd0958fd4c3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,353 @@
+//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Rotation Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-rotate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+#define MAX_HEADER_SIZE 16
+
+STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+
+ class LoopRotate : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate() : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ }
+
+ // LCSSA form makes instruction renaming easier.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool rotateLoop(Loop *L);
+
+ private:
+ LoopInfo *LI;
+ };
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+
+/// Rotate Loop L as many times as possible. Return true if
+/// the loop is rotated at least once.
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LI = &getAnalysis<LoopInfo>();
+
+ // One loop can be rotated multiple times.
+ bool MadeChange = false;
+ while (rotateLoop(L))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA;
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+ }
+}
+
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
+ // If the loop has only one block then there is not much to rotate.
+ if (L->getBlocks().size() == 1)
+ return false;
+
+ BasicBlock *OrigHeader = L->getHeader();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (BI == 0 || BI->isUnconditional())
+ return false;
+
+ // If the loop header is not one of the loop exiting blocks then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExiting(OrigHeader))
+ return false;
+
+ // Updating PHInodes in loops with multiple exits adds complexity.
+ // Keep it simple, and restrict loop rotation to loops with one exit only.
+ // In future, lift this restriction and support for multiple exits if
+ // required.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() > 1)
+ return false;
+
+ // Check size of original header and reject loop if it is very big.
+ {
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader);
+ if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ return false;
+ }
+
+ // Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (OrigPreheader == 0 || OrigLatch == 0)
+ return false;
+
+ // Anything ScalarEvolution may know about this loop or the PHI nodes
+ // in its header will soon be invalidated.
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that the new header has exactly one predecessor.
+ // Remove any single-entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Begin by walking OrigHeader and populating ValueMap with an entry for
+ // each Instruction.
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ ValueToValueMapTy ValueMap;
+
+ // For PHI nodes, the value available in OldPreHeader is just the
+ // incoming value from OldPreHeader.
+ for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+ while (I != E) {
+ Instruction *Inst = I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) &&
+ !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ delete C;
+ ValueMap[Inst] = V;
+ } else {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+ ValueMap[Inst] = C;
+ }
+ }
+
+ // Along with all the other instructions, we just cloned OrigHeader's
+ // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+ // successors by duplicating their incoming values for OrigHeader.
+ TerminatorInst *TI = OrigHeader->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+ // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+ // OrigPreHeader's old terminator (the original branch into the loop), and
+ // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+ LoopEntryBranch->eraseFromParent();
+
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
+
+ // NewHeader is now the header of the loop.
+ L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+ != NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Update DominatorTree to reflect the CFG change we just made. Then split
+ // edges as necessary to preserve LoopSimplify form.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Since OrigPreheader now has the conditional branch to Exit block, it is
+ // the dominator of Exit.
+ DT->changeImmediateDominator(Exit, OrigPreheader);
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ }
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore. Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only one
+ // predecessor.
+ BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+ ExitSplit->moveBefore(Exit);
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ }
+ }
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ MergeBlockIntoPredecessor(OrigHeader, this);
+
+ ++NumRotated;
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
new file mode 100644
index 000000000000..509d0264f10b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -0,0 +1,3911 @@
+//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+// %i = phi [ 0, %entry ], [ %i.next, %latch ]
+// ...
+// %i.next = add %i, 1
+// %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+// instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+// smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+// multiple base registers, or as the increment expression in an addrec),
+// we may not actually need both reg and (-1 * reg) in registers; the
+// negation can be implemented by using a sub instead of an add. The
+// lack of support for taking this into consideration when making
+// register pressure decisions is partly worked around by the "Special"
+// use kind.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+ /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// a particular register.
+ SmallBitVector UsedByIndices;
+
+ RegSortData() {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+ OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+ typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
+
+ RegUsesTy RegUsesMap;
+ SmallVector<const SCEV *, 16> RegSequence;
+
+public:
+ void CountRegister(const SCEV *Reg, size_t LUIdx);
+ void DropRegister(const SCEV *Reg, size_t LUIdx);
+ void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+
+ bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+ const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+ void clear();
+
+ typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+ typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+ iterator begin() { return RegSequence.begin(); }
+ iterator end() { return RegSequence.end(); }
+ const_iterator begin() const { return RegSequence.begin(); }
+ const_iterator end() const { return RegSequence.end(); }
+};
+
+}
+
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+ std::pair<RegUsesTy::iterator, bool> Pair =
+ RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
+ RegSortData &RSD = Pair.first->second;
+ if (Pair.second)
+ RegSequence.push_back(Reg);
+ RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+ RSD.UsedByIndices.set(LUIdx);
+}
+
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+ RegUsesTy::iterator It = RegUsesMap.find(Reg);
+ assert(It != RegUsesMap.end());
+ RegSortData &RSD = It->second;
+ assert(RSD.UsedByIndices.size() > LUIdx);
+ RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+ assert(LUIdx <= LastLUIdx);
+
+ // Update RegUses. The data structure is not optimized for this purpose;
+ // we must iterate through it and update each of the bit vectors.
+ for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+ I != E; ++I) {
+ SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ if (LUIdx < UsedByIndices.size())
+ UsedByIndices[LUIdx] =
+ LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+ UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+ }
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ int i = UsedByIndices.find_first();
+ if (i == -1) return false;
+ if ((size_t)i != LUIdx) return true;
+ return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ assert(I != RegUsesMap.end() && "Unknown register!");
+ return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+ RegUsesMap.clear();
+ RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+ /// AM - This is used to represent complex addressing, as well as other kinds
+ /// of interesting uses.
+ TargetLowering::AddrMode AM;
+
+ /// BaseRegs - The list of "base" registers for this use. When this is
+ /// non-empty, AM.HasBaseReg should be set to true.
+ SmallVector<const SCEV *, 2> BaseRegs;
+
+ /// ScaledReg - The 'scaled' register for this use. This should be non-null
+ /// when AM.Scale is not zero.
+ const SCEV *ScaledReg;
+
+ /// UnfoldedOffset - An additional constant offset which added near the
+ /// use. This requires a temporary register, but the offset itself can
+ /// live in an add immediate field rather than a register.
+ int64_t UnfoldedOffset;
+
+ Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+
+ void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+
+ unsigned getNumRegs() const;
+ const Type *getType() const;
+
+ void DeleteBaseReg(const SCEV *&S);
+
+ bool referencesReg(const SCEV *S) const;
+ bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+ SmallVectorImpl<const SCEV *> &Good,
+ SmallVectorImpl<const SCEV *> &Bad,
+ ScalarEvolution &SE) {
+ // Collect expressions which properly dominate the loop header.
+ if (SE.properlyDominates(S, L->getHeader())) {
+ Good.push_back(S);
+ return;
+ }
+
+ // Look at add operands.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ DoInitialMatch(*I, L, Good, Bad, SE);
+ return;
+ }
+
+ // Look at addrec operands.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ if (!AR->getStart()->isZero()) {
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
+ DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+ AR->getStepRecurrence(SE),
+ // FIXME: AR->getNoWrapFlags()
+ AR->getLoop(), SCEV::FlagAnyWrap),
+ L, Good, Bad, SE);
+ return;
+ }
+
+ // Handle a multiplication by -1 (negation) if it didn't fold.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+ if (Mul->getOperand(0)->isAllOnesValue()) {
+ SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *NewMul = SE.getMulExpr(Ops);
+
+ SmallVector<const SCEV *, 4> MyGood;
+ SmallVector<const SCEV *, 4> MyBad;
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
+ const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+ SE.getEffectiveSCEVType(NewMul->getType())));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+ E = MyGood.end(); I != E; ++I)
+ Good.push_back(SE.getMulExpr(NegOne, *I));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+ E = MyBad.end(); I != E; ++I)
+ Bad.push_back(SE.getMulExpr(NegOne, *I));
+ return;
+ }
+
+ // Ok, we can't do anything interesting. Just stuff the whole thing into a
+ // register and hope for the best.
+ Bad.push_back(S);
+}
+
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+ SmallVector<const SCEV *, 4> Good;
+ SmallVector<const SCEV *, 4> Bad;
+ DoInitialMatch(S, L, Good, Bad, SE);
+ if (!Good.empty()) {
+ const SCEV *Sum = SE.getAddExpr(Good);
+ if (!Sum->isZero())
+ BaseRegs.push_back(Sum);
+ AM.HasBaseReg = true;
+ }
+ if (!Bad.empty()) {
+ const SCEV *Sum = SE.getAddExpr(Bad);
+ if (!Sum->isZero())
+ BaseRegs.push_back(Sum);
+ AM.HasBaseReg = true;
+ }
+}
+
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+ return !!ScaledReg + BaseRegs.size();
+}
+
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+ return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+ ScaledReg ? ScaledReg->getType() :
+ AM.BaseGV ? AM.BaseGV->getType() :
+ 0;
+}
+
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+ if (&S != &BaseRegs.back())
+ std::swap(S, BaseRegs.back());
+ BaseRegs.pop_back();
+}
+
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+ return S == ScaledReg ||
+ std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
+
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const {
+ if (ScaledReg)
+ if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+ return true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I)
+ if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+ return true;
+ return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+ bool First = true;
+ if (AM.BaseGV) {
+ if (!First) OS << " + "; else First = false;
+ WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+ }
+ if (AM.BaseOffs != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << AM.BaseOffs;
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I) {
+ if (!First) OS << " + "; else First = false;
+ OS << "reg(" << **I << ')';
+ }
+ if (AM.HasBaseReg && BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: HasBaseReg**";
+ } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: !HasBaseReg**";
+ }
+ if (AM.Scale != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << AM.Scale << "*reg(";
+ if (ScaledReg)
+ OS << *ScaledReg;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ }
+ if (UnfoldedOffset != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << "imm(" << UnfoldedOffset << ')';
+ }
+}
+
+void Formula::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
+ return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given mul can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero, or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE,
+ bool IgnoreSignificantBits = false) {
+ // Handle the trivial case, which works for any SCEV type.
+ if (LHS == RHS)
+ return SE.getConstant(LHS->getType(), 1);
+
+ // Handle a few RHS special cases.
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (RC) {
+ const APInt &RA = RC->getValue()->getValue();
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+ // some folding.
+ if (RA.isAllOnesValue())
+ return SE.getMulExpr(LHS, RC);
+ // Handle x /s 1 as x.
+ if (RA == 1)
+ return LHS;
+ }
+
+ // Check for a division of a constant by a constant.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+ if (!RC)
+ return 0;
+ const APInt &LA = C->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ if (LA.srem(RA) != 0)
+ return 0;
+ return SE.getConstant(LA.sdiv(RA));
+ }
+
+ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+ const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
+ // FlagNW is independent of the start value, step direction, and is
+ // preserved with smaller magnitude steps.
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
+ }
+ return 0;
+ }
+
+ // Distribute the sdiv over add operands, if the add doesn't overflow.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+ SmallVector<const SCEV *, 8> Ops;
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Op = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits);
+ if (!Op) return 0;
+ Ops.push_back(Op);
+ }
+ return SE.getAddExpr(Ops);
+ }
+ return 0;
+ }
+
+ // Check for a multiply operand that we can pull RHS out of.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
+ if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+ SmallVector<const SCEV *, 4> Ops;
+ bool Found = false;
+ for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+ I != E; ++I) {
+ const SCEV *S = *I;
+ if (!Found)
+ if (const SCEV *Q = getExactSDiv(S, RHS, SE,
+ IgnoreSignificantBits)) {
+ S = Q;
+ Found = true;
+ }
+ Ops.push_back(S);
+ }
+ return Found ? SE.getMulExpr(Ops) : 0;
+ }
+ return 0;
+ }
+
+ // Otherwise we don't know.
+ return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ S = SE.getConstant(C->getType(), 0);
+ return C->getValue()->getSExtValue();
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
+ return Result;
+ }
+ return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+ S = SE.getConstant(GV->getType(), 0);
+ return GV;
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
+ return Result;
+ }
+ return 0;
+}
+
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getArgOperand(0) == OperandVal)
+ isAddress = true;
+ break;
+ }
+ }
+ return isAddress;
+}
+
+/// getAccessType - Return the type of the memory being accessed.
+static const Type *getAccessType(const Instruction *Inst) {
+ const Type *AccessTy = Inst->getType();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ AccessTy = SI->getOperand(0)->getType();
+ else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ AccessTy = II->getArgOperand(0)->getType();
+ break;
+ }
+ }
+
+ // All pointers have the same requirements, so canonicalize them to an
+ // arbitrary pointer type to minimize variation.
+ if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
+
+ return AccessTy;
+}
+
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+ bool Changed = false;
+
+ while (!DeadInsts.empty()) {
+ Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+
+ if (I == 0 || !isInstructionTriviallyDead(I))
+ continue;
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ *OI = 0;
+ if (U->use_empty())
+ DeadInsts.push_back(U);
+ }
+
+ I->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+namespace {
+
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+ /// TODO: Some of these could be merged. Also, a lexical ordering
+ /// isn't always optimal.
+ unsigned NumRegs;
+ unsigned AddRecCost;
+ unsigned NumIVMuls;
+ unsigned NumBaseAdds;
+ unsigned ImmCost;
+ unsigned SetupCost;
+
+public:
+ Cost()
+ : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+ SetupCost(0) {}
+
+ bool operator<(const Cost &Other) const;
+
+ void Loose();
+
+ void RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+private:
+ void RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+ void RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+};
+
+}
+
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+ if (AR->getLoop() == L)
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+ // If this is an addrec for a loop that's already been visited by LSR,
+ // don't second-guess its addrec phi nodes. LSR isn't currently smart
+ // enough to reason about more than one loop at a time. Consider these
+ // registers free and leave them alone.
+ else if (L->contains(AR->getLoop()) ||
+ (!AR->getLoop()->contains(L) &&
+ DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return;
+
+ // If this isn't one of the addrecs that the loop already has, it
+ // would require a costly new phi and add. TODO: This isn't
+ // precisely modeled right now.
+ ++NumBaseAdds;
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getStart(), Regs, L, SE, DT);
+ }
+
+ // Add the step value register, if it needs one.
+ // TODO: The non-affine case isn't precisely modeled here.
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ }
+ ++NumRegs;
+
+ // Rough heuristic; favor registers which don't require extra setup
+ // instructions in the preheader.
+ if (!isa<SCEVUnknown>(Reg) &&
+ !isa<SCEVConstant>(Reg) &&
+ !(isa<SCEVAddRecExpr>(Reg) &&
+ (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+ isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+ ++SetupCost;
+
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ SE.hasComputableLoopEvolution(Reg, L);
+}
+
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (Regs.insert(Reg))
+ RateRegister(Reg, Regs, L, SE, DT);
+}
+
+void Cost::RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ // Tally up the registers.
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (VisitedRegs.count(ScaledReg)) {
+ Loose();
+ return;
+ }
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (VisitedRegs.count(BaseReg)) {
+ Loose();
+ return;
+ }
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ }
+
+ // Determine how many (unfolded) adds we'll need inside the loop.
+ size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ if (NumBaseParts > 1)
+ NumBaseAdds += NumBaseParts - 1;
+
+ // Tally up the non-zero immediates.
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+ if (F.AM.BaseGV)
+ ImmCost += 64; // Handle symbolic values conservatively.
+ // TODO: This should probably be the pointer size.
+ else if (Offset != 0)
+ ImmCost += APInt(64, Offset, true).getMinSignedBits();
+ }
+}
+
+/// Loose - Set this cost to a losing value.
+void Cost::Loose() {
+ NumRegs = ~0u;
+ AddRecCost = ~0u;
+ NumIVMuls = ~0u;
+ NumBaseAdds = ~0u;
+ ImmCost = ~0u;
+ SetupCost = ~0u;
+}
+
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+ if (NumRegs != Other.NumRegs)
+ return NumRegs < Other.NumRegs;
+ if (AddRecCost != Other.AddRecCost)
+ return AddRecCost < Other.AddRecCost;
+ if (NumIVMuls != Other.NumIVMuls)
+ return NumIVMuls < Other.NumIVMuls;
+ if (NumBaseAdds != Other.NumBaseAdds)
+ return NumBaseAdds < Other.NumBaseAdds;
+ if (ImmCost != Other.ImmCost)
+ return ImmCost < Other.ImmCost;
+ if (SetupCost != Other.SetupCost)
+ return SetupCost < Other.SetupCost;
+ return false;
+}
+
+void Cost::print(raw_ostream &OS) const {
+ OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+ if (AddRecCost != 0)
+ OS << ", with addrec cost " << AddRecCost;
+ if (NumIVMuls != 0)
+ OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+ if (NumBaseAdds != 0)
+ OS << ", plus " << NumBaseAdds << " base add"
+ << (NumBaseAdds == 1 ? "" : "s");
+ if (ImmCost != 0)
+ OS << ", plus " << ImmCost << " imm cost";
+ if (SetupCost != 0)
+ OS << ", plus " << SetupCost << " setup cost";
+}
+
+void Cost::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+ /// UserInst - The instruction which will be updated.
+ Instruction *UserInst;
+
+ /// OperandValToReplace - The operand of the instruction which will
+ /// be replaced. The operand may be used more than once; every instance
+ /// will be replaced.
+ Value *OperandValToReplace;
+
+ /// PostIncLoops - If this user is to use the post-incremented value of an
+ /// induction variable, this variable is non-null and holds the loop
+ /// associated with the induction variable.
+ PostIncLoopSet PostIncLoops;
+
+ /// LUIdx - The index of the LSRUse describing the expression which
+ /// this fixup needs, minus an offset (below).
+ size_t LUIdx;
+
+ /// Offset - A constant offset to be added to the LSRUse expression.
+ /// This allows multiple fixups to share the same LSRUse with different
+ /// offsets, for example in an unrolled loop.
+ int64_t Offset;
+
+ bool isUseFullyOutsideLoop(const Loop *L) const;
+
+ LSRFixup();
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+LSRFixup::LSRFixup()
+ : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+ // PHI nodes use their value in their incoming blocks.
+ if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == OperandValToReplace &&
+ L->contains(PN->getIncomingBlock(i)))
+ return false;
+ return true;
+ }
+
+ return !L->contains(UserInst);
+}
+
+void LSRFixup::print(raw_ostream &OS) const {
+ OS << "UserInst=";
+ // Store is common and interesting enough to be worth special-casing.
+ if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+ OS << "store ";
+ WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+ } else if (UserInst->getType()->isVoidTy())
+ OS << UserInst->getOpcodeName();
+ else
+ WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+ OS << ", OperandValToReplace=";
+ WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+ for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+ E = PostIncLoops.end(); I != E; ++I) {
+ OS << ", PostIncLoop=";
+ WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+ }
+
+ if (LUIdx != ~size_t(0))
+ OS << ", LUIdx=" << LUIdx;
+
+ if (Offset != 0)
+ OS << ", Offset=" << Offset;
+}
+
+void LSRFixup::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+ static SmallVector<const SCEV *, 2> getEmptyKey() {
+ SmallVector<const SCEV *, 2> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-1));
+ return V;
+ }
+
+ static SmallVector<const SCEV *, 2> getTombstoneKey() {
+ SmallVector<const SCEV *, 2> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-2));
+ return V;
+ }
+
+ static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ unsigned Result = 0;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+ E = V.end(); I != E; ++I)
+ Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+ return Result;
+ }
+
+ static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+ const SmallVector<const SCEV *, 2> &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+ DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+ /// KindType - An enum for a kind of use, indicating what types of
+ /// scaled and immediate operands it might support.
+ enum KindType {
+ Basic, ///< A normal use, with no folding.
+ Special, ///< A special case of basic, allowing -1 scales.
+ Address, ///< An address use; folding according to TargetLowering
+ ICmpZero ///< An equality icmp with both operands folded into one.
+ // TODO: Add a generic icmp too?
+ };
+
+ KindType Kind;
+ const Type *AccessTy;
+
+ SmallVector<int64_t, 8> Offsets;
+ int64_t MinOffset;
+ int64_t MaxOffset;
+
+ /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+ /// LSRUse are outside of the loop, in which case some special-case heuristics
+ /// may be used.
+ bool AllFixupsOutsideLoop;
+
+ /// WidestFixupType - This records the widest use type for any fixup using
+ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+ /// max fixup widths to be equivalent, because the narrower one may be relying
+ /// on the implicit truncation to truncate away bogus bits.
+ const Type *WidestFixupType;
+
+ /// Formulae - A list of ways to build a value that can satisfy this user.
+ /// After the list is populated, one of these is selected heuristically and
+ /// used to formulate a replacement for OperandValToReplace in UserInst.
+ SmallVector<Formula, 12> Formulae;
+
+ /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ SmallPtrSet<const SCEV *, 4> Regs;
+
+ LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+ MinOffset(INT64_MAX),
+ MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true),
+ WidestFixupType(0) {}
+
+ bool HasFormulaWithSameRegs(const Formula &F) const;
+ bool InsertFormula(const Formula &F);
+ void DeleteFormula(Formula &F);
+ void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+ SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+ return Uniquifier.count(Key);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+ SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ if (!Uniquifier.insert(Key).second)
+ return false;
+
+ // Using a register to hold the value of 0 is not profitable.
+ assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+ "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+ for (SmallVectorImpl<const SCEV *>::const_iterator I =
+ F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+ assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
+
+ // Add the formula to the list.
+ Formulae.push_back(F);
+
+ // Record registers now being used by this use.
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+
+ return true;
+}
+
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+ if (&F != &Formulae.back())
+ std::swap(F, Formulae.back());
+ Formulae.pop_back();
+ assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+ Regs.clear();
+ for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+ E = Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ }
+
+ // Update the RegTracker.
+ for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+ E = OldRegs.end(); I != E; ++I)
+ if (!Regs.count(*I))
+ RegUses.DropRegister(*I, LUIdx);
+}
+
+void LSRUse::print(raw_ostream &OS) const {
+ OS << "LSR Use: Kind=";
+ switch (Kind) {
+ case Basic: OS << "Basic"; break;
+ case Special: OS << "Special"; break;
+ case ICmpZero: OS << "ICmpZero"; break;
+ case Address:
+ OS << "Address of ";
+ if (AccessTy->isPointerTy())
+ OS << "pointer"; // the full pointer type could be really verbose
+ else
+ OS << *AccessTy;
+ }
+
+ OS << ", Offsets={";
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ OS << *I;
+ if (llvm::next(I) != E)
+ OS << ',';
+ }
+ OS << '}';
+
+ if (AllFixupsOutsideLoop)
+ OS << ", all-fixups-outside-loop";
+
+ if (WidestFixupType)
+ OS << ", widest fixup type: " << *WidestFixupType;
+}
+
+void LSRUse::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ switch (Kind) {
+ case LSRUse::Address:
+ // If we have low-level target information, ask the target if it can
+ // completely fold this address.
+ if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+ // Otherwise, just guess that reg+reg addressing is legal.
+ return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+ case LSRUse::ICmpZero:
+ // There's not even a target hook for querying whether it would be legal to
+ // fold a GV into an ICmp.
+ if (AM.BaseGV)
+ return false;
+
+ // ICmp only has two operands; don't allow more than two non-trivial parts.
+ if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+ return false;
+
+ // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+ // putting the scaled register in the other operand of the icmp.
+ if (AM.Scale != 0 && AM.Scale != -1)
+ return false;
+
+ // If we have low-level target information, ask the target if it can fold an
+ // integer immediate on an icmp.
+ if (AM.BaseOffs != 0) {
+ if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+ return false;
+ }
+
+ return true;
+
+ case LSRUse::Basic:
+ // Only handle single-register values.
+ return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+
+ case LSRUse::Special:
+ // Only handle -1 scales, or no scale.
+ return AM.Scale == 0 || AM.Scale == -1;
+ }
+
+ return false;
+}
+
+static bool isLegalUse(TargetLowering::AddrMode AM,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ // Check for overflow.
+ if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+ (MinOffset > 0))
+ return false;
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+ if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+ AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+ // Check for overflow.
+ if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+ (MaxOffset > 0))
+ return false;
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+ return isLegalUse(AM, Kind, AccessTy, TLI);
+ }
+ return false;
+}
+
+static bool isAlwaysFoldable(int64_t BaseOffs,
+ GlobalValue *BaseGV,
+ bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ // Fast-path: zero is always foldable.
+ if (BaseOffs == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = BaseOffs;
+ AM.BaseGV = BaseGV;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ // Canonicalize a scale of 1 to a base register if the formula doesn't
+ // already have a base register.
+ if (!AM.HasBaseReg && AM.Scale == 1) {
+ AM.Scale = 0;
+ AM.HasBaseReg = true;
+ }
+
+ return isLegalUse(AM, Kind, AccessTy, TLI);
+}
+
+static bool isAlwaysFoldable(const SCEV *S,
+ int64_t MinOffset, int64_t MaxOffset,
+ bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI,
+ ScalarEvolution &SE) {
+ // Fast-path: zero is always foldable.
+ if (S->isZero()) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ int64_t BaseOffs = ExtractImmediate(S, SE);
+ GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+ // If there's anything else involved, it's not foldable.
+ if (!S->isZero()) return false;
+
+ // Fast-path: zero is always foldable.
+ if (BaseOffs == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = BaseOffs;
+ AM.BaseGV = BaseGV;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+}
+
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+ static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+ }
+
+ static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+ }
+
+ static unsigned
+ getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+ unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+ Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+ return Result;
+ }
+
+ static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+ const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+ IVUsers &IU;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+ LoopInfo &LI;
+ const TargetLowering *const TLI;
+ Loop *const L;
+ bool Changed;
+
+ /// IVIncInsertPos - This is the insert position that the current loop's
+ /// induction variable increment should be placed. In simple loops, this is
+ /// the latch block's terminator. But in more complicated cases, this is a
+ /// position which will dominate all the in-loop post-increment users.
+ Instruction *IVIncInsertPos;
+
+ /// Factors - Interesting factors between use strides.
+ SmallSetVector<int64_t, 8> Factors;
+
+ /// Types - Interesting use types, to facilitate truncation reuse.
+ SmallSetVector<const Type *, 4> Types;
+
+ /// Fixups - The list of operands which are to be replaced.
+ SmallVector<LSRFixup, 16> Fixups;
+
+ /// Uses - The list of interesting uses.
+ SmallVector<LSRUse, 16> Uses;
+
+ /// RegUses - Track which uses use which register candidates.
+ RegUseTracker RegUses;
+
+ void OptimizeShadowIV();
+ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+ ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+ void OptimizeLoopTermCond();
+
+ void CollectInterestingTypesAndFactors();
+ void CollectFixupsAndInitialFormulae();
+
+ LSRFixup &getNewFixup() {
+ Fixups.push_back(LSRFixup());
+ return Fixups.back();
+ }
+
+ // Support for sharing of LSRUses between LSRFixups.
+ typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+ size_t,
+ UseMapDenseMapInfo> UseMapTy;
+ UseMapTy UseMap;
+
+ bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy);
+
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ const Type *AccessTy);
+
+ void DeleteUse(LSRUse &LU, size_t LUIdx);
+
+ LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
+public:
+ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void CountRegisters(const Formula &F, size_t LUIdx);
+ bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+ void CollectLoopInvariantFixupsAndFormulae();
+
+ void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+ unsigned Depth = 0);
+ void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateCrossUseConstantOffsets();
+ void GenerateAllReuseFormulae();
+
+ void FilterOutUndesirableDedicatedRegisters();
+
+ size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
+ void NarrowSearchSpaceUsingHeuristics();
+
+ void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const;
+ void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+ BasicBlock::iterator
+ HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs) const;
+ BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU) const;
+
+ Value *Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P);
+
+ LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+ bool getChanged() const { return Changed; }
+
+ void print_factors_and_types(raw_ostream &OS) const;
+ void print_fixups(raw_ostream &OS) const;
+ void print_uses(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
+
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+ UI != E; /* empty */) {
+ IVUsers::const_iterator CandidateUI = UI;
+ ++UI;
+ Instruction *ShadowUse = CandidateUI->getUser();
+ const Type *DestTy = NULL;
+
+ /* If shadow use is a int->float cast then insert a second IV
+ to eliminate this cast.
+
+ for (unsigned i = 0; i < n; ++i)
+ foo((double)i);
+
+ is transformed into
+
+ double d = 0.0;
+ for (unsigned i = 0; i < n; ++i, ++d)
+ foo(d);
+ */
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ DestTy = UCast->getDestTy();
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ DestTy = SCast->getDestTy();
+ if (!DestTy) continue;
+
+ if (TLI) {
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ EVT DVT = TLI->getValueType(DestTy);
+ if (!TLI->isTypeLegal(DVT)) continue;
+ }
+
+ PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+ if (!PH) continue;
+ if (PH->getNumIncomingValues() != 2) continue;
+
+ const Type *SrcTy = PH->getType();
+ int Mantissa = DestTy->getFPMantissaWidth();
+ if (Mantissa == -1) continue;
+ if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+ continue;
+
+ unsigned Entry, Latch;
+ if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+ Entry = 0;
+ Latch = 1;
+ } else {
+ Entry = 1;
+ Latch = 0;
+ }
+
+ ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+ if (!Init) continue;
+ Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+ if (!Incr) continue;
+ if (Incr->getOpcode() != Instruction::Add
+ && Incr->getOpcode() != Instruction::Sub)
+ continue;
+
+ /* Initialize new IV, double d = 0.0 in above example. */
+ ConstantInt *C = NULL;
+ if (Incr->getOperand(0) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+ else if (Incr->getOperand(1) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+ else
+ continue;
+
+ if (!C) continue;
+
+ // Ignore negative constants, as the code below doesn't handle them
+ // correctly. TODO: Remove this restriction.
+ if (!C->getValue().isStrictlyPositive()) continue;
+
+ /* Add new PHINode. */
+ PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
+
+ /* create new increment. '++d' in above example. */
+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ BinaryOperator *NewIncr =
+ BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+ Instruction::FAdd : Instruction::FSub,
+ NewPH, CFP, "IV.S.next.", Incr);
+
+ NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+ NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+ /* Remove cast operation */
+ ShadowUse->replaceAllUsesWith(NewPH);
+ ShadowUse->eraseFromParent();
+ Changed = true;
+ break;
+ }
+}
+
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
+ for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ if (UI->getUser() == Cond) {
+ // NOTE: we could handle setcc instructions with multiple uses here, but
+ // InstCombine does it as well for simple uses, it's not clear that it
+ // occurs enough in real life to handle.
+ CondUse = UI;
+ return true;
+ }
+ return false;
+}
+
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+///
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
+//
+/// if (n > 0) {
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+/// }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+/// i = 0;
+/// max = n < 1 ? 1 : n;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i != max);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analysis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
+ // Check that the loop matches the pattern we're looking for.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+ Cond->getPredicate() != CmpInst::ICMP_NE)
+ return Cond;
+
+ SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+ if (!Sel || !Sel->hasOneUse()) return Cond;
+
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return Cond;
+ const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
+
+ // Add one to the backedge-taken count to get the trip count.
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
+ if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+ // Check for a max calculation that matches the pattern. There's no check
+ // for ICMP_ULE here because the comparison would be with zero, which
+ // isn't interesting.
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+ const SCEVNAryExpr *Max = 0;
+ if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+ Pred = ICmpInst::ICMP_SLE;
+ Max = S;
+ } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_SLT;
+ Max = S;
+ } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_ULT;
+ Max = U;
+ } else {
+ // No match; bail.
+ return Cond;
+ }
+
+ // To handle a max with more than two operands, this optimization would
+ // require additional checking and setup.
+ if (Max->getNumOperands() != 2)
+ return Cond;
+
+ const SCEV *MaxLHS = Max->getOperand(0);
+ const SCEV *MaxRHS = Max->getOperand(1);
+
+ // ScalarEvolution canonicalizes constants to the left. For < and >, look
+ // for a comparison with 1. For <= and >=, a comparison with zero.
+ if (!MaxLHS ||
+ (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+ return Cond;
+
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine() ||
+ AR->getStart() != One ||
+ AR->getStepRecurrence(SE) != One)
+ return Cond;
+
+ assert(AR->getLoop() == L &&
+ "Loop condition operand is an addrec in a different loop!");
+
+ // Check the right operand of the select, and remember it, as it will
+ // be used in the new comparison instruction.
+ Value *NewRHS = 0;
+ if (ICmpInst::isTrueWhenEqual(Pred)) {
+ // Look for n+1, and grab n.
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+ if (isa<ConstantInt>(BO->getOperand(1)) &&
+ cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+ SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+ if (isa<ConstantInt>(BO->getOperand(1)) &&
+ cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+ SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (!NewRHS)
+ return Cond;
+ } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+ NewRHS = Sel->getOperand(1);
+ else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
+ NewRHS = Sel->getOperand(2);
+ else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+ NewRHS = SU->getValue();
+ else
+ // Max doesn't match expected pattern.
+ return Cond;
+
+ // Determine the new comparison opcode. It may be signed or unsigned,
+ // and the original comparison may be either equality or inequality.
+ if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ // Ok, everything looks ok to change the condition into an SLT or SGE and
+ // delete the max calculation.
+ ICmpInst *NewCond =
+ new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
+
+ // Delete the max calculation instructions.
+ Cond->replaceAllUsesWith(NewCond);
+ CondUse->setUser(NewCond);
+ Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+ Cond->eraseFromParent();
+ Sel->eraseFromParent();
+ if (Cmp->use_empty())
+ Cmp->eraseFromParent();
+ return NewCond;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+void
+LSRInstance::OptimizeLoopTermCond() {
+ SmallPtrSet<Instruction *, 4> PostIncs;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+ // Get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
+
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
+
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse))
+ continue;
+
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ // One consequence of doing this now is that it disrupts the count-down
+ // optimization. That's not always a bad thing though, because in such
+ // cases it may still be worthwhile to avoid a max.
+ Cond = OptimizeMax(Cond, CondUse);
+
+ // If this exiting block dominates the latch block, it may also use
+ // the post-inc value if it won't be shared with other uses.
+ // Check for dominance.
+ if (!DT.dominates(ExitingBlock, LatchBlock))
+ continue;
+
+ // Conservatively avoid trying to use the post-inc value in non-latch
+ // exits if there may be pre-inc users in intervening blocks.
+ if (LatchBlock != ExitingBlock)
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ // Test if the use is reachable from the exiting block. This dominator
+ // query is a conservative approximation of reachability.
+ if (&*UI != CondUse &&
+ !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+ // Conservatively assume there may be reuse if the quotient of their
+ // strides could be a legal scale.
+ const SCEV *A = IU.getStride(*CondUse, L);
+ const SCEV *B = IU.getStride(*UI, L);
+ if (!A || !B) continue;
+ if (SE.getTypeSizeInBits(A->getType()) !=
+ SE.getTypeSizeInBits(B->getType())) {
+ if (SE.getTypeSizeInBits(A->getType()) >
+ SE.getTypeSizeInBits(B->getType()))
+ B = SE.getSignExtendExpr(B, A->getType());
+ else
+ A = SE.getSignExtendExpr(A, B->getType());
+ }
+ if (const SCEVConstant *D =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+ const ConstantInt *C = D->getValue();
+ // Stride of one or negative one can have reuse with non-addresses.
+ if (C->isOne() || C->isAllOnesValue())
+ goto decline_post_inc;
+ // Avoid weird situations.
+ if (C->getValue().getMinSignedBits() >= 64 ||
+ C->getValue().isMinSignedValue())
+ goto decline_post_inc;
+ // Without TLI, assume that any stride might be valid, and so any
+ // use might be shared.
+ if (!TLI)
+ goto decline_post_inc;
+ // Check for possible scaled-address reuse.
+ const Type *AccessTy = getAccessType(UI->getUser());
+ TargetLowering::AddrMode AM;
+ AM.Scale = C->getSExtValue();
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
+ goto decline_post_inc;
+ AM.Scale = -AM.Scale;
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
+ goto decline_post_inc;
+ }
+ }
+
+ DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
+
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != TermBr) {
+ if (Cond->hasOneUse()) {
+ Cond->moveBefore(TermBr);
+ } else {
+ // Clone the terminating condition and insert into the loopend.
+ ICmpInst *OldCond = Cond;
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
+ TermBr->replaceUsesOfWith(OldCond, Cond);
+ }
+ }
+
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->transformToPostInc(L);
+ Changed = true;
+
+ PostIncs.insert(Cond);
+ decline_post_inc:;
+ }
+
+ // Determine an insertion point for the loop induction variable increment. It
+ // must dominate all the post-inc comparisons we just set up, and it must
+ // dominate the loop latch edge.
+ IVIncInsertPos = L->getLoopLatch()->getTerminator();
+ for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+ E = PostIncs.end(); I != E; ++I) {
+ BasicBlock *BB =
+ DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+ (*I)->getParent());
+ if (BB == (*I)->getParent())
+ IVIncInsertPos = *I;
+ else if (BB != IVIncInsertPos->getParent())
+ IVIncInsertPos = BB->getTerminator();
+ }
+}
+
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy) {
+ int64_t NewMinOffset = LU.MinOffset;
+ int64_t NewMaxOffset = LU.MaxOffset;
+ const Type *NewAccessTy = AccessTy;
+
+ // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+ // something conservative, however this can pessimize in the case that one of
+ // the uses will have all its uses outside the loop, for example.
+ if (LU.Kind != Kind)
+ return false;
+ // Conservatively assume HasBaseReg is true for now.
+ if (NewOffset < LU.MinOffset) {
+ if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
+ Kind, AccessTy, TLI))
+ return false;
+ NewMinOffset = NewOffset;
+ } else if (NewOffset > LU.MaxOffset) {
+ if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
+ Kind, AccessTy, TLI))
+ return false;
+ NewMaxOffset = NewOffset;
+ }
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+ // Update the use.
+ LU.MinOffset = NewMinOffset;
+ LU.MaxOffset = NewMaxOffset;
+ LU.AccessTy = NewAccessTy;
+ if (NewOffset != LU.Offsets.back())
+ LU.Offsets.push_back(NewOffset);
+ return true;
+}
+
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind, const Type *AccessTy) {
+ const SCEV *Copy = Expr;
+ int64_t Offset = ExtractImmediate(Expr, SE);
+
+ // Basic uses can't accept any offset, for example.
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ Expr = Copy;
+ Offset = 0;
+ }
+
+ std::pair<UseMapTy::iterator, bool> P =
+ UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+ if (!P.second) {
+ // A use already existed with this base.
+ size_t LUIdx = P.first->second;
+ LSRUse &LU = Uses[LUIdx];
+ if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
+ // Reuse this use.
+ return std::make_pair(LUIdx, Offset);
+ }
+
+ // Create a new use.
+ size_t LUIdx = Uses.size();
+ P.first->second = LUIdx;
+ Uses.push_back(LSRUse(Kind, AccessTy));
+ LSRUse &LU = Uses[LUIdx];
+
+ // We don't need to track redundant offsets, but we don't need to go out
+ // of our way here to avoid them.
+ if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+ LU.Offsets.push_back(Offset);
+
+ LU.MinOffset = Offset;
+ LU.MaxOffset = Offset;
+ return std::make_pair(LUIdx, Offset);
+}
+
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
+ if (&LU != &Uses.back())
+ std::swap(LU, Uses.back());
+ Uses.pop_back();
+
+ // Update RegUses.
+ RegUses.SwapAndDropUse(LUIdx, Uses.size());
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+ const LSRUse &OrigLU) {
+ // Search all uses for the formula. This could be more clever.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
+ if (&LU != &OrigLU &&
+ LU.Kind != LSRUse::ICmpZero &&
+ LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.WidestFixupType == OrigLU.WidestFixupType &&
+ LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
+ if (F.BaseRegs == OrigF.BaseRegs &&
+ F.ScaledReg == OrigF.ScaledReg &&
+ F.AM.BaseGV == OrigF.AM.BaseGV &&
+ F.AM.Scale == OrigF.AM.Scale &&
+ F.UnfoldedOffset == OrigF.UnfoldedOffset) {
+ if (F.AM.BaseOffs == 0)
+ return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and procede to the next LSRUse.
+ break;
+ }
+ }
+ }
+ }
+
+ // Nothing looked good.
+ return 0;
+}
+
+void LSRInstance::CollectInterestingTypesAndFactors() {
+ SmallSetVector<const SCEV *, 4> Strides;
+
+ // Collect interesting types and strides.
+ SmallVector<const SCEV *, 4> Worklist;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ const SCEV *Expr = IU.getExpr(*UI);
+
+ // Collect interesting types.
+ Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+ // Add strides for mentioned loops.
+ Worklist.push_back(Expr);
+ do {
+ const SCEV *S = Worklist.pop_back_val();
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ Strides.insert(AR->getStepRecurrence(SE));
+ Worklist.push_back(AR->getStart());
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ Worklist.append(Add->op_begin(), Add->op_end());
+ }
+ } while (!Worklist.empty());
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
+ for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
+ const SCEV *NewStride = *NewStrideIter;
+
+ if (SE.getTypeSizeInBits(OldStride->getType()) !=
+ SE.getTypeSizeInBits(NewStride->getType())) {
+ if (SE.getTypeSizeInBits(OldStride->getType()) >
+ SE.getTypeSizeInBits(NewStride->getType()))
+ NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+ else
+ OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+ }
+ if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ } else if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+ NewStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ }
+ }
+
+ // If all uses use the same type, don't bother looking for truncation-based
+ // reuse.
+ if (Types.size() == 1)
+ Types.clear();
+
+ DEBUG(print_factors_and_types(dbgs()));
+}
+
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ // Record the uses.
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = UI->getUser();
+ LF.OperandValToReplace = UI->getOperandValToReplace();
+ LF.PostIncLoops = UI->getPostIncLoops();
+
+ LSRUse::KindType Kind = LSRUse::Basic;
+ const Type *AccessTy = 0;
+ if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+ Kind = LSRUse::Address;
+ AccessTy = getAccessType(LF.UserInst);
+ }
+
+ const SCEV *S = IU.getExpr(*UI);
+
+ // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+ // (N - i == 0), and this allows (N - i) to be the expression that we work
+ // with rather than just N or i, so we can consider the register
+ // requirements for both N and i at the same time. Limiting this code to
+ // equality icmps is not a problem because all interesting loops use
+ // equality icmps, thanks to IndVarSimplify.
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+ if (CI->isEquality()) {
+ // Swap the operands if needed to put the OperandValToReplace on the
+ // left, for consistency.
+ Value *NV = CI->getOperand(1);
+ if (NV == LF.OperandValToReplace) {
+ CI->setOperand(1, CI->getOperand(0));
+ CI->setOperand(0, NV);
+ NV = CI->getOperand(1);
+ Changed = true;
+ }
+
+ // x == y --> x - y == 0
+ const SCEV *N = SE.getSCEV(NV);
+ if (SE.isLoopInvariant(N, L)) {
+ // S is normalized, so normalize N before folding it into S
+ // to keep the result normalized.
+ N = TransformForPostIncUse(Normalize, N, CI, 0,
+ LF.PostIncLoops, SE, DT);
+ Kind = LSRUse::ICmpZero;
+ S = SE.getMinusSCEV(N, S);
+ }
+
+ // -1 and the negations of all interesting strides (except the negation
+ // of -1) are now also interesting.
+ for (size_t i = 0, e = Factors.size(); i != e; ++i)
+ if (Factors[i] != -1)
+ Factors.insert(-(uint64_t)Factors[i]);
+ Factors.insert(-1);
+ }
+
+ // Set up the initial formula for this use.
+ std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+
+ // If this is the first use of this LSRUse, give it a formula.
+ if (LU.Formulae.empty()) {
+ InsertInitialFormula(S, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), LF.LUIdx);
+ }
+ }
+
+ DEBUG(print_fixups(dbgs()));
+}
+
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.InitialMatch(S, L, SE);
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+ LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.BaseRegs.push_back(S);
+ F.AM.HasBaseReg = true;
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+ if (F.ScaledReg)
+ RegUses.CountRegister(F.ScaledReg, LUIdx);
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I)
+ RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ if (!LU.InsertFormula(F))
+ return false;
+
+ CountRegisters(F, LUIdx);
+ return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+ SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+ SmallPtrSet<const SCEV *, 8> Inserted;
+
+ while (!Worklist.empty()) {
+ const SCEV *S = Worklist.pop_back_val();
+
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+ Worklist.append(N->op_begin(), N->op_end());
+ else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ Worklist.push_back(C->getOperand());
+ else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ Worklist.push_back(D->getLHS());
+ Worklist.push_back(D->getRHS());
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (!Inserted.insert(U)) continue;
+ const Value *V = U->getValue();
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Look for instructions defined outside the loop.
+ if (L->contains(Inst)) continue;
+ } else if (isa<UndefValue>(V))
+ // Undef doesn't have a live range, so it doesn't matter.
+ continue;
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+ // Ignore non-instructions.
+ if (!UserInst)
+ continue;
+ // Ignore instructions in other functions (as can happen with
+ // Constants).
+ if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+ continue;
+ // Ignore instructions not dominated by the loop.
+ const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+ UserInst->getParent() :
+ cast<PHINode>(UserInst)->getIncomingBlock(
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+ if (!DT.dominates(L->getHeader(), UseBB))
+ continue;
+ // Ignore uses which are part of other SCEV expressions, to avoid
+ // analyzing them multiple times.
+ if (SE.isSCEVable(UserInst->getType())) {
+ const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+ // If the user is a no-op, look through to its uses.
+ if (!isa<SCEVUnknown>(UserS))
+ continue;
+ if (UserS == U) {
+ Worklist.push_back(
+ SE.getUnknown(const_cast<Instruction *>(UserInst)));
+ continue;
+ }
+ }
+ // Ignore icmp instructions which are already being analyzed.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+ unsigned OtherIdx = !UI.getOperandNo();
+ Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+ if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
+ continue;
+ }
+
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = const_cast<Instruction *>(UserInst);
+ LF.OperandValToReplace = UI.getUse();
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+ InsertSupplementalFormula(U, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+ break;
+ }
+ }
+ }
+}
+
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ // Break out add operands.
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ CollectSubexprs(*I, C, Ops, L, SE);
+ return;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Split a non-zero base out of an addrec.
+ if (!AR->getStart()->isZero()) {
+ CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap),
+ C, Ops, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, L, SE);
+ return;
+ }
+ } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ // Break (C * (a + b + c)) into C*a + C*b + C*c.
+ if (Mul->getNumOperands() == 2)
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ CollectSubexprs(Mul->getOperand(1),
+ C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+ Ops, L, SE);
+ return;
+ }
+ }
+
+ // Otherwise use the value itself, optionally with a scale applied.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
+
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base,
+ unsigned Depth) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *BaseReg = Base.BaseRegs[i];
+
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+
+ if (AddOps.size() == 1) continue;
+
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
+
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+ Base.getNumRegs() > 1,
+ LU.Kind, LU.AccessTy, TLI, SE))
+ continue;
+
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+ Base.getNumRegs() > 1,
+ LU.Kind, LU.AccessTy, TLI, SE))
+ continue;
+
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
+
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (TLI && InnerSumSC &&
+ SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue();
+ F.BaseRegs.erase(F.BaseRegs.begin() + i);
+ } else
+ F.BaseRegs[i] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+ }
+ }
+}
+
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // This method is only interesting on a plurality of registers.
+ if (Base.BaseRegs.size() <= 1) return;
+
+ Formula F = Base;
+ F.BaseRegs.clear();
+ SmallVector<const SCEV *, 4> Ops;
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+ !SE.hasComputableLoopEvolution(BaseReg, L))
+ Ops.push_back(BaseReg);
+ else
+ F.BaseRegs.push_back(BaseReg);
+ }
+ if (Ops.size() > 1) {
+ const SCEV *Sum = SE.getAddExpr(Ops);
+ // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+ // opportunity to fold something. For now, just ignore such cases
+ // rather than proceed with zero in a register.
+ if (!Sum->isZero()) {
+ F.BaseRegs.push_back(Sum);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
+
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // We can't add a symbolic offset if the address already contains one.
+ if (Base.AM.BaseGV) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ continue;
+ Formula F = Base;
+ F.AM.BaseGV = GV;
+ if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
+
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // TODO: For now, just add the min and max offset, because it usually isn't
+ // worthwhile looking at everything inbetween.
+ SmallVector<int64_t, 2> Worklist;
+ Worklist.push_back(LU.MinOffset);
+ if (LU.MaxOffset != LU.MinOffset)
+ Worklist.push_back(LU.MaxOffset);
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ Formula F = Base;
+ F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+ if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+ LU.Kind, LU.AccessTy, TLI)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ std::swap(F.BaseRegs[i], F.BaseRegs.back());
+ F.BaseRegs.pop_back();
+ } else
+ F.BaseRegs[i] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ continue;
+ Formula F = Base;
+ F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+ if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
+
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ if (LU.Kind != LSRUse::ICmpZero) return;
+
+ // Determine the integer type for the base formula.
+ const Type *IntTy = Base.getType();
+ if (!IntTy) return;
+ if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+ // Don't do this if there is more than one offset.
+ if (LU.MinOffset != LU.MaxOffset) return;
+
+ assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ // Check that the multiplication doesn't overflow.
+ if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+ continue;
+ int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+ if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+ continue;
+
+ // Check that multiplying with the use offset doesn't overflow.
+ int64_t Offset = LU.MinOffset;
+ if (Offset == INT64_MIN && Factor == -1)
+ continue;
+ Offset = (uint64_t)Offset * Factor;
+ if (Offset / Factor != LU.MinOffset)
+ continue;
+
+ Formula F = Base;
+ F.AM.BaseOffs = NewBaseOffs;
+
+ // Check that this scale is legal.
+ if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ continue;
+
+ // Compensate for the use having MinOffset built into it.
+ F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+ const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+
+ // Check that multiplying with each base register doesn't overflow.
+ for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+ F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+ if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+ goto next;
+ }
+
+ // Check that multiplying with the scaled register doesn't overflow.
+ if (F.ScaledReg) {
+ F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+ if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+ continue;
+ }
+
+ // Check that multiplying with the unfolded offset doesn't overflow.
+ if (F.UnfoldedOffset != 0) {
+ if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
+ continue;
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
+ if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
+ continue;
+ }
+
+ // If we make it here and it's legal, add it.
+ (void)InsertFormula(LU, LUIdx, F);
+ next:;
+ }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
+ // Determine the integer type for the base formula.
+ const Type *IntTy = Base.getType();
+ if (!IntTy) return;
+
+ // If this Formula already has a scaled register, we can't add another one.
+ if (Base.AM.Scale != 0) return;
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ Base.AM.Scale = Factor;
+ Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+ // Check whether this scale is going to be legal.
+ if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI)) {
+ // As a special-case, handle special out-of-loop Basic users specially.
+ // TODO: Reconsider this special case.
+ if (LU.Kind == LSRUse::Basic &&
+ isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+ LSRUse::Special, LU.AccessTy, TLI) &&
+ LU.AllFixupsOutsideLoop)
+ LU.Kind = LSRUse::Special;
+ else
+ continue;
+ }
+ // For an ICmpZero, negating a solitary base register won't lead to
+ // new solutions.
+ if (LU.Kind == LSRUse::ICmpZero &&
+ !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+ continue;
+ // For each addrec base reg, apply the scale, if possible.
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+ const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+ if (FactorS->isZero())
+ continue;
+ // Divide out the factor, ignoring high bits, since we'll be
+ // scaling the value back up in the end.
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+ // TODO: This could be optimized to avoid all the copying.
+ Formula F = Base;
+ F.ScaledReg = Quotient;
+ F.DeleteBaseReg(F.BaseRegs[i]);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+ }
+}
+
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
+ // This requires TargetLowering to tell us which truncates are free.
+ if (!TLI) return;
+
+ // Don't bother truncating symbolic values.
+ if (Base.AM.BaseGV) return;
+
+ // Determine the integer type for the base formula.
+ const Type *DstTy = Base.getType();
+ if (!DstTy) return;
+ DstTy = SE.getEffectiveSCEVType(DstTy);
+
+ for (SmallSetVector<const Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ const Type *SrcTy = *I;
+ if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ Formula F = Base;
+
+ if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+ for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J)
+ *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+ // TODO: This assumes we've done basic processing on all uses and
+ // have an idea what the register usage is.
+ if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+ continue;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+ size_t LUIdx;
+ int64_t Imm;
+ const SCEV *OrigReg;
+
+ WorkItem(size_t LI, int64_t I, const SCEV *R)
+ : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+ OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+ << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+ // Group the registers by their value without any added constant offset.
+ typedef std::map<int64_t, const SCEV *> ImmMapTy;
+ typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+ RegMapTy Map;
+ DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+ SmallVector<const SCEV *, 8> Sequence;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ int64_t Imm = ExtractImmediate(Reg, SE);
+ std::pair<RegMapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Reg, ImmMapTy()));
+ if (Pair.second)
+ Sequence.push_back(Reg);
+ Pair.first->second.insert(std::make_pair(Imm, *I));
+ UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+ }
+
+ // Now examine each set of registers with the same base value. Build up
+ // a list of work to do and do the work in a separate step so that we're
+ // not adding formulae and register counts while we're searching.
+ SmallVector<WorkItem, 32> WorkItems;
+ SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+ E = Sequence.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ const ImmMapTy &Imms = Map.find(Reg)->second;
+
+ // It's not worthwhile looking for reuse if there's only one offset.
+ if (Imms.size() == 1)
+ continue;
+
+ DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J)
+ dbgs() << ' ' << J->first;
+ dbgs() << '\n');
+
+ // Examine each offset.
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J) {
+ const SCEV *OrigReg = J->second;
+
+ int64_t JImm = J->first;
+ const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+ if (!isa<SCEVConstant>(OrigReg) &&
+ UsedByIndicesMap[Reg].count() == 1) {
+ DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+ continue;
+ }
+
+ // Conservatively examine offsets between this orig reg a few selected
+ // other orig regs.
+ ImmMapTy::const_iterator OtherImms[] = {
+ Imms.begin(), prior(Imms.end()),
+ Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ };
+ for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+ ImmMapTy::const_iterator M = OtherImms[i];
+ if (M == J || M == JE) continue;
+
+ // Compute the difference between the two.
+ int64_t Imm = (uint64_t)JImm - M->first;
+ for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+ LUIdx = UsedByIndices.find_next(LUIdx))
+ // Make a memo of this use, offset, and register tuple.
+ if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+ WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+ }
+ }
+ }
+
+ Map.clear();
+ Sequence.clear();
+ UsedByIndicesMap.clear();
+ UniqueItems.clear();
+
+ // Now iterate through the worklist and add new formulae.
+ for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+ E = WorkItems.end(); I != E; ++I) {
+ const WorkItem &WI = *I;
+ size_t LUIdx = WI.LUIdx;
+ LSRUse &LU = Uses[LUIdx];
+ int64_t Imm = WI.Imm;
+ const SCEV *OrigReg = WI.OrigReg;
+
+ const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+ unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+ // TODO: Use a more targeted data structure.
+ for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+ const Formula &F = LU.Formulae[L];
+ // Use the immediate in the scaled register.
+ if (F.ScaledReg == OrigReg) {
+ int64_t Offs = (uint64_t)F.AM.BaseOffs +
+ Imm * (uint64_t)F.AM.Scale;
+ // Don't create 50 + reg(-50).
+ if (F.referencesReg(SE.getSCEV(
+ ConstantInt::get(IntTy, -(uint64_t)Offs))))
+ continue;
+ Formula NewF = F;
+ NewF.AM.BaseOffs = Offs;
+ if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+ // If the new scale is a constant in a register, and adding the constant
+ // value to the immediate would produce a value closer to zero than the
+ // immediate itself, then the formula isn't worthwhile.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+ if (C->getValue()->isNegative() !=
+ (NewF.AM.BaseOffs < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+ .ule(abs64(NewF.AM.BaseOffs)))
+ continue;
+
+ // OK, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ } else {
+ // Use the immediate in a base register.
+ for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+ const SCEV *BaseReg = F.BaseRegs[N];
+ if (BaseReg != OrigReg)
+ continue;
+ Formula NewF = F;
+ NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+ if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI)) {
+ if (!TLI ||
+ !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ continue;
+ NewF = F;
+ NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
+ }
+ NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+ // If the new formula has a constant in a register, and adding the
+ // constant value to the immediate would produce a value closer to
+ // zero than the immediate itself, then the formula isn't worthwhile.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+ J != JE; ++J)
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+ if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+ abs64(NewF.AM.BaseOffs)) &&
+ (C->getValue()->getValue() +
+ NewF.AM.BaseOffs).countTrailingZeros() >=
+ CountTrailingZeros_64(NewF.AM.BaseOffs))
+ goto skip_formula;
+
+ // Ok, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ break;
+ skip_formula:;
+ }
+ }
+ }
+ }
+}
+
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+ // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+ // queries are more precise.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateScales(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+ }
+
+ GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
+}
+
+/// If there are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
+#ifndef NDEBUG
+ bool ChangedFormulae = false;
+#endif
+
+ // Collect the best formula for each unique set of shared registers. This
+ // is reset for each use.
+ typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ BestFormulaeTy;
+ BestFormulaeTy BestFormulae;
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+ bool Any = false;
+ for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+ FIdx != NumForms; ++FIdx) {
+ Formula &F = LU.Formulae[FIdx];
+
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (!P.second) {
+ Formula &Best = LU.Formulae[P.first->second];
+
+ Cost CostF;
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ Cost CostBest;
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ if (CostF < CostBest)
+ std::swap(F, Best);
+ DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of formula "; Best.print(dbgs());
+ dbgs() << '\n');
+#ifndef NDEBUG
+ ChangedFormulae = true;
+#endif
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
+ continue;
+ }
+ }
+
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+
+ // Reset this to prepare for the next use.
+ BestFormulae.clear();
+ }
+
+ DEBUG(if (ChangedFormulae) {
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
+ print_uses(dbgs());
+ });
+}
+
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+ size_t Power = 1;
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ size_t FSize = I->Formulae.size();
+ if (FSize >= ComplexityLimit) {
+ Power = ComplexityLimit;
+ break;
+ }
+ Power *= FSize;
+ if (Power >= ComplexityLimit)
+ break;
+ }
+ return Power;
+}
+
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+ "which use a superset of registers used by other "
+ "formulae.\n");
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ // Look for a formula with a constant or GV in a register. If the use
+ // also has a formula with that same value in an immediate field,
+ // delete the one that uses a register.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+ Formula NewF = F;
+ NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+ if (!F.AM.BaseGV) {
+ Formula NewF = F;
+ NewF.AM.BaseGV = GV;
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+ "separated by a constant offset will use the same "
+ "registers.\n");
+
+ // This is especially useful for unrolled loops.
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+ if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+ if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+ /*HasBaseReg=*/false,
+ LU.Kind, LU.AccessTy)) {
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
+ dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.AM.BaseOffs;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
+ }
+ DEBUG(dbgs() << "New fixup has offset "
+ << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(F.AM,
+ LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Delete the old use.
+ DeleteUse(LU, LUIdx);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
+ // With all other options exhausted, loop until the system is simple
+ // enough to handle.
+ SmallPtrSet<const SCEV *, 4> Taken;
+ while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ // Pick the register which is used by the most LSRUses, which is likely
+ // to be a good reuse register candidate.
+ const SCEV *Best = 0;
+ unsigned BestNum = 0;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ if (Taken.count(Reg))
+ continue;
+ if (!Best)
+ Best = Reg;
+ else {
+ unsigned Count = RegUses.getUsedByIndices(Reg).count();
+ if (Count > BestNum) {
+ Best = Reg;
+ BestNum = Count;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+ << " will yield profitable reuse.\n");
+ Taken.insert(Best);
+
+ // In any use with formulae which references this register, delete formulae
+ // which don't reference it.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ if (!LU.Regs.count(Best)) continue;
+
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ if (!F.referencesReg(Best)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --e;
+ --i;
+ Any = true;
+ assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
+ continue;
+ }
+ }
+
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const {
+ // Some ideas:
+ // - prune more:
+ // - use more aggressive filtering
+ // - sort the formula so that the most profitable solutions are found first
+ // - sort the uses too
+ // - search faster:
+ // - don't compute a cost, and then compare. compare while computing a cost
+ // and bail early.
+ // - track register sets with SmallBitVector
+
+ const LSRUse &LU = Uses[Workspace.size()];
+
+ // If this use references any register that's already a part of the
+ // in-progress solution, consider it a requirement that a formula must
+ // reference that register in order to be considered. This prunes out
+ // unprofitable searching.
+ SmallSetVector<const SCEV *, 4> ReqRegs;
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+ E = CurRegs.end(); I != E; ++I)
+ if (LU.Regs.count(*I))
+ ReqRegs.insert(*I);
+
+ bool AnySatisfiedReqRegs = false;
+ SmallPtrSet<const SCEV *, 16> NewRegs;
+ Cost NewCost;
+retry:
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+
+ // Ignore formulae which do not use any of the required registers.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+ JE = ReqRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ F.BaseRegs.end())
+ goto skip;
+ }
+ AnySatisfiedReqRegs = true;
+
+ // Evaluate the cost of the current formula. If it's already worse than
+ // the current best, prune the search at that point.
+ NewCost = CurCost;
+ NewRegs = CurRegs;
+ NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ if (NewCost < SolutionCost) {
+ Workspace.push_back(&F);
+ if (Workspace.size() != Uses.size()) {
+ SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+ NewRegs, VisitedRegs);
+ if (F.getNumRegs() == 1 && Workspace.size() == 1)
+ VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+ } else {
+ DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+ dbgs() << ". Regs:";
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator
+ I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+ dbgs() << ' ' << **I;
+ dbgs() << '\n');
+
+ SolutionCost = NewCost;
+ Solution = Workspace;
+ }
+ Workspace.pop_back();
+ }
+ skip:;
+ }
+
+ // If none of the formulae had all of the required registers, relax the
+ // constraint so that we don't exclude all formulae.
+ if (!AnySatisfiedReqRegs) {
+ assert(!ReqRegs.empty() && "Solver failed even without required registers");
+ ReqRegs.clear();
+ goto retry;
+ }
+}
+
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+ SmallVector<const Formula *, 8> Workspace;
+ Cost SolutionCost;
+ SolutionCost.Loose();
+ Cost CurCost;
+ SmallPtrSet<const SCEV *, 16> CurRegs;
+ DenseSet<const SCEV *> VisitedRegs;
+ Workspace.reserve(Uses.size());
+
+ // SolveRecurse does all the work.
+ SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+ CurRegs, VisitedRegs);
+
+ // Ok, we've now made all our decisions.
+ DEBUG(dbgs() << "\n"
+ "The chosen solution requires "; SolutionCost.print(dbgs());
+ dbgs() << ":\n";
+ for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+ dbgs() << " ";
+ Uses[i].print(dbgs());
+ dbgs() << "\n"
+ " ";
+ Solution[i]->print(dbgs());
+ dbgs() << '\n';
+ });
+
+ assert(Solution.size() == Uses.size() && "Malformed solution!");
+}
+
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs)
+ const {
+ for (;;) {
+ const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+ unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+ BasicBlock *IDom;
+ for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+ if (!Rung) return IP;
+ Rung = Rung->getIDom();
+ if (!Rung) return IP;
+ IDom = Rung->getBlock();
+
+ // Don't climb into a loop though.
+ const Loop *IDomLoop = LI.getLoopFor(IDom);
+ unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+ if (IDomDepth <= IPLoopDepth &&
+ (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+ break;
+ }
+
+ bool AllDominate = true;
+ Instruction *BetterPos = 0;
+ Instruction *Tentative = IDom->getTerminator();
+ for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+ E = Inputs.end(); I != E; ++I) {
+ Instruction *Inst = *I;
+ if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+ AllDominate = false;
+ break;
+ }
+ // Attempt to find an insert position in the middle of the block,
+ // instead of at the end, so that it can be used for other expansions.
+ if (IDom == Inst->getParent() &&
+ (!BetterPos || DT.dominates(BetterPos, Inst)))
+ BetterPos = llvm::next(BasicBlock::iterator(Inst));
+ }
+ if (!AllDominate)
+ break;
+ if (BetterPos)
+ IP = BetterPos;
+ else
+ IP = Tentative;
+ }
+
+ return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU) const {
+ // Collect some instructions which must be dominated by the
+ // expanding replacement. These must be dominated by any operands that
+ // will be required in the expansion.
+ SmallVector<Instruction *, 4> Inputs;
+ if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+ Inputs.push_back(I);
+ if (LU.Kind == LSRUse::ICmpZero)
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+ Inputs.push_back(I);
+ if (LF.PostIncLoops.count(L)) {
+ if (LF.isUseFullyOutsideLoop(L))
+ Inputs.push_back(L->getLoopLatch()->getTerminator());
+ else
+ Inputs.push_back(IVIncInsertPos);
+ }
+ // The expansion must also be dominated by the increment positions of any
+ // loops it for which it is using post-inc mode.
+ for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+ E = LF.PostIncLoops.end(); I != E; ++I) {
+ const Loop *PIL = *I;
+ if (PIL == L) continue;
+
+ // Be dominated by the loop exit.
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ PIL->getExitingBlocks(ExitingBlocks);
+ if (!ExitingBlocks.empty()) {
+ BasicBlock *BB = ExitingBlocks[0];
+ for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+ BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+ Inputs.push_back(BB->getTerminator());
+ }
+ }
+
+ // Then, climb up the immediate dominator tree as far as we can go while
+ // still being dominated by the input positions.
+ IP = HoistInsertPosition(IP, Inputs);
+
+ // Don't insert instructions before PHI nodes.
+ while (isa<PHINode>(IP)) ++IP;
+
+ // Ignore debug intrinsics.
+ while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+
+ return IP;
+}
+
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
+Value *LSRInstance::Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
+ const LSRUse &LU = Uses[LF.LUIdx];
+
+ // Determine an input position which will be dominated by the operands and
+ // which will dominate the result.
+ IP = AdjustInsertPositionForExpand(IP, LF, LU);
+
+ // Inform the Rewriter if we have a post-increment use, so that it can
+ // perform an advantageous expansion.
+ Rewriter.setPostInc(LF.PostIncLoops);
+
+ // This is the type that the user actually needs.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ // This will be the type that we'll initially expand to.
+ const Type *Ty = F.getType();
+ if (!Ty)
+ // No type known; just expand directly to the ultimate type.
+ Ty = OpTy;
+ else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+ // Expand directly to the ultimate type if it's the right size.
+ Ty = OpTy;
+ // This is the type to do integer arithmetic in.
+ const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+ // Build up a list of operands to add together to form the full base.
+ SmallVector<const SCEV *, 8> Ops;
+
+ // Expand the BaseRegs portion.
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ Reg = TransformForPostIncUse(Denormalize, Reg,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
+
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ }
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the ScaledReg portion.
+ Value *ICmpScaledV = 0;
+ if (F.AM.Scale != 0) {
+ const SCEV *ScaledS = F.ScaledReg;
+
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
+
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.AM.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ } else {
+ // Otherwise just expand the scaled register and an explicit scale,
+ // which is expected to be matched as part of the address.
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+ ScaledS = SE.getMulExpr(ScaledS,
+ SE.getConstant(ScaledS->getType(), F.AM.Scale));
+ Ops.push_back(ScaledS);
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+ }
+
+ // Expand the GV portion.
+ if (F.AM.BaseGV) {
+ Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the immediate portion.
+ int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+ if (Offset != 0) {
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // The other interesting way of "folding" with an ICmpZero is to use a
+ // negated immediate.
+ if (!ICmpScaledV)
+ ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ else {
+ Ops.push_back(SE.getUnknown(ICmpScaledV));
+ ICmpScaledV = ConstantInt::get(IntTy, Offset);
+ }
+ } else {
+ // Just add the immediate values. These again are expected to be matched
+ // as part of the address.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+ }
+ }
+
+ // Expand the unfolded offset portion.
+ int64_t UnfoldedOffset = F.UnfoldedOffset;
+ if (UnfoldedOffset != 0) {
+ // Just add the immediate values.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
+ UnfoldedOffset)));
+ }
+
+ // Emit instructions summing all the operands.
+ const SCEV *FullS = Ops.empty() ?
+ SE.getConstant(IntTy, 0) :
+ SE.getAddExpr(Ops);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+ // We're done expanding now, so reset the rewriter.
+ Rewriter.clearPostInc();
+
+ // An ICmpZero Formula represents an ICmp which we're handling as a
+ // comparison against zero. Now that we've expanded an expression for that
+ // form, update the ICmp's other operand.
+ if (LU.Kind == LSRUse::ICmpZero) {
+ ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+ DeadInsts.push_back(CI->getOperand(1));
+ assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ if (F.AM.Scale == -1) {
+ if (ICmpScaledV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+ OpTy, false),
+ ICmpScaledV, OpTy, "tmp", CI);
+ ICmpScaledV = Cast;
+ }
+ CI->setOperand(1, ICmpScaledV);
+ } else {
+ assert(F.AM.Scale == 0 &&
+ "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+ -(uint64_t)Offset);
+ if (C->getType() != OpTy)
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ OpTy, false),
+ C, OpTy);
+
+ CI->setOperand(1, C);
+ }
+ }
+
+ return FullV;
+}
+
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+ const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ DenseMap<BasicBlock *, Value *> Inserted;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, which complicates post-inc
+ // users.
+ if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(BB->getTerminator())) {
+ Loop *PNLoop = LI.getLoopFor(PN->getParent());
+ if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
+ }
+
+ std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ if (!Pair.second)
+ PN->setIncomingValue(i, Pair.first->second);
+ else {
+ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy)
+ FullV =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false,
+ OpTy, false),
+ FullV, LF.OperandValToReplace->getType(),
+ "tmp", BB->getTerminator());
+
+ PN->setIncomingValue(i, FullV);
+ Pair.first->second = FullV;
+ }
+ }
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ // First, find an insertion point that dominates UserInst. For PHI nodes,
+ // find the nearest block which dominates all the relevant uses.
+ if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ } else {
+ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+ FullV, OpTy, "tmp", LF.UserInst);
+ FullV = Cast;
+ }
+
+ // Update the user. ICmpZero is handled specially here (for now) because
+ // Expand may have updated one of the operands of the icmp already, and
+ // its new value may happen to be equal to LF.OperandValToReplace, in
+ // which case doing replaceUsesOfWith leads to replacing both operands
+ // with the same value. TODO: Reorganize this.
+ if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+ LF.UserInst->setOperand(0, FullV);
+ else
+ LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
+ }
+
+ DeadInsts.push_back(LF.OperandValToReplace);
+}
+
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P) {
+ // Keep track of instructions we may have made dead, so that
+ // we can remove them after we are done working.
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ SCEVExpander Rewriter(SE, "lsr");
+ Rewriter.disableCanonicalMode();
+ Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+
+ // Expand the new value definitions and update the users.
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ const LSRFixup &Fixup = *I;
+
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+
+ Changed = true;
+ }
+
+ // Clean up after ourselves. This must be done before deleting any
+ // instructions.
+ Rewriter.clear();
+
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
+
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()),
+ SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()),
+ LI(P->getAnalysis<LoopInfo>()),
+ TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm()) return;
+
+ // If there's no interesting work to be done, bail early.
+ if (IU.empty()) return;
+
+ DEBUG(dbgs() << "\nLSR on loop ";
+ WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+ dbgs() << ":\n");
+
+ // First, perform some low-level loop optimizations.
+ OptimizeShadowIV();
+ OptimizeLoopTermCond();
+
+ // Start collecting data and preparing for the solver.
+ CollectInterestingTypesAndFactors();
+ CollectFixupsAndInitialFormulae();
+ CollectLoopInvariantFixupsAndFormulae();
+
+ DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+ print_uses(dbgs()));
+
+ // Now use the reuse data to generate a bunch of interesting ways
+ // to formulate the values needed for the uses.
+ GenerateAllReuseFormulae();
+
+ FilterOutUndesirableDedicatedRegisters();
+ NarrowSearchSpaceUsingHeuristics();
+
+ SmallVector<const Formula *, 8> Solution;
+ Solve(Solution);
+
+ // Release memory that is no longer needed.
+ Factors.clear();
+ Types.clear();
+ RegUses.clear();
+
+#ifndef NDEBUG
+ // Formulae should be legal.
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J)
+ assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI) &&
+ "Illegal formula generated!");
+ };
+#endif
+
+ // Now that we've decided what we want, make it so.
+ ImplementSolution(Solution, P);
+}
+
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+ if (Factors.empty() && Types.empty()) return;
+
+ OS << "LSR has identified the following interesting factors and types: ";
+ bool First = true;
+
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '*' << *I;
+ }
+
+ for (SmallSetVector<const Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '(' << **I << ')';
+ }
+ OS << '\n';
+}
+
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+ OS << "LSR is examining the following fixup sites:\n";
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ dbgs() << " ";
+ I->print(OS);
+ OS << '\n';
+ }
+}
+
+void LSRInstance::print_uses(raw_ostream &OS) const {
+ OS << "LSR is examining the following uses:\n";
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ dbgs() << " ";
+ LU.print(OS);
+ OS << '\n';
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J) {
+ OS << " ";
+ J->print(OS);
+ OS << '\n';
+ }
+ }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+ print_factors_and_types(OS);
+ print_fixups(OS);
+ print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *const TLI;
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+ : LoopPass(ID), TLI(tli) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+ }
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ // Requiring LoopSimplify a second time here prevents IVUsers from running
+ // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ bool Changed = false;
+
+ // Run the main LSR transformation.
+ Changed |= LSRInstance(TLI, L, this).getChanged();
+
+ // At this point, it is worth checking to see if any recurrence PHIs are also
+ // dead, so that we can remove them as well.
+ Changed |= DeleteDeadPHIs(L->getHeader());
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
new file mode 100644
index 000000000000..fef6bc31c7b6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,193 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller. It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+ cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+ cl::desc("Allows loops to be partially unrolled until "
+ "-unroll-threshold loop size is reached."));
+
+namespace {
+ class LoopUnroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+ CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+ CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+ UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// A magic value for use with the Threshold parameter to indicate
+ /// that the loop unroll should be performed regardless of how much
+ /// code expansion would result.
+ static const unsigned NoThreshold = UINT_MAX;
+
+ // Threshold to use when optsize is specified (and there is no
+ // explicit -unroll-threshold).
+ static const unsigned OptSizeUnrollThreshold = 50;
+
+ unsigned CurrentCount;
+ unsigned CurrentThreshold;
+ bool CurrentAllowPartial;
+ bool UserThreshold; // CurrentThreshold is user-specified.
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTree>();
+ }
+ };
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+ return new LoopUnroll(Threshold, Count, AllowPartial);
+}
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I);
+ NumCalls = Metrics.NumInlineCandidates;
+
+ unsigned LoopSize = Metrics.NumInsts;
+
+ // Don't allow an estimate of size zero. This would allows unrolling of loops
+ // with huge iteration counts, which is a compile time problem even if it's
+ // not a problem for code quality.
+ if (LoopSize == 0) LoopSize = 1;
+
+ return LoopSize;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+ << "] Loop %" << Header->getName() << "\n");
+ (void)Header;
+
+ // Determine the current unrolling threshold. While this is normally set
+ // from UnrollThreshold, it is overridden to a smaller value if the current
+ // function is marked as optimize-for-size, and the unroll threshold was
+ // not user specified.
+ unsigned Threshold = CurrentThreshold;
+ if (!UserThreshold &&
+ Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Threshold = OptSizeUnrollThreshold;
+
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ unsigned Count = CurrentCount;
+
+ // Automatically select an unroll count.
+ if (Count == 0) {
+ // Conservative heuristic: if we know the trip count, see if we can
+ // completely unroll (subject to the threshold, checked below); otherwise
+ // try to find greatest modulo of the trip count which is still under
+ // threshold value.
+ if (TripCount == 0)
+ return false;
+ Count = TripCount;
+ }
+
+ // Enforce the threshold.
+ if (Threshold != NoThreshold) {
+ unsigned NumInlineCandidates;
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ return false;
+ }
+ uint64_t Size = (uint64_t)LoopSize*Count;
+ if (TripCount != 1 && Size > Threshold) {
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << Threshold << "\n");
+ if (!CurrentAllowPartial) {
+ DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ return false;
+ }
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = Threshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0) {
+ Count--;
+ }
+ if (Count < 2) {
+ DEBUG(dbgs() << " could not unroll partially\n");
+ return false;
+ }
+ DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ }
+ }
+
+ // Unroll the loop.
+ Function *F = L->getHeader()->getParent();
+ if (!UnrollLoop(L, Count, LI, &LPM))
+ return false;
+
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->runOnFunction(*F);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
new file mode 100644
index 000000000000..840c4b69cf06
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1063 @@
+//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops that contain branches on loop-invariant conditions
+// to have multiple loops. For example, it turns the left into the right code:
+//
+// for (...) if (lic)
+// A for (...)
+// if (lic) A; B; C
+// B else
+// C for (...)
+// A; C
+//
+// This can increase the size of the code exponentially (doubling it every time
+// a loop is unswitched) so we only unswitch if the resultant code will be
+// smaller than a threshold.
+//
+// This pass expects LICM to be run before it to hoist invariant conditions out
+// of the loop, to make the unswitching opportunity obvious.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unswitch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects , "Number of selects unswitched");
+STATISTIC(NumTrivial , "Number of unswitches that are trivial");
+STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+
+// The specific value of 50 here was chosen based only on intuition and a
+// few specific examples.
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+ cl::init(50), cl::Hidden);
+
+namespace {
+ class LoopUnswitch : public LoopPass {
+ LoopInfo *LI; // Loop information
+ LPPassManager *LPM;
+
+ // LoopProcessWorklist - Used to check if second loop needs processing
+ // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ std::vector<Loop*> LoopProcessWorklist;
+ SmallPtrSet<Value *,8> UnswitchedVals;
+
+ bool OptimizeForSize;
+ bool redoLoop;
+
+ Loop *currentLoop;
+ DominatorTree *DT;
+ BasicBlock *loopHeader;
+ BasicBlock *loopPreheader;
+
+ // LoopBlocks contains all of the basic blocks of the loop, including the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
+ // loop, in that order.
+ std::vector<BasicBlock*> LoopBlocks;
+ // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+ std::vector<BasicBlock*> NewBlocks;
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopUnswitch(bool Os = false) :
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
+ loopPreheader(NULL) {
+ initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool processCurrentLoop();
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+ private:
+
+ virtual void releaseMemory() {
+ UnswitchedVals.clear();
+ }
+
+ /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
+ /// remove it.
+ void RemoveLoopFromWorklist(Loop *L) {
+ std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
+ LoopProcessWorklist.end(), L);
+ if (I != LoopProcessWorklist.end())
+ LoopProcessWorklist.erase(I);
+ }
+
+ void initLoopData() {
+ loopHeader = currentLoop->getHeader();
+ loopPreheader = currentLoop->getLoopPreheader();
+ }
+
+ /// Split all of the edges from inside the loop to their exit blocks.
+ /// Update the appropriate Phi nodes as we do so.
+ void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+
+ bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+ void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+ BasicBlock *ExitBlock);
+ void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+
+ void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val, bool isEqual);
+
+ void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt);
+
+ void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+ void RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist, Loop *l);
+ void RemoveLoopFromHierarchy(Loop *L);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+ BasicBlock **LoopExit = 0);
+
+ };
+}
+char LoopUnswitch::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+ return new LoopUnswitch(Os);
+}
+
+/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
+/// invariant in the loop, or has an invariant piece, return the invariant.
+/// Otherwise, return null.
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+ // We can never unswitch on vector conditions.
+ if (Cond->getType()->isVectorTy())
+ return 0;
+
+ // Constants should be folded, not unswitched on!
+ if (isa<Constant>(Cond)) return 0;
+
+ // TODO: Handle: br (VARIANT|INVARIANT).
+
+ // Hoist simple values out.
+ if (L->makeLoopInvariant(Cond, Changed))
+ return Cond;
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
+ if (BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or) {
+ // If either the left or right side is invariant, we can unswitch on this,
+ // which will cause the branch to go away in one loop and the condition to
+ // simplify in the other one.
+ if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+ return LHS;
+ if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+ return RHS;
+ }
+
+ return 0;
+}
+
+bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+ LI = &getAnalysis<LoopInfo>();
+ LPM = &LPM_Ref;
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ currentLoop = L;
+ Function *F = currentLoop->getHeader()->getParent();
+ bool Changed = false;
+ do {
+ assert(currentLoop->isLCSSAForm(*DT));
+ redoLoop = false;
+ Changed |= processCurrentLoop();
+ } while(redoLoop);
+
+ if (Changed) {
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (DT)
+ DT->runOnFunction(*F);
+ }
+ return Changed;
+}
+
+/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
+ bool Changed = false;
+ LLVMContext &Context = currentLoop->getHeader()->getContext();
+
+ // Loop over all of the basic blocks in the loop. If we find an interior
+ // block that is branching on a loop-invariant condition, we can unswitch this
+ // loop.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ TerminatorInst *TI = (*I)->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ // If this isn't branching on an invariant condition, we can't unswitch
+ // it.
+ if (BI->isConditional()) {
+ // See if this, or some part of it, is loop invariant. If so, we can
+ // unswitch on it if we desire.
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue(Context))) {
+ ++NumBranches;
+ return true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && SI->getNumCases() > 1) {
+ // Find a value to unswitch on:
+ // FIXME: this should chose the most expensive case!
+ // FIXME: scan for a case with a non-critical edge?
+ Constant *UnswitchVal = SI->getCaseValue(1);
+ // Do not process same value again and again.
+ if (!UnswitchedVals.insert(UnswitchVal))
+ continue;
+
+ if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
+ ++NumSwitches;
+ return true;
+ }
+ }
+ }
+
+ // Scan the instructions to check for unswitchable values.
+ for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
+ BBI != E; ++BBI)
+ if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue(Context))) {
+ ++NumSelects;
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
+///
+/// If true, we return true and set ExitBB to the block we
+/// exit through.
+///
+static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
+ BasicBlock *&ExitBB,
+ std::set<BasicBlock*> &Visited) {
+ if (!Visited.insert(BB).second) {
+ // Already visited. Without more analysis, this could indicate an infinte loop.
+ return false;
+ } else if (!L->contains(BB)) {
+ // Otherwise, this is a loop exit, this is fine so long as this is the
+ // first exit.
+ if (ExitBB != 0) return false;
+ ExitBB = BB;
+ return true;
+ }
+
+ // Otherwise, this is an unvisited intra-loop node. Check all successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
+ // Check to see if the successor is a trivial loop exit.
+ if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
+ return false;
+ }
+
+ // Okay, everything after this looks good, check to make sure that this block
+ // doesn't include any side effects.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+
+ return true;
+}
+
+/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
+/// leads to an exit from the specified loop, and has no side-effects in the
+/// process. If so, return the block that is exited to, otherwise return null.
+static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
+ std::set<BasicBlock*> Visited;
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
+ BasicBlock *ExitBB = 0;
+ if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
+ return ExitBB;
+ return 0;
+}
+
+/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
+/// trivial: that is, that the condition controls whether or not the loop does
+/// anything at all. If this is a trivial condition, unswitching produces no
+/// code duplications (equivalently, it produces a simpler loop and a new empty
+/// loop, which gets deleted).
+///
+/// If this is a trivial condition, return true, otherwise return false. When
+/// returning true, this sets Cond and Val to the condition that controls the
+/// trivial condition: when Cond dynamically equals Val, the loop is known to
+/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
+/// Cond == Val.
+///
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+ BasicBlock **LoopExit) {
+ BasicBlock *Header = currentLoop->getHeader();
+ TerminatorInst *HeaderTerm = Header->getTerminator();
+ LLVMContext &Context = Header->getContext();
+
+ BasicBlock *LoopExitBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
+ // If the header block doesn't end with a conditional branch on Cond, we
+ // can't handle it.
+ if (!BI->isConditional() || BI->getCondition() != Cond)
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ if (Val) *Val = ConstantInt::getTrue(Context);
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ if (Val) *Val = ConstantInt::getFalse(Context);
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
+ // If this isn't a switch on Cond, we can't handle it.
+ if (SI->getCondition() != Cond) return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this. Note that we can't trivially unswitch on the default case.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ SI->getSuccessor(i)))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ if (Val) *Val = SI->getCaseValue(i);
+ break;
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit block
+ // contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ if (LoopExit) *LoopExit = LoopExitBB;
+
+ // We already know that nothing uses any scalar values defined inside of this
+ // loop. As such, we just have to check to see if this loop will execute any
+ // side-effecting instructions (e.g. stores, calls, volatile loads) in the
+ // part of the loop that the code *would* execute. We already checked the
+ // tail, check the header now.
+ for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+ return true;
+}
+
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
+/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// unswitch the loop, reprocess the pieces, then return true.
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+
+ initLoopData();
+
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
+ Function *F = loopHeader->getParent();
+
+ Constant *CondVal = 0;
+ BasicBlock *ExitBlock = 0;
+ if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+ // If the condition is trivial, always unswitch. There is no code growth
+ // for this case.
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+ return true;
+ }
+
+ // Check to see if it would be profitable to unswitch current loop.
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ // FIXME: This is overly conservative because it does not take into
+ // consideration code simplification opportunities and code that can
+ // be shared by the resultant unswitched loops.
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I);
+
+ // Limit the number of instructions to avoid causing significant code
+ // expansion, and the number of basic blocks, to avoid loops with
+ // large numbers of branches which cause loop unswitching to go crazy.
+ // This is a very ad-hoc heuristic.
+ if (Metrics.NumInsts > Threshold ||
+ Metrics.NumBlocks * 5 > Threshold ||
+ Metrics.containsIndirectBr || Metrics.isRecursive) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName() << ", cost too high: "
+ << currentLoop->getBlocks().size() << "\n");
+ return false;
+ }
+
+ UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+ return true;
+}
+
+/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop *New = new Loop();
+ LPM->insertLoop(New, PL);
+
+ // Add all of the blocks in L to the new loop.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L)
+ New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+
+ // Add all of the subloops to the new loop.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ CloneLoop(*I, New, VM, LI, LPM);
+
+ return New;
+}
+
+/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
+/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
+/// code immediately before InsertPt.
+void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt) {
+ // Insert a conditional branch on LIC to the two preheaders. The original
+ // code is the true version and the new code is the false version.
+ Value *BranchVal = LIC;
+ if (!isa<ConstantInt>(Val) ||
+ Val->getType() != Type::getInt1Ty(LIC->getContext()))
+ BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+ else if (Val != ConstantInt::getTrue(Val->getContext()))
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+
+ // Insert the new branch.
+ BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+ // If either edge is critical, split it. This helps preserve LoopSimplify
+ // form for enclosing loops.
+ SplitCriticalEdge(BI, 0, this);
+ SplitCriticalEdge(BI, 1, this);
+}
+
+/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
+/// condition in it (a cond branch from its header block to its latch block,
+/// where the path through the loop that doesn't execute its body has no
+/// side-effects), unswitch it. This doesn't involve any code duplication, just
+/// moving the conditional branch outside of the loop and updating loop info.
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+ Constant *Val,
+ BasicBlock *ExitBlock) {
+ DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << L->getHeader()->getParent()->getName()
+ << " on cond: " << *Val << " == " << *Cond << "\n");
+
+ // First step, split the preheader, so that we know that there is a safe place
+ // to insert the conditional branch. We will change loopPreheader to have a
+ // conditional branch on Cond.
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+
+ // Now that we have a place to insert the conditional branch, create a place
+ // to branch to: this is the exit block out of the loop that we should
+ // short-circuit to.
+
+ // Split this block now, so that the loop maintains its exit block, and so
+ // that the jump from the preheader can execute the contents of the exit block
+ // without actually branching to it (the exit block should be dominated by the
+ // loop header, not the preheader).
+ assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
+ BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+
+ // Okay, now we have a position to branch from and a position to branch to,
+ // insert the new conditional branch.
+ EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
+ loopPreheader->getTerminator());
+ LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+ loopPreheader->getTerminator()->eraseFromParent();
+
+ // We need to reprocess this loop, it could be unswitched again.
+ redoLoop = true;
+
+ // Now that we know that the loop is never entered when this condition is a
+ // particular value, rewrite the loop with this info. We know that this will
+ // at least eliminate the old branch.
+ RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+ ++NumTrivial;
+}
+
+/// SplitExitEdges - Split all of the edges from inside the loop to their exit
+/// blocks. Update the appropriate Phi nodes as we do so.
+void LoopUnswitch::SplitExitEdges(Loop *L,
+ const SmallVector<BasicBlock *, 8> &ExitBlocks){
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+ // Although SplitBlockPredecessors doesn't preserve loop-simplify in
+ // general, if we call it on all predecessors of all exits then it does.
+ SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+ ".us-lcssa", this);
+ }
+}
+
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val. Split it into loop versions and test the
+/// condition outside of either loop. Return the loops created as Out1/Out2.
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
+ Loop *L) {
+ Function *F = loopHeader->getParent();
+ DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << F->getName()
+ << " when '" << *Val << "' == " << *LIC << "\n");
+
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ LoopBlocks.clear();
+ NewBlocks.clear();
+
+ // First step, split the preheader and exit blocks, and add these blocks to
+ // the LoopBlocks list.
+ BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+ LoopBlocks.push_back(NewPreheader);
+
+ // We want the loop to come after the preheader, but before the exit blocks.
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Split all of the edges from inside the loop to their exit blocks. Update
+ // the appropriate Phi nodes as we do so.
+ SplitExitEdges(L, ExitBlocks);
+
+ // The exit blocks may have been changed due to edge splitting, recompute.
+ ExitBlocks.clear();
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Add exit blocks to the loop blocks.
+ LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+ // Next step, clone all of the basic blocks that make up the loop (including
+ // the loop preheader and exit blocks), keeping track of the mapping between
+ // the instructions and blocks.
+ NewBlocks.reserve(LoopBlocks.size());
+ ValueToValueMapTy VMap;
+ for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+ NewBlocks.push_back(NewBB);
+ VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
+ }
+
+ // Splice the newly inserted blocks into the function right before the
+ // original preheader.
+ F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Now we create the new Loop object for the versioned loop.
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+ Loop *ParentLoop = L->getParentLoop();
+ if (ParentLoop) {
+ // Make sure to add the cloned preheader and exit blocks to the parent loop
+ // as well.
+ ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+ }
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
+ // The new exit block should be in the same loop as the old one.
+ if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+ ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+
+ assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+ "Exit block should have been split to have one successor!");
+ BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+ // If the successor of the exit block had PHI nodes, add an entry for
+ // NewExit.
+ PHINode *PN;
+ for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+ PN = cast<PHINode>(I);
+ Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+ ValueToValueMapTy::iterator It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ PN->addIncoming(V, NewExit);
+ }
+ }
+
+ // Rewrite the code to refer to itself.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // Rewrite the original preheader to select between versions of the loop.
+ BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+ "Preheader splitting did not work correctly!");
+
+ // Emit the new branch that selects between the two versions of this loop.
+ EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+ LPM->deleteSimpleAnalysisValue(OldBR, L);
+ OldBR->eraseFromParent();
+
+ LoopProcessWorklist.push_back(NewLoop);
+ redoLoop = true;
+
+ // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody
+ // deletes the instruction (for example by simplifying a PHI that feeds into
+ // the condition that we're unswitching on), we don't rewrite the second
+ // iteration.
+ WeakVH LICHandle(LIC);
+
+ // Now we rewrite the original code to know that the condition is true and the
+ // new code to know that the condition is false.
+ RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // changed to another value or a constant. If its a constant, don't simplify
+ // it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+ LICHandle && !isa<Constant>(LICHandle))
+ RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+}
+
+/// RemoveFromWorklist - Remove all instances of I from the worklist vector
+/// specified.
+static void RemoveFromWorklist(Instruction *I,
+ std::vector<Instruction*> &Worklist) {
+ std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
+ Worklist.end(), I);
+ while (WI != Worklist.end()) {
+ unsigned Offset = WI-Worklist.begin();
+ Worklist.erase(WI);
+ WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
+ }
+}
+
+/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// program, replacing all uses with V and update the worklist.
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
+ std::vector<Instruction*> &Worklist,
+ Loop *L, LPPassManager *LPM) {
+ DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+
+ // Add users to the worklist which may be simplified now.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->replaceAllUsesWith(V);
+ I->eraseFromParent();
+ ++NumSimplify;
+}
+
+/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
+/// information, and remove any dead successors it has.
+///
+void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist,
+ Loop *L) {
+ if (pred_begin(BB) != pred_end(BB)) {
+ // This block isn't dead, since an edge to BB was just removed, see if there
+ // are any easy simplifications we can do now.
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ // If it has one pred, fold phi nodes in BB.
+ while (isa<PHINode>(BB->begin()))
+ ReplaceUsesOfWith(BB->begin(),
+ cast<PHINode>(BB->begin())->getIncomingValue(0),
+ Worklist, L, LPM);
+
+ // If this is the header of a loop and the only pred is the latch, we now
+ // have an unreachable loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ if (loopHeader == BB && L->contains(Pred)) {
+ // Remove the branch from the latch to the header block, this makes
+ // the header dead, which will make the latch dead (because the header
+ // dominates the latch).
+ LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
+ Pred->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), Pred);
+
+ // The loop is now broken, remove it from LI.
+ RemoveLoopFromHierarchy(L);
+
+ // Reprocess the header, which now IS dead.
+ RemoveBlockIfDead(BB, Worklist, L);
+ return;
+ }
+
+ // If pred ends in a uncond branch, add uncond branch to worklist so that
+ // the two blocks will get merged.
+ if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (BI->isUnconditional())
+ Worklist.push_back(BI);
+ }
+ return;
+ }
+
+ DEBUG(dbgs() << "Nuking dead block: " << *BB);
+
+ // Remove the instructions in the basic block from the worklist.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ RemoveFromWorklist(I, Worklist);
+
+ // Anything that uses the instructions in this basic block should have their
+ // uses replaced with undefs.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I->getType()->isVoidTy())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // If this is the edge to the header block for a loop, remove the loop and
+ // promote all subloops.
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+ if (BBLoop->getLoopLatch() == BB) {
+ RemoveLoopFromHierarchy(BBLoop);
+ if (currentLoop == BBLoop) {
+ currentLoop = 0;
+ redoLoop = false;
+ }
+ }
+ }
+
+ // Remove the block from the loop info, which removes it from any loops it
+ // was in.
+ LI->removeBlock(BB);
+
+
+ // Remove phi node entries in successors for this block.
+ TerminatorInst *TI = BB->getTerminator();
+ SmallVector<BasicBlock*, 4> Succs;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ Succs.push_back(TI->getSuccessor(i));
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+
+ // Unique the successors, remove anything with multiple uses.
+ array_pod_sort(Succs.begin(), Succs.end());
+ Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
+
+ // Remove the basic block, including all of the instructions contained in it.
+ LPM->deleteSimpleAnalysisValue(BB, L);
+ BB->eraseFromParent();
+ // Remove successor blocks here that are not dead, so that we know we only
+ // have dead blocks in this list. Nondead blocks have a way of becoming dead,
+ // then getting removed before we revisit them, which is badness.
+ //
+ for (unsigned i = 0; i != Succs.size(); ++i)
+ if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
+ // One exception is loop headers. If this block was the preheader for a
+ // loop, then we DO want to visit the loop so the loop gets deleted.
+ // We know that if the successor is a loop header, that this loop had to
+ // be the preheader: the case where this was the latch block was handled
+ // above and headers can only have two predecessors.
+ if (!LI->isLoopHeader(Succs[i])) {
+ Succs.erase(Succs.begin()+i);
+ --i;
+ }
+ }
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ RemoveBlockIfDead(Succs[i], Worklist, L);
+}
+
+/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
+/// become unwrapped, either because the backedge was deleted, or because the
+/// edge into the header was removed. If the edge into the header from the
+/// latch block was removed, the loop is unwrapped but subloops are still alive,
+/// so they just reparent loops. If the loops are actually dead, they will be
+/// removed later.
+void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
+ LPM->deleteLoopFromQueue(L);
+ RemoveLoopFromWorklist(L);
+}
+
+// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
+// the value specified by Val in the specified loop, or we know it does NOT have
+// that value. Rewrite any uses of LIC or of properties correlated to it.
+void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val,
+ bool IsEqual) {
+ assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+ // FIXME: Support correlated properties, like:
+ // for (...)
+ // if (li1 < li2)
+ // ...
+ // if (li1 > li2)
+ // ...
+
+ // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
+ // selects, switches.
+ std::vector<Instruction*> Worklist;
+ LLVMContext &Context = Val->getContext();
+
+
+ // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
+ // in the loop with the appropriate one directly.
+ if (IsEqual || (isa<ConstantInt>(Val) &&
+ Val->getType()->isIntegerTy(1))) {
+ Value *Replacement;
+ if (IsEqual)
+ Replacement = Val;
+ else
+ Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
+ !cast<ConstantInt>(Val)->getZExtValue());
+
+ for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+ UI != E; ++UI) {
+ Instruction *U = dyn_cast<Instruction>(*UI);
+ if (!U || !L->contains(U))
+ continue;
+ U->replaceUsesOfWith(LIC, Replacement);
+ Worklist.push_back(U);
+ }
+ SimplifyCode(Worklist, L);
+ return;
+ }
+
+ // Otherwise, we don't know the precise value of LIC, but we do know that it
+ // is certainly NOT "Val". As such, simplify any uses in the loop that we
+ // can. This case occurs when we unswitch switch statements.
+ for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+ UI != E; ++UI) {
+ Instruction *U = dyn_cast<Instruction>(*UI);
+ if (!U || !L->contains(U))
+ continue;
+
+ Worklist.push_back(U);
+
+ // TODO: We could do other simplifications, for example, turning
+ // 'icmp eq LIC, Val' -> false.
+
+ // If we know that LIC is not Val, use this info to simplify code.
+ SwitchInst *SI = dyn_cast<SwitchInst>(U);
+ if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+
+ unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ if (DeadCase == 0) continue; // Default case is live for multiple values.
+
+ // Found a dead case value. Don't remove PHI nodes in the
+ // successor if they become single-entry, those PHI nodes may
+ // be in the Users list.
+
+ BasicBlock *Switch = SI->getParent();
+ BasicBlock *SISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
+ // If the DeadCase successor dominates the loop latch, then the
+ // transformation isn't safe since it will delete the sole predecessor edge
+ // to the latch.
+ if (Latch && DT->dominates(SISucc, Latch))
+ continue;
+
+ // FIXME: This is a hack. We need to keep the successor around
+ // and hooked up so as to preserve the loop structure, because
+ // trying to update it is complicated. So instead we preserve the
+ // loop structure and put the block on a dead code path.
+ SplitEdge(Switch, SISucc, this);
+ // Compute the successors instead of relying on the return value
+ // of SplitEdge, since it may have split the switch successor
+ // after PHI nodes.
+ BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+ // Create an "unreachable" destination.
+ BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+ Switch->getParent(),
+ OldSISucc);
+ new UnreachableInst(Context, Abort);
+ // Force the new case destination to branch to the "unreachable"
+ // block while maintaining a (dead) CFG edge to the old block.
+ NewSISucc->getTerminator()->eraseFromParent();
+ BranchInst::Create(Abort, OldSISucc,
+ ConstantInt::getTrue(Context), NewSISucc);
+ // Release the PHI operands for this edge.
+ for (BasicBlock::iterator II = NewSISucc->begin();
+ PHINode *PN = dyn_cast<PHINode>(II); ++II)
+ PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+ UndefValue::get(PN->getType()));
+ // Tell the domtree about the new block. We don't fully update the
+ // domtree here -- instead we force it to do a full recomputation
+ // after the pass is complete -- but we do need to inform it of
+ // new blocks.
+ if (DT)
+ DT->addNewBlock(Abort, NewSISucc);
+ }
+
+ SimplifyCode(Worklist, L);
+}
+
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
+/// loop, walk over it and constant prop, dce, and fold control flow where
+/// possible. Note that this is effectively a very simple loop-structure-aware
+/// optimizer. During processing of this loop, L could very well be deleted, so
+/// it must not be used.
+///
+/// FIXME: When the loop optimizer is more mature, separate this out to a new
+/// pass.
+///
+void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+
+ // Simple DCE.
+ if (isInstructionTriviallyDead(I)) {
+ DEBUG(dbgs() << "Remove dead instruction '" << *I);
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ // See if instruction simplification can hack this up. This is common for
+ // things like "select false, X, Y" after unswitching made the condition be
+ // 'false'.
+ if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (LI->replacementPreservesLCSSAForm(I, V)) {
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ continue;
+ }
+
+ // Special case hacks that appear commonly in unswitched code.
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (BI->isUnconditional()) {
+ // If BI's parent is the only pred of the successor, fold the two blocks
+ // together.
+ BasicBlock *Pred = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
+ BasicBlock *SinglePred = Succ->getSinglePredecessor();
+ if (!SinglePred) continue; // Nothing to do.
+ assert(SinglePred == Pred && "CFG broken");
+
+ DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
+ << Succ->getName() << "\n");
+
+ // Resolve any single entry PHI nodes in Succ.
+ while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
+ ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+
+ // If Succ has any successors with PHI nodes, update them to have
+ // entries coming from Pred instead of Succ.
+ Succ->replaceAllUsesWith(Pred);
+
+ // Move all of the successor contents from Succ to Pred.
+ Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
+ Succ->end());
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+
+ // Remove Succ from the loop tree.
+ LI->removeBlock(Succ);
+ LPM->deleteSimpleAnalysisValue(Succ, L);
+ Succ->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+ // Conditional branch. Turn it into an unconditional branch, then
+ // remove dead blocks.
+ continue; // FIXME: Enable.
+
+ DEBUG(dbgs() << "Folded branch: " << *BI);
+ BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
+ BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
+ DeadSucc->removePredecessor(BI->getParent(), true);
+ Worklist.push_back(BranchInst::Create(LiveSucc, BI));
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+ ++NumSimplify;
+
+ RemoveBlockIfDead(DeadSucc, Worklist, L);
+ }
+ continue;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 000000000000..9087b46c138b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,139 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
+ IRBuilder<> Builder(II->getParent(), II);
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::memory_barrier:
+ break;
+
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin: {
+ Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
+ switch (IID) {
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta, Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta, Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig, Delta);
+ break;
+ }
+ Builder.CreateStore(Res, Ptr);
+
+ II->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_swap: {
+ Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Builder.CreateStore(Val, Ptr);
+ II->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_cmp_swap: {
+ Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
+ Value *Val = II->getArgOperand(2);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+ II->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ assert(II->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ II->eraseFromParent();
+
+ return true;
+}
+
+namespace {
+ struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {
+ initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
+ Changed |= LowerAtomicIntrinsic(II);
+ return Changed;
+ }
+ };
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form",
+ false, false)
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
new file mode 100644
index 000000000000..7ed3db6cc1db
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,986 @@
+//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs various transformations related to eliminating memcpy
+// calls, or transforming sets of stores into memset's.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memcpyopt"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
+STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
+
+static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
+ bool &VariableIdxFound, const TargetData &TD){
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (OpC == 0)
+ return VariableIdxFound = true;
+ if (OpC->isZero()) continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or vector. Multiply
+ // the index by the ElementSize.
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
+/// constant offset, and return that constant offset. For example, Ptr1 might
+/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
+ const TargetData &TD) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+ GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+ GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+
+ bool VariableIdxFound = false;
+
+ // If one pointer is a GEP and the other isn't, then see if the GEP is a
+ // constant offset from the base, as in "P" and "gep P, 1".
+ if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ return false;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
+ int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+ if (VariableIdxFound) return false;
+
+ Offset = Offset2-Offset1;
+ return true;
+}
+
+
+/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// This allows us to analyze stores like:
+/// store 0 -> P+1
+/// store 0 -> P+0
+/// store 0 -> P+3
+/// store 0 -> P+2
+/// which sometimes happens with stores to arrays of structs etc. When we see
+/// the first store, we make a range [1, 2). The second store extends the range
+/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
+/// two ranges into [0, 3) which is memset'able.
+namespace {
+struct MemsetRange {
+ // Start/End - A semi range that describes the span that this range covers.
+ // The range is closed at the start and open at the end: [Start, End).
+ int64_t Start, End;
+
+ /// StartPtr - The getelementptr instruction that points to the start of the
+ /// range.
+ Value *StartPtr;
+
+ /// Alignment - The known alignment of the first store.
+ unsigned Alignment;
+
+ /// TheStores - The actual stores that make up this range.
+ SmallVector<Instruction*, 16> TheStores;
+
+ bool isProfitableToUseMemset(const TargetData &TD) const;
+
+};
+} // end anon namespace
+
+bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
+ // If we found more than 8 stores to merge or 64 bytes, use memset.
+ if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+ // If there is nothing to merge, don't do anything.
+ if (TheStores.size() < 2) return false;
+
+ // If any of the stores are a memset, then it is always good to extend the
+ // memset.
+ for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+ if (!isa<StoreInst>(TheStores[i]))
+ return true;
+
+ // Assume that the code generator is capable of merging pairs of stores
+ // together if it wants to.
+ if (TheStores.size() == 2) return false;
+
+ // If we have fewer than 8 stores, it can still be worthwhile to do this.
+ // For example, merging 4 i8 stores into an i32 store is useful almost always.
+ // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
+ // memset will be split into 2 32-bit stores anyway) and doing so can
+ // pessimize the llvm optimizer.
+ //
+ // Since we don't have perfect knowledge here, make some assumptions: assume
+ // the maximum GPR width is the same size as the pointer size and assume that
+ // this width can be stored. If so, check to see whether we will end up
+ // actually reducing the number of stores used.
+ unsigned Bytes = unsigned(End-Start);
+ unsigned NumPointerStores = Bytes/TD.getPointerSize();
+
+ // Assume the remaining bytes if any are done a byte at a time.
+ unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+
+ // If we will reduce the # stores (according to this heuristic), do the
+ // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
+ // etc.
+ return TheStores.size() > NumPointerStores+NumByteStores;
+}
+
+
+namespace {
+class MemsetRanges {
+ /// Ranges - A sorted list of the memset ranges. We use std::list here
+ /// because each element is relatively large and expensive to copy.
+ std::list<MemsetRange> Ranges;
+ typedef std::list<MemsetRange>::iterator range_iterator;
+ const TargetData &TD;
+public:
+ MemsetRanges(const TargetData &td) : TD(td) {}
+
+ typedef std::list<MemsetRange>::const_iterator const_iterator;
+ const_iterator begin() const { return Ranges.begin(); }
+ const_iterator end() const { return Ranges.end(); }
+ bool empty() const { return Ranges.empty(); }
+
+ void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ addStore(OffsetFromFirst, SI);
+ else
+ addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+ }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+ int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ addRange(OffsetFromFirst, StoreSize,
+ SI->getPointerOperand(), SI->getAlignment(), SI);
+ }
+
+ void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+ int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+ }
+
+ void addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst);
+
+};
+
+} // end anon namespace
+
+
+/// addRange - Add a new store to the MemsetRanges data structure. This adds a
+/// new range for the specified store at the specified offset, merging into
+/// existing ranges as appropriate.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list. We keep the ranges sorted for
+/// simplicity here. This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst) {
+ int64_t End = Start+Size;
+ range_iterator I = Ranges.begin(), E = Ranges.end();
+
+ while (I != E && Start > I->End)
+ ++I;
+
+ // We now know that I == E, in which case we didn't find anything to merge
+ // with, or that Start <= I->End. If End < I->Start or I == E, then we need
+ // to insert a new range. Handle this now.
+ if (I == E || End < I->Start) {
+ MemsetRange &R = *Ranges.insert(I, MemsetRange());
+ R.Start = Start;
+ R.End = End;
+ R.StartPtr = Ptr;
+ R.Alignment = Alignment;
+ R.TheStores.push_back(Inst);
+ return;
+ }
+
+ // This store overlaps with I, add it.
+ I->TheStores.push_back(Inst);
+
+ // At this point, we may have an interval that completely contains our store.
+ // If so, just add it to the interval and return.
+ if (I->Start <= Start && I->End >= End)
+ return;
+
+ // Now we know that Start <= I->End and End >= I->Start so the range overlaps
+ // but is not entirely contained within the range.
+
+ // See if the range extends the start of the range. In this case, it couldn't
+ // possibly cause it to join the prior range, because otherwise we would have
+ // stopped on *it*.
+ if (Start < I->Start) {
+ I->Start = Start;
+ I->StartPtr = Ptr;
+ I->Alignment = Alignment;
+ }
+
+ // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
+ // is in or right at the end of I), and that End >= I->Start. Extend I out to
+ // End.
+ if (End > I->End) {
+ I->End = End;
+ range_iterator NextI = I;
+ while (++NextI != E && End >= NextI->Start) {
+ // Merge the range in.
+ I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
+ if (NextI->End > I->End)
+ I->End = NextI->End;
+ Ranges.erase(NextI);
+ NextI = I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MemCpyOpt Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class MemCpyOpt : public FunctionPass {
+ MemoryDependenceAnalysis *MD;
+ TargetLibraryInfo *TLI;
+ const TargetData *TD;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MemCpyOpt() : FunctionPass(ID) {
+ initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ MD = 0;
+ TLI = 0;
+ TD = 0;
+ }
+
+ bool runOnFunction(Function &F);
+
+ private:
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+
+ // Helper fuctions
+ bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+ bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
+ bool processMemCpy(MemCpyInst *M);
+ bool processMemMove(MemMoveInst *M);
+ bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C);
+ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize);
+ bool processByValArgument(CallSite CS, unsigned ArgNo);
+ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+ Value *ByteVal);
+
+ bool iterateOnFunction(Function &F);
+ };
+
+ char MemCpyOpt::ID = 0;
+}
+
+// createMemCpyOptPass - The public interface to this file...
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
+/// some other patterns to fold away. In particular, this looks for stores to
+/// neighboring locations of memory. If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+ Value *StartPtr, Value *ByteVal) {
+ if (TD == 0) return 0;
+
+ // Okay, so we now have a single store that can be splatable. Scan to find
+ // all subsequent stores of the same value to offset from the same pointer.
+ // Join these together into ranges, so we can decide whether contiguous blocks
+ // are stored.
+ MemsetRanges Ranges(*TD);
+
+ BasicBlock::iterator BI = StartInst;
+ for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
+ // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
+
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ // If this is a store, see if we can merge it in.
+ if (NextStore->isVolatile()) break;
+
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+ Offset, *TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+ !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ break;
+
+ Ranges.addMemSet(Offset, MSI);
+ }
+ }
+
+ // If we have no ranges, then we just had a single store with nothing that
+ // could be merged in. This is a very common case of course.
+ if (Ranges.empty())
+ return 0;
+
+ // If we had at least one store that could be merged in, add the starting
+ // store as well. We try to avoid this unless there is at least something
+ // interesting as a small compile-time optimization.
+ Ranges.addInst(0, StartInst);
+
+ // If we create any memsets, we put it right before the first instruction that
+ // isn't part of the memset block. This ensure that the memset is dominated
+ // by any addressing instruction needed by the start of the block.
+ IRBuilder<> Builder(BI);
+
+ // Now that we have full information about ranges, loop over the ranges and
+ // emit memset's for anything big enough to be worthwhile.
+ Instruction *AMemSet = 0;
+ for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
+ I != E; ++I) {
+ const MemsetRange &Range = *I;
+
+ if (Range.TheStores.size() == 1) continue;
+
+ // If it is profitable to lower this range to memset, do so now.
+ if (!Range.isProfitableToUseMemset(*TD))
+ continue;
+
+ // Otherwise, we do want to transform this! Create a new memset.
+ // Get the starting pointer of the block.
+ StartPtr = Range.StartPtr;
+
+ // Determine alignment
+ unsigned Alignment = Range.Alignment;
+ if (Alignment == 0) {
+ const Type *EltType =
+ cast<PointerType>(StartPtr->getType())->getElementType();
+ Alignment = TD->getABITypeAlignment(EltType);
+ }
+
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+
+ DEBUG(dbgs() << "Replace stores:\n";
+ for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
+ dbgs() << *Range.TheStores[i] << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+
+ // Zap all the stores.
+ for (SmallVector<Instruction*, 16>::const_iterator
+ SI = Range.TheStores.begin(),
+ SE = Range.TheStores.end(); SI != SE; ++SI) {
+ MD->removeInstruction(*SI);
+ (*SI)->eraseFromParent();
+ }
+ ++NumMemSetInfer;
+ }
+
+ return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+ if (SI->isVolatile()) return false;
+
+ if (TD == 0) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (!LI->isVolatile() && LI->hasOneUse() &&
+ LI->getParent() == SI->getParent()) {
+ MemDepResult ldep = MD->getDependency(LI);
+ CallInst *C = 0;
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+
+ if (C) {
+ // Check that nothing touches the dest of the "copy" between
+ // the call and the store.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+ for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+ E = C; I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+ C = 0;
+ break;
+ }
+ }
+ }
+
+ if (C) {
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ if (changed) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+ ByteVal)) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+
+ return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+ // See if there is another memset or store neighboring this memset which
+ // allows us to widen out the memset to do a single larger store.
+ if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+ if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+ MSI->getValue())) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+ return false;
+}
+
+
+/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a call slot optimization by having
+/// the call write its result directly into the destination of the memcpy.
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+ Value *cpyDest, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C) {
+ // The general transformation to keep in mind is
+ //
+ // call @func(..., src, ...)
+ // memcpy(dest, src, ...)
+ //
+ // ->
+ //
+ // memcpy(dest, src, ...)
+ // call @func(..., dest, ...)
+ //
+ // Since moving the memcpy is technically awkward, we additionally check that
+ // src only holds uninitialized values at the moment of the call, meaning that
+ // the memcpy can be discarded rather than moved.
+
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
+ CallSite CS(C);
+
+ // Require that src be an alloca. This simplifies the reasoning considerably.
+ AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+ if (!srcAlloca)
+ return false;
+
+ // Check that all of src is copied to dest.
+ if (TD == 0) return false;
+
+ ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+ if (!srcArraySize)
+ return false;
+
+ uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ srcArraySize->getZExtValue();
+
+ if (cpyLen < srcSize)
+ return false;
+
+ // Check that accessing the first srcSize bytes of dest will not cause a
+ // trap. Otherwise the transform is invalid since it might cause a trap
+ // to occur earlier than it otherwise would.
+ if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
+ // The destination is an alloca. Check it is larger than srcSize.
+ ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+ if (!destArraySize)
+ return false;
+
+ uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+ destArraySize->getZExtValue();
+
+ if (destSize < srcSize)
+ return false;
+ } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
+ // If the destination is an sret parameter then only accesses that are
+ // outside of the returned struct type can trap.
+ if (!A->hasStructRetAttr())
+ return false;
+
+ const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
+
+ if (destSize < srcSize)
+ return false;
+ } else {
+ return false;
+ }
+
+ // Check that src is not accessed except via the call and the memcpy. This
+ // guarantees that it holds only undefined values when passed in (so the final
+ // memcpy can be dropped), that it is not read or written between the call and
+ // the memcpy, and that writing beyond the end of it is undefined.
+ SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
+ srcAlloca->use_end());
+ while (!srcUseList.empty()) {
+ User *UI = srcUseList.pop_back_val();
+
+ if (isa<BitCastInst>(UI)) {
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (G->hasAllZeroIndices())
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ else
+ return false;
+ } else if (UI != C && UI != cpy) {
+ return false;
+ }
+ }
+
+ // Since we're changing the parameter to the callsite, we need to make sure
+ // that what would be the new parameter dominates the callsite.
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
+ if (!DT.dominates(cpyDestInst, C))
+ return false;
+
+ // In addition to knowing that the call does not access src in some
+ // unexpected manner, for example via a global, which we deduce from
+ // the use analysis, we also need to know that it does not sneakily
+ // access dest. We rely on AA to figure this out for us.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
+ return false;
+
+ // All the checks have passed, so do the transformation.
+ bool changedArgument = false;
+ for (unsigned i = 0; i < CS.arg_size(); ++i)
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
+ if (cpySrc->getType() != cpyDest->getType())
+ cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+ cpyDest->getName(), C);
+ changedArgument = true;
+ if (CS.getArgument(i)->getType() == cpyDest->getType())
+ CS.setArgument(i, cpyDest);
+ else
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.getArgument(i)->getType(), cpyDest->getName(), C));
+ }
+
+ if (!changedArgument)
+ return false;
+
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
+ MD->removeInstruction(C);
+
+ // Remove the memcpy.
+ MD->removeInstruction(cpy);
+ ++NumMemCpyInstr;
+
+ return true;
+}
+
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
+/// copy from MDep's input if we can. MSize is the size of M's copy.
+///
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize) {
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other.
+ if (M->getSource() != MDep->getDest() || MDep->isVolatile())
+ return false;
+
+ // If dep instruction is reading from our current input, then it is a noop
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
+ // memcpy(a <- a)
+ // memcpy(b <- a)
+ if (M->getSource() == MDep->getSource())
+ return false;
+
+ // Second, the length of the memcpy's must be the same, or the preceding one
+ // must be larger than the following one.
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+ return false;
+
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Verify that the copied-from memory doesn't change in between the two
+ // transfers. For example, in:
+ // memcpy(a <- b)
+ // *b = 42;
+ // memcpy(c <- a)
+ // It would be invalid to transform the second memcpy into memcpy(c <- b).
+ //
+ // TODO: If the code between M and MDep is transparent to the destination "c",
+ // then we could still perform the xform by moving M up to the first memcpy.
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+ false, M, M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ // If the dest of the second might alias the source of the first, then the
+ // source and dest might overlap. We still want to eliminate the intermediate
+ // value, but we have to generate a memmove instead of memcpy.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+ UseMemMove = true;
+
+ // If all checks passed, then we can transform M.
+
+ // Make sure to use the lesser of the alignment of the source and the dest
+ // since we're changing where we're reading from, but don't want to increase
+ // the alignment past what can be read from or written to.
+ // TODO: Is this worth it if we're creating a less aligned memcpy? For
+ // example we could be moving from movaps -> movq on x86.
+ unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+
+ IRBuilder<> Builder(M);
+ if (UseMemMove)
+ Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+ else
+ Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+
+ // Remove the instruction we're replacing.
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+ // We can only optimize statically-sized memcpy's that are non-volatile.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0 || M->isVolatile()) return false;
+
+ // If the source and destination of the memcpy are the same, then zap it.
+ if (M->getSource() == M->getDest()) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return false;
+ }
+
+ // If copying from a constant, try to turn the memcpy into a memset.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ IRBuilder<> Builder(M);
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ M->getAlignment(), false);
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumCpyToSet;
+ return true;
+ }
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ MemDepResult DepInfo = MD->getDependency(M);
+ if (!DepInfo.isClobber())
+ return false;
+
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ if (!TLI->has(LibFunc::memmove))
+ return false;
+
+ // See if the pointers alias.
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
+ return false;
+
+ DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+
+ // If not, then we know we can transform this.
+ Module *Mod = M->getParent()->getParent()->getParent();
+ Type *ArgTys[3] = { M->getRawDest()->getType(),
+ M->getRawSource()->getType(),
+ M->getLength()->getType() };
+ M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+ ArgTys));
+
+ // MemDep may have over conservative information about this instruction, just
+ // conservatively flush it from the cache.
+ MD->removeInstruction(M);
+
+ ++NumMoveToCpy;
+ return true;
+}
+
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+ if (TD == 0) return false;
+
+ // Find out what feeds this byval argument.
+ Value *ByValArg = CS.getArgument(ArgNo);
+ const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ MemDepResult DepInfo =
+ MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+ true, CS.getInstruction(),
+ CS.getInstruction()->getParent());
+ if (!DepInfo.isClobber())
+ return false;
+
+ // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, see if we can byval from the source of the memcpy instead of the
+ // result.
+ MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ if (MDep == 0 || MDep->isVolatile() ||
+ ByValArg->stripPointerCasts() != MDep->getDest())
+ return false;
+
+ // The length of the memcpy must be larger or equal to the size of the byval.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ return false;
+
+ // Get the alignment of the byval. If the call doesn't specify the alignment,
+ // then it is some target specific value that we can't know.
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ if (ByValAlign == 0) return false;
+
+ // If it is greater than the memcpy, then we check to see if we can force the
+ // source of the memcpy to the alignment we need. If we fail, we bail out.
+ if (MDep->getAlignment() < ByValAlign &&
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+ return false;
+
+ // Verify that the copied-from memory doesn't change in between the memcpy and
+ // the byval call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+ false, CS.getInstruction(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ Value *TmpCast = MDep->getSource();
+ if (MDep->getSource()->getType() != ByValArg->getType())
+ TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+ "tmpcast", CS.getInstruction());
+
+ DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
+
+ // Otherwise we're good! Update the byval argument.
+ CS.setArgument(ArgNo, TmpCast);
+ ++NumMemCpyInstr;
+ return true;
+}
+
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+bool MemCpyOpt::iterateOnFunction(Function &F) {
+ bool MadeChange = false;
+
+ // Walk all instruction in the function.
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ // Avoid invalidating the iterator.
+ Instruction *I = BI++;
+
+ bool RepeatInstruction = false;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ MadeChange |= processStore(SI, BI);
+ else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+ RepeatInstruction = processMemSet(M, BI);
+ else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+ RepeatInstruction = processMemCpy(M);
+ else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+ RepeatInstruction = processMemMove(M);
+ else if (CallSite CS = (Value*)I) {
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ MadeChange |= processByValArgument(CS, i);
+ }
+
+ // Reprocess the instruction if desired.
+ if (RepeatInstruction) {
+ if (BI != BB->begin()) --BI;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+ bool MadeChange = false;
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // If we don't have at least memset and memcpy, there is little point of doing
+ // anything here. These are required by a freestanding implementation, so if
+ // even they are disabled, there is no point in trying hard.
+ if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ return false;
+
+ while (1) {
+ if (!iterateOnFunction(F))
+ break;
+ MadeChange = true;
+ }
+
+ MD = 0;
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
new file mode 100644
index 000000000000..ee132d3be4f5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
@@ -0,0 +1,3595 @@
+//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines ObjC ARC optimizations. ARC stands for
+// Automatic Reference Counting and is a system for managing reference counts
+// for objects in Objective C.
+//
+// The optimizations performed include elimination of redundant, partially
+// redundant, and inconsequential reference count operations, elimination of
+// redundant weak pointer operations, pattern-matching and replacement of
+// low-level operations into higher-level operations, and numerous minor
+// simplifications.
+//
+// This file also defines a simple ARC-aware AliasAnalysis.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowedge of their semantics.
+//
+// WARNING: This file knows about how certain Objective-C library functions are
+// used. Naive LLVM IR transformations which would otherwise be
+// behavior-preserving may break these assumptions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// A handy option to enable/disable all optimizations in this file.
+static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// Misc. Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// MapVector - An associative container with fast insertion-order
+ /// (deterministic) iteration over its elements. Plus the special
+ /// blot operation.
+ template<class KeyT, class ValueT>
+ class MapVector {
+ /// Map - Map keys to indices in Vector.
+ typedef DenseMap<KeyT, size_t> MapTy;
+ MapTy Map;
+
+ /// Vector - Keys and values.
+ typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+ VectorTy Vector;
+
+ public:
+ typedef typename VectorTy::iterator iterator;
+ typedef typename VectorTy::const_iterator const_iterator;
+ iterator begin() { return Vector.begin(); }
+ iterator end() { return Vector.end(); }
+ const_iterator begin() const { return Vector.begin(); }
+ const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+ ~MapVector() {
+ assert(Vector.size() >= Map.size()); // May differ due to blotting.
+ for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+ I != E; ++I) {
+ assert(I->second < Vector.size());
+ assert(Vector[I->second].first == I->first);
+ }
+ for (typename VectorTy::const_iterator I = Vector.begin(),
+ E = Vector.end(); I != E; ++I)
+ assert(!I->first ||
+ (Map.count(I->first) &&
+ Map[I->first] == size_t(I - Vector.begin())));
+ }
+#endif
+
+ ValueT &operator[](KeyT Arg) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Arg, size_t(0)));
+ if (Pair.second) {
+ Pair.first->second = Vector.size();
+ Vector.push_back(std::make_pair(Arg, ValueT()));
+ return Vector.back().second;
+ }
+ return Vector[Pair.first->second].second;
+ }
+
+ std::pair<iterator, bool>
+ insert(const std::pair<KeyT, ValueT> &InsertPair) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+ if (Pair.second) {
+ Pair.first->second = Vector.size();
+ Vector.push_back(InsertPair);
+ return std::make_pair(llvm::prior(Vector.end()), true);
+ }
+ return std::make_pair(Vector.begin() + Pair.first->second, false);
+ }
+
+ const_iterator find(KeyT Key) const {
+ typename MapTy::const_iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ /// blot - This is similar to erase, but instead of removing the element
+ /// from the vector, it just zeros out the key in the vector. This leaves
+ /// iterators intact, but clients must be prepared for zeroed-out keys when
+ /// iterating.
+ void blot(KeyT Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return;
+ Vector[It->second].first = KeyT();
+ Map.erase(It);
+ }
+
+ void clear() {
+ Map.clear();
+ Vector.clear();
+ }
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// ARC Utilities.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// InstructionClass - A simple classification for instructions.
+ enum InstructionClass {
+ IC_Retain, ///< objc_retain
+ IC_RetainRV, ///< objc_retainAutoreleasedReturnValue
+ IC_RetainBlock, ///< objc_retainBlock
+ IC_Release, ///< objc_release
+ IC_Autorelease, ///< objc_autorelease
+ IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue
+ IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+ IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop
+ IC_NoopCast, ///< objc_retainedObject, etc.
+ IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+ IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+ IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
+ IC_StoreWeak, ///< objc_storeWeak (primitive)
+ IC_InitWeak, ///< objc_initWeak (derived)
+ IC_LoadWeak, ///< objc_loadWeak (derived)
+ IC_MoveWeak, ///< objc_moveWeak (derived)
+ IC_CopyWeak, ///< objc_copyWeak (derived)
+ IC_DestroyWeak, ///< objc_destroyWeak (derived)
+ IC_CallOrUser, ///< could call objc_release and/or "use" pointers
+ IC_Call, ///< could call objc_release
+ IC_User, ///< could "use" a pointer
+ IC_None ///< anything else
+ };
+}
+
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer.
+static bool IsPotentialUse(const Value *Op) {
+ // Pointers to static or stack storage are not reference-counted pointers.
+ if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+ return false;
+ // Special arguments are not reference-counted.
+ if (const Argument *Arg = dyn_cast<Argument>(Op))
+ if (Arg->hasByValAttr() ||
+ Arg->hasNestAttr() ||
+ Arg->hasStructRetAttr())
+ return false;
+ // Only consider values with pointer types, and not function pointers.
+ const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ if (!Ty || isa<FunctionType>(Ty->getElementType()))
+ return false;
+ // Conservatively assume anything else is a potential use.
+ return true;
+}
+
+/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
+/// of construct CS is.
+static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ if (IsPotentialUse(*I))
+ return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+ return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// GetFunctionClass - Determine if F is one of the special known Functions.
+/// If it isn't, return IC_CallOrUser.
+static InstructionClass GetFunctionClass(const Function *F) {
+ Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+ // No arguments.
+ if (AI == AE)
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
+ .Default(IC_CallOrUser);
+
+ // One argument.
+ const Argument *A0 = AI++;
+ if (AI == AE)
+ // Argument is a pointer.
+ if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ const Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_retain", IC_Retain)
+ .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+ .Case("objc_retainBlock", IC_RetainBlock)
+ .Case("objc_release", IC_Release)
+ .Case("objc_autorelease", IC_Autorelease)
+ .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop)
+ .Case("objc_retainedObject", IC_NoopCast)
+ .Case("objc_unretainedObject", IC_NoopCast)
+ .Case("objc_unretainedPointer", IC_NoopCast)
+ .Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+ .Default(IC_CallOrUser);
+
+ // Argument is i8**
+ if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_loadWeakRetained", IC_LoadWeakRetained)
+ .Case("objc_loadWeak", IC_LoadWeak)
+ .Case("objc_destroyWeak", IC_DestroyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Two arguments, first is i8**.
+ const Argument *A1 = AI++;
+ if (AI == AE)
+ if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (Pte->getElementType()->isIntegerTy(8))
+ if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ const Type *ETy1 = PTy1->getElementType();
+ // Second argument is i8*
+ if (ETy1->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_storeWeak", IC_StoreWeak)
+ .Case("objc_initWeak", IC_InitWeak)
+ .Default(IC_CallOrUser);
+ // Second argument is i8**.
+ if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (Pte1->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_moveWeak", IC_MoveWeak)
+ .Case("objc_copyWeak", IC_CopyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Anything else.
+ return IC_CallOrUser;
+}
+
+/// GetInstructionClass - Determine what kind of construct V is.
+static InstructionClass GetInstructionClass(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Any instruction other than bitcast and gep with a pointer operand have a
+ // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+ // to a subsequent use, rather than using it themselves, in this sense.
+ // As a short cut, several other opcodes are known to have no pointer
+ // operands of interest. And ret is never followed by a release, so it's
+ // not interesting to examine.
+ switch (I->getOpcode()) {
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ // Check for calls to special functions.
+ if (const Function *F = CI->getCalledFunction()) {
+ InstructionClass Class = GetFunctionClass(F);
+ if (Class != IC_CallOrUser)
+ return Class;
+
+ // None of the intrinsic functions do objc_release. For intrinsics, the
+ // only question is whether or not they may be users.
+ switch (F->getIntrinsicID()) {
+ case 0: break;
+ case Intrinsic::bswap: case Intrinsic::ctpop:
+ case Intrinsic::ctlz: case Intrinsic::cttz:
+ case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+ case Intrinsic::stacksave: case Intrinsic::stackrestore:
+ case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return IC_None;
+ default:
+ for (Function::const_arg_iterator AI = F->arg_begin(),
+ AE = F->arg_end(); AI != AE; ++AI)
+ if (IsPotentialUse(AI))
+ return IC_User;
+ return IC_None;
+ }
+ }
+ return GetCallSiteClass(CI);
+ }
+ case Instruction::Invoke:
+ return GetCallSiteClass(cast<InvokeInst>(I));
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select: case Instruction::PHI:
+ case Instruction::Ret: case Instruction::Br:
+ case Instruction::Switch: case Instruction::IndirectBr:
+ case Instruction::Alloca: case Instruction::VAArg:
+ case Instruction::Add: case Instruction::FAdd:
+ case Instruction::Sub: case Instruction::FSub:
+ case Instruction::Mul: case Instruction::FMul:
+ case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+ case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+ case Instruction::And: case Instruction::Or: case Instruction::Xor:
+ case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+ case Instruction::IntToPtr: case Instruction::FCmp:
+ case Instruction::FPTrunc: case Instruction::FPExt:
+ case Instruction::FPToUI: case Instruction::FPToSI:
+ case Instruction::UIToFP: case Instruction::SIToFP:
+ case Instruction::InsertElement: case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ break;
+ case Instruction::ICmp:
+ // Comparing a pointer with null, or any other constant, isn't an
+ // interesting use, because we don't care what the pointer points to, or
+ // about the values of any other dynamic reference-counted pointers.
+ if (IsPotentialUse(I->getOperand(1)))
+ return IC_User;
+ break;
+ default:
+ // For anything else, check all the operands.
+ for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (IsPotentialUse(*OI))
+ return IC_User;
+ }
+ }
+
+ // Otherwise, it's totally inert for ARC purposes.
+ return IC_None;
+}
+
+/// GetBasicInstructionClass - Determine what kind of construct V is. This is
+/// similar to GetInstructionClass except that it only detects objc runtine
+/// calls. This allows it to be faster.
+static InstructionClass GetBasicInstructionClass(const Value *V) {
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (const Function *F = CI->getCalledFunction())
+ return GetFunctionClass(F);
+ // Otherwise, be conservative.
+ return IC_CallOrUser;
+ }
+
+ // Otherwise, be conservative.
+ return IC_User;
+}
+
+/// IsRetain - Test if the the given class is objc_retain or
+/// equivalent.
+static bool IsRetain(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV;
+}
+
+/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// equivalent.
+static bool IsAutorelease(InstructionClass Class) {
+ return Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// IsForwarding - Test if the given class represents instructions which return
+/// their argument verbatim.
+static bool IsForwarding(InstructionClass Class) {
+ // objc_retainBlock technically doesn't always return its argument
+ // verbatim, but it doesn't matter for our purposes here.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock ||
+ Class == IC_NoopCast;
+}
+
+/// IsNoopOnNull - Test if the given class represents instructions which do
+/// nothing if passed a null pointer.
+static bool IsNoopOnNull(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock;
+}
+
+/// IsAlwaysTail - Test if the given class represents instructions which are
+/// always safe to mark with the "tail" keyword.
+static bool IsAlwaysTail(InstructionClass Class) {
+ // IC_RetainBlock may be given a stack argument.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// IsNoThrow - Test if the given class represents instructions which are always
+/// safe to mark with the nounwind attribute..
+static bool IsNoThrow(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_RetainBlock ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_AutoreleasepoolPush ||
+ Class == IC_AutoreleasepoolPop;
+}
+
+/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// argument verbatim, so if it's such a call and the return value has users,
+/// replace them with the argument value.
+static void EraseInstruction(Instruction *CI) {
+ Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+ bool Unused = CI->use_empty();
+
+ if (!Unused) {
+ // Replace the return value with the argument.
+ assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+ "Can't delete non-forwarding instruction with users!");
+ CI->replaceAllUsesWith(OldArg);
+ }
+
+ CI->eraseFromParent();
+
+ if (Unused)
+ RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
+/// also knows how to look through objc_retain and objc_autorelease calls, which
+/// we know to return their argument verbatim.
+static const Value *GetUnderlyingObjCPtr(const Value *V) {
+ for (;;) {
+ V = GetUnderlyingObject(V);
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+
+ return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static Value *StripPointerCastsAndObjCCalls(Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// GetObjCArg - Assuming the given instruction is one of the special calls such
+/// as objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static Value *GetObjCArg(Value *Inst) {
+ return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
+/// isObjCIdentifiedObject, except that it uses special knowledge of
+/// ObjC conventions...
+static bool IsObjCIdentifiedObject(const Value *V) {
+ // Assume that call results and arguments have their own "provenance".
+ // Constants (including GlobalVariables) and Allocas are never
+ // reference-counted.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+ isa<Argument>(V) || isa<Constant>(V) ||
+ isa<AllocaInst>(V))
+ return true;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ const Value *Pointer =
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ StringRef Name = GV->getName();
+ // These special variables are known to hold values which are not
+ // reference-counted pointers.
+ if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+ Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+ Name.startswith("\01l_objc_msgSend_fixup_"))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// FindSingleUseIdentifiedObject - This is similar to
+/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
+/// with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+ if (Arg->hasOneUse()) {
+ if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+ return FindSingleUseIdentifiedObject(BC->getOperand(0));
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+ if (GEP->hasAllZeroIndices())
+ return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+ if (IsForwarding(GetBasicInstructionClass(Arg)))
+ return FindSingleUseIdentifiedObject(
+ cast<CallInst>(Arg)->getArgOperand(0));
+ if (!IsObjCIdentifiedObject(Arg))
+ return 0;
+ return Arg;
+ }
+
+ // If we found an identifiable object but it has multiple uses, but they
+ // are trivial uses, we can still consider this to be a single-use
+ // value.
+ if (IsObjCIdentifiedObject(Arg)) {
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+ return 0;
+ }
+
+ return Arg;
+ }
+
+ return 0;
+}
+
+/// ModuleHasARC - Test if the given module looks interesting to run ARC
+/// optimization on.
+static bool ModuleHasARC(const Module &M) {
+ return
+ M.getNamedValue("objc_retain") ||
+ M.getNamedValue("objc_release") ||
+ M.getNamedValue("objc_autorelease") ||
+ M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+ M.getNamedValue("objc_retainBlock") ||
+ M.getNamedValue("objc_autoreleaseReturnValue") ||
+ M.getNamedValue("objc_autoreleasePoolPush") ||
+ M.getNamedValue("objc_loadWeakRetained") ||
+ M.getNamedValue("objc_loadWeak") ||
+ M.getNamedValue("objc_destroyWeak") ||
+ M.getNamedValue("objc_storeWeak") ||
+ M.getNamedValue("objc_initWeak") ||
+ M.getNamedValue("objc_moveWeak") ||
+ M.getNamedValue("objc_copyWeak") ||
+ M.getNamedValue("objc_retainedObject") ||
+ M.getNamedValue("objc_unretainedObject") ||
+ M.getNamedValue("objc_unretainedPointer");
+}
+
+//===----------------------------------------------------------------------===//
+// ARC AliasAnalysis.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+
+namespace {
+ /// ObjCARCAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses knowledge of ARC constructs to answer queries.
+ ///
+ /// TODO: This class could be generalized to know about other ObjC-specific
+ /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+ /// even though their offsets are dynamic.
+ class ObjCARCAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+ initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+ return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+ // precise alias query.
+ const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+ const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+ AliasResult Result =
+ AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+ Location(SB, LocB.Size, LocB.TBAATag));
+ if (Result != MayAlias)
+ return Result;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *UA = GetUnderlyingObjCPtr(SA);
+ const Value *UB = GetUnderlyingObjCPtr(SB);
+ if (UA != SA || UB != SB) {
+ Result = AliasAnalysis::alias(Location(UA), Location(UB));
+ // We can't use MustAlias or PartialAlias results here because
+ // GetUnderlyingObjCPtr may return an offsetted pointer value.
+ if (Result == NoAlias)
+ return NoAlias;
+ }
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making
+ // a precise alias query.
+ const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+ if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+ OrLocal))
+ return true;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *U = GetUnderlyingObjCPtr(S);
+ if (U != S)
+ return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ // We have nothing to do. Just chain to the next AliasAnalysis.
+ return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefBehavior(F);
+
+ switch (GetFunctionClass(F)) {
+ case IC_NoopCast:
+ return DoesNotAccessMemory;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ switch (GetBasicInstructionClass(CS.getInstruction())) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_RetainBlock:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_NoopCast:
+ case IC_AutoreleasepoolPush:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ // These functions don't access any memory visible to the compiler.
+ return NoModRef;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // TODO: Theoretically we could check for dependencies between objc_* calls
+ // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
+//===----------------------------------------------------------------------===//
+// ARC expansion.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Scalar.h"
+
+namespace {
+ /// ObjCARCExpand - Early ARC transformations.
+ class ObjCARCExpand : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ /// Run - A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ public:
+ static char ID;
+ ObjCARCExpand() : FunctionPass(ID) {
+ initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+ "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+ return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+ Run = ModuleHasARC(M);
+ return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ bool Changed = false;
+
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ // These calls return their argument verbatim, as a low-level
+ // optimization. However, this makes high-level optimizations
+ // harder. Undo any uses of this optimization that the front-end
+ // emitted here. We'll redo them in a later pass.
+ Changed = true;
+ Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// ARC optimization.
+//===----------------------------------------------------------------------===//
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+#include "llvm/GlobalAlias.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets, "Number of return value forwarding "
+ "retain+autoreleaes eliminated");
+STATISTIC(NumRRs, "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+
+namespace {
+ /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
+ /// uses many of the same techniques, except it uses special ObjC-specific
+ /// reasoning about pointer relationships.
+ class ProvenanceAnalysis {
+ AliasAnalysis *AA;
+
+ typedef std::pair<const Value *, const Value *> ValuePairTy;
+ typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+ CachedResultsTy CachedResults;
+
+ bool relatedCheck(const Value *A, const Value *B);
+ bool relatedSelect(const SelectInst *A, const Value *B);
+ bool relatedPHI(const PHINode *A, const Value *B);
+
+ // Do not implement.
+ void operator=(const ProvenanceAnalysis &);
+ ProvenanceAnalysis(const ProvenanceAnalysis &);
+
+ public:
+ ProvenanceAnalysis() {}
+
+ void setAA(AliasAnalysis *aa) { AA = aa; }
+
+ AliasAnalysis *getAA() const { return AA; }
+
+ bool related(const Value *A, const Value *B);
+
+ void clear() {
+ CachedResults.clear();
+ }
+ };
+}
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for relations between the values on corresponding arms.
+ if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+ if (A->getCondition() == SB->getCondition()) {
+ if (related(A->getTrueValue(), SB->getTrueValue()))
+ return true;
+ if (related(A->getFalseValue(), SB->getFalseValue()))
+ return true;
+ return false;
+ }
+
+ // Check both arms of the Select node individually.
+ if (related(A->getTrueValue(), B))
+ return true;
+ if (related(A->getFalseValue(), B))
+ return true;
+
+ // The arms both checked out.
+ return false;
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
+ // If the values are PHIs in the same block, we can do a more precise as well
+ // as efficient check: just check for relations between the values on
+ // corresponding edges.
+ if (const PHINode *PNB = dyn_cast<PHINode>(B))
+ if (PNB->getParent() == A->getParent()) {
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+ if (related(A->getIncomingValue(i),
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+ return true;
+ return false;
+ }
+
+ // Check each unique source of the PHI node against B.
+ SmallPtrSet<const Value *, 4> UniqueSrc;
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+ const Value *PV1 = A->getIncomingValue(i);
+ if (UniqueSrc.insert(PV1) && related(PV1, B))
+ return true;
+ }
+
+ // All of the arms checked out.
+ return false;
+}
+
+/// isStoredObjCPointer - Test if the value of P, or any value covered by its
+/// provenance, is ever stored within the function (not counting callees).
+static bool isStoredObjCPointer(const Value *P) {
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Worklist;
+ Worklist.push_back(P);
+ Visited.insert(P);
+ do {
+ P = Worklist.pop_back_val();
+ for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+ UI != UE; ++UI) {
+ const User *Ur = *UI;
+ if (isa<StoreInst>(Ur)) {
+ if (UI.getOperandNo() == 0)
+ // The pointer is stored.
+ return true;
+ // The pointed is stored through.
+ continue;
+ }
+ if (isa<CallInst>(Ur))
+ // The pointer is passed as an argument, ignore this.
+ continue;
+ if (isa<PtrToIntInst>(P))
+ // Assume the worst.
+ return true;
+ if (Visited.insert(Ur))
+ Worklist.push_back(Ur);
+ }
+ } while (!Worklist.empty());
+
+ // Everything checked out.
+ return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
+ // Skip past provenance pass-throughs.
+ A = GetUnderlyingObjCPtr(A);
+ B = GetUnderlyingObjCPtr(B);
+
+ // Quick check.
+ if (A == B)
+ return true;
+
+ // Ask regular AliasAnalysis, for a first approximation.
+ switch (AA->alias(A, B)) {
+ case AliasAnalysis::NoAlias:
+ return false;
+ case AliasAnalysis::MustAlias:
+ case AliasAnalysis::PartialAlias:
+ return true;
+ case AliasAnalysis::MayAlias:
+ break;
+ }
+
+ bool AIsIdentified = IsObjCIdentifiedObject(A);
+ bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+ // An ObjC-Identified object can't alias a load if it is never locally stored.
+ if (AIsIdentified) {
+ if (BIsIdentified) {
+ // If both pointers have provenance, they can be directly compared.
+ if (A != B)
+ return false;
+ } else {
+ if (isa<LoadInst>(B))
+ return isStoredObjCPointer(A);
+ }
+ } else {
+ if (BIsIdentified && isa<LoadInst>(A))
+ return isStoredObjCPointer(B);
+ }
+
+ // Special handling for PHI and Select.
+ if (const PHINode *PN = dyn_cast<PHINode>(A))
+ return relatedPHI(PN, B);
+ if (const PHINode *PN = dyn_cast<PHINode>(B))
+ return relatedPHI(PN, A);
+ if (const SelectInst *S = dyn_cast<SelectInst>(A))
+ return relatedSelect(S, B);
+ if (const SelectInst *S = dyn_cast<SelectInst>(B))
+ return relatedSelect(S, A);
+
+ // Conservative.
+ return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
+ // Begin by inserting a conservative value into the map. If the insertion
+ // fails, we have the answer already. If it succeeds, leave it there until we
+ // compute the real answer to guard against recursive queries.
+ if (A > B) std::swap(A, B);
+ std::pair<CachedResultsTy::iterator, bool> Pair =
+ CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ bool Result = relatedCheck(A, B);
+ CachedResults[ValuePairTy(A, B)] = Result;
+ return Result;
+}
+
+namespace {
+ // Sequence - A sequence of states that a pointer may go through in which an
+ // objc_retain and objc_release are actually needed.
+ enum Sequence {
+ S_None,
+ S_Retain, ///< objc_retain(x)
+ S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement
+ S_Use, ///< any use of x
+ S_Stop, ///< like S_Release, but code motion is stopped
+ S_Release, ///< objc_release(x)
+ S_MovableRelease ///< objc_release(x), !clang.imprecise_release
+ };
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+ // The easy cases.
+ if (A == B)
+ return A;
+ if (A == S_None || B == S_None)
+ return S_None;
+
+ // Note that we can't merge S_CanRelease and S_Use.
+ if (A > B) std::swap(A, B);
+ if (TopDown) {
+ // Choose the side which is further along in the sequence.
+ if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+ return B;
+ } else {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Use || A == S_CanRelease) &&
+ (B == S_Release || B == S_Stop || B == S_MovableRelease))
+ return A;
+ // If both sides are releases, choose the more conservative one.
+ if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+ return A;
+ if (A == S_Release && B == S_MovableRelease)
+ return A;
+ }
+
+ return S_None;
+}
+
+namespace {
+ /// RRInfo - Unidirectional information about either a
+ /// retain-decrement-use-release sequence or release-use-decrement-retain
+ /// reverese sequence.
+ struct RRInfo {
+ /// KnownIncremented - After an objc_retain, the reference count of the
+ /// referenced object is known to be positive. Similarly, before an
+ /// objc_release, the reference count of the referenced object is known to
+ /// be positive. If there are retain-release pairs in code regions where the
+ /// retain count is known to be positive, they can be eliminated, regardless
+ /// of any side effects between them.
+ bool KnownIncremented;
+
+ /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
+ /// opposed to objc_retain calls).
+ bool IsRetainBlock;
+
+ /// IsTailCallRelease - True of the objc_release calls are all marked
+ /// with the "tail" keyword.
+ bool IsTailCallRelease;
+
+ /// ReleaseMetadata - If the Calls are objc_release calls and they all have
+ /// a clang.imprecise_release tag, this is the metadata tag.
+ MDNode *ReleaseMetadata;
+
+ /// Calls - For a top-down sequence, the set of objc_retains or
+ /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+ SmallPtrSet<Instruction *, 2> Calls;
+
+ /// ReverseInsertPts - The set of optimal insert positions for
+ /// moving calls in the opposite sequence.
+ SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+ RRInfo() :
+ KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+ ReleaseMetadata(0) {}
+
+ void clear();
+ };
+}
+
+void RRInfo::clear() {
+ KnownIncremented = false;
+ IsRetainBlock = false;
+ IsTailCallRelease = false;
+ ReleaseMetadata = 0;
+ Calls.clear();
+ ReverseInsertPts.clear();
+}
+
+namespace {
+ /// PtrState - This class summarizes several per-pointer runtime properties
+ /// which are propogated through the flow graph.
+ class PtrState {
+ /// RefCount - The known minimum number of reference count increments.
+ unsigned RefCount;
+
+ /// Seq - The current position in the sequence.
+ Sequence Seq;
+
+ public:
+ /// RRI - Unidirectional information about the current sequence.
+ /// TODO: Encapsulate this better.
+ RRInfo RRI;
+
+ PtrState() : RefCount(0), Seq(S_None) {}
+
+ void IncrementRefCount() {
+ if (RefCount != UINT_MAX) ++RefCount;
+ }
+
+ void DecrementRefCount() {
+ if (RefCount != 0) --RefCount;
+ }
+
+ void ClearRefCount() {
+ RefCount = 0;
+ }
+
+ bool IsKnownIncremented() const {
+ return RefCount > 0;
+ }
+
+ void SetSeq(Sequence NewSeq) {
+ Seq = NewSeq;
+ }
+
+ void SetSeqToRelease(MDNode *M) {
+ if (Seq == S_None || Seq == S_Use) {
+ Seq = M ? S_MovableRelease : S_Release;
+ RRI.ReleaseMetadata = M;
+ } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
+ Seq = S_Release;
+ RRI.ReleaseMetadata = 0;
+ }
+ }
+
+ Sequence GetSeq() const {
+ return Seq;
+ }
+
+ void ClearSequenceProgress() {
+ Seq = S_None;
+ RRI.clear();
+ }
+
+ void Merge(const PtrState &Other, bool TopDown);
+ };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+ Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+ RefCount = std::min(RefCount, Other.RefCount);
+
+ // We can't merge a plain objc_retain with an objc_retainBlock.
+ if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
+ Seq = S_None;
+
+ if (Seq == S_None) {
+ RRI.clear();
+ } else {
+ // Conservatively merge the ReleaseMetadata information.
+ if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+ RRI.ReleaseMetadata = 0;
+
+ RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+ RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+ RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+ RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
+ Other.RRI.ReverseInsertPts.end());
+ }
+}
+
+namespace {
+ /// BBState - Per-BasicBlock state.
+ class BBState {
+ /// TopDownPathCount - The number of unique control paths from the entry
+ /// which can reach this block.
+ unsigned TopDownPathCount;
+
+ /// BottomUpPathCount - The number of unique control paths to exits
+ /// from this block.
+ unsigned BottomUpPathCount;
+
+ /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
+ typedef MapVector<const Value *, PtrState> MapTy;
+
+ /// PerPtrTopDown - The top-down traversal uses this to record information
+ /// known about a pointer at the bottom of each block.
+ MapTy PerPtrTopDown;
+
+ /// PerPtrBottomUp - The bottom-up traversal uses this to record information
+ /// known about a pointer at the top of each block.
+ MapTy PerPtrBottomUp;
+
+ public:
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+ typedef MapTy::iterator ptr_iterator;
+ typedef MapTy::const_iterator ptr_const_iterator;
+
+ ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+ ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+ ptr_const_iterator top_down_ptr_begin() const {
+ return PerPtrTopDown.begin();
+ }
+ ptr_const_iterator top_down_ptr_end() const {
+ return PerPtrTopDown.end();
+ }
+
+ ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+ ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+ ptr_const_iterator bottom_up_ptr_begin() const {
+ return PerPtrBottomUp.begin();
+ }
+ ptr_const_iterator bottom_up_ptr_end() const {
+ return PerPtrBottomUp.end();
+ }
+
+ /// SetAsEntry - Mark this block as being an entry block, which has one
+ /// path from the entry by definition.
+ void SetAsEntry() { TopDownPathCount = 1; }
+
+ /// SetAsExit - Mark this block as being an exit block, which has one
+ /// path to an exit by definition.
+ void SetAsExit() { BottomUpPathCount = 1; }
+
+ PtrState &getPtrTopDownState(const Value *Arg) {
+ return PerPtrTopDown[Arg];
+ }
+
+ PtrState &getPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp[Arg];
+ }
+
+ void clearBottomUpPointers() {
+ PerPtrTopDown.clear();
+ }
+
+ void clearTopDownPointers() {
+ PerPtrTopDown.clear();
+ }
+
+ void InitFromPred(const BBState &Other);
+ void InitFromSucc(const BBState &Other);
+ void MergePred(const BBState &Other);
+ void MergeSucc(const BBState &Other);
+
+ /// GetAllPathCount - Return the number of possible unique paths from an
+ /// entry to an exit which pass through this block. This is only valid
+ /// after both the top-down and bottom-up traversals are complete.
+ unsigned GetAllPathCount() const {
+ return TopDownPathCount * BottomUpPathCount;
+ }
+ };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+ PerPtrTopDown = Other.PerPtrTopDown;
+ TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+ PerPtrBottomUp = Other.PerPtrBottomUp;
+ BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// MergePred - The top-down traversal uses this to merge information about
+/// predecessors to form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+ // Other.TopDownPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ TopDownPathCount += Other.TopDownPathCount;
+
+ // For each entry in the other set, if our set has an entry with the same key,
+ // merge the entries. Otherwise, copy the entry and merge it with an empty
+ // entry.
+ for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+ ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/true);
+ }
+
+ // For each entry in our set, if the other set doens't have an entry with the
+ // same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = top_down_ptr_begin(),
+ ME = top_down_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// MergeSucc - The bottom-up traversal uses this to merge information about
+/// successors to form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+ // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ BottomUpPathCount += Other.BottomUpPathCount;
+
+ // For each entry in the other set, if our set has an entry with the
+ // same key, merge the entries. Otherwise, copy the entry and merge
+ // it with an empty entry.
+ for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+ ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/false);
+ }
+
+ // For each entry in our set, if the other set doens't have an entry
+ // with the same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = bottom_up_ptr_begin(),
+ ME = bottom_up_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+namespace {
+ /// ObjCARCOpt - The main ARC optimization pass.
+ class ObjCARCOpt : public FunctionPass {
+ bool Changed;
+ ProvenanceAnalysis PA;
+
+ /// Run - A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// RetainFunc, RelaseFunc - Declarations for objc_retain,
+ /// objc_retainBlock, and objc_release.
+ Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
+
+ /// RetainRVCallee, etc. - Declarations for ObjC runtime
+ /// functions, for use in creating calls to them. These are initialized
+ /// lazily to avoid cluttering up the Module with unused declarations.
+ Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
+ *RetainCallee, *AutoreleaseCallee;
+
+ /// UsedInThisFunciton - Flags which determine whether each of the
+ /// interesting runtine functions is in fact used in the current function.
+ unsigned UsedInThisFunction;
+
+ /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
+ /// metadata.
+ unsigned ImpreciseReleaseMDKind;
+
+ Constant *getRetainRVCallee(Module *M);
+ Constant *getAutoreleaseRVCallee(Module *M);
+ Constant *getReleaseCallee(Module *M);
+ Constant *getRetainCallee(Module *M);
+ Constant *getAutoreleaseCallee(Module *M);
+
+ void OptimizeRetainCall(Function &F, Instruction *Retain);
+ bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+ void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
+ void OptimizeIndividualCalls(Function &F);
+
+ void CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const;
+ bool VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains);
+ bool VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases);
+ bool Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts);
+
+ bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void OptimizeWeakCalls(Function &F);
+
+ bool OptimizeSequences(Function &F);
+
+ void OptimizeReturns(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual void releaseMemory();
+
+ public:
+ static char ID;
+ ObjCARCOpt() : FunctionPass(ID) {
+ initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+ return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ObjCARCAliasAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ // ARC optimization doesn't currently split critical edges.
+ AU.setPreservesCFG();
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attributes);
+ }
+ return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+ if (!AutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ AutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+ Attributes);
+ }
+ return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+ if (!ReleaseCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ ReleaseCallee =
+ M->getOrInsertFunction(
+ "objc_release",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+ if (!RetainCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainCallee =
+ M->getOrInsertFunction(
+ "objc_retain",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+ if (!AutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ AutoreleaseCallee =
+ M->getOrInsertFunction(
+ "objc_autorelease",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return AutoreleaseCallee;
+}
+
+/// CanAlterRefCount - Test whether the given instruction can result in a
+/// reference count modification (positive or negative) for the pointer's
+/// object.
+static bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, InstructionClass Class) {
+ switch (Class) {
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_User:
+ // These operations never directly modify a reference count.
+ return false;
+ default: break;
+ }
+
+ ImmutableCallSite CS = static_cast<const Value *>(Inst);
+ assert(CS && "Only calls can alter reference counts!");
+
+ // See if AliasAnalysis can help us with the call.
+ AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return false;
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I) {
+ const Value *Op = *I;
+ if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ }
+
+ // Assume the worst.
+ return true;
+}
+
+/// CanUse - Test whether the given instruction can "use" the given pointer's
+/// object in a way that requires the reference count to be positive.
+static bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+ InstructionClass Class) {
+ // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+ if (Class == IC_Call)
+ return false;
+
+ // Consider various instructions which may have pointer arguments which are
+ // not "uses".
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+ // Comparing a pointer with null, or any other constant, isn't really a use,
+ // because we don't care what the pointer points to, or about the values
+ // of any other dynamic reference-counted pointers.
+ if (!IsPotentialUse(ICI->getOperand(1)))
+ return false;
+ } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+ // For calls, just check the arguments (and not the callee operand).
+ for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+ OE = CS.arg_end(); OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Special-case stores, because we don't care about the stored value, just
+ // the store address.
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+ // If we can't tell what the underlying object was, assume there is a
+ // dependence.
+ return IsPotentialUse(Op) && PA.related(Op, Ptr);
+ }
+
+ // Check each operand for a match.
+ for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+ OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+}
+
+/// CanInterruptRV - Test whether the given instruction can autorelease
+/// any pointer or cause an autoreleasepool pop.
+static bool
+CanInterruptRV(InstructionClass Class) {
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_CallOrUser:
+ case IC_Call:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+namespace {
+ /// DependenceKind - There are several kinds of dependence-like concepts in
+ /// use here.
+ enum DependenceKind {
+ NeedsPositiveRetainCount,
+ CanChangeRetainCount,
+ RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
+ RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
+ RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue.
+ };
+}
+
+/// Depends - Test if there can be dependencies on Inst through Arg. This
+/// function only tests dependencies relevant for removing pairs of calls.
+static bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+ ProvenanceAnalysis &PA) {
+ // If we've reached the definition of Arg, stop.
+ if (Inst == Arg)
+ return true;
+
+ switch (Flavor) {
+ case NeedsPositiveRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanUse(Inst, Arg, PA, Class);
+ }
+ }
+
+ case CanChangeRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ // Conservatively assume this can decrement any count.
+ return true;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanAlterRefCount(Inst, Arg, PA, Class);
+ }
+ }
+
+ case RetainAutoreleaseDep:
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPop:
+ // Don't merge an objc_autorelease with an objc_retain inside a different
+ // autoreleasepool scope.
+ return true;
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Nothing else matters for objc_retainAutorelease formation.
+ return false;
+ }
+ break;
+
+ case RetainAutoreleaseRVDep: {
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Anything that can autorelease interrupts
+ // retainAutoreleaseReturnValue formation.
+ return CanInterruptRV(Class);
+ }
+ break;
+ }
+
+ case RetainRVDep:
+ return CanInterruptRV(GetBasicInstructionClass(Inst));
+ }
+
+ llvm_unreachable("Invalid dependence flavor");
+ return true;
+}
+
+/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
+/// find local and non-local dependencies on Arg.
+/// TODO: Cache results?
+static void
+FindDependencies(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSet<Instruction *, 4> &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ BasicBlock::iterator StartPos = StartInst;
+
+ SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+ Worklist.push_back(std::make_pair(StartBB, StartPos));
+ do {
+ std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+ Worklist.pop_back_val();
+ BasicBlock *LocalStartBB = Pair.first;
+ BasicBlock::iterator LocalStartPos = Pair.second;
+ BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+ for (;;) {
+ if (LocalStartPos == StartBBBegin) {
+ pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+ if (PI == PE)
+ // If we've reached the function entry, produce a null dependence.
+ DependingInstructions.insert(0);
+ else
+ // Add the predecessors to the worklist.
+ do {
+ BasicBlock *PredBB = *PI;
+ if (Visited.insert(PredBB))
+ Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+ } while (++PI != PE);
+ break;
+ }
+
+ Instruction *Inst = --LocalStartPos;
+ if (Depends(Flavor, Inst, Arg, PA)) {
+ DependingInstructions.insert(Inst);
+ break;
+ }
+ }
+ } while (!Worklist.empty());
+
+ // Determine whether the original StartBB post-dominates all of the blocks we
+ // visited. If not, insert a sentinal indicating that most optimizations are
+ // not safe.
+ for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+ E = Visited.end(); I != E; ++I) {
+ const BasicBlock *BB = *I;
+ if (BB == StartBB)
+ continue;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ const BasicBlock *Succ = *SI;
+ if (Succ != StartBB && !Visited.count(Succ)) {
+ DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
+ return;
+ }
+ }
+ }
+}
+
+static bool isNullOrUndef(const Value *V) {
+ return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static bool isNoopInstruction(const Instruction *I) {
+ return isa<BitCastInst>(I) ||
+ (isa<GetElementPtrInst>(I) &&
+ cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// OptimizeRetainCall - Turn objc_retain into
+/// objc_retainAutoreleasedReturnValue if the operand is a return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ CallSite CS(GetObjCArg(Retain));
+ Instruction *Call = CS.getInstruction();
+ if (!Call) return;
+ if (Call->getParent() != Retain->getParent()) return;
+
+ // Check that the call is next to the retain.
+ BasicBlock::iterator I = Call;
+ ++I;
+ while (isNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue..
+ Changed = true;
+ ++NumPeeps;
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+}
+
+/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
+/// objc_retain if the operand is not a return value. Or, if it can be
+/// paired with an objc_autoreleaseReturnValue, delete the pair and
+/// return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+ // Check for the argument being from an immediately preceding call.
+ Value *Arg = GetObjCArg(RetainRV);
+ CallSite CS(Arg);
+ if (Instruction *Call = CS.getInstruction())
+ if (Call->getParent() == RetainRV->getParent()) {
+ BasicBlock::iterator I = Call;
+ ++I;
+ while (isNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ }
+
+ // Check for being preceded by an objc_autoreleaseReturnValue on the same
+ // pointer. In this case, we can delete the pair.
+ BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+ if (I != Begin) {
+ do --I; while (I != Begin && isNoopInstruction(I));
+ if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+ GetObjCArg(I) == Arg) {
+ Changed = true;
+ ++NumPeeps;
+ EraseInstruction(I);
+ EraseInstruction(RetainRV);
+ return true;
+ }
+ }
+
+ // Turn it to a plain objc_retain.
+ Changed = true;
+ ++NumPeeps;
+ cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+ return false;
+}
+
+/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
+/// objc_autorelease if the result is not used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
+ // Check for a return of the pointer value.
+ const Value *Ptr = GetObjCArg(AutoreleaseRV);
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ }
+
+ Changed = true;
+ ++NumPeeps;
+ cast<CallInst>(AutoreleaseRV)->
+ setCalledFunction(getAutoreleaseCallee(F.getParent()));
+}
+
+/// OptimizeIndividualCalls - Visit each call, one at a time, and make
+/// simplifications without doing any additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ // Reset all the flags in preparation for recomputing them.
+ UsedInThisFunction = 0;
+
+ // Visit all objc_* calls in F.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ switch (Class) {
+ default: break;
+
+ // Delete no-op casts. These function calls have special semantics, but
+ // the semantics are entirely implemented via lowering in the front-end,
+ // so by the time they reach the optimizer, they are just no-op calls
+ // which return their argument.
+ //
+ // There are gray areas here, as the ability to cast reference-counted
+ // pointers to raw void* and back allows code to break ARC assumptions,
+ // however these are currently considered to be unimportant.
+ case IC_NoopCast:
+ Changed = true;
+ ++NumNoops;
+ EraseInstruction(Inst);
+ continue;
+
+ // If the pointer-to-weak-pointer is null, it's undefined behavior.
+ case IC_StoreWeak:
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained:
+ case IC_InitWeak:
+ case IC_DestroyWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(0))) {
+ const Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_CopyWeak:
+ case IC_MoveWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(0)) ||
+ isNullOrUndef(CI->getArgOperand(1))) {
+ const Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_Retain:
+ OptimizeRetainCall(F, Inst);
+ break;
+ case IC_RetainRV:
+ if (OptimizeRetainRVCall(F, Inst))
+ continue;
+ break;
+ case IC_AutoreleaseRV:
+ OptimizeAutoreleaseRVCall(F, Inst);
+ break;
+ }
+
+ // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+ if (IsAutorelease(Class) && Inst->use_empty()) {
+ CallInst *Call = cast<CallInst>(Inst);
+ const Value *Arg = Call->getArgOperand(0);
+ Arg = FindSingleUseIdentifiedObject(Arg);
+ if (Arg) {
+ Changed = true;
+ ++NumAutoreleases;
+
+ // Create the declaration lazily.
+ LLVMContext &C = Inst->getContext();
+ CallInst *NewCall =
+ CallInst::Create(getReleaseCallee(F.getParent()),
+ Call->getArgOperand(0), "", Call);
+ NewCall->setMetadata(ImpreciseReleaseMDKind,
+ MDNode::get(C, ArrayRef<Value *>()));
+ EraseInstruction(Call);
+ Inst = NewCall;
+ Class = IC_Release;
+ }
+ }
+
+ // For functions which can never be passed stack arguments, add
+ // a tail keyword.
+ if (IsAlwaysTail(Class)) {
+ Changed = true;
+ cast<CallInst>(Inst)->setTailCall();
+ }
+
+ // Set nounwind as needed.
+ if (IsNoThrow(Class)) {
+ Changed = true;
+ cast<CallInst>(Inst)->setDoesNotThrow();
+ }
+
+ if (!IsNoopOnNull(Class)) {
+ UsedInThisFunction |= 1 << Class;
+ continue;
+ }
+
+ const Value *Arg = GetObjCArg(Inst);
+
+ // ARC calls with null are no-ops. Delete them.
+ if (isNullOrUndef(Arg)) {
+ Changed = true;
+ ++NumNoops;
+ EraseInstruction(Inst);
+ continue;
+ }
+
+ // Keep track of which of retain, release, autorelease, and retain_block
+ // are actually present in this function.
+ UsedInThisFunction |= 1 << Class;
+
+ // If Arg is a PHI, and one or more incoming values to the
+ // PHI are null, and the call is control-equivalent to the PHI, and there
+ // are no relevant side effects between the PHI and the call, the call
+ // could be pushed up to just those paths with non-null incoming values.
+ // For now, don't bother splitting critical edges for this.
+ SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+ Worklist.push_back(std::make_pair(Inst, Arg));
+ do {
+ std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+ Inst = Pair.first;
+ Arg = Pair.second;
+
+ const PHINode *PN = dyn_cast<PHINode>(Arg);
+ if (!PN) continue;
+
+ // Determine if the PHI has any null operands, or any incoming
+ // critical edges.
+ bool HasNull = false;
+ bool HasCriticalEdges = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (isNullOrUndef(Incoming))
+ HasNull = true;
+ else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+ .getNumSuccessors() != 1) {
+ HasCriticalEdges = true;
+ break;
+ }
+ }
+ // If we have null operands and no critical edges, optimize.
+ if (!HasCriticalEdges && HasNull) {
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Check that there is nothing that cares about the reference
+ // count between the call and the phi.
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() == 1 &&
+ *DependingInstructions.begin() == PN) {
+ Changed = true;
+ ++NumPartialNoops;
+ // Clone the call into each predecessor that has a non-null value.
+ CallInst *CInst = cast<CallInst>(Inst);
+ const Type *ParamTy = CInst->getArgOperand(0)->getType();
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (!isNullOrUndef(Incoming)) {
+ CallInst *Clone = cast<CallInst>(CInst->clone());
+ Value *Op = PN->getIncomingValue(i);
+ Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+ if (Op->getType() != ParamTy)
+ Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+ Clone->setArgOperand(0, Op);
+ Clone->insertBefore(InsertPos);
+ Worklist.push_back(std::make_pair(Clone, Incoming));
+ }
+ }
+ // Erase the original call.
+ EraseInstruction(CInst);
+ continue;
+ }
+ }
+ } while (!Worklist.empty());
+ }
+}
+
+/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
+/// control flow, or other CFG structures where moving code across the edge
+/// would result in it being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const {
+ // If any top-down local-use or possible-dec has a succ which is earlier in
+ // the sequence, forget it.
+ for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+ E = MyStates.top_down_ptr_end(); I != E; ++I)
+ switch (I->second.GetSeq()) {
+ default: break;
+ case S_Use: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+ switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ case S_None:
+ case S_CanRelease:
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ SomeSuccHasSame = false;
+ break;
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ }
+ case S_CanRelease: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+ switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ case S_None:
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ SomeSuccHasSame = false;
+ break;
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ }
+ }
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each successor to compute the initial state
+ // for the current block.
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ succ_const_iterator SI(TI), SE(TI, false);
+ if (SI == SE)
+ MyStates.SetAsExit();
+ else
+ do {
+ const BasicBlock *Succ = *SI++;
+ if (Succ == BB)
+ continue;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ if (I == BBStates.end())
+ continue;
+ MyStates.InitFromSucc(I->second);
+ while (SI != SE) {
+ Succ = *SI++;
+ if (Succ != BB) {
+ I = BBStates.find(Succ);
+ if (I != BBStates.end())
+ MyStates.MergeSucc(I->second);
+ }
+ }
+ break;
+ } while (SI != SE);
+
+ // Visit all the instructions, bottom-up.
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+ Instruction *Inst = llvm::prior(I);
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+ NestingDetected = true;
+
+ S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+ S.RRI.clear();
+ S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.Calls.insert(Inst);
+
+ S.IncrementRefCount();
+ break;
+ }
+ case IC_RetainBlock:
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ S.DecrementRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_CanRelease:
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ Retains[Inst] = S.RRI;
+ }
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearBottomUpPointers();
+ continue;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ continue;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible retains and releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ // Check for a retain (we're going bottom-up here).
+ S.DecrementRefCount();
+
+ // Check for a release.
+ if (!IsRetain(Class) && Class != IC_RetainBlock)
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ S.RRI.ReverseInsertPts.clear();
+ S.RRI.ReverseInsertPts.insert(Inst);
+ S.SetSeq(S_Use);
+ } else if (Seq == S_Release &&
+ (Class == IC_User || Class == IC_CallOrUser)) {
+ // Non-movable releases depend on any possible objc pointer use.
+ S.SetSeq(S_Stop);
+ S.RRI.ReverseInsertPts.clear();
+ S.RRI.ReverseInsertPts.insert(Inst);
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each predecessor to compute the initial state
+ // for the current block.
+ const_pred_iterator PI(BB), PE(BB, false);
+ if (PI == PE)
+ MyStates.SetAsEntry();
+ else
+ do {
+ const BasicBlock *Pred = *PI++;
+ if (Pred == BB)
+ continue;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+ if (I == BBStates.end())
+ continue;
+ MyStates.InitFromPred(I->second);
+ while (PI != PE) {
+ Pred = *PI++;
+ if (Pred != BB) {
+ I = BBStates.find(Pred);
+ if (I != BBStates.end())
+ MyStates.MergePred(I->second);
+ }
+ }
+ break;
+ } while (PI != PE);
+
+ // Visit all the instructions, top-down.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ Instruction *Inst = I;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_RetainBlock:
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Retain)
+ NestingDetected = true;
+
+ S.SetSeq(S_Retain);
+ S.RRI.clear();
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.Calls.insert(Inst);
+ }
+
+ S.IncrementRefCount();
+ break;
+ }
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ S.DecrementRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Retain:
+ case S_CanRelease:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_Use:
+ S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ Releases[Inst] = S.RRI;
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearTopDownPointers();
+ continue;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ continue;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (!IsRetain(Class) && Class != IC_RetainBlock &&
+ CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ // Check for a release.
+ S.DecrementRefCount();
+
+ // Check for a release.
+ switch (Seq) {
+ case S_Retain:
+ S.SetSeq(S_CanRelease);
+ S.RRI.ReverseInsertPts.clear();
+ S.RRI.ReverseInsertPts.insert(Inst);
+
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ continue;
+ case S_Use:
+ case S_CanRelease:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_CanRelease:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_Use:
+ case S_Retain:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+ }
+
+ CheckForCFGHazards(BB, BBStates, MyStates);
+ return NestingDetected;
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+ // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+ // magically know that loops will be well behaved, i.e. they won't repeatedly
+ // call retain on a single pointer without doing a release.
+ bool BottomUpNestingDetected = false;
+ SmallVector<BasicBlock *, 8> PostOrder;
+ for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+ BasicBlock *BB = *I;
+ PostOrder.push_back(BB);
+
+ BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+ }
+
+ // Iterate through the post-order in reverse order, achieving a
+ // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
+ // class here because it works by computing its own full postorder iteration,
+ // recording the sequence, and playing it back in reverse. Since we're already
+ // doing a full iteration above, we can just record the sequence manually and
+ // avoid the cost of having ReversePostOrderTraversal compute it.
+ bool TopDownNestingDetected = false;
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
+ RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
+ TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+
+ return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ const Type *ArgTy = Arg->getType();
+ const Type *ParamTy =
+ (RetainRVFunc ? RetainRVFunc :
+ RetainFunc ? RetainFunc :
+ RetainBlockFunc)->arg_begin()->getType();
+
+ // Insert the new retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = ReleasesToMove.ReverseInsertPts.begin(),
+ PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call =
+ CallInst::Create(RetainsToMove.IsRetainBlock ?
+ RetainBlockFunc : RetainFunc,
+ MyArg, "", InsertPt);
+ Call->setDoesNotThrow();
+ if (!RetainsToMove.IsRetainBlock)
+ Call->setTailCall();
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = RetainsToMove.ReverseInsertPts.begin(),
+ PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *LastUse = *PI;
+ Instruction *InsertPts[] = { 0, 0, 0 };
+ if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
+ // We can't insert code immediately after an invoke instruction, so
+ // insert code at the beginning of both successor blocks instead.
+ // The invoke's return value isn't available in the unwind block,
+ // but our releases will never depend on it, because they must be
+ // paired with retains from before the invoke.
+ InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
+ InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+ } else {
+ // Insert code immediately after the last use.
+ InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
+ }
+
+ for (Instruction **I = InsertPts; *I; ++I) {
+ Instruction *InsertPt = *I;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+ // Attach a clang.imprecise_release metadata tag, if appropriate.
+ if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+ Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setDoesNotThrow();
+ if (ReleasesToMove.IsTailCallRelease)
+ Call->setTailCall();
+ }
+ }
+
+ // Delete the original retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = RetainsToMove.Calls.begin(),
+ AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRetain = *AI;
+ Retains.blot(OrigRetain);
+ DeadInsts.push_back(OrigRetain);
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = ReleasesToMove.Calls.begin(),
+ AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRelease = *AI;
+ Releases.erase(OrigRelease);
+ DeadInsts.push_back(OrigRelease);
+ }
+}
+
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+ &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+ bool AnyPairsCompletelyEliminated = false;
+ RRInfo RetainsToMove;
+ RRInfo ReleasesToMove;
+ SmallVector<Instruction *, 4> NewRetains;
+ SmallVector<Instruction *, 4> NewReleases;
+ SmallVector<Instruction *, 8> DeadInsts;
+
+ for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+ E = Retains.end(); I != E; ) {
+ Value *V = (I++)->first;
+ if (!V) continue; // blotted
+
+ Instruction *Retain = cast<Instruction>(V);
+ Value *Arg = GetObjCArg(Retain);
+
+ // If the object being released is in static or stack storage, we know it's
+ // not being managed by ObjC reference counting, so we can delete pairs
+ // regardless of what possible decrements or uses lie between them.
+ bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+ // If a pair happens in a region where it is known that the reference count
+ // is already incremented, we can similarly ignore possible decrements.
+ bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+
+ // Connect the dots between the top-down-collected RetainsToMove and
+ // bottom-up-collected ReleasesToMove to form sets of related calls.
+ // This is an iterative process so that we connect multiple releases
+ // to multiple retains if needed.
+ unsigned OldDelta = 0;
+ unsigned NewDelta = 0;
+ unsigned OldCount = 0;
+ unsigned NewCount = 0;
+ bool FirstRelease = true;
+ bool FirstRetain = true;
+ NewRetains.push_back(Retain);
+ for (;;) {
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+ Instruction *NewRetain = *NI;
+ MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+ assert(It != Retains.end());
+ const RRInfo &NewRetainRRI = It->second;
+ KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewRetainRRI.Calls.begin(),
+ LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewRetainRelease = *LI;
+ DenseMap<Value *, RRInfo>::const_iterator Jt =
+ Releases.find(NewRetainRelease);
+ if (Jt == Releases.end())
+ goto next_retain;
+ const RRInfo &NewRetainReleaseRRI = Jt->second;
+ assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+ if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+ OldDelta -=
+ BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+ // Merge the ReleaseMetadata and IsTailCallRelease values.
+ if (FirstRelease) {
+ ReleasesToMove.ReleaseMetadata =
+ NewRetainReleaseRRI.ReleaseMetadata;
+ ReleasesToMove.IsTailCallRelease =
+ NewRetainReleaseRRI.IsTailCallRelease;
+ FirstRelease = false;
+ } else {
+ if (ReleasesToMove.ReleaseMetadata !=
+ NewRetainReleaseRRI.ReleaseMetadata)
+ ReleasesToMove.ReleaseMetadata = 0;
+ if (ReleasesToMove.IsTailCallRelease !=
+ NewRetainReleaseRRI.IsTailCallRelease)
+ ReleasesToMove.IsTailCallRelease = false;
+ }
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+ RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+ NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+ }
+ NewReleases.push_back(NewRetainRelease);
+ }
+ }
+ }
+ NewRetains.clear();
+ if (NewReleases.empty()) break;
+
+ // Back the other way.
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+ Instruction *NewRelease = *NI;
+ DenseMap<Value *, RRInfo>::const_iterator It =
+ Releases.find(NewRelease);
+ assert(It != Releases.end());
+ const RRInfo &NewReleaseRRI = It->second;
+ KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewReleaseRRI.Calls.begin(),
+ LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewReleaseRetain = *LI;
+ MapVector<Value *, RRInfo>::const_iterator Jt =
+ Retains.find(NewReleaseRetain);
+ if (Jt == Retains.end())
+ goto next_retain;
+ const RRInfo &NewReleaseRetainRRI = Jt->second;
+ assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+ if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ unsigned PathCount =
+ BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+ OldDelta += PathCount;
+ OldCount += PathCount;
+
+ // Merge the IsRetainBlock values.
+ if (FirstRetain) {
+ RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+ FirstRetain = false;
+ } else if (ReleasesToMove.IsRetainBlock !=
+ NewReleaseRetainRRI.IsRetainBlock)
+ // It's not possible to merge the sequences if one uses
+ // objc_retain and the other uses objc_retainBlock.
+ goto next_retain;
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+ RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+ PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+ NewDelta += PathCount;
+ NewCount += PathCount;
+ }
+ }
+ NewRetains.push_back(NewReleaseRetain);
+ }
+ }
+ }
+ NewReleases.clear();
+ if (NewRetains.empty()) break;
+ }
+
+ // If the pointer is known incremented, we can safely delete the pair
+ // regardless of what's between them.
+ if (KnownIncrementedTD || KnownIncrementedBU) {
+ RetainsToMove.ReverseInsertPts.clear();
+ ReleasesToMove.ReverseInsertPts.clear();
+ NewCount = 0;
+ }
+
+ // Determine whether the original call points are balanced in the retain and
+ // release calls through the program. If not, conservatively don't touch
+ // them.
+ // TODO: It's theoretically possible to do code motion in this case, as
+ // long as the existing imbalances are maintained.
+ if (OldDelta != 0)
+ goto next_retain;
+
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ goto next_retain;
+
+ // Ok, everything checks out and we're all set. Let's move some code!
+ Changed = true;
+ AnyPairsCompletelyEliminated = NewCount == 0;
+ NumRRs += OldCount - NewCount;
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+
+ next_retain:
+ NewReleases.clear();
+ NewRetains.clear();
+ RetainsToMove.clear();
+ ReleasesToMove.clear();
+ }
+
+ // Now that we're done moving everything, we can delete the newly dead
+ // instructions, as we no longer need them as insert points.
+ while (!DeadInsts.empty())
+ EraseInstruction(DeadInsts.pop_back_val());
+
+ return AnyPairsCompletelyEliminated;
+}
+
+/// OptimizeWeakCalls - Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+ // itself because it uses AliasAnalysis and we need to do provenance
+ // queries instead.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+ continue;
+
+ // Delete objc_loadWeak calls with no users.
+ if (Class == IC_LoadWeak && Inst->use_empty()) {
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // TODO: For now, just look for an earlier available version of this value
+ // within the same block. Theoretically, we could do memdep-style non-local
+ // analysis too, but that would want caching. A better approach would be to
+ // use the technique that EarlyCSE uses.
+ inst_iterator Current = llvm::prior(I);
+ BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+ for (BasicBlock::iterator B = CurrentBB->begin(),
+ J = Current.getInstructionIterator();
+ J != B; --J) {
+ Instruction *EarlierInst = &*llvm::prior(J);
+ InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+ switch (EarlierClass) {
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained: {
+ // If this is loading from the same pointer, replace this load's value
+ // with that one.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall);
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_StoreWeak:
+ case IC_InitWeak: {
+ // If this is storing to the same pointer and has the same size etc.
+ // replace this load's value with the stored value.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_MoveWeak:
+ case IC_CopyWeak:
+ // TOOD: Grab the copied value.
+ goto clobbered;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ case IC_User:
+ // Weak pointers are only modified through the weak entry points
+ // (and arbitrary calls, which could call the weak entry points).
+ break;
+ default:
+ // Anything else could modify the weak pointer.
+ goto clobbered;
+ }
+ }
+ clobbered:;
+ }
+
+ // Then, for each destroyWeak with an alloca operand, check to see if
+ // the alloca and all its users can be zapped.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_DestroyWeak)
+ continue;
+
+ CallInst *Call = cast<CallInst>(Inst);
+ Value *Arg = Call->getArgOperand(0);
+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ++UI) {
+ Instruction *UserInst = cast<Instruction>(*UI);
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ case IC_DestroyWeak:
+ continue;
+ default:
+ goto done;
+ }
+ }
+ Changed = true;
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ) {
+ CallInst *UserInst = cast<CallInst>(*UI++);
+ if (!UserInst->use_empty())
+ UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+ UserInst->eraseFromParent();
+ }
+ Alloca->eraseFromParent();
+ done:;
+ }
+ }
+}
+
+/// OptimizeSequences - Identify program paths which execute sequences of
+/// retains and releases which can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+ /// Releases, Retains - These are used to store the results of the main flow
+ /// analysis. These use Value* as the key instead of Instruction* so that the
+ /// map stays valid when we get around to rewriting code and calls get
+ /// replaced by arguments.
+ DenseMap<Value *, RRInfo> Releases;
+ MapVector<Value *, RRInfo> Retains;
+
+ /// BBStates, This is used during the traversal of the function to track the
+ /// states for each identified object at each block.
+ DenseMap<const BasicBlock *, BBState> BBStates;
+
+ // Analyze the CFG of the function, and all instructions.
+ bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+ // Transform.
+ return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+}
+
+/// OptimizeReturns - Look for this pattern:
+///
+/// %call = call i8* @something(...)
+/// %2 = call i8* @objc_retain(i8* %call)
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+///
+/// And delete the retain and autorelease.
+///
+/// Otherwise if it's just this:
+///
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+///
+/// convert the autorelease to autoreleaseRV.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+ if (!F.getReturnType()->isPointerTy())
+ return;
+
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ BasicBlock *BB = FI;
+ ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+ if (!Ret) continue;
+
+ const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ BB, Ret, DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Autorelease =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ if (!Autorelease)
+ goto next_block;
+ InstructionClass AutoreleaseClass =
+ GetBasicInstructionClass(Autorelease);
+ if (!IsAutorelease(AutoreleaseClass))
+ goto next_block;
+ if (GetObjCArg(Autorelease) != Arg)
+ goto next_block;
+
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Check that there is nothing that can affect the reference
+ // count between the autorelease and the retain.
+ FindDependencies(CanChangeRetainCount, Arg,
+ BB, Autorelease, DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Retain =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+ // Check that we found a retain with the same argument.
+ if (!Retain ||
+ !IsRetain(GetBasicInstructionClass(Retain)) ||
+ GetObjCArg(Retain) != Arg)
+ goto next_block;
+
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Convert the autorelease to an autoreleaseRV, since it's
+ // returning the value.
+ if (AutoreleaseClass == IC_Autorelease) {
+ Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
+ AutoreleaseClass = IC_AutoreleaseRV;
+ }
+
+ // Check that there is nothing that can affect the reference
+ // count between the retain and the call.
+ FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+ DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Call =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+ // Check that the pointer is the return value of the call.
+ if (!Call || Arg != Call)
+ goto next_block;
+
+ // Check that the call is a regular call.
+ InstructionClass Class = GetBasicInstructionClass(Call);
+ if (Class != IC_CallOrUser && Class != IC_Call)
+ goto next_block;
+
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
+ }
+ }
+ }
+
+ next_block:
+ DependingInstructions.clear();
+ Visited.clear();
+ }
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // Identify the imprecise release metadata kind.
+ ImpreciseReleaseMDKind =
+ M.getContext().getMDKindID("clang.imprecise_release");
+
+ // Identify the declarations for objc_retain and friends.
+ RetainFunc = M.getFunction("objc_retain");
+ RetainBlockFunc = M.getFunction("objc_retainBlock");
+ RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
+ ReleaseFunc = M.getFunction("objc_release");
+
+ // Intuitively, objc_retain and others are nocapture, however in practice
+ // they are not, because they return their argument value. And objc_release
+ // calls finalizers.
+
+ // These are initialized lazily.
+ RetainRVCallee = 0;
+ AutoreleaseRVCallee = 0;
+ ReleaseCallee = 0;
+ RetainCallee = 0;
+ AutoreleaseCallee = 0;
+
+ return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // This pass performs several distinct transformations. As a compile-time aid
+ // when compiling code that isn't ObjC, skip these if the relevant ObjC
+ // library functions aren't declared.
+
+ // Preliminary optimizations. This also computs UsedInThisFunction.
+ OptimizeIndividualCalls(F);
+
+ // Optimizations for weak pointers.
+ if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+ (1 << IC_LoadWeakRetained) |
+ (1 << IC_StoreWeak) |
+ (1 << IC_InitWeak) |
+ (1 << IC_CopyWeak) |
+ (1 << IC_MoveWeak) |
+ (1 << IC_DestroyWeak)))
+ OptimizeWeakCalls(F);
+
+ // Optimizations for retain+release pairs.
+ if (UsedInThisFunction & ((1 << IC_Retain) |
+ (1 << IC_RetainRV) |
+ (1 << IC_RetainBlock)))
+ if (UsedInThisFunction & (1 << IC_Release))
+ // Run OptimizeSequences until it either stops making changes or
+ // no retain+release pair nesting is detected.
+ while (OptimizeSequences(F)) {}
+
+ // Optimizations if objc_autorelease is used.
+ if (UsedInThisFunction &
+ ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+ OptimizeReturns(F);
+
+ return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+ PA.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// ARC contraction.
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#include "llvm/Operator.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/Dominators.h"
+
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+ /// ObjCARCContract - Late ARC optimizations. These change the IR in a way
+ /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
+ class ObjCARCContract : public FunctionPass {
+ bool Changed;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ ProvenanceAnalysis PA;
+
+ /// Run - A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// StoreStrongCallee, etc. - Declarations for ObjC runtime
+ /// functions, for use in creating calls to them. These are initialized
+ /// lazily to avoid cluttering up the Module with unused declarations.
+ Constant *StoreStrongCallee,
+ *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
+
+ /// RetainRVMarker - The inline asm string to insert between calls and
+ /// RetainRV calls to make the optimization work on targets which need it.
+ const MDString *RetainRVMarker;
+
+ Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainAutoreleaseCallee(Module *M);
+ Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+ bool ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited);
+
+ void ContractRelease(Instruction *Release,
+ inst_iterator &Iter);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ public:
+ static char ID;
+ ObjCARCContract() : FunctionPass(ID) {
+ initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+ return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+ if (!StoreStrongCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ std::vector<Type *> Params;
+ Params.push_back(I8XX);
+ Params.push_back(I8X);
+
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Attributes.addAttr(1, Attribute::NoCapture);
+
+ StoreStrongCallee =
+ M->getOrInsertFunction(
+ "objc_storeStrong",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+ if (!RetainAutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainAutoreleaseCallee =
+ M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
+ }
+ return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+ if (!RetainAutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainAutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+ Attributes);
+ }
+ return RetainAutoreleaseRVCallee;
+}
+
+/// ContractAutorelease - Merge an autorelease with a retain into a fused
+/// call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited) {
+ const Value *Arg = GetObjCArg(Autorelease);
+
+ // Check that there are no instructions between the retain and the autorelease
+ // (such as an autorelease_pop) which may change the count.
+ CallInst *Retain = 0;
+ if (Class == IC_AutoreleaseRV)
+ FindDependencies(RetainAutoreleaseRVDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+ else
+ FindDependencies(RetainAutoreleaseDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+
+ Visited.clear();
+ if (DependingInstructions.size() != 1) {
+ DependingInstructions.clear();
+ return false;
+ }
+
+ Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ DependingInstructions.clear();
+
+ if (!Retain ||
+ GetBasicInstructionClass(Retain) != IC_Retain ||
+ GetObjCArg(Retain) != Arg)
+ return false;
+
+ Changed = true;
+ ++NumPeeps;
+
+ if (Class == IC_AutoreleaseRV)
+ Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+ else
+ Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+ EraseInstruction(Autorelease);
+ return true;
+}
+
+/// ContractRelease - Attempt to merge an objc_release with a store, load, and
+/// objc_retain to form an objc_storeStrong. This can be a little tricky because
+/// the instructions don't always appear in order, and there may be unrelated
+/// intervening instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+ inst_iterator &Iter) {
+ LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+ if (!Load || Load->isVolatile()) return;
+
+ // For now, require everything to be in one basic block.
+ BasicBlock *BB = Release->getParent();
+ if (Load->getParent() != BB) return;
+
+ // Walk down to find the store.
+ BasicBlock::iterator I = Load, End = BB->end();
+ ++I;
+ AliasAnalysis::Location Loc = AA->getLocation(Load);
+ while (I != End &&
+ (&*I == Release ||
+ IsRetain(GetBasicInstructionClass(I)) ||
+ !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
+ ++I;
+ StoreInst *Store = dyn_cast<StoreInst>(I);
+ if (!Store || Store->isVolatile()) return;
+ if (Store->getPointerOperand() != Loc.Ptr) return;
+
+ Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+ // Walk up to find the retain.
+ I = Store;
+ BasicBlock::iterator Begin = BB->begin();
+ while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+ --I;
+ Instruction *Retain = I;
+ if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+ if (GetObjCArg(Retain) != New) return;
+
+ Changed = true;
+ ++NumStoreStrongs;
+
+ LLVMContext &C = Release->getContext();
+ const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ const Type *I8XX = PointerType::getUnqual(I8X);
+
+ Value *Args[] = { Load->getPointerOperand(), New };
+ if (Args[0]->getType() != I8XX)
+ Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+ if (Args[1]->getType() != I8X)
+ Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+ CallInst *StoreStrong =
+ CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+ Args, "", Store);
+ StoreStrong->setDoesNotThrow();
+ StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+ if (&*Iter == Store) ++Iter;
+ Store->eraseFromParent();
+ Release->eraseFromParent();
+ EraseInstruction(Retain);
+ if (Load->use_empty())
+ Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // These are initialized lazily.
+ StoreStrongCallee = 0;
+ RetainAutoreleaseCallee = 0;
+ RetainAutoreleaseRVCallee = 0;
+
+ // Initialize RetainRVMarker.
+ RetainRVMarker = 0;
+ if (NamedMDNode *NMD =
+ M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+ if (NMD->getNumOperands() == 1) {
+ const MDNode *N = NMD->getOperand(0);
+ if (N->getNumOperands() == 1)
+ if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+ RetainRVMarker = S;
+ }
+
+ return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // For ObjC library calls which return their argument, replace uses of the
+ // argument with uses of the call return value, if it dominates the use. This
+ // reduces register pressure.
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ // Only these library routines return their argument. In particular,
+ // objc_retainBlock does not necessarily return its argument.
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ break;
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+ continue;
+ break;
+ case IC_RetainRV: {
+ // If we're compiling for a target which needs a special inline-asm
+ // marker to do the retainAutoreleasedReturnValue optimization,
+ // insert it now.
+ if (!RetainRVMarker)
+ break;
+ BasicBlock::iterator BBI = Inst;
+ --BBI;
+ while (isNoopInstruction(BBI)) --BBI;
+ if (&*BBI == GetObjCArg(Inst)) {
+ InlineAsm *IA =
+ InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+ /*isVarArg=*/false),
+ RetainRVMarker->getString(),
+ /*Constraints=*/"", /*hasSideEffects=*/true);
+ CallInst::Create(IA, "", Inst);
+ }
+ break;
+ }
+ case IC_InitWeak: {
+ // objc_initWeak(p, null) => *p = null
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(1))) {
+ Value *Null =
+ ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+ Changed = true;
+ new StoreInst(Null, CI->getArgOperand(0), CI);
+ CI->replaceAllUsesWith(Null);
+ CI->eraseFromParent();
+ }
+ continue;
+ }
+ case IC_Release:
+ ContractRelease(Inst, I);
+ continue;
+ default:
+ continue;
+ }
+
+ // Don't use GetObjCArg because we don't want to look through bitcasts
+ // and such; to do the replacement, the argument must have type i8*.
+ const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+ for (;;) {
+ // If we're compiling bugpointed code, don't get in trouble.
+ if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+ break;
+ // Look through the uses of the pointer.
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ) {
+ Use &U = UI.getUse();
+ unsigned OperandNo = UI.getOperandNo();
+ ++UI; // Increment UI now, because we may unlink its element.
+ if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
+ if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
+ Changed = true;
+ Instruction *Replacement = Inst;
+ const Type *UseTy = U.get()->getType();
+ if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+ // For PHI nodes, insert the bitcast in the predecessor block.
+ unsigned ValNo =
+ PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB =
+ PHI->getIncomingBlock(ValNo);
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ &BB->back());
+ for (unsigned i = 0, e = PHI->getNumIncomingValues();
+ i != e; ++i)
+ if (PHI->getIncomingBlock(i) == BB) {
+ // Keep the UI iterator valid.
+ if (&PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) ==
+ &UI.getUse())
+ ++UI;
+ PHI->setIncomingValue(i, Replacement);
+ }
+ } else {
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+ U.set(Replacement);
+ }
+ }
+ }
+
+ // If Arg is a no-op casted pointer, strip one level of casts and
+ // iterate.
+ if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+ Arg = BI->getOperand(0);
+ else if (isa<GEPOperator>(Arg) &&
+ cast<GEPOperator>(Arg)->hasAllZeroIndices())
+ Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+ else if (isa<GlobalAlias>(Arg) &&
+ !cast<GlobalAlias>(Arg)->mayBeOverridden())
+ Arg = cast<GlobalAlias>(Arg)->getAliasee();
+ else
+ break;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
new file mode 100644
index 000000000000..e6341ae3071f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -0,0 +1,1123 @@
+//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates commutative expressions in an order that is designed
+// to promote better constant propagation, GCSE, LICM, PRE, etc.
+//
+// For example: 4 + (x + 5) -> x + (4 + 5)
+//
+// In the implementation of this algorithm, constants are assigned rank = 0,
+// function arguments are rank = 1, and other values are assigned ranks
+// corresponding to the reverse post order traversal of current function
+// (starting at 2), which effectively gives values in deep loops higher rank
+// than values not in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reassociate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumLinear , "Number of insts linearized");
+STATISTIC(NumChanged, "Number of insts reassociated");
+STATISTIC(NumAnnihil, "Number of expr tree annihilated");
+STATISTIC(NumFactor , "Number of multiplies factored");
+
+namespace {
+ struct ValueEntry {
+ unsigned Rank;
+ Value *Op;
+ ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
+ };
+ inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
+ return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start.
+ }
+}
+
+#ifndef NDEBUG
+/// PrintOps - Print out the expression identified in the Ops list.
+///
+static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
+ Module *M = I->getParent()->getParent()->getParent();
+ dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
+ << *Ops[0].Op->getType() << '\t';
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ dbgs() << "[ ";
+ WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+ dbgs() << ", #" << Ops[i].Rank << "] ";
+ }
+}
+#endif
+
+namespace {
+ class Reassociate : public FunctionPass {
+ DenseMap<BasicBlock*, unsigned> RankMap;
+ DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+ SmallVector<WeakVH, 8> RedoInsts;
+ SmallVector<WeakVH, 8> DeadInsts;
+ bool MadeChange;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ Reassociate() : FunctionPass(ID) {
+ initializeReassociatePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ private:
+ void BuildRankMap(Function &F);
+ unsigned getRank(Value *V);
+ Value *ReassociateExpression(BinaryOperator *I);
+ void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops,
+ unsigned Idx = 0);
+ Value *OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
+ void LinearizeExpr(BinaryOperator *I);
+ Value *RemoveFactorFromExpression(Value *V, Value *Factor);
+ void ReassociateInst(BasicBlock::iterator &BBI);
+
+ void RemoveDeadBinaryOp(Value *V);
+ };
+}
+
+char Reassociate::ID = 0;
+INITIALIZE_PASS(Reassociate, "reassociate",
+ "Reassociate expressions", false, false)
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
+
+void Reassociate::RemoveDeadBinaryOp(Value *V) {
+ Instruction *Op = dyn_cast<Instruction>(V);
+ if (!Op || !isa<BinaryOperator>(Op))
+ return;
+
+ Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
+
+ ValueRankMap.erase(Op);
+ DeadInsts.push_back(Op);
+ RemoveDeadBinaryOp(LHS);
+ RemoveDeadBinaryOp(RHS);
+}
+
+
+static bool isUnmovableInstruction(Instruction *I) {
+ if (I->getOpcode() == Instruction::PHI ||
+ I->getOpcode() == Instruction::Alloca ||
+ I->getOpcode() == Instruction::Load ||
+ I->getOpcode() == Instruction::Invoke ||
+ (I->getOpcode() == Instruction::Call &&
+ !isa<DbgInfoIntrinsic>(I)) ||
+ I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv ||
+ I->getOpcode() == Instruction::FDiv ||
+ I->getOpcode() == Instruction::URem ||
+ I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::FRem)
+ return true;
+ return false;
+}
+
+void Reassociate::BuildRankMap(Function &F) {
+ unsigned i = 2;
+
+ // Assign distinct ranks to function arguments
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ ValueRankMap[&*I] = ++i;
+
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+ E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ unsigned BBRank = RankMap[BB] = ++i << 16;
+
+ // Walk the basic block, adding precomputed ranks for any instructions that
+ // we cannot move. This ensures that the ranks for these instructions are
+ // all different in the block.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isUnmovableInstruction(I))
+ ValueRankMap[&*I] = ++BBRank;
+ }
+}
+
+unsigned Reassociate::getRank(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) {
+ if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
+ return 0; // Otherwise it's a global or constant, rank 0.
+ }
+
+ if (unsigned Rank = ValueRankMap[I])
+ return Rank; // Rank already known?
+
+ // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
+ // we can reassociate expressions for code motion! Since we do not recurse
+ // for PHI nodes, we cannot have infinite recursion here, because there
+ // cannot be loops in the value graph that do not go through PHI nodes.
+ unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
+ for (unsigned i = 0, e = I->getNumOperands();
+ i != e && Rank != MaxRank; ++i)
+ Rank = std::max(Rank, getRank(I->getOperand(i)));
+
+ // If this is a not or neg instruction, do not count it for rank. This
+ // assures us that X and ~X will have the same rank.
+ if (!I->getType()->isIntegerTy() ||
+ (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+ ++Rank;
+
+ //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
+ // << Rank << "\n");
+
+ return ValueRankMap[I] = Rank;
+}
+
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+ if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) &&
+ cast<Instruction>(V)->getOpcode() == Opcode)
+ return cast<BinaryOperator>(V);
+ return 0;
+}
+
+/// LowerNegateToMultiply - Replace 0-X with X*-1.
+///
+static Instruction *LowerNegateToMultiply(Instruction *Neg,
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ Constant *Cst = Constant::getAllOnesValue(Neg->getType());
+
+ Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+ ValueRankMap.erase(Neg);
+ Res->takeName(Neg);
+ Neg->replaceAllUsesWith(Res);
+ Res->setDebugLoc(Neg->getDebugLoc());
+ Neg->eraseFromParent();
+ return Res;
+}
+
+// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'.
+// Note that if D is also part of the expression tree that we recurse to
+// linearize it as well. Besides that case, this does not recurse into A,B, or
+// C.
+void Reassociate::LinearizeExpr(BinaryOperator *I) {
+ BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+ BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1));
+ assert(isReassociableOp(LHS, I->getOpcode()) &&
+ isReassociableOp(RHS, I->getOpcode()) &&
+ "Not an expression that needs linearization?");
+
+ DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
+
+ // Move the RHS instruction to live immediately before I, avoiding breaking
+ // dominator properties.
+ RHS->moveBefore(I);
+
+ // Move operands around to do the linearization.
+ I->setOperand(1, RHS->getOperand(0));
+ RHS->setOperand(0, LHS);
+ I->setOperand(0, RHS);
+
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+ LHS->clearSubclassOptionalData();
+ RHS->clearSubclassOptionalData();
+
+ ++NumLinear;
+ MadeChange = true;
+ DEBUG(dbgs() << "Linearized: " << *I << '\n');
+
+ // If D is part of this expression tree, tail recurse.
+ if (isReassociableOp(I->getOperand(1), I->getOpcode()))
+ LinearizeExpr(I);
+}
+
+
+/// LinearizeExprTree - Given an associative binary expression tree, traverse
+/// all of the uses putting it into canonical form. This forces a left-linear
+/// form of the expression (((a+b)+c)+d), and collects information about the
+/// rank of the non-tree operands.
+///
+/// NOTE: These intentionally destroys the expression tree operands (turning
+/// them into undef values) to reduce #uses of the values. This means that the
+/// caller MUST use something like RewriteExprTree to put the values back in.
+///
+void Reassociate::LinearizeExprTree(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ unsigned Opcode = I->getOpcode();
+
+ // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
+ BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
+ BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);
+
+ // If this is a multiply expression tree and it contains internal negations,
+ // transform them into multiplies by -1 so they can be reassociated.
+ if (I->getOpcode() == Instruction::Mul) {
+ if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
+ LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
+ LHSBO = isReassociableOp(LHS, Opcode);
+ }
+ if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
+ RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
+ RHSBO = isReassociableOp(RHS, Opcode);
+ }
+ }
+
+ if (!LHSBO) {
+ if (!RHSBO) {
+ // Neither the LHS or RHS as part of the tree, thus this is a leaf. As
+ // such, just remember these operands and their rank.
+ Ops.push_back(ValueEntry(getRank(LHS), LHS));
+ Ops.push_back(ValueEntry(getRank(RHS), RHS));
+
+ // Clear the leaves out.
+ I->setOperand(0, UndefValue::get(I->getType()));
+ I->setOperand(1, UndefValue::get(I->getType()));
+ return;
+ }
+
+ // Turn X+(Y+Z) -> (Y+Z)+X
+ std::swap(LHSBO, RHSBO);
+ std::swap(LHS, RHS);
+ bool Success = !I->swapOperands();
+ assert(Success && "swapOperands failed");
+ Success = false;
+ MadeChange = true;
+ } else if (RHSBO) {
+ // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not
+ // part of the expression tree.
+ LinearizeExpr(I);
+ LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
+ RHS = I->getOperand(1);
+ RHSBO = 0;
+ }
+
+ // Okay, now we know that the LHS is a nested expression and that the RHS is
+ // not. Perform reassociation.
+ assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");
+
+ // Move LHS right before I to make sure that the tree expression dominates all
+ // values.
+ LHSBO->moveBefore(I);
+
+ // Linearize the expression tree on the LHS.
+ LinearizeExprTree(LHSBO, Ops);
+
+ // Remember the RHS operand and its rank.
+ Ops.push_back(ValueEntry(getRank(RHS), RHS));
+
+ // Clear the RHS leaf out.
+ I->setOperand(1, UndefValue::get(I->getType()));
+}
+
+// RewriteExprTree - Now that the operands for this expression tree are
+// linearized and optimized, emit them in-order. This function is written to be
+// tail recursive.
+void Reassociate::RewriteExprTree(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops,
+ unsigned i) {
+ if (i+2 == Ops.size()) {
+ if (I->getOperand(0) != Ops[i].Op ||
+ I->getOperand(1) != Ops[i+1].Op) {
+ Value *OldLHS = I->getOperand(0);
+ DEBUG(dbgs() << "RA: " << *I << '\n');
+ I->setOperand(0, Ops[i].Op);
+ I->setOperand(1, Ops[i+1].Op);
+
+ // Clear all the optional flags, which may not hold after the
+ // reassociation if the expression involved more than just this operation.
+ if (Ops.size() != 2)
+ I->clearSubclassOptionalData();
+
+ DEBUG(dbgs() << "TO: " << *I << '\n');
+ MadeChange = true;
+ ++NumChanged;
+
+ // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3)
+ // delete the extra, now dead, nodes.
+ RemoveDeadBinaryOp(OldLHS);
+ }
+ return;
+ }
+ assert(i+2 < Ops.size() && "Ops index out of range!");
+
+ if (I->getOperand(1) != Ops[i].Op) {
+ DEBUG(dbgs() << "RA: " << *I << '\n');
+ I->setOperand(1, Ops[i].Op);
+
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+
+ DEBUG(dbgs() << "TO: " << *I << '\n');
+ MadeChange = true;
+ ++NumChanged;
+ }
+
+ BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+ assert(LHS->getOpcode() == I->getOpcode() &&
+ "Improper expression tree!");
+
+ // Compactify the tree instructions together with each other to guarantee
+ // that the expression tree is dominated by all of Ops.
+ LHS->moveBefore(I);
+ RewriteExprTree(LHS, Ops, i+1);
+}
+
+
+
+// NegateValue - Insert instructions before the instruction pointed to by BI,
+// that computes the negative version of the value specified. The negative
+// version of the value is returned, and BI is left pointing at the instruction
+// that should be processed next by the reassociation pass.
+//
+static Value *NegateValue(Value *V, Instruction *BI) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getNeg(C);
+
+ // We are trying to expose opportunity for reassociation. One of the things
+ // that we want to do to achieve this is to push a negation as deep into an
+ // expression chain as possible, to expose the add instructions. In practice,
+ // this means that we turn this:
+ // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
+ // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
+ // the constants. We assume that instcombine will clean up the mess later if
+ // we introduce tons of unnecessary negation instructions.
+ //
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
+ // Push the negates through the add.
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+ // We must move the add instruction here, because the neg instructions do
+ // not dominate the old add instruction in general. By moving it, we are
+ // assured that the neg instructions we just inserted dominate the
+ // instruction we are about to insert after them.
+ //
+ I->moveBefore(BI);
+ I->setName(I->getName()+".neg");
+ return I;
+ }
+
+ // Okay, we need to materialize a negated version of V with an instruction.
+ // Scan the use lists of V to see if we have one already.
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (!BinaryOperator::isNeg(U)) continue;
+
+ // We found one! Now we have to make sure that the definition dominates
+ // this use. We do this by moving it to the entry block (if it is a
+ // non-instruction value) or right after the definition. These negates will
+ // be zapped by reassociate later, so we don't need much finesse here.
+ BinaryOperator *TheNeg = cast<BinaryOperator>(U);
+
+ // Verify that the negate is in this function, V might be a constant expr.
+ if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
+ continue;
+
+ BasicBlock::iterator InsertPt;
+ if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
+ InsertPt = II->getNormalDest()->begin();
+ } else {
+ InsertPt = InstInput;
+ ++InsertPt;
+ }
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ } else {
+ InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
+ }
+ TheNeg->moveBefore(InsertPt);
+ return TheNeg;
+ }
+
+ // Insert a 'neg' instruction that subtracts the value from zero to get the
+ // negation.
+ return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+}
+
+/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
+/// X-Y into (X + -Y).
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
+ // If this is a negation, we can't split it up!
+ if (BinaryOperator::isNeg(Sub))
+ return false;
+
+ // Don't bother to break this up unless either the LHS is an associable add or
+ // subtract or if this is only used by one.
+ if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+ return true;
+ if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+ return true;
+ if (Sub->hasOneUse() &&
+ (isReassociableOp(Sub->use_back(), Instruction::Add) ||
+ isReassociableOp(Sub->use_back(), Instruction::Sub)))
+ return true;
+
+ return false;
+}
+
+/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
+/// only used by an add, transform this into (X+(0-Y)) to promote better
+/// reassociation.
+static Instruction *BreakUpSubtract(Instruction *Sub,
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ // Convert a subtract into an add and a neg instruction. This allows sub
+ // instructions to be commuted with other add instructions.
+ //
+ // Calculate the negative value of Operand 1 of the sub instruction,
+ // and set it as the RHS of the add instruction we just made.
+ //
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Instruction *New =
+ BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+ New->takeName(Sub);
+
+ // Everyone now refers to the add instruction.
+ ValueRankMap.erase(Sub);
+ Sub->replaceAllUsesWith(New);
+ New->setDebugLoc(Sub->getDebugLoc());
+ Sub->eraseFromParent();
+
+ DEBUG(dbgs() << "Negated: " << *New << '\n');
+ return New;
+}
+
+/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
+/// by one, change this into a multiply by a constant to assist with further
+/// reassociation.
+static Instruction *ConvertShiftToMul(Instruction *Shl,
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ // If an operand of this shift is a reassociable multiply, or if the shift
+ // is used by a reassociable multiply or add, turn into a multiply.
+ if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
+ (Shl->hasOneUse() &&
+ (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
+ isReassociableOp(Shl->use_back(), Instruction::Add)))) {
+ Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+
+ Instruction *Mul =
+ BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+ ValueRankMap.erase(Shl);
+ Mul->takeName(Shl);
+ Shl->replaceAllUsesWith(Mul);
+ Mul->setDebugLoc(Shl->getDebugLoc());
+ Shl->eraseFromParent();
+ return Mul;
+ }
+ return 0;
+}
+
+// Scan backwards and forwards among values with the same rank as element i to
+// see if X exists. If X does not exist, return i. This is useful when
+// scanning for 'x' when we see '-x' because they both get the same rank.
+static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
+ Value *X) {
+ unsigned XRank = Ops[i].Rank;
+ unsigned e = Ops.size();
+ for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+ if (Ops[j].Op == X)
+ return j;
+ // Scan backwards.
+ for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+ if (Ops[j].Op == X)
+ return j;
+ return i;
+}
+
+/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// and returning the result. Insert the tree before I.
+static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){
+ if (Ops.size() == 1) return Ops.back();
+
+ Value *V1 = Ops.back();
+ Ops.pop_back();
+ Value *V2 = EmitAddTreeOfValues(I, Ops);
+ return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+}
+
+/// RemoveFactorFromExpression - If V is an expression tree that is a
+/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// remove Factor from the tree and return the new tree.
+Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ if (!BO) return 0;
+
+ SmallVector<ValueEntry, 8> Factors;
+ LinearizeExprTree(BO, Factors);
+
+ bool FoundFactor = false;
+ bool NeedsNegate = false;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ if (Factors[i].Op == Factor) {
+ FoundFactor = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+
+ // If this is a negative version of this factor, remove it.
+ if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+ if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
+ if (FC1->getValue() == -FC2->getValue()) {
+ FoundFactor = NeedsNegate = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+ }
+
+ if (!FoundFactor) {
+ // Make sure to restore the operands to the expression tree.
+ RewriteExprTree(BO, Factors);
+ return 0;
+ }
+
+ BasicBlock::iterator InsertPt = BO; ++InsertPt;
+
+ // If this was just a single multiply, remove the multiply and return the only
+ // remaining operand.
+ if (Factors.size() == 1) {
+ ValueRankMap.erase(BO);
+ DeadInsts.push_back(BO);
+ V = Factors[0].Op;
+ } else {
+ RewriteExprTree(BO, Factors);
+ V = BO;
+ }
+
+ if (NeedsNegate)
+ V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+
+ return V;
+}
+
+/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
+/// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
+static void FindSingleUseMultiplyFactors(Value *V,
+ SmallVectorImpl<Value*> &Factors,
+ const SmallVectorImpl<ValueEntry> &Ops,
+ bool IsRoot) {
+ BinaryOperator *BO;
+ if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
+ !(BO = dyn_cast<BinaryOperator>(V)) ||
+ BO->getOpcode() != Instruction::Mul) {
+ Factors.push_back(V);
+ return;
+ }
+
+ // If this value has a single use because it is another input to the add
+ // tree we're reassociating and we dropped its use, it actually has two
+ // uses and we can't factor it.
+ if (!IsRoot) {
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Ops[i].Op == V) {
+ Factors.push_back(V);
+ return;
+ }
+ }
+
+
+ // Otherwise, add the LHS and RHS to the list of factors.
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
+}
+
+/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
+/// instruction. This optimizes based on identities. If it can be reduced to
+/// a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+static Value *OptimizeAndOrXor(unsigned Opcode,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+ // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // First, check for X and ~X in the operand list.
+ assert(i < Ops.size());
+ if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
+ Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX != i) {
+ if (Opcode == Instruction::And) // ...&X&~X = 0
+ return Constant::getNullValue(X->getType());
+
+ if (Opcode == Instruction::Or) // ...|X|~X = -1
+ return Constant::getAllOnesValue(X->getType());
+ }
+ }
+
+ // Next, check for duplicate pairs of values, which we assume are next to
+ // each other, due to our sorting criteria.
+ assert(i < Ops.size());
+ if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+ if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+ // Drop duplicate values for And and Or.
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ ++NumAnnihil;
+ continue;
+ }
+
+ // Drop pairs of values for Xor.
+ assert(Opcode == Instruction::Xor);
+ if (e == 2)
+ return Constant::getNullValue(Ops[0].Op->getType());
+
+ // Y ^ X^X -> Y
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+ i -= 1; e -= 2;
+ ++NumAnnihil;
+ }
+ }
+ return 0;
+}
+
+/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
+/// optimizes based on identities. If it can be reduced to a single Value, it
+/// is returned, otherwise the Ops list is mutated as necessary.
+Value *Reassociate::OptimizeAdd(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and -X pairs. If we find any, we
+ // can simplify the expression. X+-X == 0. While we're at it, scan for any
+ // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
+ //
+ // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
+ //
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *TheOp = Ops[i].Op;
+ // Check to see if we've seen this operand before. If so, we factor all
+ // instances of the operand together. Due to our sorting criteria, we know
+ // that these need to be next to each other in the vector.
+ if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
+ // Rescan the list, remove all instances of this operand from the expr.
+ unsigned NumFound = 0;
+ do {
+ Ops.erase(Ops.begin()+i);
+ ++NumFound;
+ } while (i != Ops.size() && Ops[i].Op == TheOp);
+
+ DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
+ ++NumFactor;
+
+ // Insert a new multiply.
+ Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
+ Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+
+ // Now that we have inserted a multiply, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
+ RedoInsts.push_back(Mul);
+
+ // If every add operand was a duplicate, return the multiply.
+ if (Ops.empty())
+ return Mul;
+
+ // Otherwise, we had some input that didn't have the dupe, such as
+ // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
+
+ --i;
+ e = Ops.size();
+ continue;
+ }
+
+ // Check for X and -X in the operand list.
+ if (!BinaryOperator::isNeg(TheOp))
+ continue;
+
+ Value *X = BinaryOperator::getNegArgument(TheOp);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX == i)
+ continue;
+
+ // Remove X and -X from the operand list.
+ if (Ops.size() == 2)
+ return Constant::getNullValue(X->getType());
+
+ Ops.erase(Ops.begin()+i);
+ if (i < FoundX)
+ --FoundX;
+ else
+ --i; // Need to back up an extra one.
+ Ops.erase(Ops.begin()+FoundX);
+ ++NumAnnihil;
+ --i; // Revisit element.
+ e -= 2; // Removed two elements.
+ }
+
+ // Scan the operand list, checking to see if there are any common factors
+ // between operands. Consider something like A*A+A*B*C+D. We would like to
+ // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+ // To efficiently find this, we count the number of times a factor occurs
+ // for any ADD operands that are MULs.
+ DenseMap<Value*, unsigned> FactorOccurrences;
+
+ // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
+ // where they are actually the same multiply.
+ unsigned MaxOcc = 0;
+ Value *MaxOccVal = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+ if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ continue;
+
+ // Compute all of the factors of this added value.
+ SmallVector<Value*, 8> Factors;
+ FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
+ assert(Factors.size() > 1 && "Bad linearize!");
+
+ // Add one to FactorOccurrences for each unique factor in this op.
+ SmallPtrSet<Value*, 8> Duplicates;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ Value *Factor = Factors[i];
+ if (!Duplicates.insert(Factor)) continue;
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+
+ // If Factor is a negative constant, add the negated value as a factor
+ // because we can percolate the negate out. Watch for minint, which
+ // cannot be positivified.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+ if (CI->isNegative() && !CI->isMinValue(true)) {
+ Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+ assert(!Duplicates.count(Factor) &&
+ "Shouldn't have two constant factors, missed a canonicalize");
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+ }
+ }
+ }
+
+ // If any factor occurred more than one time, we can pull it out.
+ if (MaxOcc > 1) {
+ DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
+ ++NumFactor;
+
+ // Create a new instruction that uses the MaxOccVal twice. If we don't do
+ // this, we could otherwise run into situations where removing a factor
+ // from an expression will drop a use of maxocc, and this can cause
+ // RemoveFactorFromExpression on successive values to behave differently.
+ Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+ SmallVector<Value*, 4> NewMulOps;
+ for (unsigned i = 0; i != Ops.size(); ++i) {
+ // Only try to remove factors from expressions we're allowed to.
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+ if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ continue;
+
+ if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+ // The factorized operand may occur several times. Convert them all in
+ // one fell swoop.
+ for (unsigned j = Ops.size(); j != i;) {
+ --j;
+ if (Ops[j].Op == Ops[i].Op) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+j);
+ }
+ }
+ --i;
+ }
+ }
+
+ // No need for extra uses anymore.
+ delete DummyInst;
+
+ unsigned NumAddedValues = NewMulOps.size();
+ Value *V = EmitAddTreeOfValues(I, NewMulOps);
+
+ // Now that we have inserted the add tree, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
+ assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+ (void)NumAddedValues;
+ V = ReassociateExpression(cast<BinaryOperator>(V));
+
+ // Create the multiply.
+ Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+ // Rerun associate on the multiply in case the inner expression turned into
+ // a multiply. We want to make sure that we keep things in canonical form.
+ V2 = ReassociateExpression(cast<BinaryOperator>(V2));
+
+ // If every add operand included the factor (e.g. "A*B + A*C"), then the
+ // entire result expression is just the multiply "A*(B+C)".
+ if (Ops.empty())
+ return V2;
+
+ // Otherwise, we had some input that didn't have the factor, such as
+ // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+ }
+
+ return 0;
+}
+
+Value *Reassociate::OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Now that we have the linearized expression tree, try to optimize it.
+ // Start by folding any constants that we found.
+ bool IterateOptimization = false;
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ unsigned Opcode = I->getOpcode();
+
+ if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
+ if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
+ Ops.pop_back();
+ Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
+ return OptimizeExpression(I, Ops);
+ }
+
+ // Check for destructive annihilation due to a constant being used.
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ if (CstVal->isZero()) // X & 0 -> 0
+ return CstVal;
+ if (CstVal->isAllOnesValue()) // X & -1 -> X
+ Ops.pop_back();
+ break;
+ case Instruction::Mul:
+ if (CstVal->isZero()) { // X * 0 -> 0
+ ++NumAnnihil;
+ return CstVal;
+ }
+
+ if (cast<ConstantInt>(CstVal)->isOne())
+ Ops.pop_back(); // X * 1 -> X
+ break;
+ case Instruction::Or:
+ if (CstVal->isAllOnesValue()) // X | -1 -> -1
+ return CstVal;
+ // FALLTHROUGH!
+ case Instruction::Add:
+ case Instruction::Xor:
+ if (CstVal->isZero()) // X [|^+] 0 -> X
+ Ops.pop_back();
+ break;
+ }
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ // Handle destructive annihilation due to identities between elements in the
+ // argument list here.
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ unsigned NumOps = Ops.size();
+ if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
+ return Result;
+ IterateOptimization |= Ops.size() != NumOps;
+ break;
+ }
+
+ case Instruction::Add: {
+ unsigned NumOps = Ops.size();
+ if (Value *Result = OptimizeAdd(I, Ops))
+ return Result;
+ IterateOptimization |= Ops.size() != NumOps;
+ }
+
+ break;
+ //case Instruction::Mul:
+ }
+
+ if (IterateOptimization)
+ return OptimizeExpression(I, Ops);
+ return 0;
+}
+
+
+/// ReassociateInst - Inspect and reassociate the instruction at the
+/// given position, post-incrementing the position.
+void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
+ Instruction *BI = BBI++;
+ if (BI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BI->getOperand(1)))
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+ MadeChange = true;
+ BI = NI;
+ }
+
+ // Reject cases where it is pointless to do this.
+ if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
+ BI->getType()->isVectorTy())
+ return; // Floating point ops are not associative.
+
+ // Do not reassociate boolean (i1) expressions. We want to preserve the
+ // original order of evaluation for short-circuited comparisons that
+ // SimplifyCFG has folded to AND/OR expressions. If the expression
+ // is not further optimized, it is likely to be transformed back to a
+ // short-circuited form for code gen, and the source order may have been
+ // optimized for the most likely conditions.
+ if (BI->getType()->isIntegerTy(1))
+ return;
+
+ // If this is a subtract instruction which is not already in negate form,
+ // see if we can convert it to X+-Y.
+ if (BI->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
+ // Reset the BBI iterator in case BreakUpSubtract changed the
+ // instruction it points to.
+ BBI = BI;
+ ++BBI;
+ MadeChange = true;
+ } else if (BinaryOperator::isNeg(BI)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+ (!BI->hasOneUse() ||
+ !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
+ MadeChange = true;
+ }
+ }
+ }
+
+ // If this instruction is a commutative binary operator, process it.
+ if (!BI->isAssociative()) return;
+ BinaryOperator *I = cast<BinaryOperator>(BI);
+
+ // If this is an interior node of a reassociable tree, ignore it until we
+ // get to the root of the tree, to avoid N^2 analysis.
+ if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+ return;
+
+ // If this is an add tree that is used by a sub instruction, ignore it
+ // until we process the subtract.
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+ return;
+
+ ReassociateExpression(I);
+}
+
+Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
+
+ // First, walk the expression tree, linearizing the tree, collecting the
+ // operand information.
+ SmallVector<ValueEntry, 8> Ops;
+ LinearizeExprTree(I, Ops);
+
+ DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
+
+ // Now that we have linearized the tree to a list and have gathered all of
+ // the operands and their ranks, sort the operands by their rank. Use a
+ // stable_sort so that values with equal ranks will have their relative
+ // positions maintained (and so the compiler is deterministic). Note that
+ // this sorts so that the highest ranking values end up at the beginning of
+ // the vector.
+ std::stable_sort(Ops.begin(), Ops.end());
+
+ // OptimizeExpression - Now that we have the expression tree in a convenient
+ // sorted form, optimize it globally if possible.
+ if (Value *V = OptimizeExpression(I, Ops)) {
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
+ I->replaceAllUsesWith(V);
+ if (Instruction *VI = dyn_cast<Instruction>(V))
+ VI->setDebugLoc(I->getDebugLoc());
+ RemoveDeadBinaryOp(I);
+ ++NumAnnihil;
+ return V;
+ }
+
+ // We want to sink immediates as deeply as possible except in the case where
+ // this is a multiply tree used only by an add, and the immediate is a -1.
+ // In this case we reassociate to put the negation on the outside so that we
+ // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
+ if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Ops.back().Op) &&
+ cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+ ValueEntry Tmp = Ops.pop_back_val();
+ Ops.insert(Ops.begin(), Tmp);
+ }
+
+ DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
+
+ if (Ops.size() == 1) {
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ I->replaceAllUsesWith(Ops[0].Op);
+ if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+ OI->setDebugLoc(I->getDebugLoc());
+ RemoveDeadBinaryOp(I);
+ return Ops[0].Op;
+ }
+
+ // Now that we ordered and optimized the expressions, splat them back into
+ // the expression tree, removing any unneeded nodes.
+ RewriteExprTree(I, Ops);
+ return I;
+}
+
+
+bool Reassociate::runOnFunction(Function &F) {
+ // Recalculate the rank map for F
+ BuildRankMap(F);
+
+ MadeChange = false;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
+ ReassociateInst(BBI);
+
+ // Now that we're done, revisit any instructions which are likely to
+ // have secondary reassociation opportunities.
+ while (!RedoInsts.empty())
+ if (Value *V = RedoInsts.pop_back_val()) {
+ BasicBlock::iterator BBI = cast<Instruction>(V);
+ ReassociateInst(BBI);
+ }
+
+ // Now that we're done, delete any instructions which are no longer used.
+ while (!DeadInsts.empty())
+ if (Value *V = DeadInsts.pop_back_val())
+ RecursivelyDeleteTriviallyDeadInstructions(V);
+
+ // We are done with the rank map.
+ RankMap.clear();
+ ValueRankMap.clear();
+ return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
new file mode 100644
index 000000000000..47afc770bb0c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,134 @@
+//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file demotes all registers to memory references. It is intented to be
+// the inverse of PromoteMemoryToRegister. By converting to loads, the only
+// values live across basic blocks are allocas and loads before phi nodes.
+// It is intended that this should make CFG hacking much easier.
+// To make later hacking easier, the entry block is split into two, such that
+// all introduced allocas and nothing else are in the entry block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg2mem"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumRegsDemoted, "Number of registers demoted");
+STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
+
+namespace {
+ struct RegToMem : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RegToMem() : FunctionPass(ID) {
+ initializeRegToMemPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addPreservedID(BreakCriticalEdgesID);
+ }
+
+ bool valueEscapes(const Instruction *Inst) const {
+ const BasicBlock *BB = Inst->getParent();
+ for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
+ UI != E; ++UI) {
+ const Instruction *I = cast<Instruction>(*UI);
+ if (I->getParent() != BB || isa<PHINode>(I))
+ return true;
+ }
+ return false;
+ }
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char RegToMem::ID = 0;
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+
+bool RegToMem::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ // Insert all new allocas into entry block.
+ BasicBlock *BBEntry = &F.getEntryBlock();
+ assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+ "Entry block to function must not have predecessors!");
+
+ // Find first non-alloca instruction and create insertion point. This is
+ // safe if block is well-formed: it always have terminator, otherwise
+ // we'll get and assertion.
+ BasicBlock::iterator I = BBEntry->begin();
+ while (isa<AllocaInst>(I)) ++I;
+
+ CastInst *AllocaInsertionPoint =
+ new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ Type::getInt32Ty(F.getContext()),
+ "reg2mem alloca point", I);
+
+ // Find the escaped instructions. But don't create stack slots for
+ // allocas in entry block.
+ std::list<Instruction*> WorkList;
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib) {
+ if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+ valueEscapes(iib)) {
+ WorkList.push_front(&*iib);
+ }
+ }
+
+ // Demote escaped instructions
+ NumRegsDemoted += WorkList.size();
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ ile = WorkList.end(); ilb != ile; ++ilb)
+ DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+
+ WorkList.clear();
+
+ // Find all phi's
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib)
+ if (isa<PHINode>(iib))
+ WorkList.push_front(&*iib);
+
+ // Demote phi nodes
+ NumPhisDemoted += WorkList.size();
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ ile = WorkList.end(); ilb != ile; ++ilb)
+ DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+
+ return true;
+}
+
+
+// createDemoteRegisterToMemory - Provide an entry point to create this pass.
+//
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
+FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
+ return new RegToMem();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
new file mode 100644
index 000000000000..083412ed942d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -0,0 +1,2010 @@
+//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements sparse conditional constant propagation and merging:
+//
+// Specifically, this:
+// * Assumes values are constant unless proven otherwise
+// * Assumes BasicBlocks are dead unless proven otherwise
+// * Proves values to be constant, and replaces them with constants
+// * Proves conditional branches to be unconditional
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccp"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumInstRemoved, "Number of instructions removed");
+STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
+
+STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
+STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
+STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
+
+namespace {
+/// LatticeVal class - This class represents the different lattice values that
+/// an LLVM value may occupy. It is a simple class with value semantics.
+///
+class LatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This LLVM Value has no known value yet.
+ undefined,
+
+ /// constant - This LLVM Value has a specific constant value.
+ constant,
+
+ /// forcedconstant - This LLVM Value was thought to be undef until
+ /// ResolvedUndefsIn. This is treated just like 'constant', but if merged
+ /// with another (different) constant, it goes to overdefined, instead of
+ /// asserting.
+ forcedconstant,
+
+ /// overdefined - This instruction is not known to be constant, and we know
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'forcedconstant' value.
+ PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+
+ LatticeValueTy getLatticeValue() const {
+ return Val.getInt();
+ }
+
+public:
+ LatticeVal() : Val(0, undefined) {}
+
+ bool isUndefined() const { return getLatticeValue() == undefined; }
+ bool isConstant() const {
+ return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
+ }
+ bool isOverdefined() const { return getLatticeValue() == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val.getPointer();
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+
+ Val.setInt(overdefined);
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ if (getLatticeValue() == constant) { // Constant but not forcedconstant.
+ assert(getConstant() == V && "Marking constant with different value");
+ return false;
+ }
+
+ if (isUndefined()) {
+ Val.setInt(constant);
+ assert(V && "Marking constant with NULL");
+ Val.setPointer(V);
+ } else {
+ assert(getLatticeValue() == forcedconstant &&
+ "Cannot move from overdefined to constant!");
+ // Stay at forcedconstant if the constant is the same.
+ if (V == getConstant()) return false;
+
+ // Otherwise, we go to overdefined. Assumptions made based on the
+ // forced value are possibly wrong. Assuming this is another constant
+ // could expose a contradiction.
+ Val.setInt(overdefined);
+ }
+ return true;
+ }
+
+ /// getConstantInt - If this is a constant with a ConstantInt value, return it
+ /// otherwise return null.
+ ConstantInt *getConstantInt() const {
+ if (isConstant())
+ return dyn_cast<ConstantInt>(getConstant());
+ return 0;
+ }
+
+ void markForcedConstant(Constant *V) {
+ assert(isUndefined() && "Can't force a defined value!");
+ Val.setInt(forcedconstant);
+ Val.setPointer(V);
+ }
+};
+} // end anonymous namespace.
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+//
+/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
+/// Constant Propagation.
+///
+class SCCPSolver : public InstVisitor<SCCPSolver> {
+ const TargetData *TD;
+ SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+ DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
+
+ /// StructValueState - This maintains ValueState for values that have
+ /// StructType, for example for formal arguments, calls, insertelement, etc.
+ ///
+ DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
+
+ /// GlobalValue - If we are tracking any values for the contents of a global
+ /// variable, we keep a mapping from the constant accessor to the element of
+ /// the global, to the currently known value. If the value becomes
+ /// overdefined, it's entry is simply removed from this map.
+ DenseMap<GlobalVariable*, LatticeVal> TrackedGlobals;
+
+ /// TrackedRetVals - If we are tracking arguments into and the return
+ /// value out of a function, it will have an entry in this map, indicating
+ /// what the known return value for the function is.
+ DenseMap<Function*, LatticeVal> TrackedRetVals;
+
+ /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+ /// that return multiple values.
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
+
+ /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+ /// represented here for efficient lookup.
+ SmallPtrSet<Function*, 16> MRVFunctionsTracked;
+
+ /// TrackingIncomingArguments - This is the set of functions for whose
+ /// arguments we make optimistic assumptions about and try to prove as
+ /// constants.
+ SmallPtrSet<Function*, 16> TrackingIncomingArguments;
+
+ /// The reason for two worklists is that overdefined is the lowest state
+ /// on the lattice, and moving things to overdefined as fast as possible
+ /// makes SCCP converge much faster.
+ ///
+ /// By having a separate worklist, we accomplish this because everything
+ /// possibly overdefined will become overdefined at the soonest possible
+ /// point.
+ SmallVector<Value*, 64> OverdefinedInstWorkList;
+ SmallVector<Value*, 64> InstWorkList;
+
+
+ SmallVector<BasicBlock*, 64> BBWorkList; // The BasicBlock work list
+
+ /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
+ /// overdefined, despite the fact that the PHI node is overdefined.
+ std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
+
+ /// KnownFeasibleEdges - Entries in this set are edges which have already had
+ /// PHI nodes retriggered.
+ typedef std::pair<BasicBlock*, BasicBlock*> Edge;
+ DenseSet<Edge> KnownFeasibleEdges;
+public:
+ SCCPSolver(const TargetData *td) : TD(td) {}
+
+ /// MarkBlockExecutable - This method can be used by clients to mark all of
+ /// the blocks that are known to be intrinsically live in the processed unit.
+ ///
+ /// This returns true if the block was not considered live before.
+ bool MarkBlockExecutable(BasicBlock *BB) {
+ if (!BBExecutable.insert(BB)) return false;
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ BBWorkList.push_back(BB); // Add the block to the work list!
+ return true;
+ }
+
+ /// TrackValueOfGlobalVariable - Clients can use this method to
+ /// inform the SCCPSolver that it should track loads and stores to the
+ /// specified global variable if it can. This is only legal to call if
+ /// performing Interprocedural SCCP.
+ void TrackValueOfGlobalVariable(GlobalVariable *GV) {
+ // We only track the contents of scalar globals.
+ if (GV->getType()->getElementType()->isSingleValueType()) {
+ LatticeVal &IV = TrackedGlobals[GV];
+ if (!isa<UndefValue>(GV->getInitializer()))
+ IV.markConstant(GV->getInitializer());
+ }
+ }
+
+ /// AddTrackedFunction - If the SCCP solver is supposed to track calls into
+ /// and out of the specified function (which cannot have its address taken),
+ /// this method must be called.
+ void AddTrackedFunction(Function *F) {
+ // Add an entry, F -> undef.
+ if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ MRVFunctionsTracked.insert(F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+ LatticeVal()));
+ } else
+ TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
+ }
+
+ void AddArgumentTrackedFunction(Function *F) {
+ TrackingIncomingArguments.insert(F);
+ }
+
+ /// Solve - Solve for constants and executable blocks.
+ ///
+ void Solve();
+
+ /// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+ /// that branches on undef values cannot reach any of their successors.
+ /// However, this is not a safe assumption. After we solve dataflow, this
+ /// method should be use to handle this. If this returns true, the solver
+ /// should be rerun.
+ bool ResolvedUndefsIn(Function &F);
+
+ bool isBlockExecutable(BasicBlock *BB) const {
+ return BBExecutable.count(BB);
+ }
+
+ LatticeVal getLatticeValueFor(Value *V) const {
+ DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+ assert(I != ValueState.end() && "V is not in valuemap!");
+ return I->second;
+ }
+
+ /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+ DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
+ StructValueState.find(std::make_pair(V, i));
+ assert(I != StructValueState.end() && "V is not in valuemap!");
+ return I->second;
+ }*/
+
+ /// getTrackedRetVals - Get the inferred return value map.
+ ///
+ const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+ return TrackedRetVals;
+ }
+
+ /// getTrackedGlobals - Get and return the set of inferred initializers for
+ /// global variables.
+ const DenseMap<GlobalVariable*, LatticeVal> &getTrackedGlobals() {
+ return TrackedGlobals;
+ }
+
+ void markOverdefined(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ markOverdefined(ValueState[V], V);
+ }
+
+ /// markAnythingOverdefined - Mark the specified value overdefined. This
+ /// works with both scalars and structs.
+ void markAnythingOverdefined(Value *V) {
+ if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ markOverdefined(getStructValueState(V, i), V);
+ else
+ markOverdefined(V);
+ }
+
+private:
+ // markConstant - Make a value be marked as "constant". If the value
+ // is not already a constant, add it to the instruction work list so that
+ // the users of the instruction are updated later.
+ //
+ void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+ if (!IV.markConstant(C)) return;
+ DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+ if (IV.isOverdefined())
+ OverdefinedInstWorkList.push_back(V);
+ else
+ InstWorkList.push_back(V);
+ }
+
+ void markConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ markConstant(ValueState[V], V, C);
+ }
+
+ void markForcedConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ LatticeVal &IV = ValueState[V];
+ IV.markForcedConstant(C);
+ DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+ if (IV.isOverdefined())
+ OverdefinedInstWorkList.push_back(V);
+ else
+ InstWorkList.push_back(V);
+ }
+
+
+ // markOverdefined - Make a value be marked as "overdefined". If the
+ // value is not already overdefined, add it to the overdefined instruction
+ // work list so that the users of the instruction are updated later.
+ void markOverdefined(LatticeVal &IV, Value *V) {
+ if (!IV.markOverdefined()) return;
+
+ DEBUG(dbgs() << "markOverdefined: ";
+ if (Function *F = dyn_cast<Function>(V))
+ dbgs() << "Function '" << F->getName() << "'\n";
+ else
+ dbgs() << *V << '\n');
+ // Only instructions go on the work list
+ OverdefinedInstWorkList.push_back(V);
+ }
+
+ void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
+ if (IV.isOverdefined() || MergeWithV.isUndefined())
+ return; // Noop.
+ if (MergeWithV.isOverdefined())
+ markOverdefined(IV, V);
+ else if (IV.isUndefined())
+ markConstant(IV, V, MergeWithV.getConstant());
+ else if (IV.getConstant() != MergeWithV.getConstant())
+ markOverdefined(IV, V);
+ }
+
+ void mergeInValue(Value *V, LatticeVal MergeWithV) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ mergeInValue(ValueState[V], V, MergeWithV);
+ }
+
+
+ /// getValueState - Return the LatticeVal object that corresponds to the
+ /// value. This function handles the case when the value hasn't been seen yet
+ /// by properly seeding constants etc.
+ LatticeVal &getValueState(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+ std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
+ ValueState.insert(std::make_pair(V, LatticeVal()));
+ LatticeVal &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Undef values remain undefined.
+ if (!isa<UndefValue>(V))
+ LV.markConstant(C); // Constants are constant
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+ /// getStructValueState - Return the LatticeVal object that corresponds to the
+ /// value/field pair. This function handles the case when the value hasn't
+ /// been seen yet by properly seeding constants etc.
+ LatticeVal &getStructValueState(Value *V, unsigned i) {
+ assert(V->getType()->isStructTy() && "Should use getValueState");
+ assert(i < cast<StructType>(V->getType())->getNumElements() &&
+ "Invalid element #");
+
+ std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator,
+ bool> I = StructValueState.insert(
+ std::make_pair(std::make_pair(V, i), LatticeVal()));
+ LatticeVal &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<UndefValue>(C))
+ ; // Undef values remain undefined.
+ else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+ LV.markConstant(CS->getOperand(i)); // Constants are constant.
+ else if (isa<ConstantAggregateZero>(C)) {
+ const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+ LV.markConstant(Constant::getNullValue(FieldTy));
+ } else
+ LV.markOverdefined(); // Unknown sort of constant.
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+
+ /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+ /// work list if it is not already executable.
+ void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ if (!MarkBlockExecutable(Dest)) {
+ // If the destination is already executable, we just made an *edge*
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << "\n");
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ visitPHINode(*PN);
+ }
+ }
+
+ // getFeasibleSuccessors - Return a vector of booleans to indicate which
+ // successors are reachable from a given terminator instruction.
+ //
+ void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+
+ // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+ // block to the 'To' basic block is currently feasible.
+ //
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+
+ // OperandChangedState - This method is invoked on all of the users of an
+ // instruction that was just changed state somehow. Based on this
+ // information, we need to update the specified user of this instruction.
+ //
+ void OperandChangedState(Instruction *I) {
+ if (BBExecutable.count(I->getParent())) // Inst is executable?
+ visit(*I);
+ }
+
+ /// RemoveFromOverdefinedPHIs - If I has any entries in the
+ /// UsersOfOverdefinedPHIs map for PN, remove them now.
+ void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
+ if (UsersOfOverdefinedPHIs.empty()) return;
+ std::multimap<PHINode*, Instruction*>::iterator It, E;
+ tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+ while (It != E) {
+ if (It->second == I)
+ UsersOfOverdefinedPHIs.erase(It++);
+ else
+ ++It;
+ }
+ }
+
+ /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+ /// map for I and PN, but if one is there already, do not create another.
+ /// (Duplicate entries do not break anything directly, but can lead to
+ /// exponential growth of the table in rare cases.)
+ void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+ std::multimap<PHINode*, Instruction*>::iterator J, E;
+ tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (; J != E; ++J)
+ if (J->second == I)
+ return;
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+ }
+
+private:
+ friend class InstVisitor<SCCPSolver>;
+
+ // visit implementations - Something changed in this instruction. Either an
+ // operand made a transition, or the instruction is newly executable. Change
+ // the value type of I to reflect these changes if appropriate.
+ void visitPHINode(PHINode &I);
+
+ // Terminators
+ void visitReturnInst(ReturnInst &I);
+ void visitTerminatorInst(TerminatorInst &TI);
+
+ void visitCastInst(CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitCmpInst(CmpInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ // Instructions that cannot be folded away.
+ void visitStoreInst (StoreInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitCallInst (CallInst &I) {
+ visitCallSite(&I);
+ }
+ void visitInvokeInst (InvokeInst &II) {
+ visitCallSite(&II);
+ visitTerminatorInst(II);
+ }
+ void visitCallSite (CallSite CS);
+ void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
+ void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
+ void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
+
+ void visitInstruction(Instruction &I) {
+ // If a new instruction is added to LLVM that we don't handle.
+ dbgs() << "SCCP: Don't know how to handle: " << I;
+ markAnythingOverdefined(&I); // Just in case
+ }
+};
+
+} // end anonymous namespace
+
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+//
+void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVector<bool, 16> &Succs) {
+ Succs.resize(TI.getNumSuccessors());
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue = getValueState(BI->getCondition());
+ ConstantInt *CI = BCValue.getConstantInt();
+ if (CI == 0) {
+ // Overdefined condition variables, and branches on unfoldable constant
+ // conditions, mean the branch could go either way.
+ if (!BCValue.isUndefined())
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way.
+ Succs[CI->isZero()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ LatticeVal SCValue = getValueState(SI->getCondition());
+ ConstantInt *CI = SCValue.getConstantInt();
+
+ if (CI == 0) { // Overdefined or undefined condition?
+ // All destinations are executable!
+ if (!SCValue.isUndefined())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ Succs[SI->findCaseValue(CI)] = true;
+ return;
+ }
+
+ // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+ if (isa<IndirectBrInst>(&TI)) {
+ // Just mark all destinations executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+#ifndef NDEBUG
+ dbgs() << "Unknown terminator instruction: " << TI << '\n';
+#endif
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+//
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+ assert(BBExecutable.count(To) && "Dest should always be alive!");
+
+ // Make sure the source basic block is executable!!
+ if (!BBExecutable.count(From)) return false;
+
+ // Check to make sure this edge itself is actually feasible now.
+ TerminatorInst *TI = From->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional())
+ return true;
+
+ LatticeVal BCValue = getValueState(BI->getCondition());
+
+ // Overdefined condition variables mean the branch could go either way,
+ // undef conditions mean that neither edge is feasible yet.
+ ConstantInt *CI = BCValue.getConstantInt();
+ if (CI == 0)
+ return !BCValue.isUndefined();
+
+ // Constant condition variables mean the branch can only go a single way.
+ return BI->getSuccessor(CI->isZero()) == To;
+ }
+
+ // Invoke instructions successors are always executable.
+ if (isa<InvokeInst>(TI))
+ return true;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ LatticeVal SCValue = getValueState(SI->getCondition());
+ ConstantInt *CI = SCValue.getConstantInt();
+
+ if (CI == 0)
+ return !SCValue.isUndefined();
+
+ // Make sure to skip the "default value" which isn't a value
+ for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
+ if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
+ return SI->getSuccessor(i) == To;
+
+ // If the constant value is not equal to any of the branches, we must
+ // execute default branch.
+ return SI->getDefaultDest() == To;
+ }
+
+ // Just mark all destinations executable!
+ // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+ if (isa<IndirectBrInst>(TI))
+ return true;
+
+#ifndef NDEBUG
+ dbgs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+ llvm_unreachable(0);
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable. Change
+// the value type of I to reflect these changes if appropriate. This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+// from different branches, or if the PHI node merges in an overdefined
+// value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+// PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+// destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+// successors executable.
+//
+void SCCPSolver::visitPHINode(PHINode &PN) {
+ // If this PN returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (PN.getType()->isStructTy())
+ return markAnythingOverdefined(&PN);
+
+ if (getValueState(&PN).isOverdefined()) {
+ // There may be instructions using this PHI node that are not overdefined
+ // themselves. If so, make sure that they know that the PHI node operand
+ // changed.
+ std::multimap<PHINode*, Instruction*>::iterator I, E;
+ tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
+ if (I == E)
+ return;
+
+ SmallVector<Instruction*, 16> Users;
+ for (; I != E; ++I)
+ Users.push_back(I->second);
+ while (!Users.empty())
+ visit(Users.pop_back_val());
+ return; // Quick exit
+ }
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64)
+ return markOverdefined(&PN);
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. If they are all
+ // constant, and they agree with each other, the PHI becomes the identical
+ // constant. If they are constant and don't agree, the PHI is overdefined.
+ // If there are no executable operands, the PHI remains undefined.
+ //
+ Constant *OperandVal = 0;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LatticeVal IV = getValueState(PN.getIncomingValue(i));
+ if (IV.isUndefined()) continue; // Doesn't influence PHI node.
+
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+
+ if (IV.isOverdefined()) // PHI node becomes overdefined!
+ return markOverdefined(&PN);
+
+ if (OperandVal == 0) { // Grab the first value.
+ OperandVal = IV.getConstant();
+ continue;
+ }
+
+ // There is already a reachable operand. If we conflict with it,
+ // then the PHI node becomes overdefined. If we agree with it, we
+ // can continue on.
+
+ // Check to see if there are two different constants merging, if so, the PHI
+ // node is overdefined.
+ if (IV.getConstant() != OperandVal)
+ return markOverdefined(&PN);
+ }
+
+ // If we exited the loop, this means that the PHI node only has constant
+ // arguments that agree with each other(and OperandVal is the constant) or
+ // OperandVal is null because there are no defined incoming arguments. If
+ // this is the case, the PHI remains undefined.
+ //
+ if (OperandVal)
+ markConstant(&PN, OperandVal); // Acquire operand value
+}
+
+
+
+
+void SCCPSolver::visitReturnInst(ReturnInst &I) {
+ if (I.getNumOperands() == 0) return; // ret void
+
+ Function *F = I.getParent()->getParent();
+ Value *ResultOp = I.getOperand(0);
+
+ // If we are tracking the return value of this function, merge it in.
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI =
+ TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+ return;
+ }
+ }
+
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty()) {
+ if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (MRVFunctionsTracked.count(F))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+ getStructValueState(ResultOp, i));
+
+ }
+}
+
+void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable.
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPSolver::visitCastInst(CastInst &I) {
+ LatticeVal OpSt = getValueState(I.getOperand(0));
+ if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
+ markOverdefined(&I);
+ else if (OpSt.isConstant()) // Propagate constant value
+ markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
+ OpSt.getConstant(), I.getType()));
+}
+
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+ // If this returns a struct, mark all elements over defined, we don't track
+ // structs in structs.
+ if (EVI.getType()->isStructTy())
+ return markAnythingOverdefined(&EVI);
+
+ // If this is extracting from more than one level of struct, we don't know.
+ if (EVI.getNumIndices() != 1)
+ return markOverdefined(&EVI);
+
+ Value *AggVal = EVI.getAggregateOperand();
+ if (AggVal->getType()->isStructTy()) {
+ unsigned i = *EVI.idx_begin();
+ LatticeVal EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return markOverdefined(&EVI);
+ }
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+ const StructType *STy = dyn_cast<StructType>(IVI.getType());
+ if (STy == 0)
+ return markOverdefined(&IVI);
+
+ // If this has more than one index, we can't handle it, drive all results to
+ // undef.
+ if (IVI.getNumIndices() != 1)
+ return markAnythingOverdefined(&IVI);
+
+ Value *Aggr = IVI.getAggregateOperand();
+ unsigned Idx = *IVI.idx_begin();
+
+ // Compute the result based on what we're inserting.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // This passes through all values that aren't the inserted element.
+ if (i != Idx) {
+ LatticeVal EltVal = getStructValueState(Aggr, i);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+ continue;
+ }
+
+ Value *Val = IVI.getInsertedValueOperand();
+ if (Val->getType()->isStructTy())
+ // We don't track structs in structs.
+ markOverdefined(getStructValueState(&IVI, i), &IVI);
+ else {
+ LatticeVal InVal = getValueState(Val);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+ }
+ }
+}
+
+void SCCPSolver::visitSelectInst(SelectInst &I) {
+ // If this select returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (I.getType()->isStructTy())
+ return markAnythingOverdefined(&I);
+
+ LatticeVal CondValue = getValueState(I.getCondition());
+ if (CondValue.isUndefined())
+ return;
+
+ if (ConstantInt *CondCB = CondValue.getConstantInt()) {
+ Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+ mergeInValue(&I, getValueState(OpVal));
+ return;
+ }
+
+ // Otherwise, the condition is overdefined or a constant we can't evaluate.
+ // See if we can produce something better than overdefined based on the T/F
+ // value.
+ LatticeVal TVal = getValueState(I.getTrueValue());
+ LatticeVal FVal = getValueState(I.getFalseValue());
+
+ // select ?, C, C -> C.
+ if (TVal.isConstant() && FVal.isConstant() &&
+ TVal.getConstant() == FVal.getConstant())
+ return markConstant(&I, FVal.getConstant());
+
+ if (TVal.isUndefined()) // select ?, undef, X -> X.
+ return mergeInValue(&I, FVal);
+ if (FVal.isUndefined()) // select ?, X, undef -> X.
+ return mergeInValue(&I, TVal);
+ markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPSolver::visitBinaryOperator(Instruction &I) {
+ LatticeVal V1State = getValueState(I.getOperand(0));
+ LatticeVal V2State = getValueState(I.getOperand(1));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V1State.isConstant() && V2State.isConstant())
+ return markConstant(IV, &I,
+ ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant()));
+
+ // If something is undef, wait for it to resolve.
+ if (!V1State.isOverdefined() && !V2State.isOverdefined())
+ return;
+
+ // Otherwise, one of our operands is overdefined. Try to produce something
+ // better than overdefined with some tricks.
+
+ // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+ // operand is overdefined.
+ if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+ LatticeVal *NonOverdefVal = 0;
+ if (!V1State.isOverdefined())
+ NonOverdefVal = &V1State;
+ else if (!V2State.isOverdefined())
+ NonOverdefVal = &V2State;
+
+ if (NonOverdefVal) {
+ if (NonOverdefVal->isUndefined()) {
+ // Could annihilate value.
+ if (I.getOpcode() == Instruction::And)
+ markConstant(IV, &I, Constant::getNullValue(I.getType()));
+ else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+ markConstant(IV, &I, Constant::getAllOnesValue(PT));
+ else
+ markConstant(IV, &I,
+ Constant::getAllOnesValue(I.getType()));
+ return;
+ }
+
+ if (I.getOpcode() == Instruction::And) {
+ // X and 0 = 0
+ if (NonOverdefVal->getConstant()->isNullValue())
+ return markConstant(IV, &I, NonOverdefVal->getConstant());
+ } else {
+ if (ConstantInt *CI = NonOverdefVal->getConstantInt())
+ if (CI->isAllOnesValue()) // X or -1 = -1
+ return markConstant(IV, &I, NonOverdefVal->getConstant());
+ }
+ }
+ }
+
+
+ // If both operands are PHI nodes, it is possible that this instruction has
+ // a constant value, despite the fact that the PHI node doesn't. Check for
+ // this condition now.
+ if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+ if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+ if (PN1->getParent() == PN2->getParent()) {
+ // Since the two PHI nodes are in the same basic block, they must have
+ // entries for the same predecessors. Walk the predecessor list, and
+ // if all of the incoming values are constants, and the result of
+ // evaluating this expression with all incoming value pairs is the
+ // same, then this expression is a constant even though the PHI node
+ // is not a constant!
+ LatticeVal Result;
+ for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+ LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+ BasicBlock *InBlock = PN1->getIncomingBlock(i);
+ LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+ if (In1.isOverdefined() || In2.isOverdefined()) {
+ Result.markOverdefined();
+ break; // Cannot fold this operation over the PHI nodes!
+ }
+
+ if (In1.isConstant() && In2.isConstant()) {
+ Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+ In2.getConstant());
+ if (Result.isUndefined())
+ Result.markConstant(V);
+ else if (Result.isConstant() && Result.getConstant() != V) {
+ Result.markOverdefined();
+ break;
+ }
+ }
+ }
+
+ // If we found a constant value here, then we know the instruction is
+ // constant despite the fact that the PHI nodes are overdefined.
+ if (Result.isConstant()) {
+ markConstant(IV, &I, Result.getConstant());
+ // Remember that this instruction is virtually using the PHI node
+ // operands.
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
+ return;
+ }
+
+ if (Result.isUndefined())
+ return;
+
+ // Okay, this really is overdefined now. Since we might have
+ // speculatively thought that this was not overdefined before, and
+ // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+ // make sure to clean out any entries that we put there, for
+ // efficiency.
+ RemoveFromOverdefinedPHIs(&I, PN1);
+ RemoveFromOverdefinedPHIs(&I, PN2);
+ }
+
+ markOverdefined(&I);
+}
+
+// Handle ICmpInst instruction.
+void SCCPSolver::visitCmpInst(CmpInst &I) {
+ LatticeVal V1State = getValueState(I.getOperand(0));
+ LatticeVal V2State = getValueState(I.getOperand(1));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V1State.isConstant() && V2State.isConstant())
+ return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
+ V1State.getConstant(),
+ V2State.getConstant()));
+
+ // If operands are still undefined, wait for it to resolve.
+ if (!V1State.isOverdefined() && !V2State.isOverdefined())
+ return;
+
+ // If something is overdefined, use some tricks to avoid ending up and over
+ // defined if we can.
+
+ // If both operands are PHI nodes, it is possible that this instruction has
+ // a constant value, despite the fact that the PHI node doesn't. Check for
+ // this condition now.
+ if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+ if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+ if (PN1->getParent() == PN2->getParent()) {
+ // Since the two PHI nodes are in the same basic block, they must have
+ // entries for the same predecessors. Walk the predecessor list, and
+ // if all of the incoming values are constants, and the result of
+ // evaluating this expression with all incoming value pairs is the
+ // same, then this expression is a constant even though the PHI node
+ // is not a constant!
+ LatticeVal Result;
+ for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+ LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+ BasicBlock *InBlock = PN1->getIncomingBlock(i);
+ LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+ if (In1.isOverdefined() || In2.isOverdefined()) {
+ Result.markOverdefined();
+ break; // Cannot fold this operation over the PHI nodes!
+ }
+
+ if (In1.isConstant() && In2.isConstant()) {
+ Constant *V = ConstantExpr::getCompare(I.getPredicate(),
+ In1.getConstant(),
+ In2.getConstant());
+ if (Result.isUndefined())
+ Result.markConstant(V);
+ else if (Result.isConstant() && Result.getConstant() != V) {
+ Result.markOverdefined();
+ break;
+ }
+ }
+ }
+
+ // If we found a constant value here, then we know the instruction is
+ // constant despite the fact that the PHI nodes are overdefined.
+ if (Result.isConstant()) {
+ markConstant(&I, Result.getConstant());
+ // Remember that this instruction is virtually using the PHI node
+ // operands.
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
+ return;
+ }
+
+ if (Result.isUndefined())
+ return;
+
+ // Okay, this really is overdefined now. Since we might have
+ // speculatively thought that this was not overdefined before, and
+ // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+ // make sure to clean out any entries that we put there, for
+ // efficiency.
+ RemoveFromOverdefinedPHIs(&I, PN1);
+ RemoveFromOverdefinedPHIs(&I, PN2);
+ }
+
+ markOverdefined(&I);
+}
+
+void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &IdxState = getValueState(I.getOperand(1));
+
+ if (ValState.isOverdefined() || IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &EltState = getValueState(I.getOperand(1));
+ LatticeVal &IdxState = getValueState(I.getOperand(2));
+
+ if (ValState.isOverdefined() || EltState.isOverdefined() ||
+ IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+ else if (ValState.isUndefined() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+#if 0
+ LatticeVal &V1State = getValueState(I.getOperand(0));
+ LatticeVal &V2State = getValueState(I.getOperand(1));
+ LatticeVal &MaskState = getValueState(I.getOperand(2));
+
+ if (MaskState.isUndefined() ||
+ (V1State.isUndefined() && V2State.isUndefined()))
+ return; // Undefined output if mask or both inputs undefined.
+
+ if (V1State.isOverdefined() || V2State.isOverdefined() ||
+ MaskState.isOverdefined()) {
+ markOverdefined(&I);
+ } else {
+ // A mix of constant/undef inputs.
+ Constant *V1 = V1State.isConstant() ?
+ V1State.getConstant() : UndefValue::get(I.getType());
+ Constant *V2 = V2State.isConstant() ?
+ V2State.getConstant() : UndefValue::get(I.getType());
+ Constant *Mask = MaskState.isConstant() ?
+ MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
+ markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
+ }
+#endif
+}
+
+// Handle getelementptr instructions. If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+//
+void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
+ if (ValueState[&I].isOverdefined()) return;
+
+ SmallVector<Constant*, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ LatticeVal State = getValueState(I.getOperand(i));
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+
+ if (State.isOverdefined())
+ return markOverdefined(&I);
+
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ Constant *Ptr = Operands[0];
+ markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
+ Operands.size()-1));
+}
+
+void SCCPSolver::visitStoreInst(StoreInst &SI) {
+ // If this store is of a struct, ignore it.
+ if (SI.getOperand(0)->getType()->isStructTy())
+ return;
+
+ if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+ return;
+
+ GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+ DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
+ if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
+
+ // Get the value we are storing into the global, then merge it.
+ mergeInValue(I->second, GV, getValueState(SI.getOperand(0)));
+ if (I->second.isOverdefined())
+ TrackedGlobals.erase(I); // No need to keep tracking this!
+}
+
+
+// Handle load instructions. If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPSolver::visitLoadInst(LoadInst &I) {
+ // If this load is of a struct, just mark the result overdefined.
+ if (I.getType()->isStructTy())
+ return markAnythingOverdefined(&I);
+
+ LatticeVal PtrVal = getValueState(I.getOperand(0));
+ if (PtrVal.isUndefined()) return; // The pointer is not resolved yet!
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (!PtrVal.isConstant() || I.isVolatile())
+ return markOverdefined(IV, &I);
+
+ Constant *Ptr = PtrVal.getConstant();
+
+ // load null -> null
+ if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
+ return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+
+ // Transform load (constant global) into the value loaded.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ if (!TrackedGlobals.empty()) {
+ // If we are tracking this global, merge in the known value for it.
+ DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+ TrackedGlobals.find(GV);
+ if (It != TrackedGlobals.end()) {
+ mergeInValue(IV, &I, It->second);
+ return;
+ }
+ }
+ }
+
+ // Transform load from a constant into a constant if possible.
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+ return markConstant(IV, &I, C);
+
+ // Otherwise we cannot say for certain what value this load will produce.
+ // Bail out.
+ markOverdefined(IV, &I);
+}
+
+void SCCPSolver::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ Instruction *I = CS.getInstruction();
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (F == 0 || F->isDeclaration()) {
+CallOverdefined:
+ // Void return and not tracking callee, just bail.
+ if (I->getType()->isVoidTy()) return;
+
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
+ canConstantFoldCallTo(F)) {
+
+ SmallVector<Constant*, 8> Operands;
+ for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+ AI != E; ++AI) {
+ LatticeVal State = getValueState(*AI);
+
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+ if (State.isOverdefined())
+ return markOverdefined(I);
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+ return markConstant(I, C);
+ }
+
+ // Otherwise, we don't know anything about this call, mark it overdefined.
+ return markAnythingOverdefined(I);
+ }
+
+ // If this is a local function that doesn't have its address taken, mark its
+ // entry block executable and merge in the actual arguments to the call into
+ // the formal arguments of the function.
+ if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
+ MarkBlockExecutable(F->begin());
+
+ // Propagate information from this call site into the callee.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++CAI) {
+ // If this argument is byval, and if the function is not readonly, there
+ // will be an implicit copy formed of the input aggregate.
+ if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+ markOverdefined(AI);
+ continue;
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal CallArg = getStructValueState(*CAI, i);
+ mergeInValue(getStructValueState(AI, i), AI, CallArg);
+ }
+ } else {
+ mergeInValue(AI, getValueState(*CAI));
+ }
+ }
+ }
+
+ // If this is a single/zero retval case, see if we're tracking the function.
+ if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (!MRVFunctionsTracked.count(F))
+ goto CallOverdefined; // Not tracking this callee.
+
+ // If we are tracking this callee, propagate the result of the function
+ // into this call site.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(getStructValueState(I, i), I,
+ TrackedMultipleRetVals[std::make_pair(F, i)]);
+ } else {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ if (TFRVI == TrackedRetVals.end())
+ goto CallOverdefined; // Not tracking this callee.
+
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(I, TFRVI->second);
+ }
+}
+
+void SCCPSolver::Solve() {
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty() ||
+ !OverdefinedInstWorkList.empty()) {
+ // Process the overdefined instruction's work list first, which drives other
+ // things to overdefined more quickly.
+ while (!OverdefinedInstWorkList.empty()) {
+ Value *I = OverdefinedInstWorkList.pop_back_val();
+
+ DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined
+ // Update all of the users of this instruction's value.
+ //
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (Instruction *I = dyn_cast<Instruction>(*UI))
+ OperandChangedState(I);
+ }
+
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Value *I = InstWorkList.pop_back_val();
+
+ DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+ // "I" got into the work list because it made the transition from undef to
+ // constant.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined.
+ // Update all of the users of this instruction's value.
+ //
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (Instruction *I = dyn_cast<Instruction>(*UI))
+ OperandChangedState(I);
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ visit(BB);
+ }
+ }
+}
+
+/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption. After we solve dataflow, this
+/// method should be use to handle this. If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible). This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs, whose results are actually
+/// defined. For example, 'zext i8 undef to i32' should produce all zeros
+/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
+/// even if X isn't defined.
+bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Look for instructions which produce undef values.
+ if (I->getType()->isVoidTy()) continue;
+
+ if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
+ // Only a few things that can be structs matter for undef. Just send
+ // all their results to overdefined. We could be more precise than this
+ // but it isn't worth bothering.
+ if (isa<CallInst>(I) || isa<SelectInst>(I)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal &LV = getStructValueState(I, i);
+ if (LV.isUndefined())
+ markOverdefined(LV, I);
+ }
+ }
+ continue;
+ }
+
+ LatticeVal &LV = getValueState(I);
+ if (!LV.isUndefined()) continue;
+
+ // No instructions using structs need disambiguation.
+ if (I->getOperand(0)->getType()->isStructTy())
+ continue;
+
+ // Get the lattice values of the first two operands for use below.
+ LatticeVal Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op1LV;
+ if (I->getNumOperands() == 2) {
+ // No instructions using structs need disambiguation.
+ if (I->getOperand(1)->getType()->isStructTy())
+ continue;
+
+ // If this is a two-operand instruction, and if both operands are
+ // undefs, the result stays undef.
+ Op1LV = getValueState(I->getOperand(1));
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ continue;
+ }
+
+ // If this is an instructions whose result is defined even if the input is
+ // not fully defined, propagate the information.
+ const Type *ITy = I->getType();
+ switch (I->getOpcode()) {
+ default: break; // Leave the instruction as an undef.
+ case Instruction::ZExt:
+ // After a zero extend, we know the top part is zero. SExt doesn't have
+ // to be handled here, because we don't know whether the top part is 1's
+ // or 0's.
+ case Instruction::SIToFP: // some FP values are not possible, just use 0.
+ case Instruction::UIToFP: // some FP values are not possible, just use 0.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Mul:
+ case Instruction::And:
+ // undef * X -> 0. X could be zero.
+ // undef & X -> 0. X could be zero.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::Or:
+ // undef | X -> -1. X could be -1.
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ return true;
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // X / undef -> undef. No change.
+ // X % undef -> undef. No change.
+ if (Op1LV.isUndefined()) break;
+
+ // undef / X -> 0. X could be maxint.
+ // undef % X -> 0. X could be 1.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::AShr:
+ // undef >>s X -> undef. No change.
+ if (Op0LV.isUndefined()) break;
+
+ // X >>s undef -> X. X could be 0, X could have the high-bit known set.
+ if (Op0LV.isConstant())
+ markForcedConstant(I, Op0LV.getConstant());
+ else
+ markOverdefined(I);
+ return true;
+ case Instruction::LShr:
+ case Instruction::Shl:
+ // undef >> X -> undef. No change.
+ // undef << X -> undef. No change.
+ if (Op0LV.isUndefined()) break;
+
+ // X >> undef -> 0. X could be 0.
+ // X << undef -> 0. X could be 0.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Select:
+ // undef ? X : Y -> X or Y. There could be commonality between X/Y.
+ if (Op0LV.isUndefined()) {
+ if (!Op1LV.isConstant()) // Pick the constant one if there is any.
+ Op1LV = getValueState(I->getOperand(2));
+ } else if (Op1LV.isUndefined()) {
+ // c ? undef : undef -> undef. No change.
+ Op1LV = getValueState(I->getOperand(2));
+ if (Op1LV.isUndefined())
+ break;
+ // Otherwise, c ? undef : x -> x.
+ } else {
+ // Leave Op1LV as Operand(1)'s LatticeValue.
+ }
+
+ if (Op1LV.isConstant())
+ markForcedConstant(I, Op1LV.getConstant());
+ else
+ markOverdefined(I);
+ return true;
+ case Instruction::Call:
+ // If a call has an undef result, it is because it is constant foldable
+ // but one of the inputs was undef. Just force the result to
+ // overdefined.
+ markOverdefined(I);
+ return true;
+ }
+ }
+
+ // Check to see if we have a branch or switch on an undefined value. If so
+ // we force the branch to go one way or the other to make the successor
+ // values live. It doesn't really matter which way we force it.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional()) continue;
+ if (!getValueState(BI->getCondition()).isUndefined())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // false.
+ if (isa<UndefValue>(BI->getCondition())) {
+ BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Handle this by forcing the input value to the
+ // branch to false.
+ markForcedConstant(BI->getCondition(),
+ ConstantInt::getFalse(TI->getContext()));
+ return true;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors() < 2) // no cases
+ continue;
+ if (!getValueState(SI->getCondition()).isUndefined())
+ continue;
+
+ // If the input to SCCP is actually switch on undef, fix the undef to
+ // the first constant.
+ if (isa<UndefValue>(SI->getCondition())) {
+ SI->setCondition(SI->getCaseValue(1));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// SCCP Class - This class uses the SCCPSolver to implement a per-function
+ /// Sparse Conditional Constant Propagator.
+ ///
+ struct SCCP : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ SCCP() : FunctionPass(ID) {
+ initializeSCCPPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - Run the Sparse Conditional Constant Propagation
+ // algorithm, and return true if the function was modified.
+ //
+ bool runOnFunction(Function &F);
+ };
+} // end anonymous namespace
+
+char SCCP::ID = 0;
+INITIALIZE_PASS(SCCP, "sccp",
+ "Sparse Conditional Constant Propagation", false, false)
+
+// createSCCPPass - This is the public interface to this file.
+FunctionPass *llvm::createSCCPPass() {
+ return new SCCP();
+}
+
+static void DeleteInstructionInBlock(BasicBlock *BB) {
+ DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
+ ++NumDeadBlocks;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ while (!isa<TerminatorInst>(BB->begin())) {
+ Instruction *I = --BasicBlock::iterator(BB->getTerminator());
+
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ BB->getInstList().erase(I);
+ ++NumInstRemoved;
+ }
+}
+
+// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// and return true if the function was modified.
+//
+bool SCCP::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
+ SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+ // Mark the first block of the function as being executable.
+ Solver.MarkBlockExecutable(F.begin());
+
+ // Mark all arguments to the function as being overdefined.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
+ Solver.markAnythingOverdefined(AI);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+ DEBUG(dbgs() << "RESOLVING UNDEFs\n");
+ ResolvedUndefs = Solver.ResolvedUndefsIn(F);
+ }
+
+ bool MadeChanges = false;
+
+ // If we decided that there are basic blocks that are dead in this function,
+ // delete their contents now. Note that we cannot actually delete the blocks,
+ // as we cannot modify the CFG of the function.
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!Solver.isBlockExecutable(BB)) {
+ DeleteInstructionInBlock(BB);
+ MadeChanges = true;
+ continue;
+ }
+
+ // Iterate over all of the instructions in a function, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+ continue;
+
+ // TODO: Reconstruct structs from their elements.
+ if (Inst->getType()->isStructTy())
+ continue;
+
+ LatticeVal IV = Solver.getLatticeValueFor(Inst);
+ if (IV.isOverdefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+
+ // Replaces all of the uses of a variable with uses of the constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++NumInstRemoved;
+ }
+ }
+
+ return MadeChanges;
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// IPSCCP Class - This class implements interprocedural Sparse Conditional
+ /// Constant Propagation.
+ ///
+ struct IPSCCP : public ModulePass {
+ static char ID;
+ IPSCCP() : ModulePass(ID) {
+ initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M);
+ };
+} // end anonymous namespace
+
+char IPSCCP::ID = 0;
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+
+// createIPSCCPPass - This is the public interface to this file.
+ModulePass *llvm::createIPSCCPPass() {
+ return new IPSCCP();
+}
+
+
+static bool AddressIsTaken(const GlobalValue *GV) {
+ // Delete any dead constantexpr klingons.
+ GV->removeDeadConstantUsers();
+
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == GV || SI->isVolatile())
+ return true; // Storing addr of GV.
+ } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+ // Make sure we are calling the function, not passing the address.
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
+ return true;
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return true;
+ } else if (isa<BlockAddress>(U)) {
+ // blockaddress doesn't take the address of the function, it takes addr
+ // of label.
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool IPSCCP::runOnModule(Module &M) {
+ SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
+ // Loop over all functions, marking arguments to those with their addresses
+ // taken or that are external as overdefined.
+ //
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration())
+ continue;
+
+ // If this is a strong or ODR definition of this function, then we can
+ // propagate information about its result into callsites of it.
+ if (!F->mayBeOverridden())
+ Solver.AddTrackedFunction(F);
+
+ // If this function only has direct calls that we can see, we can track its
+ // arguments and return value aggressively, and can assume it is not called
+ // unless we see evidence to the contrary.
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
+ }
+
+ // Assume the function is called.
+ Solver.MarkBlockExecutable(F->begin());
+
+ // Assume nothing about the incoming arguments.
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI)
+ Solver.markAnythingOverdefined(AI);
+ }
+
+ // Loop over global variables. We inform the solver about any internal global
+ // variables that do not have their 'addresses taken'. If they don't have
+ // their addresses taken, we can propagate constants through them.
+ for (Module::global_iterator G = M.global_begin(), E = M.global_end();
+ G != E; ++G)
+ if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
+ Solver.TrackValueOfGlobalVariable(G);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+
+ DEBUG(dbgs() << "RESOLVING UNDEFS\n");
+ ResolvedUndefs = false;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+ }
+
+ bool MadeChanges = false;
+
+ // Iterate over all of the instructions in the module, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ SmallVector<BasicBlock*, 512> BlocksToErase;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (Solver.isBlockExecutable(F->begin())) {
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ if (AI->use_empty() || AI->getType()->isStructTy()) continue;
+
+ // TODO: Could use getStructLatticeValueFor to find out if the entire
+ // result is a constant and replace it entirely if so.
+
+ LatticeVal IV = Solver.getLatticeValueFor(AI);
+ if (IV.isOverdefined()) continue;
+
+ Constant *CST = IV.isConstant() ?
+ IV.getConstant() : UndefValue::get(AI->getType());
+ DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n");
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ AI->replaceAllUsesWith(CST);
+ ++IPNumArgsElimed;
+ }
+ }
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (!Solver.isBlockExecutable(BB)) {
+ DeleteInstructionInBlock(BB);
+ MadeChanges = true;
+
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ if (!Succ->empty() && isa<PHINode>(Succ->begin()))
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
+
+ if (&*BB != &F->front())
+ BlocksToErase.push_back(BB);
+ else
+ new UnreachableInst(M.getContext(), BB);
+ continue;
+ }
+
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
+ continue;
+
+ // TODO: Could use getStructLatticeValueFor to find out if the entire
+ // result is a constant and replace it entirely if so.
+
+ LatticeVal IV = Solver.getLatticeValueFor(Inst);
+ if (IV.isOverdefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++IPNumInstRemoved;
+ }
+ }
+
+ // Now that all instructions in the function are constant folded, erase dead
+ // blocks, because we can now use ConstantFoldTerminator to get rid of
+ // in-edges.
+ for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+ // If there are any PHI nodes in this successor, drop entries for BB now.
+ BasicBlock *DeadBB = BlocksToErase[i];
+ for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
+ UI != UE; ) {
+ // Grab the user and then increment the iterator early, as the user
+ // will be deleted. Step past all adjacent uses from the same user.
+ Instruction *I = dyn_cast<Instruction>(*UI);
+ do { ++UI; } while (UI != UE && *UI == I);
+
+ // Ignore blockaddress users; BasicBlock's dtor will handle them.
+ if (!I) continue;
+
+ bool Folded = ConstantFoldTerminator(I->getParent());
+ if (!Folded) {
+ // The constant folder may not have been able to fold the terminator
+ // if this is a branch or switch on undef. Fold it manually as a
+ // branch to the first successor.
+#ifndef NDEBUG
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+ "Branch should be foldable!");
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+ } else {
+ llvm_unreachable("Didn't fold away reference to block!");
+ }
+#endif
+
+ // Make this an uncond branch to the first successor.
+ TerminatorInst *TI = I->getParent()->getTerminator();
+ BranchInst::Create(TI->getSuccessor(0), TI);
+
+ // Remove entries in successor phi nodes to remove edges.
+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(TI->getParent());
+
+ // Remove the old terminator.
+ TI->eraseFromParent();
+ }
+ }
+
+ // Finally, delete the basic block.
+ F->getBasicBlockList().erase(DeadBB);
+ }
+ BlocksToErase.clear();
+ }
+
+ // If we inferred constant or undef return values for a function, we replaced
+ // all call uses with the inferred value. This means we don't need to bother
+ // actually returning anything from the function. Replace all return
+ // instructions with return undef.
+ //
+ // Do this in two stages: first identify the functions we should process, then
+ // actually zap their returns. This is important because we can only do this
+ // if the address of the function isn't taken. In cases where a return is the
+ // last use of a function, the order of processing functions would affect
+ // whether other functions are optimizable.
+ SmallVector<ReturnInst*, 8> ReturnsToZap;
+
+ // TODO: Process multiple value ret instructions also.
+ const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+ for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
+ E = RV.end(); I != E; ++I) {
+ Function *F = I->first;
+ if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+ continue;
+
+ // We can only do this if we know that nothing else can call the function.
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
+ continue;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (!isa<UndefValue>(RI->getOperand(0)))
+ ReturnsToZap.push_back(RI);
+ }
+
+ // Zap all returns which we've identified as zap to change.
+ for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+ Function *F = ReturnsToZap[i]->getParent()->getParent();
+ ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
+ }
+
+ // If we inferred constant or undef values for globals variables, we can delete
+ // the global and any stores that remain to it.
+ const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
+ for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
+ E = TG.end(); I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ assert(!I->second.isOverdefined() &&
+ "Overdefined values should have been taken out of the map!");
+ DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
+ while (!GV->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(GV->use_back());
+ SI->eraseFromParent();
+ }
+ M.getGlobalList().erase(GV);
+ ++IPNumGlobalConst;
+ }
+
+ return MadeChanges;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
new file mode 100644
index 000000000000..302c287d3cbd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,189 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+ initializeADCEPass(Registry);
+ initializeBlockPlacementPass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeConstantPropagationPass(Registry);
+ initializeCorrelatedValuePropagationPass(Registry);
+ initializeDCEPass(Registry);
+ initializeDeadInstEliminationPass(Registry);
+ initializeDSEPass(Registry);
+ initializeGVNPass(Registry);
+ initializeEarlyCSEPass(Registry);
+ initializeIndVarSimplifyPass(Registry);
+ initializeJumpThreadingPass(Registry);
+ initializeLICMPass(Registry);
+ initializeLoopDeletionPass(Registry);
+ initializeLoopInstSimplifyPass(Registry);
+ initializeLoopRotatePass(Registry);
+ initializeLoopStrengthReducePass(Registry);
+ initializeLoopUnrollPass(Registry);
+ initializeLoopUnswitchPass(Registry);
+ initializeLoopIdiomRecognizePass(Registry);
+ initializeLowerAtomicPass(Registry);
+ initializeLowerExpectIntrinsicPass(Registry);
+ initializeMemCpyOptPass(Registry);
+ initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCExpandPass(Registry);
+ initializeObjCARCContractPass(Registry);
+ initializeObjCARCOptPass(Registry);
+ initializeReassociatePass(Registry);
+ initializeRegToMemPass(Registry);
+ initializeSCCPPass(Registry);
+ initializeIPSCCPPass(Registry);
+ initializeSROA_DTPass(Registry);
+ initializeSROA_SSAUpPass(Registry);
+ initializeCFGSimplifyPassPass(Registry);
+ initializeSimplifyLibCallsPass(Registry);
+ initializeSinkingPass(Registry);
+ initializeTailDupPass(Registry);
+ initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+ initializeScalarOpts(*unwrap(R));
+}
+
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAggressiveDCEPass());
+}
+
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCFGSimplificationPass());
+}
+
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadStoreEliminationPass());
+}
+
+void LLVMAddGVNPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGVNPass());
+}
+
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIndVarSimplifyPass());
+}
+
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createInstructionCombiningPass());
+}
+
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createJumpThreadingPass());
+}
+
+void LLVMAddLICMPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLICMPass());
+}
+
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopDeletionPass());
+}
+
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopIdiomPass());
+}
+
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRotatePass());
+}
+
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnrollPass());
+}
+
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnswitchPass());
+}
+
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMemCpyOptPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createReassociatePass());
+}
+
+void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSCCPPass());
+}
+
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass());
+}
+
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+ int Threshold) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+}
+
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSimplifyLibCallsPass());
+}
+
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTailCallEliminationPass());
+}
+
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantPropagationPass());
+}
+
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDemoteRegisterToMemoryPass());
+}
+
+void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createVerifierPass());
+}
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
new file mode 100644
index 000000000000..7d6349cf4e77
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -0,0 +1,2674 @@
+//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation implements the well known scalar replacement of
+// aggregates transformation. This xform breaks up alloca instructions of
+// aggregate type (structure or array) into individual alloca instructions for
+// each member (if possible). Then, if possible, it transforms the individual
+// alloca instructions into nice clean scalar SSA form.
+//
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// often interact, especially for C++ programs. As such, iterating between
+// SRoA, then Mem2Reg until we run out of things to promote works well.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarrepl"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumReplaced, "Number of allocas broken up");
+STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
+STATISTIC(NumConverted, "Number of aggregates converted to scalar");
+STATISTIC(NumGlobals, "Number of allocas copied from constant global");
+
+namespace {
+ struct SROA : public FunctionPass {
+ SROA(int T, bool hasDT, char &ID)
+ : FunctionPass(ID), HasDomTree(hasDT) {
+ if (T == -1)
+ SRThreshold = 128;
+ else
+ SRThreshold = T;
+ }
+
+ bool runOnFunction(Function &F);
+
+ bool performScalarRepl(Function &F);
+ bool performPromotion(Function &F);
+
+ private:
+ bool HasDomTree;
+ TargetData *TD;
+
+ /// DeadInsts - Keep track of instructions we have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<Value*, 32> DeadInsts;
+
+ /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
+ /// information about the uses. All these fields are initialized to false
+ /// and set to true when something is learned.
+ struct AllocaInfo {
+ /// The alloca to promote.
+ AllocaInst *AI;
+
+ /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+ /// looping and avoid redundant work.
+ SmallPtrSet<PHINode*, 8> CheckedPHIs;
+
+ /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
+ bool isUnsafe : 1;
+
+ /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
+ bool isMemCpySrc : 1;
+
+ /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
+ bool isMemCpyDst : 1;
+
+ /// hasSubelementAccess - This is true if a subelement of the alloca is
+ /// ever accessed, or false if the alloca is only accessed with mem
+ /// intrinsics or load/store that only access the entire alloca at once.
+ bool hasSubelementAccess : 1;
+
+ /// hasALoadOrStore - This is true if there are any loads or stores to it.
+ /// The alloca may just be accessed with memcpy, for example, which would
+ /// not set this.
+ bool hasALoadOrStore : 1;
+
+ explicit AllocaInfo(AllocaInst *ai)
+ : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+ hasSubelementAccess(false), hasALoadOrStore(false) {}
+ };
+
+ unsigned SRThreshold;
+
+ void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+ I.isUnsafe = true;
+ DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
+ }
+
+ bool isSafeAllocaToScalarRepl(AllocaInst *AI);
+
+ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+ void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+ void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Instruction *TheAccess, bool AllowWholeAccess);
+ bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy);
+
+ void DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList);
+ void DeleteDeadInstructions();
+
+ void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+
+ static MemTransferInst *isOnlyCopiedFromConstantGlobal(
+ AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
+ };
+
+ // SROA_DT - SROA that uses DominatorTree.
+ struct SROA_DT : public SROA {
+ static char ID;
+ public:
+ SROA_DT(int T = -1) : SROA(T, true, ID) {
+ initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+ };
+
+ // SROA_SSAUp - SROA that uses SSAUpdater.
+ struct SROA_SSAUp : public SROA {
+ static char ID;
+ public:
+ SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+ initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+
+}
+
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+
+// Public interface to the ScalarReplAggregates pass
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+ bool UseDomTree) {
+ if (UseDomTree)
+ return new SROA_DT(Threshold);
+ return new SROA_SSAUp(Threshold);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+ /// AllocaSize - The size of the alloca being considered in bytes.
+ unsigned AllocaSize;
+ const TargetData &TD;
+
+ /// IsNotTrivial - This is set to true if there is some access to the object
+ /// which means that mem2reg can't promote it.
+ bool IsNotTrivial;
+
+ /// ScalarKind - Tracks the kind of alloca being considered for promotion,
+ /// computed based on the uses of the alloca rather than the LLVM type system.
+ enum {
+ Unknown,
+
+ // Accesses via GEPs that are consistent with element access of a vector
+ // type. This will not be converted into a vector unless there is a later
+ // access using an actual vector type.
+ ImplicitVector,
+
+ // Accesses via vector operations and GEPs that are consistent with the
+ // layout of a vector type.
+ Vector,
+
+ // An integer bag-of-bits with bitwise operations for insertion and
+ // extraction. Any combination of types can be converted into this kind
+ // of scalar.
+ Integer
+ } ScalarKind;
+
+ /// VectorTy - This tracks the type that we should promote the vector to if
+ /// it is possible to turn it into a vector. This starts out null, and if it
+ /// isn't possible to turn into a vector type, it gets set to VoidTy.
+ const VectorType *VectorTy;
+
+ /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// alloca that is not a MemTransferInst. We don't want to turn structs into
+ /// large integers unless there is some potential for optimization.
+ bool HadNonMemTransferAccess;
+
+public:
+ explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
+ : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
+ VectorTy(0), HadNonMemTransferAccess(false) { }
+
+ AllocaInst *TryConvert(AllocaInst *AI);
+
+private:
+ bool CanConvertToScalar(Value *V, uint64_t Offset);
+ void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
+ bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
+ void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+
+ Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+ uint64_t Offset, IRBuilder<> &Builder);
+ Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+ uint64_t Offset, IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+ // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+ // out.
+ if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+ return 0;
+
+ // If an alloca has only memset / memcpy uses, it may still have an Unknown
+ // ScalarKind. Treat it as an Integer below.
+ if (ScalarKind == Unknown)
+ ScalarKind = Integer;
+
+ // FIXME: It should be possible to promote the vector type up to the alloca's
+ // size.
+ if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+ ScalarKind = Integer;
+
+ // If we were able to find a vector type that can handle this with
+ // insert/extract elements, and if there was at least one use that had
+ // a vector type, promote this to a vector. We don't want to promote
+ // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+ // we just get a lot of insert/extracts. If at least one vector is
+ // involved, then we probably really do have a union of vector/array.
+ const Type *NewTy;
+ if (ScalarKind == Vector) {
+ assert(VectorTy && "Missing type for vector scalar.");
+ DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
+ << *VectorTy << '\n');
+ NewTy = VectorTy; // Use the vector type.
+ } else {
+ unsigned BitWidth = AllocaSize * 8;
+ if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
+ !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
+ return 0;
+
+ DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+ // Create and insert the integer alloca.
+ NewTy = IntegerType::get(AI->getContext(), BitWidth);
+ }
+ AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0);
+ return NewAI;
+}
+
+/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
+/// (VectorTy) so far at the offset specified by Offset (which is specified in
+/// bytes).
+///
+/// There are three cases we handle here:
+/// 1) A union of vector types of the same size and potentially its elements.
+/// Here we turn element accesses into insert/extract element operations.
+/// This promotes a <4 x float> with a store of float to the third element
+/// into a <4 x float> that uses insert element.
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
+/// large) integer type with extract and insert operations where the loads
+/// and stores would mutate the memory. We mark this by setting VectorTy
+/// to VoidTy.
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+ uint64_t Offset) {
+ // If we already decided to turn this into a blob of integer memory, there is
+ // nothing to be done.
+ if (ScalarKind == Integer)
+ return;
+
+ // If this could be contributing to a vector, analyze it.
+
+ // If the In type is a vector that is the same size as the alloca, see if it
+ // matches the existing VecTy.
+ if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ if (MergeInVectorType(VInTy, Offset))
+ return;
+ } else if (In->isFloatTy() || In->isDoubleTy() ||
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+ isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // Full width accesses can be ignored, because they can always be turned
+ // into bitcasts.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (EltSize == AllocaSize)
+ return;
+
+ // If we're accessing something that could be an element of a vector, see
+ // if the implied vector agrees with what we already have and if Offset is
+ // compatible with it.
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+ (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
+ if (!VectorTy) {
+ ScalarKind = ImplicitVector;
+ VectorTy = VectorType::get(In, AllocaSize/EltSize);
+ return;
+ }
+
+ unsigned CurrentEltSize = VectorTy->getElementType()
+ ->getPrimitiveSizeInBits()/8;
+ if (EltSize == CurrentEltSize)
+ return;
+
+ if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+ return;
+ }
+ }
+
+ // Otherwise, we have a case that we can't handle with an optimized vector
+ // form. We can still turn this into a large integer.
+ ScalarKind = Integer;
+}
+
+/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
+/// returning true if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+ uint64_t Offset) {
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ ScalarKind = Vector;
+ VectorTy = VInTy;
+ return true;
+ }
+
+ unsigned BitWidth = VectorTy->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
+ ScalarKind = Vector;
+ return true;
+ }
+
+ const Type *ElementTy = VectorTy->getElementType();
+ const Type *InElementTy = VInTy->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ ScalarKind = Vector;
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
+}
+
+/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type. If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // Don't break volatile loads.
+ if (LI->isVolatile())
+ return false;
+ // Don't touch MMX operations.
+ if (LI->getType()->isX86_MMXTy())
+ return false;
+ HadNonMemTransferAccess = true;
+ MergeInTypeForLoadOrStore(LI->getType(), Offset);
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Storing the pointer, not into the value?
+ if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ // Don't touch MMX operations.
+ if (SI->getOperand(0)->getType()->isX86_MMXTy())
+ return false;
+ HadNonMemTransferAccess = true;
+ MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!CanConvertToScalar(BCI, Offset))
+ return false;
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // If this is a GEP with a variable indices, we can't handle it.
+ if (!GEP->hasAllConstantIndices())
+ return false;
+
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ // See if all uses can be converted.
+ if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+ return false;
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // handle it.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ // Store of constant value.
+ if (!isa<ConstantInt>(MSI->getValue()))
+ return false;
+
+ // Store of constant size.
+ ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+ if (!Len)
+ return false;
+
+ // If the size differs from the alloca, we can only convert the alloca to
+ // an integer bag-of-bits.
+ // FIXME: This should handle all of the cases that are currently accepted
+ // as vector element insertions.
+ if (Len->getZExtValue() != AllocaSize || Offset != 0)
+ ScalarKind = Integer;
+
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+ if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ return false;
+
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ continue;
+ }
+
+ // Otherwise, we cannot handle this!
+ return false;
+ }
+
+ return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly. This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right. By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+ uint64_t Offset) {
+ while (!Ptr->use_empty()) {
+ Instruction *User = cast<Instruction>(Ptr->use_back());
+
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+ ConvertUsesToScalar(CI, NewAI, Offset);
+ CI->eraseFromParent();
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+ GEP->eraseFromParent();
+ continue;
+ }
+
+ IRBuilder<> Builder(User);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // The load is a bit extract from NewAI shifted right by Offset bits.
+ Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *NewLoadVal
+ = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+ LI->replaceAllUsesWith(NewLoadVal);
+ LI->eraseFromParent();
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ assert(SI->getOperand(0) != Ptr && "Consistency error!");
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+ Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+ Builder);
+ Builder.CreateStore(New, NewAI);
+ SI->eraseFromParent();
+
+ // If the load we just inserted is now dead, then the inserted store
+ // overwrote the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // transform it into a store of the expanded constant value.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ assert(MSI->getRawDest() == Ptr && "Consistency error!");
+ unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if (NumBytes != 0) {
+ unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+
+ // Compute the value replicated the right number of times.
+ APInt APVal(NumBytes*8, Val);
+
+ // Splat the value if non-zero.
+ if (Val)
+ for (unsigned i = 1; i != NumBytes; ++i)
+ APVal |= APVal << 8;
+
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+ Value *New = ConvertScalar_InsertValue(
+ ConstantInt::get(User->getContext(), APVal),
+ Old, Offset, Builder);
+ Builder.CreateStore(New, NewAI);
+
+ // If the load we just inserted is now dead, then the memset overwrote
+ // the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
+ }
+ MSI->eraseFromParent();
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ assert(Offset == 0 && "must be store to start of alloca");
+
+ // If the source and destination are both to the same alloca, then this is
+ // a noop copy-to-self, just delete it. Otherwise, emit a load and store
+ // as appropriate.
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+ if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
+ // Dest must be OrigAI, change this to be a load from the original
+ // pointer (bitcasted), then a store to our new alloca.
+ assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+ Value *SrcPtr = MTI->getSource();
+ const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ SPTy->getAddressSpace());
+ }
+ SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
+ LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+ SrcVal->setAlignment(MTI->getAlignment());
+ Builder.CreateStore(SrcVal, NewAI);
+ } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
+ // Src must be OrigAI, change this to be a load from NewAI then a store
+ // through the original dest pointer (bitcasted).
+ assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+ LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+ const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ DPTy->getAddressSpace());
+ }
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
+ StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+ NewStore->setAlignment(MTI->getAlignment());
+ } else {
+ // Noop transfer. Src == Dst
+ }
+
+ MTI->eraseFromParent();
+ continue;
+ }
+
+ llvm_unreachable("Unsupported operation!");
+ }
+}
+
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+ unsigned NewBitWidth) {
+ bool IsFP1 = Ty1->isFloatingPointTy() ||
+ (Ty1->isVectorTy() &&
+ cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+ bool IsFP2 = Ty2->isFloatingPointTy() ||
+ (Ty2->isVectorTy() &&
+ cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+ LLVMContext &Context = Ty1->getContext();
+
+ // Prefer floating-point types over integer types, as integer types may have
+ // been created by earlier scalar replacement.
+ if (IsFP1 || IsFP2) {
+ if (NewBitWidth == 32)
+ return Type::getFloatTy(Context);
+ if (NewBitWidth == 64)
+ return Type::getDoubleTy(Context);
+ }
+
+ return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+ IRBuilder<> &Builder) {
+ const Type *FromType = FromVal->getType();
+ const VectorType *FromVTy = cast<VectorType>(FromType);
+ const VectorType *ToVTy = cast<VectorType>(ToType);
+ assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+ "Vectors must have the same element type");
+ Value *UnV = UndefValue::get(FromType);
+ unsigned numEltsFrom = FromVTy->getNumElements();
+ unsigned numEltsTo = ToVTy->getNumElements();
+
+ SmallVector<Constant*, 3> Args;
+ const Type* Int32Ty = Builder.getInt32Ty();
+ unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+ unsigned i;
+ for (i=0; i != minNumElts; ++i)
+ Args.push_back(ConstantInt::get(Int32Ty, i));
+
+ if (i < numEltsTo) {
+ Constant* UnC = UndefValue::get(Int32Ty);
+ for (; i != numEltsTo; ++i)
+ Args.push_back(UnC);
+ }
+ Constant *Mask = ConstantVector::get(Args);
+ return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset. This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+ uint64_t Offset, IRBuilder<> &Builder) {
+ // If the load is of the whole new alloca, no conversion is needed.
+ const Type *FromType = FromVal->getType();
+ if (FromType == ToType && Offset == 0)
+ return FromVal;
+
+ // If the result alloca is a vector type, this is either an element
+ // access or a bitcast to another vector type of the same size.
+ if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
+ unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ if (FromTypeSize == ToTypeSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (FromType->getPrimitiveSizeInBits() ==
+ ToType->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ else
+ return CreateShuffleVectorCast(FromVal, ToType, Builder);
+ }
+
+ if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
+ assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+ "of a smaller vector type at a nonzero offset.");
+
+ const Type *CastElementTy = getScaledElementType(FromType, ToType,
+ ToTypeSize * 8);
+ unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
+
+ // Otherwise it must be an element access.
+ unsigned Elt = 0;
+ if (Offset) {
+ unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ }
+ // Return the element extracted out of it.
+ Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+ Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+ if (V->getType() != ToType)
+ V = Builder.CreateBitCast(V, ToType, "tmp");
+ return V;
+ }
+
+ // If ToType is a first class aggregate, extract out each of the pieces and
+ // use insertvalue's to form the FCA.
+ if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+ const StructLayout &Layout = *TD.getStructLayout(ST);
+ Value *Res = UndefValue::get(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+ Offset+Layout.getElementOffsetInBits(i),
+ Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ }
+ return Res;
+ }
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ Value *Res = UndefValue::get(AT);
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+ Offset+i*EltSize, Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ }
+ return Res;
+ }
+
+ // Otherwise, this must be a union that was converted to an integer value.
+ const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+ // If this is a big-endian system and the load is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD.isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+ TD.getTypeStoreSizeInBits(ToType) - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shl) which are not defined.
+ // We do this to support (f.e.) loads off the end of a structure where
+ // only some bits are used.
+ if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateLShr(FromVal,
+ ConstantInt::get(FromVal->getType(),
+ ShAmt), "tmp");
+ else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateShl(FromVal,
+ ConstantInt::get(FromVal->getType(),
+ -ShAmt), "tmp");
+
+ // Finally, unconditionally truncate the integer to the right width.
+ unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+ if (LIBitWidth < NTy->getBitWidth())
+ FromVal =
+ Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+ LIBitWidth), "tmp");
+ else if (LIBitWidth > NTy->getBitWidth())
+ FromVal =
+ Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+ LIBitWidth), "tmp");
+
+ // If the result is an integer, this is a trunc or bitcast.
+ if (ToType->isIntegerTy()) {
+ // Should be done.
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+ // Just do a bitcast, we know the sizes match up.
+ FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ } else {
+ // Otherwise must be a pointer.
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ }
+ assert(FromVal->getType() == ToType && "Didn't convert right?");
+ return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+ uint64_t Offset, IRBuilder<> &Builder) {
+ // Convert the stored type to the actual type, shift it left to insert
+ // then 'or' into place.
+ const Type *AllocaType = Old->getType();
+ LLVMContext &Context = Old->getContext();
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+ uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+
+ // Changing the whole vector with memset or with an access of a different
+ // vector type?
+ if (ValSize == VecSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (VTy->getPrimitiveSizeInBits() ==
+ SV->getType()->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ else
+ return CreateShuffleVectorCast(SV, VTy, Builder);
+ }
+
+ if (isPowerOf2_64(VecSize / ValSize)) {
+ assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+ "value of a smaller vector type at a nonzero offset.");
+
+ const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+ ValSize);
+ unsigned NumCastVectorElements = VecSize / ValSize;
+
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
+
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
+ // Must be an element insertion.
+ assert(SV->getType() == VTy->getElementType());
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ unsigned Elt = Offset/EltSize;
+ return Builder.CreateInsertElement(Old, SV,
+ ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
+ "tmp");
+ }
+
+ // If SV is a first-class aggregate value, insert each value recursively.
+ if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ const StructLayout &Layout = *TD.getStructLayout(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Old = ConvertScalar_InsertValue(Elt, Old,
+ Offset+Layout.getElementOffsetInBits(i),
+ Builder);
+ }
+ return Old;
+ }
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+ }
+ return Old;
+ }
+
+ // If SV is a float, convert it to the appropriate integer type.
+ // If it is a pointer, do the same.
+ unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+ unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+ unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+ unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+ SV = Builder.CreateBitCast(SV,
+ IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+ else if (SV->getType()->isPointerTy())
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+
+ // Zero extend or truncate the value if needed.
+ if (SV->getType() != AllocaType) {
+ if (SV->getType()->getPrimitiveSizeInBits() <
+ AllocaType->getPrimitiveSizeInBits())
+ SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ else {
+ // Truncation may be needed if storing more than the alloca can hold
+ // (undefined behavior).
+ SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SrcWidth = DestWidth;
+ SrcStoreWidth = DestStoreWidth;
+ }
+ }
+
+ // If this is a big-endian system and the store is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD.isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shr) which are not defined.
+ // We do this to support (f.e.) stores off the end of a structure where
+ // only some bits in the structure are set.
+ APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+ if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
+ ShAmt), "tmp");
+ Mask <<= ShAmt;
+ } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
+ -ShAmt), "tmp");
+ Mask = Mask.lshr(-ShAmt);
+ }
+
+ // Mask out the bits we are about to insert from the old value, and or
+ // in the new bits.
+ if (SrcWidth != DestWidth) {
+ assert(DestWidth > SrcWidth);
+ Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+ SV = Builder.CreateOr(Old, SV, "ins");
+ }
+ return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
+bool SROA::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ bool Changed = performPromotion(F);
+
+ // FIXME: ScalarRepl currently depends on TargetData more than it
+ // theoretically needs to. It should be refactored in order to support
+ // target-independent IR. Until this is done, just skip the actual
+ // scalar-replacement portion of this pass.
+ if (!TD) return Changed;
+
+ while (1) {
+ bool LocalChange = performScalarRepl(F);
+ if (!LocalChange) break; // No need to repromote if no scalarrepl
+ Changed = true;
+ LocalChange = performPromotion(F);
+ if (!LocalChange) break; // No need to re-scalarrepl if no promotion
+ }
+
+ return Changed;
+}
+
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst *AI;
+ DIBuilder *DIB;
+ SmallVector<DbgDeclareInst *, 4> DDIs;
+ SmallVector<DbgValueInst *, 4> DVIs;
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ DIBuilder *DB)
+ : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+
+ void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ this->AI = AI;
+ if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ DDIs.push_back(DDI);
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ DVIs.push_back(DVI);
+
+ LoadAndStorePromoter::run(Insts);
+ AI->eraseFromParent();
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ DDI->eraseFromParent();
+ }
+ for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ DVI->eraseFromParent();
+ }
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == AI;
+ return cast<StoreInst>(I)->getPointerOperand() == AI;
+ }
+
+ virtual void updateDebugInfo(Instruction *Inst) const {
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+ }
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Instruction *DbgVal = NULL;
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ Argument *ExtendedArg = NULL;
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (ExtendedArg)
+ DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0,
+ DIVariable(DVI->getVariable()),
+ SI);
+ else
+ DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
+ DIVariable(DVI->getVariable()),
+ SI);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Instruction *DbgVal =
+ DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0,
+ DIVariable(DVI->getVariable()), LI);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ }
+ }
+ }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // Both operands to the select need to be dereferencable, either absolutely
+ // (e.g. allocas) or at this point because we can see other accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+ // For now, we can only do this promotion if the load is in the same block as
+ // the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN->getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is a
+ // common case that happens when instcombine merges two loads through a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ }
+
+ // Okay, we know that we have one or more loads in the same block as the PHI.
+ // We can transform this if it is safe to push the loads into the predecessor
+ // blocks. The only thing to watch out for is that we can't put a possibly
+ // trapping load in the predecessor if it is a critical edge.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+
+ // If the predecessor has a single successor, then the edge isn't critical.
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the InVal is an invoke in the pred, we can't put a load on the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == Pred)
+ return false;
+
+ // If this pointer is always safe to load, or if we can prove that there is
+ // already a load in the block, then we can move the load to the pred block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it. If the alloca is close but
+/// not quite there, this will transform the code to allow promotion. As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+ SetVector<Instruction*, SmallVector<Instruction*, 4>,
+ SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI || SI->isVolatile())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ // If the condition being selected on is a constant, fold the select, yes
+ // this does (rarely) happen early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ Value *Result = SI->getOperand(1+CI->isZero());
+ SI->replaceAllUsesWith(Result);
+ SI->eraseFromParent();
+
+ // This is very rare and we just scrambled the use list of AI, start
+ // over completely.
+ return tryToMakeAllocaBePromotable(AI, TD);
+ }
+
+ // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+ // loads, then we can transform this by rewriting the select.
+ if (!isSafeSelectToSpeculate(SI, TD))
+ return false;
+
+ InstsToRewrite.insert(SI);
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ if (PN->use_empty()) { // Dead PHIs can be stripped.
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+ // in the pred blocks, then we can transform this by rewriting the PHI.
+ if (!isSafePHIToSpeculate(PN, TD))
+ return false;
+
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ return false;
+ }
+
+ // If there are no instructions to rewrite, then all uses are load/stores and
+ // we're done!
+ if (InstsToRewrite.empty())
+ return true;
+
+ // If we have instructions that need to be rewritten for this to be promotable
+ // take care of it now.
+ for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+ // Selects in InstsToRewrite only have load uses. Rewrite each as two
+ // loads with a new select.
+ while (!SI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(SI->use_back());
+
+ IRBuilder<> Builder(LI);
+ LoadInst *TrueLoad =
+ Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+ LoadInst *FalseLoad =
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
+
+ // Transfer alignment and TBAA info if present.
+ TrueLoad->setAlignment(LI->getAlignment());
+ FalseLoad->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+ V->takeName(LI);
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ }
+
+ // Now that all the loads are gone, the select is gone too.
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // Otherwise, we have a PHI node which allows us to push the loads into the
+ // predecessors.
+ PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+ if (PN->use_empty()) {
+ PN->eraseFromParent();
+ continue;
+ }
+
+ const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+ PN->getName()+".ld", PN);
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks. Keep track of which blocks we
+ // insert them into in case we have multiple edges from the same block.
+ DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ LoadInst *&Load = InsertedLoads[Pred];
+ if (Load == 0) {
+ Load = new LoadInst(PN->getIncomingValue(i),
+ PN->getName() + "." + Pred->getName(),
+ Pred->getTerminator());
+ Load->setAlignment(Align);
+ if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ PN->eraseFromParent();
+ }
+
+ ++NumAdjusted;
+ return true;
+}
+
+bool SROA::performPromotion(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree *DT = 0;
+ if (HasDomTree)
+ DT = &getAnalysis<DominatorTree>();
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ DIBuilder DIB(*F.getParent());
+ bool Changed = false;
+ SmallVector<Instruction*, 64> Insts;
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (tryToMakeAllocaBePromotable(AI, TD))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ if (HasDomTree)
+ PromoteMemToReg(Allocas, *DT);
+ else {
+ SSAUpdater SSA;
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ AllocaInst *AI = Allocas[i];
+
+ // Build list of instructions to promote.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ Insts.push_back(cast<Instruction>(*UI));
+ AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
+ Insts.clear();
+ }
+ }
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA. It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+ const Type *T = AI->getAllocatedType();
+ // Do not promote any struct into more than 32 separate vars.
+ if (const StructType *ST = dyn_cast<StructType>(T))
+ return ST->getNumElements() <= 32;
+ // Arrays are much less likely to be safe for SROA; only consider
+ // them if they are very small.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getNumElements() <= 8;
+ return false;
+}
+
+
+// performScalarRepl - This algorithm is a simple worklist driven algorithm,
+// which runs on all of the alloca instructions in the function, removing them
+// if they are only used by getelementptr instructions.
+//
+bool SROA::performScalarRepl(Function &F) {
+ std::vector<AllocaInst*> WorkList;
+
+ // Scan the entry basic block, adding allocas to the worklist.
+ BasicBlock &BB = F.getEntryBlock();
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ if (AllocaInst *A = dyn_cast<AllocaInst>(I))
+ WorkList.push_back(A);
+
+ // Process the worklist
+ bool Changed = false;
+ while (!WorkList.empty()) {
+ AllocaInst *AI = WorkList.back();
+ WorkList.pop_back();
+
+ // Handle dead allocas trivially. These can be formed by SROA'ing arrays
+ // with unused elements.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // If this alloca is impossible for us to promote, reject it early.
+ if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
+ continue;
+
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ ToDelete[i]->eraseFromParent();
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+ Copy->eraseFromParent(); // Don't mutate the global.
+ AI->eraseFromParent();
+ ++NumGlobals;
+ Changed = true;
+ continue;
+ }
+
+ // Check to see if we can perform the core SROA transformation. We cannot
+ // transform the allocation instruction if it is an array allocation
+ // (allocations OF arrays are ok though), and an allocation of a scalar
+ // value cannot be decomposed at all.
+ uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+
+ // Do not promote [0 x %struct].
+ if (AllocaSize == 0) continue;
+
+ // Do not promote any struct whose size is too big.
+ if (AllocaSize > SRThreshold) continue;
+
+ // If the alloca looks like a good candidate for scalar replacement, and if
+ // all its users can be transformed, then split up the aggregate into its
+ // separate elements.
+ if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+ DoScalarReplacement(AI, WorkList);
+ Changed = true;
+ continue;
+ }
+
+ // If we can turn this aggregate value (potentially with casts) into a
+ // simple scalar value that can be mem2reg'd into a register value.
+ // IsNotTrivial tracks whether this is something that mem2reg could have
+ // promoted itself. If so, we don't want to transform it needlessly. Note
+ // that we can't just check based on the type: the alloca may be of an i32
+ // but that has pointer arithmetic to set byte 3 of it or something.
+ if (AllocaInst *NewAI =
+ ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
+ NewAI->takeName(AI);
+ AI->eraseFromParent();
+ ++NumConverted;
+ Changed = true;
+ continue;
+ }
+
+ // Otherwise, couldn't process this alloca.
+ }
+
+ return Changed;
+}
+
+/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
+/// predicate, do SROA now.
+void SROA::DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList) {
+ DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
+ SmallVector<AllocaInst*, 32> ElementAllocas;
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ ElementAllocas.reserve(ST->getNumContainedTypes());
+ for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AI->getAlignment(),
+ AI->getName() + "." + Twine(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ } else {
+ const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ElementAllocas.reserve(AT->getNumElements());
+ const Type *ElTy = AT->getElementType();
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AI->getName() + "." + Twine(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ }
+
+ // Now that we have created the new alloca instructions, rewrite all the
+ // uses of the old alloca.
+ RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
+
+ // Now erase any instructions that were made dead while rewriting the alloca.
+ DeleteDeadInstructions();
+ AI->eraseFromParent();
+
+ ++NumReplaced;
+}
+
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+ while (!DeadInsts.empty()) {
+ Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ // (But, don't add allocas to the dead instruction list -- they are
+ // already on the worklist and will be deleted separately.)
+ *OI = 0;
+ if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+ DeadInsts.push_back(U);
+ }
+
+ I->eraseFromParent();
+ }
+}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI. The results are flagged in
+/// the Info parameter. Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafeForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ uint64_t GEPOffset = Offset;
+ isSafeGEP(GEPI, GEPOffset, Info);
+ if (!Info.isUnsafe)
+ isSafeForScalarRepl(GEPI, GEPOffset, Info);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ if (Length == 0)
+ return MarkUnsafe(Info, User);
+ isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info, MI,
+ true /*AllowWholeAccess*/);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca. The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ // If we've already checked this PHI, don't do it again.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (!Info.CheckedPHIs.insert(PN))
+ return;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Only allow "bitcast" GEPs for simplicity. We could generalize this,
+ // but would have to prove that we're staying inside of an element being
+ // promoted.
+ if (!GEPI->hasAllZeroIndices())
+ return MarkUnsafe(Info, User);
+ isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement. It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type. The results are flagged in the Info parameter. Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
+ uint64_t &Offset, AllocaInfo &Info) {
+ gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+ if (GEPIt == E)
+ return;
+
+ // Walk through the GEP type indices, checking the types that this indexes
+ // into.
+ for (; GEPIt != E; ++GEPIt) {
+ // Ignore struct elements, no extra checking needed for these.
+ if ((*GEPIt)->isStructTy())
+ continue;
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+ if (!IdxVal)
+ return MarkUnsafe(Info, GEPI);
+ }
+
+ // Compute the offset due to this GEP and check if the alloca has a
+ // component element at that offset.
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays). If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+ const Type *&EltTy) {
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ NumElts = AT->getNumElements();
+ EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ return true;
+ }
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ NumElts = ST->getNumContainedTypes();
+ EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ for (unsigned n = 1; n < NumElts; ++n) {
+ if (ST->getContainedType(n) != EltTy)
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+ if (T1 == T2)
+ return true;
+
+ unsigned NumElts1, NumElts2;
+ const Type *EltTy1, *EltTy2;
+ if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+ isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+ NumElts1 == NumElts2 &&
+ EltTy1 == EltTy2)
+ return true;
+
+ return false;
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it. The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit. If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore,
+ AllocaInfo &Info, Instruction *TheAccess,
+ bool AllowWholeAccess) {
+ // Check if this is a load/store of the entire alloca.
+ if (Offset == 0 && AllowWholeAccess &&
+ MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+ // loads/stores (which are essentially the same as the MemIntrinsics with
+ // regard to copying padding between elements). But, if an alloca is
+ // flagged as both a source and destination of such operations, we'll need
+ // to check later for padding between elements.
+ if (!MemOpType || MemOpType->isIntegerTy()) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ return;
+ }
+ // This is also safe for references using a type that is compatible with
+ // the type of the alloca, so that loads/stores can be rewritten using
+ // insertvalue/extractvalue.
+ if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+ Info.hasSubelementAccess = true;
+ return;
+ }
+ }
+ // Check if the offset/size correspond to a component within the alloca type.
+ const Type *T = Info.AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize)) {
+ Info.hasSubelementAccess = true;
+ return;
+ }
+
+ return MarkUnsafe(Info, TheAccess);
+}
+
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size. If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
+ const Type *EltTy;
+ uint64_t EltSize;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+ EltTy = ST->getContainedType(EltIdx);
+ EltSize = TD->getTypeAllocSize(EltTy);
+ Offset -= Layout->getElementOffset(EltIdx);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ EltTy = AT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= AT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
+ } else {
+ return false;
+ }
+ if (Offset == 0 && (Size == 0 || EltSize == Size))
+ return true;
+ // Check if the component spans multiple elements.
+ if (Offset + Size > EltSize)
+ return false;
+ return TypeHasComponent(EltTy, Offset, Size);
+}
+
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCast(BC, AI, Offset, NewElts);
+ continue;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ RewriteGEP(GEPI, AI, Offset, NewElts);
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ uint64_t MemSize = Length->getZExtValue();
+ if (Offset == 0 &&
+ MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+ // Otherwise the intrinsic can only touch a single element and the
+ // address operand will be updated, so nothing else needs to be done.
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ const Type *LIType = LI->getType();
+
+ if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ Value *Insert = UndefValue::get(LIType);
+ IRBuilder<> Builder(LI);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Load = Builder.CreateLoad(NewElts[i], "load");
+ Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
+ }
+ LI->replaceAllUsesWith(Insert);
+ DeadInsts.push_back(LI);
+ } else if (LIType->isIntegerTy() &&
+ TD->getTypeAllocSize(LIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a load of the entire alloca to an integer, rewrite it.
+ RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ const Type *SIType = Val->getType();
+ if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ IRBuilder<> Builder(SI);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
+ Builder.CreateStore(Extract, NewElts[i]);
+ }
+ DeadInsts.push_back(SI);
+ } else if (SIType->isIntegerTy() &&
+ TD->getTypeAllocSize(SIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a store of the entire alloca from an integer, rewrite it.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+ }
+ continue;
+ }
+
+ if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
+ // the new pointer.
+ if (!isa<AllocaInst>(I)) continue;
+
+ assert(Offset == 0 && NewElts[0] &&
+ "Direct alloca use should have a zero offset");
+
+ // If we have a use of the alloca, we know the derived uses will be
+ // utilizing just the first element of the scalarized result. Insert a
+ // bitcast of the first alloca before the user as required.
+ AllocaInst *NewAI = NewElts[0];
+ BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+ NewAI->moveBefore(BCI);
+ TheUse = BCI;
+ continue;
+ }
+ }
+}
+
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ RewriteForScalarRepl(BC, AI, Offset, NewElts);
+ if (BC->getOperand(0) != AI)
+ return;
+
+ // The bitcast references the original alloca. Replace its uses with
+ // references to the first new element alloca.
+ Instruction *Val = NewElts[0];
+ if (Val->getType() != BC->getDestTy()) {
+ Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+ Val->takeName(BC);
+ }
+ BC->replaceAllUsesWith(Val);
+ DeadInsts.push_back(BC);
+}
+
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element. IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy) {
+ uint64_t Idx = 0;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ Idx = Layout->getElementContainingOffset(Offset);
+ T = ST->getContainedType(Idx);
+ Offset -= Layout->getElementOffset(Idx);
+ IdxTy = Type::getInt32Ty(T->getContext());
+ return Idx;
+ }
+ const ArrayType *AT = cast<ArrayType>(T);
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ uint64_t OldOffset = Offset;
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+
+ RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+ const Type *T = AI->getAllocatedType();
+ const Type *IdxTy;
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ if (GEPI->getOperand(0) == AI)
+ OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+ T = AI->getAllocatedType();
+ uint64_t EltOffset = Offset;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+ // If this GEP does not move the pointer across elements of the alloca
+ // being split, then it does not needs to be rewritten.
+ if (Idx == OldIdx)
+ return;
+
+ const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ SmallVector<Value*, 8> NewArgs;
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ while (EltOffset != 0) {
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+ }
+ Instruction *Val = NewElts[Idx];
+ if (NewArgs.size() > 1) {
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
+ NewArgs.end(), "", GEPI);
+ Val->takeName(GEPI);
+ }
+ if (Val->getType() != GEPI->getType())
+ Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
+ GEPI->replaceAllUsesWith(Val);
+ DeadInsts.push_back(GEPI);
+}
+
+/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
+/// Rewrite it to copy or set the elements of the scalarized memory.
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // If this is a memcpy/memmove, construct the other pointer as the
+ // appropriate type. The "Other" pointer is the pointer that goes to memory
+ // that doesn't have anything to do with the alloca that we are promoting. For
+ // memset, this Value* stays null.
+ Value *OtherPtr = 0;
+ unsigned MemAlignment = MI->getAlignment();
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
+ if (Inst == MTI->getRawDest())
+ OtherPtr = MTI->getRawSource();
+ else {
+ assert(Inst == MTI->getRawSource());
+ OtherPtr = MTI->getRawDest();
+ }
+ }
+
+ // If there is an other pointer, we want to convert it to the same pointer
+ // type as AI has, so we can GEP through it safely.
+ if (OtherPtr) {
+ unsigned AddrSpace =
+ cast<PointerType>(OtherPtr->getType())->getAddressSpace();
+
+ // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
+ // optimization, but it's also required to detect the corner case where
+ // both pointer operands are referencing the same memory, and where
+ // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
+ // function is only called for mem intrinsics that access the whole
+ // aggregate, so non-zero GEPs are not an issue here.)
+ OtherPtr = OtherPtr->stripPointerCasts();
+
+ // Copying the alloca to itself is a no-op: just delete it.
+ if (OtherPtr == AI || OtherPtr == NewElts[0]) {
+ // This code will run twice for a no-op memcpy -- once for each operand.
+ // Put only one reference to MI on the DeadInsts list.
+ for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+ E = DeadInsts.end(); I != E; ++I)
+ if (*I == MI) return;
+ DeadInsts.push_back(MI);
+ return;
+ }
+
+ // If the pointer is not the right type, insert a bitcast to the right
+ // type.
+ const Type *NewTy =
+ PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+ if (OtherPtr->getType() != NewTy)
+ OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
+ }
+
+ // Process each element of the aggregate.
+ bool SROADest = MI->getRawDest() == Inst;
+
+ Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // If this is a memcpy/memmove, emit a GEP of the other element address.
+ Value *OtherElt = 0;
+ unsigned OtherEltAlign = MemAlignment;
+
+ if (OtherPtr) {
+ Value *Idx[2] = { Zero,
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherPtr->getName()+"."+Twine(i),
+ MI);
+ uint64_t EltOffset;
+ const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ const Type *OtherTy = OtherPtrTy->getElementType();
+ if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+ EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+ } else {
+ const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+ EltOffset = TD->getTypeAllocSize(EltTy)*i;
+ }
+
+ // The alignment of the other pointer is the guaranteed alignment of the
+ // element, which is affected by both the known alignment of the whole
+ // mem intrinsic and the alignment of the element. If the alignment of
+ // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
+ // known alignment is just 4 bytes.
+ OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+ }
+
+ Value *EltPtr = NewElts[i];
+ const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+
+ // If we got down to a scalar, insert a load or store as appropriate.
+ if (EltTy->isSingleValueType()) {
+ if (isa<MemTransferInst>(MI)) {
+ if (SROADest) {
+ // From Other to Alloca.
+ Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+ new StoreInst(Elt, EltPtr, MI);
+ } else {
+ // From Alloca to Other.
+ Value *Elt = new LoadInst(EltPtr, "tmp", MI);
+ new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+ }
+ continue;
+ }
+ assert(isa<MemSetInst>(MI));
+
+ // If the stored element is zero (common case), just store a null
+ // constant.
+ Constant *StoreVal;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
+ if (CI->isZero()) {
+ StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
+ } else {
+ // If EltTy is a vector type, get the element type.
+ const Type *ValTy = EltTy->getScalarType();
+
+ // Construct an integer with the right value.
+ unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+ APInt OneVal(EltSize, CI->getZExtValue());
+ APInt TotalVal(OneVal);
+ // Set each byte.
+ for (unsigned i = 0; 8*i < EltSize; ++i) {
+ TotalVal = TotalVal.shl(8);
+ TotalVal |= OneVal;
+ }
+
+ // Convert the integer value to the appropriate type.
+ StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
+ if (ValTy->isPointerTy())
+ StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+ else if (ValTy->isFloatingPointTy())
+ StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+ assert(StoreVal->getType() == ValTy && "Type mismatch!");
+
+ // If the requested value was a vector constant, create it.
+ if (EltTy != ValTy) {
+ unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
+ StoreVal = ConstantVector::get(Elts);
+ }
+ }
+ new StoreInst(StoreVal, EltPtr, MI);
+ continue;
+ }
+ // Otherwise, if we're storing a byte variable, use a memset call for
+ // this element.
+ }
+
+ unsigned EltSize = TD->getTypeAllocSize(EltTy);
+
+ IRBuilder<> Builder(MI);
+
+ // Finally, insert the meminst for this element.
+ if (isa<MemSetInst>(MI)) {
+ Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+ MI->isVolatile());
+ } else {
+ assert(isa<MemTransferInst>(MI));
+ Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
+ Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
+
+ if (isa<MemCpyInst>(MI))
+ Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+ else
+ Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
+ }
+ }
+ DeadInsts.push_back(MI);
+}
+
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
+/// overwrites the entire allocation. Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts){
+ // Extract each element out of the integer according to its structure offset
+ // and store the element value to the individual alloca.
+ Value *SrcVal = SI->getOperand(0);
+ const Type *AllocaEltTy = AI->getAllocatedType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ IRBuilder<> Builder(SI);
+
+ // Handle tail padding by extending the operand
+ if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ SrcVal = Builder.CreateZExt(SrcVal,
+ IntegerType::get(SI->getContext(), AllocaSizeBits));
+
+ DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+ << '\n');
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Get the number of bits to shift SrcVal to get the value.
+ const Type *FieldTy = EltSTy->getElementType(i);
+ uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+ }
+
+ // Truncate down to an integer of the right size.
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ if (FieldSizeBits != AllocaSizeBits)
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(), FieldSizeBits));
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == FieldTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = Builder.CreateBitCast(EltVal, FieldTy);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
+ }
+ new StoreInst(EltVal, DestField, SI);
+ }
+
+ } else {
+ const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ const Type *ArrayEltTy = ATy->getElementType();
+ uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+ uint64_t Shift;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-ElementOffset;
+ else
+ Shift = 0;
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (ElementSizeBits == 0) continue;
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+ }
+
+ // Truncate down to an integer of the right size.
+ if (ElementSizeBits != AllocaSizeBits)
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(),
+ ElementSizeBits));
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == ArrayEltTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (ArrayEltTy->isFloatingPointTy() ||
+ ArrayEltTy->isVectorTy()) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
+ }
+ new StoreInst(EltVal, DestField, SI);
+
+ if (TD->isBigEndian())
+ Shift -= ElementOffset;
+ else
+ Shift += ElementOffset;
+ }
+ }
+
+ DeadInsts.push_back(SI);
+}
+
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
+/// an integer. Load the individual pieces to form the aggregate value.
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // Extract each element out of the NewElts according to its structure offset
+ // and form the result value.
+ const Type *AllocaEltTy = AI->getAllocatedType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+ << '\n');
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ const StructLayout *Layout = 0;
+ uint64_t ArrayEltBitOffset = 0;
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ Layout = TD->getStructLayout(EltSTy);
+ } else {
+ const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ }
+
+ Value *ResultVal =
+ Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Load the value from the alloca. If the NewElt is an aggregate, cast
+ // the pointer to an integer of the same size before doing the load.
+ Value *SrcField = NewElts[i];
+ const Type *FieldTy =
+ cast<PointerType>(SrcField->getType())->getElementType();
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+ FieldSizeBits);
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
+ SrcField = new BitCastInst(SrcField,
+ PointerType::getUnqual(FieldIntTy),
+ "", LI);
+ SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
+
+ // If SrcField is a fp or vector of the right size but that isn't an
+ // integer type, bitcast to an integer so we can shift it.
+ if (SrcField->getType() != FieldIntTy)
+ SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
+
+ // Zero extend the field to be the same size as the final alloca so that
+ // we can shift and insert it.
+ if (SrcField->getType() != ResultVal->getType())
+ SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
+
+ // Determine the number of bits to shift SrcField.
+ uint64_t Shift;
+ if (Layout) // Struct case.
+ Shift = Layout->getElementOffsetInBits(i);
+ else // Array case.
+ Shift = i*ArrayEltBitOffset;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
+
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+ SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
+ }
+
+ // Don't create an 'or x, 0' on the first iteration.
+ if (!isa<Constant>(ResultVal) ||
+ !cast<Constant>(ResultVal)->isNullValue())
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ else
+ ResultVal = SrcField;
+ }
+
+ // Handle tail padding by truncating the result
+ if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
+
+ LI->replaceAllUsesWith(ResultVal);
+ DeadInsts.push_back(LI);
+}
+
+/// HasPadding - Return true if the specified type has any structure or
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Ty = ATy->getElementType();
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ }
+
+ // SROA currently handles only Arrays and Structs.
+ const StructType *STy = cast<StructType>(Ty);
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
+ PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+ if (PrevFieldEnd < FieldBitOffset)
+ return true;
+ }
+ PrevFieldBitOffset = FieldBitOffset;
+ }
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+ return false;
+}
+
+/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
+/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
+/// or 1 if safe after canonicalization has been performed.
+bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
+ // Loop over the use list of the alloca. We can only transform it if all of
+ // the users are safe to transform.
+ AllocaInfo Info(AI);
+
+ isSafeForScalarRepl(AI, 0, Info);
+ if (Info.isUnsafe) {
+ DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
+ return false;
+ }
+
+ // Okay, we know all the users are promotable. If the aggregate is a memcpy
+ // source and destination, we have to be careful. In particular, the memcpy
+ // could be moving around elements that live in structure padding of the LLVM
+ // types, but may actually be used. In these cases, we refuse to promote the
+ // struct.
+ if (Info.isMemCpySrc && Info.isMemCpyDst &&
+ HasPadding(AI->getAllocatedType(), *TD))
+ return false;
+
+ // If the alloca never has an access to just *part* of it, but is accessed
+ // via loads and stores, then we should use ConvertToScalarInfo to promote
+ // the alloca instead of promoting each piece at a time and inserting fission
+ // and fusion code.
+ if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+ // If the struct/array just has one element, use basic SRoA.
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (ST->getNumElements() > 1) return false;
+ } else {
+ if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we don't can't rewrite arbitrary instructions.
+static bool PointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return PointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with isOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ bool isOffset,
+ SmallVector<Instruction *, 4> &LifetimeMarkers) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (LI->isVolatile()) return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
+ LifetimeMarkers))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
+ isOffset || !GEP->hasAllZeroIndices(),
+ LifetimeMarkers))
+ return false;
+ continue;
+ }
+
+ if (CallSite CS = U) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() ||
+ CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ LifetimeMarkers.push_back(II);
+ continue;
+ }
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (isOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!PointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+MemTransferInst *
+SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVector<Instruction*, 4> &ToDelete) {
+ MemTransferInst *TheCopy = 0;
+ if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
+ return TheCopy;
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 000000000000..a66b3e38258f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,331 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations. For example:
+//
+// * Removes basic blocks with no predecessors.
+// * Merges a basic block into its predecessor if there is only one and the
+// predecessor only has one successor.
+// * Eliminates PHI nodes for basic blocks with a single predecessor.
+// * Eliminates a basic block that only contains an unconditional branch.
+// * Changes invoke instructions to nounwind functions to be calls.
+// * Change things like "if (x) if (y)" into "if (x&y)".
+// * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Attributes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+ struct CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGSimplifyPass() : FunctionPass(ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+ "Simplify the CFG", false, false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+ return new CFGSimplifyPass();
+}
+
+/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// ChangeToCall - Convert the specified invoke into a normal call.
+static void ChangeToCall(InvokeInst *II) {
+ BasicBlock *BB = II->getParent();
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(BB);
+ BB->getInstList().erase(II);
+}
+
+static bool MarkAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(BB);
+ bool Changed = false;
+ do {
+ BB = Worklist.pop_back_val();
+
+ if (!Reachable.insert(BB))
+ continue;
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ ChangeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ ChangeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ ChangeToCall(II);
+ Changed = true;
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ Worklist.push_back(*SI);
+ } while (!Worklist.empty());
+ return Changed;
+}
+
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+static bool RemoveUnreachableBlocksFromFn(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumSimpl += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
+ continue;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
+
+/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// node) return blocks, merge them together to promote recursive block merging.
+static bool MergeEmptyReturnBlocks(Function &F) {
+ bool Changed = false;
+
+ BasicBlock *RetBlock = 0;
+
+ // Scan all the blocks in the function, looking for empty return blocks.
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+ BasicBlock &BB = *BBI++;
+
+ // Only look at return blocks.
+ ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (Ret == 0) continue;
+
+ // Only look at the block if it is empty or the only other thing in it is a
+ // single PHI node that is the operand to the return.
+ if (Ret != &BB.front()) {
+ // Check for something else in the block.
+ BasicBlock::iterator I = Ret;
+ --I;
+ // Skip over debug info.
+ while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+ --I;
+ if (!isa<DbgInfoIntrinsic>(I) &&
+ (!isa<PHINode>(I) || I != BB.begin() ||
+ Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != I))
+ continue;
+ }
+
+ // If this is the first returning block, remember it and keep going.
+ if (RetBlock == 0) {
+ RetBlock = &BB;
+ continue;
+ }
+
+ // Otherwise, we found a duplicate return block. Merge the two.
+ Changed = true;
+
+ // Case when there is no input to the return or when the returned values
+ // agree is trivial. Note that they can't agree if there are phis in the
+ // blocks.
+ if (Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) ==
+ cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+ BB.replaceAllUsesWith(RetBlock);
+ BB.eraseFromParent();
+ continue;
+ }
+
+ // If the canonical return block has no PHI node, create one now.
+ PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+ if (RetBlockPHI == 0) {
+ Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
+ pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+ RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+ std::distance(PB, PE), "merge",
+ &RetBlock->front());
+
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ RetBlockPHI->addIncoming(InVal, *PI);
+ RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+ }
+
+ // Turn BB into a block that just unconditionally branches to the return
+ // block. This handles the case when the two return blocks have a common
+ // predecessor but that return different things.
+ RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+ BB.getTerminator()->eraseFromParent();
+ BranchInst::Create(RetBlock, &BB);
+ }
+
+ return Changed;
+}
+
+/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+ bool Changed = false;
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Loop over all of the basic blocks and remove them if they are unneeded...
+ //
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
+ if (SimplifyCFG(BBIt++, TD)) {
+ LocalChange = true;
+ ++NumSimpl;
+ }
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ bool EverChanged = RemoveUnreachableBlocksFromFn(F);
+ EverChanged |= MergeEmptyReturnBlocks(F);
+ EverChanged |= IterativeSimplifyCFG(F, TD);
+
+ // If neither pass changed anything, we're done.
+ if (!EverChanged) return false;
+
+ // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
+ // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterate between the two optimizations. We structure the code like this to
+ // avoid reruning IterativeSimplifyCFG if the second pass of
+ // RemoveUnreachableBlocksFromFn doesn't do anything.
+ if (!RemoveUnreachableBlocksFromFn(F))
+ return true;
+
+ do {
+ EverChanged = IterativeSimplifyCFG(F, TD);
+ EverChanged |= RemoveUnreachableBlocksFromFn(F);
+ } while (EverChanged);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
new file mode 100644
index 000000000000..7c415e5150dc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,2391 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions). Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const TargetData *TD;
+ const TargetLibraryInfo *TLI;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// CallOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ this->TLI = TLI;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return CallOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
+
+/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// String and Memory LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'strcat' Optimizations
+namespace {
+struct StrCatOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ EmitStrLenMemCpy(Src, Dst, Len, B);
+ return Dst;
+ }
+
+ void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, TD);
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ }
+};
+
+//===---------------------------------------===//
+// 'strncat' Optimizations
+
+struct StrNCatOpt : public StrCatOpt {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ EmitStrLenMemCpy(Src, Dst, SrcLen, B);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strchr' Optimizations
+
+struct StrChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (CharC == 0) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // strchr can find the nul character.
+ Str += '\0';
+
+ // Compute the offset.
+ size_t I = Str.find(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD);
+ return 0;
+ }
+
+ // strrchr can find the nul character.
+ Str += '\0';
+
+ // Compute the offset.
+ size_t I = Str.rfind(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strcmp' Optimizations
+
+struct StrCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ strcmp(Str1.c_str(),Str2.c_str()));
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, TD);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncmp' Optimizations
+
+struct StrNCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ strncmp(Str1.c_str(), Str2.c_str(), Length));
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'strcpy' Optimizations
+
+struct StrCpyOpt : public LibCallOptimization {
+ bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall.
+
+ StrCpyOpt(bool c) : OptChkCall(c) {}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ unsigned NumParams = OptChkCall ? 3 : 2;
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != NumParams ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ if (OptChkCall)
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getArgOperand(2), B, TD);
+ else
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncpy' Optimizations
+
+struct StrNCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen+1) return 0;
+
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strlen' Optimizations
+
+struct StrLenOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (IsOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == std::string::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
+
+struct StrToOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
+ return 0;
+
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addAttribute(1, Attribute::NoCapture);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strstr' Optimizations
+
+struct StrStrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return 0;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD);
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ Old->replaceAllUsesWith(Cmp);
+ Old->eraseFromParent();
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ std::string SearchStr, ToFindStr;
+ bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1)
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+ ToFindStr[0], B, TD), CI->getType());
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'memcmp' Optimizations
+
+struct MemCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
+ return 0;
+
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ std::string LHSStr, RHSStr;
+ if (GetConstantStringInfo(LHS, LHSStr) &&
+ GetConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.length() || Len > RHSStr.length())
+ return 0;
+ uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'memcpy' Optimizations
+
+struct MemCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===---------------------------------------===//
+// 'memmove' Optimizations
+
+struct MemMoveOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===---------------------------------------===//
+// 'memset' Optimizations
+
+struct MemSetOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'pow*' Optimizations
+
+struct PowOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ return Op1C;
+ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return 0;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinite correctly.
+ // TODO: In fast-math mode, this could be just sqrt(x).
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+ Callee->getAttributes());
+ Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+ Callee->getAttributes());
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+ Op1, "powrecip");
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'exp2' Optimizations
+
+struct Exp2Opt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType()->isFloatTy())
+ Name = "ldexpf";
+ else if (Op->getType()->isDoubleTy())
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
+ Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'ffs*' Optimizations
+
+struct FFSOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->getValue() == 0) // ffs(0) -> 0.
+ return Constant::getNullValue(CI->getType());
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall(F, Op, "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+ V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ return B.CreateSelect(Cond, V, B.getInt32(0));
+ }
+};
+
+//===---------------------------------------===//
+// 'isdigit' Optimizations
+
+struct IsDigitOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'isascii' Optimizations
+
+struct IsAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'abs', 'labs', 'llabs' Optimizations
+
+struct AbsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+
+//===---------------------------------------===//
+// 'toascii' Optimizations
+
+struct ToAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'printf' Optimizations
+
+struct PrintFOpt : public LibCallOptimization {
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), 0);
+
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == std::string::npos) { // no format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr.erase(FormatStr.end()-1);
+ Constant *C = ConstantArray::get(*Context, FormatStr, true);
+ C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
+ GlobalVariable::InternalLinkage, C, "str");
+ EmitPutS(C, B, TD);
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
+ EmitPutS(CI->getArgOperand(1), B, TD);
+ return CI;
+ }
+ return 0;
+ }
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'sprintf' Optimizations
+
+struct SPrintFOpt : public LibCallOptimization {
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return 0; // we found a format specifier, bail out.
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ FormatStr.size() + 1), 1); // nul byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+ Value *IncLen = B.CreateAdd(Len,
+ ConstantInt::get(Len->getType(), 1),
+ "leninc");
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return 0;
+ }
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fwrite' Optimizations
+
+struct FWriteOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeC || !CountC) return 0;
+ uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ EmitFPutC(Char, CI->getArgOperand(3), B, TD);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fputs' Optimizations
+
+struct FPutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // Require two pointers. Also, we can't optimize if return value is used.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !CI->use_empty())
+ return 0;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len) return 0;
+ EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD);
+ return CI; // Known to have no uses (see above).
+ }
+};
+
+//===---------------------------------------===//
+// 'fprintf' Optimizations
+
+struct FPrintFOpt : public LibCallOptimization {
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // All the optimizations depend on the format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumArgOperands() == 2) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return 0; // We found a format specifier.
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ return 0;
+ EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+ return CI;
+ }
+ return 0;
+ }
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a constant string.
+ std::string Str;
+ if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+ return 0;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return 0;
+ }
+};
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// This pass optimizes well known library functions from libc and libm.
+ ///
+ class SimplifyLibCalls : public FunctionPass {
+ TargetLibraryInfo *TLI;
+
+ StringMap<LibCallOptimization*> Optimizations;
+ // String and Memory LibCall Optimizations
+ StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+ StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+ StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
+ MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
+ // Math Library Optimizations
+ PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ // Integer Optimizations
+ FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
+ ToAsciiOpt ToAscii;
+ // Formatting and IO Optimizations
+ SPrintFOpt SPrintF; PrintFOpt PrintF;
+ FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+ PutsOpt Puts;
+
+ bool Modified; // This is only used by doInitialization.
+ public:
+ static char ID; // Pass identification
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+ initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+ }
+ void InitOptimizations();
+ bool runOnFunction(Function &F);
+
+ void setDoesNotAccessMemory(Function &F);
+ void setOnlyReadsMemory(Function &F);
+ void setDoesNotThrow(Function &F);
+ void setDoesNotCapture(Function &F, unsigned n);
+ void setDoesNotAlias(Function &F, unsigned n);
+ bool doInitialization(Module &M);
+
+ void inferPrototypeAttributes(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+} // end anonymous namespace.
+
+char SimplifyLibCalls::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false)
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+ return new SimplifyLibCalls();
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+ // String and Memory LibCall Optimizations
+ Optimizations["strcat"] = &StrCat;
+ Optimizations["strncat"] = &StrNCat;
+ Optimizations["strchr"] = &StrChr;
+ Optimizations["strrchr"] = &StrRChr;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+ Optimizations["strcpy"] = &StrCpy;
+ Optimizations["strncpy"] = &StrNCpy;
+ Optimizations["strlen"] = &StrLen;
+ Optimizations["strpbrk"] = &StrPBrk;
+ Optimizations["strtol"] = &StrTo;
+ Optimizations["strtod"] = &StrTo;
+ Optimizations["strtof"] = &StrTo;
+ Optimizations["strtoul"] = &StrTo;
+ Optimizations["strtoll"] = &StrTo;
+ Optimizations["strtold"] = &StrTo;
+ Optimizations["strtoull"] = &StrTo;
+ Optimizations["strspn"] = &StrSpn;
+ Optimizations["strcspn"] = &StrCSpn;
+ Optimizations["strstr"] = &StrStr;
+ Optimizations["memcmp"] = &MemCmp;
+ if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
+ Optimizations["memmove"] = &MemMove;
+ if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
+
+ // _chk variants of String and Memory LibCall Optimizations.
+ Optimizations["__strcpy_chk"] = &StrCpyChk;
+
+ // Math Library Optimizations
+ Optimizations["powf"] = &Pow;
+ Optimizations["pow"] = &Pow;
+ Optimizations["powl"] = &Pow;
+ Optimizations["llvm.pow.f32"] = &Pow;
+ Optimizations["llvm.pow.f64"] = &Pow;
+ Optimizations["llvm.pow.f80"] = &Pow;
+ Optimizations["llvm.pow.f128"] = &Pow;
+ Optimizations["llvm.pow.ppcf128"] = &Pow;
+ Optimizations["exp2l"] = &Exp2;
+ Optimizations["exp2"] = &Exp2;
+ Optimizations["exp2f"] = &Exp2;
+ Optimizations["llvm.exp2.ppcf128"] = &Exp2;
+ Optimizations["llvm.exp2.f128"] = &Exp2;
+ Optimizations["llvm.exp2.f80"] = &Exp2;
+ Optimizations["llvm.exp2.f64"] = &Exp2;
+ Optimizations["llvm.exp2.f32"] = &Exp2;
+
+#ifdef HAVE_FLOORF
+ Optimizations["floor"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_CEILF
+ Optimizations["ceil"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_ROUNDF
+ Optimizations["round"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_RINTF
+ Optimizations["rint"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_NEARBYINTF
+ Optimizations["nearbyint"] = &UnaryDoubleFP;
+#endif
+
+ // Integer Optimizations
+ Optimizations["ffs"] = &FFS;
+ Optimizations["ffsl"] = &FFS;
+ Optimizations["ffsll"] = &FFS;
+ Optimizations["abs"] = &Abs;
+ Optimizations["labs"] = &Abs;
+ Optimizations["llabs"] = &Abs;
+ Optimizations["isdigit"] = &IsDigit;
+ Optimizations["isascii"] = &IsAscii;
+ Optimizations["toascii"] = &ToAscii;
+
+ // Formatting and IO Optimizations
+ Optimizations["sprintf"] = &SPrintF;
+ Optimizations["printf"] = &PrintF;
+ Optimizations["fwrite"] = &FWrite;
+ Optimizations["fputs"] = &FPuts;
+ Optimizations["fprintf"] = &FPrintF;
+ Optimizations["puts"] = &Puts;
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ if (Optimizations.empty())
+ InitOptimizations();
+
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ IRBuilder<> Builder(F.getContext());
+
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I++);
+ if (!CI) continue;
+
+ // Ignore indirect calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration() ||
+ !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+ continue;
+
+ // Ignore unknown calls.
+ LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ if (!LCO) continue;
+
+ // Set the builder to the instruction after the call.
+ Builder.SetInsertPoint(BB, I);
+
+ // Use debug location of CI for all new instructions.
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Try to optimize this call.
+ Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
+ if (Result == 0) continue;
+
+ DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
+ dbgs() << " into: " << *Result << "\n");
+
+ // Something changed!
+ Changed = true;
+ ++NumSimplified;
+
+ // Inspect the instruction after the call (which was potentially just
+ // added) next.
+ I = CI; ++I;
+
+ if (CI != Result && !CI->use_empty()) {
+ CI->replaceAllUsesWith(Result);
+ if (!Result->hasName())
+ Result->takeName(CI);
+ }
+ CI->eraseFromParent();
+ }
+ }
+ return Changed;
+}
+
+// Utility methods for doInitialization.
+
+void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+
+
+void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
+ const FunctionType *FTy = F.getFunctionType();
+
+ StringRef Name = F.getName();
+ switch (Name[0]) {
+ case 's':
+ if (Name == "strlen") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "strchr" ||
+ Name == "strrchr") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if (Name == "strcpy" ||
+ Name == "stpcpy" ||
+ Name == "strcat" ||
+ Name == "strtol" ||
+ Name == "strtod" ||
+ Name == "strtof" ||
+ Name == "strtoul" ||
+ Name == "strtoll" ||
+ Name == "strtold" ||
+ Name == "strncat" ||
+ Name == "strncpy" ||
+ Name == "strtoull") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strxfrm") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strcmp" ||
+ Name == "strspn" ||
+ Name == "strncmp" ||
+ Name == "strcspn" ||
+ Name == "strcoll" ||
+ Name == "strcasecmp" ||
+ Name == "strncasecmp") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strstr" ||
+ Name == "strpbrk") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strtok" ||
+ Name == "strtok_r") {
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "scanf" ||
+ Name == "setbuf" ||
+ Name == "setvbuf") {
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "strdup" ||
+ Name == "strndup") {
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "stat" ||
+ Name == "sscanf" ||
+ Name == "sprintf" ||
+ Name == "statvfs") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "snprintf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "setitimer") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "system") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'm':
+ if (Name == "malloc") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "memcmp") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "memchr" ||
+ Name == "memrchr") {
+ if (FTy->getNumParams() != 3)
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if (Name == "modf" ||
+ Name == "modff" ||
+ Name == "modfl" ||
+ Name == "memcpy" ||
+ Name == "memccpy" ||
+ Name == "memmove") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "memalign") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotAlias(F, 0);
+ } else if (Name == "mkdir" ||
+ Name == "mktime") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'r':
+ if (Name == "realloc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "read") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ } else if (Name == "rmdir" ||
+ Name == "rewind" ||
+ Name == "remove" ||
+ Name == "realpath") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "rename" ||
+ Name == "readlink") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'w':
+ if (Name == "write") {
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'b':
+ if (Name == "bcopy") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "bcmp") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "bzero") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'c':
+ if (Name == "calloc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "chmod" ||
+ Name == "chown" ||
+ Name == "ctermid" ||
+ Name == "clearerr" ||
+ Name == "closedir") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'a':
+ if (Name == "atoi" ||
+ Name == "atol" ||
+ Name == "atof" ||
+ Name == "atoll") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "access") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'f':
+ if (Name == "fopen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "fdopen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "feof" ||
+ Name == "free" ||
+ Name == "fseek" ||
+ Name == "ftell" ||
+ Name == "fgetc" ||
+ Name == "fseeko" ||
+ Name == "ftello" ||
+ Name == "fileno" ||
+ Name == "fflush" ||
+ Name == "fclose" ||
+ Name == "fsetpos" ||
+ Name == "flockfile" ||
+ Name == "funlockfile" ||
+ Name == "ftrylockfile") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "ferror") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ } else if (Name == "fputc" ||
+ Name == "fstat" ||
+ Name == "frexp" ||
+ Name == "frexpf" ||
+ Name == "frexpl" ||
+ Name == "fstatvfs") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "fgets") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "fread" ||
+ Name == "fwrite") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ } else if (Name == "fputs" ||
+ Name == "fscanf" ||
+ Name == "fprintf" ||
+ Name == "fgetpos") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'g':
+ if (Name == "getc" ||
+ Name == "getlogin_r" ||
+ Name == "getc_unlocked") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "getenv") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "gets" ||
+ Name == "getchar") {
+ setDoesNotThrow(F);
+ } else if (Name == "getitimer") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "getpwnam") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'u':
+ if (Name == "ungetc") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "uname" ||
+ Name == "unlink" ||
+ Name == "unsetenv") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "utime" ||
+ Name == "utimes") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'p':
+ if (Name == "putc") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "puts" ||
+ Name == "printf" ||
+ Name == "perror") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "pread" ||
+ Name == "pwrite") {
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ } else if (Name == "putchar") {
+ setDoesNotThrow(F);
+ } else if (Name == "popen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "pclose") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'v':
+ if (Name == "vscanf") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "vsscanf" ||
+ Name == "vfscanf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "valloc") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "vprintf") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "vfprintf" ||
+ Name == "vsprintf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "vsnprintf") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ }
+ break;
+ case 'o':
+ if (Name == "open") {
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ } else if (Name == "opendir") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 't':
+ if (Name == "tmpfile") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "times") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'h':
+ if (Name == "htonl" ||
+ Name == "htons") {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'n':
+ if (Name == "ntohl" ||
+ Name == "ntohs") {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'l':
+ if (Name == "lstat") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "lchown") {
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'q':
+ if (Name == "qsort") {
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ }
+ break;
+ case '_':
+ if (Name == "__strdup" ||
+ Name == "__strndup") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "__strtok_r") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "_IO_getc") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "_IO_putc") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 1:
+ if (Name == "\1__isoc99_scanf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "\1stat64" ||
+ Name == "\1lstat64" ||
+ Name == "\1statvfs64" ||
+ Name == "\1__isoc99_sscanf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1fopen64") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1fseeko64" ||
+ Name == "\1ftello64") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "\1tmpfile64") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "\1fstat64" ||
+ Name == "\1fstatvfs64") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1open64") {
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ }
+}
+
+/// doInitialization - Add attributes to well-known functions.
+///
+bool SimplifyLibCalls::doInitialization(Module &M) {
+ Modified = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (F.isDeclaration() && F.hasName())
+ inferPrototypeAttributes(F);
+ }
+ return Modified;
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// cos, cosf, cosl:
+// * cos(-x) -> cos(x)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// stpcpy:
+// * stpcpy(str, "literal") ->
+// llvm.memcpy(str,"literal",strlen("literal")+1,1)
+//
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 000000000000..705f44204900
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,274 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+ class Sinking : public FunctionPass {
+ DominatorTree *DT;
+ LoopInfo *LI;
+ AliasAnalysis *AA;
+
+ public:
+ static char ID; // Pass identification
+ Sinking() : FunctionPass(ID) {
+ initializeSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ }
+ private:
+ bool ProcessBlock(BasicBlock &BB);
+ bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+ bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+ };
+} // end anonymous namespace
+
+char Sinking::ID = 0;
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
+ BasicBlock *BB) const {
+ // Ignoring debug uses is necessary so debug info doesn't affect the code.
+ // This may leave a referencing dbg_value in the original block, before
+ // the definition of the vreg. Dwarf generator handles this although the
+ // user might not get the right info at runtime.
+ for (Value::use_iterator I = Inst->use_begin(),
+ E = Inst->use_end(); I != E; ++I) {
+ // Determine the block of the use.
+ Instruction *UseInst = cast<Instruction>(*I);
+ BasicBlock *UseBlock = UseInst->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+ UseBlock = PN->getIncomingBlock(Num);
+ }
+ // Check that it dominates.
+ if (!DT->dominates(BB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (Function::iterator I = F.begin(), E = F.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an unreachable
+ // loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&BB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ BasicBlock::iterator I = BB.end();
+ --I;
+ bool ProcessedBegin = false;
+ SmallPtrSet<Instruction *, 8> Stores;
+ do {
+ Instruction *Inst = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == BB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ if (SinkInstruction(Inst, Stores))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+ if (L->isVolatile()) return false;
+
+ AliasAnalysis::Location Loc = AA->getLocation(L);
+ for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+ E = Stores.end(); I != E; ++I)
+ if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
+ return false;
+ }
+
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ return false;
+
+ return true;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ // Check if it's safe to move the instruction.
+ if (!isSafeToMove(Inst, AA, Stores))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ BasicBlock *ParentBlock = Inst->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ BasicBlock *SuccToSinkTo = 0;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ // x = computation
+ // if () {} else {}
+ // use x
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Instructions can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ // Look at all the successors and decide which one
+ // we should sink to.
+ for (succ_iterator SI = succ_begin(ParentBlock),
+ E = succ_end(ParentBlock); SI != E; ++SI) {
+ if (AllUsesDominatedByBlock(Inst, *SI)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (Inst->getParent() == SuccToSinkTo)
+ return false;
+
+ DEBUG(dbgs() << "Sink instr " << *Inst);
+ DEBUG(dbgs() << "to block ";
+ WriteAsOperand(dbgs(), SuccToSinkTo, false));
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ if (!Inst->isSafeToSpeculativelyExecute()) {
+ DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+ return false;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+ return false;
+ }
+
+ // Don't sink instructions into a loop.
+ if (LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+ return false;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+ ++InsertPos;
+
+ // Move the instruction.
+ Inst->moveBefore(InsertPos);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
new file mode 100644
index 000000000000..9dd83c04fa61
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
@@ -0,0 +1,373 @@
+//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a limited form of tail duplication, intended to simplify
+// CFGs by removing some unconditional branches. This pass is necessary to
+// straighten out loops created by the C front-end, but also is capable of
+// making other code nicer. After this pass is run, the CFG simplify pass
+// should be run to clean up the mess.
+//
+// This pass could be enhanced in the future to use profile information to be
+// more aggressive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplicate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constant.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
+
+static cl::opt<unsigned>
+TailDupThreshold("taildup-threshold",
+ cl::desc("Max block size to tail duplicate"),
+ cl::init(1), cl::Hidden);
+
+namespace {
+ class TailDup : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TailDup() : FunctionPass(ID) {
+ initializeTailDupPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
+ inline void eliminateUnconditionalBranch(BranchInst *BI);
+ SmallPtrSet<BasicBlock*, 4> CycleDetector;
+ };
+}
+
+char TailDup::ID = 0;
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
+
+// Public interface to the Tail Duplication pass
+FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
+
+/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
+/// the function, eliminating it if it looks attractive enough. CycleDetector
+/// prevents infinite loops by checking that we aren't redirecting a branch to
+/// a place it already pointed to earlier; see PR 2323.
+bool TailDup::runOnFunction(Function &F) {
+ bool Changed = false;
+ CycleDetector.clear();
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ if (shouldEliminateUnconditionalBranch(I->getTerminator(),
+ TailDupThreshold)) {
+ eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
+ Changed = true;
+ } else {
+ ++I;
+ CycleDetector.clear();
+ }
+ }
+ return Changed;
+}
+
+/// shouldEliminateUnconditionalBranch - Return true if this branch looks
+/// attractive to eliminate. We eliminate the branch if the destination basic
+/// block has <= 5 instructions in it, not counting PHI nodes. In practice,
+/// since one of these is a terminator instruction, this means that we will add
+/// up to 4 instructions to the new block.
+///
+/// We don't count PHI nodes in the count since they will be removed when the
+/// contents of the block are copied over.
+///
+bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
+ unsigned Threshold) {
+ BranchInst *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch!
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (Dest == BI->getParent()) return false; // Do not loop infinitely!
+
+ // Do not inline a block if we will just get another branch to the same block!
+ TerminatorInst *DTI = Dest->getTerminator();
+ if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
+ if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
+ return false; // Do not loop infinitely!
+
+ // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
+ // because doing so would require breaking critical edges. This should be
+ // fixed eventually.
+ if (!DTI->use_empty())
+ return false;
+
+ // Do not bother with blocks with only a single predecessor: simplify
+ // CFG will fold these two blocks together!
+ pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
+ ++PI;
+ if (PI == PE) return false; // Exactly one predecessor!
+
+ BasicBlock::iterator I = Dest->getFirstNonPHI();
+
+ for (unsigned Size = 0; I != Dest->end(); ++I) {
+ if (Size == Threshold) return false; // The block is too large.
+
+ // Don't tail duplicate call instructions. They are very large compared to
+ // other instructions.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
+
+ // Also alloca and malloc.
+ if (isa<AllocaInst>(I)) return false;
+
+ // Some vector instructions can expand into a number of instructions.
+ if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<InsertElementInst>(I)) return false;
+
+ // Only count instructions that are not debugger intrinsics.
+ if (!isa<DbgInfoIntrinsic>(I)) ++Size;
+ }
+
+ // Do not tail duplicate a block that has thousands of successors into a block
+ // with a single successor if the block has many other predecessors. This can
+ // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
+ // cases that have a large number of indirect gotos.
+ unsigned NumSuccs = DTI->getNumSuccessors();
+ if (NumSuccs > 8) {
+ unsigned TooMany = 128;
+ if (NumSuccs >= TooMany) return false;
+ TooMany = TooMany/NumSuccs;
+ for (; PI != PE; ++PI)
+ if (TooMany-- == 0) return false;
+ }
+
+ // If this unconditional branch is a fall-through, be careful about
+ // tail duplicating it. In particular, we don't want to taildup it if the
+ // original block will still be there after taildup is completed: doing so
+ // would eliminate the fall-through, requiring unconditional branches.
+ Function::iterator DestI = Dest;
+ if (&*--DestI == BI->getParent()) {
+ // The uncond branch is a fall-through. Tail duplication of the block is
+ // will eliminate the fall-through-ness and end up cloning the terminator
+ // at the end of the Dest block. Since the original Dest block will
+ // continue to exist, this means that one or the other will not be able to
+ // fall through. One typical example that this helps with is code like:
+ // if (a)
+ // foo();
+ // if (b)
+ // foo();
+ // Cloning the 'if b' block into the end of the first foo block is messy.
+
+ // The messy case is when the fall-through block falls through to other
+ // blocks. This is what we would be preventing if we cloned the block.
+ DestI = Dest;
+ if (++DestI != Dest->getParent()->end()) {
+ BasicBlock *DestSucc = DestI;
+ // If any of Dest's successors are fall-throughs, don't do this xform.
+ for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
+ SI != SE; ++SI)
+ if (*SI == DestSucc)
+ return false;
+ }
+ }
+
+ // Finally, check that we haven't redirected to this target block earlier;
+ // there are cases where we loop forever if we don't check this (PR 2323).
+ if (!CycleDetector.insert(Dest))
+ return false;
+
+ return true;
+}
+
+/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
+/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we
+/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
+/// DstBlock, return it.
+static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
+ BasicBlock *DstBlock) {
+ // SrcBlock must have a single predecessor.
+ pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
+ if (PI == PE || ++PI != PE) return 0;
+
+ BasicBlock *SrcPred = *pred_begin(SrcBlock);
+
+ // Look at the predecessors of DstBlock. One of them will be SrcBlock. If
+ // there is only one other pred, get it, otherwise we can't handle it.
+ PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
+ BasicBlock *DstOtherPred = 0;
+ BasicBlock *P = *PI;
+ if (P == SrcBlock) {
+ if (++PI == PE) return 0;
+ DstOtherPred = *PI;
+ if (++PI != PE) return 0;
+ } else {
+ DstOtherPred = P;
+ if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
+ }
+
+ // We can handle two situations here: "if then" and "if then else" blocks. An
+ // 'if then' situation is just where DstOtherPred == SrcPred.
+ if (DstOtherPred == SrcPred)
+ return SrcPred;
+
+ // Check to see if we have an "if then else" situation, which means that
+ // DstOtherPred will have a single predecessor and it will be SrcPred.
+ PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
+ if (PI != PE && *PI == SrcPred) {
+ if (++PI != PE) return 0; // Not a single pred.
+ return SrcPred; // Otherwise, it's an "if then" situation. Return the if.
+ }
+
+ // Otherwise, this is something we can't handle.
+ return 0;
+}
+
+
+/// eliminateUnconditionalBranch - Clone the instructions from the destination
+/// block into the source block, eliminating the specified unconditional branch.
+/// If the destination block defines values used by successors of the dest
+/// block, we may need to insert PHI nodes.
+///
+void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
+ BasicBlock *SourceBlock = Branch->getParent();
+ BasicBlock *DestBlock = Branch->getSuccessor(0);
+ assert(SourceBlock != DestBlock && "Our predicate is broken!");
+
+ DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+ << "]: Eliminating branch: " << *Branch);
+
+ // See if we can avoid duplicating code by moving it up to a dominator of both
+ // blocks.
+ if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
+ DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
+
+ // If there are non-phi instructions in DestBlock that have no operands
+ // defined in DestBlock, and if the instruction has no side effects, we can
+ // move the instruction to DomBlock instead of duplicating it.
+ BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
+ while (!isa<TerminatorInst>(BBI)) {
+ Instruction *I = BBI++;
+
+ bool CanHoist = I->isSafeToSpeculativelyExecute() &&
+ !I->mayReadFromMemory();
+ if (CanHoist) {
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
+ if (OpI->getParent() == DestBlock ||
+ (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
+ CanHoist = false;
+ break;
+ }
+ if (CanHoist) {
+ // Remove from DestBlock, move right before the term in DomBlock.
+ DestBlock->getInstList().remove(I);
+ DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
+ DEBUG(dbgs() << "Hoisted: " << *I);
+ }
+ }
+ }
+ }
+
+ // Tail duplication can not update SSA properties correctly if the values
+ // defined in the duplicated tail are used outside of the tail itself. For
+ // this reason, we spill all values that are used outside of the tail to the
+ // stack.
+ for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
+ if (I->isUsedOutsideOfBlock(DestBlock)) {
+ // We found a use outside of the tail. Create a new stack slot to
+ // break this inter-block usage pattern.
+ DemoteRegToStack(*I);
+ }
+
+ // We are going to have to map operands from the original block B to the new
+ // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
+ // nodes also define part of this mapping. Loop over these PHI nodes, adding
+ // them to our mapping.
+ //
+ std::map<Value*, Value*> ValueMapping;
+
+ BasicBlock::iterator BI = DestBlock->begin();
+ bool HadPHINodes = isa<PHINode>(BI);
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
+
+ // Clone the non-phi instructions of the dest block into the source block,
+ // keeping track of the mapping...
+ //
+ for (; BI != DestBlock->end(); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ SourceBlock->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+ }
+
+ // Now that we have built the mapping information and cloned all of the
+ // instructions (giving us a new terminator, among other things), walk the new
+ // instructions, rewriting references of old instructions to use new
+ // instructions.
+ //
+ BI = Branch; ++BI; // Get an iterator to the first new instruction
+ for (; BI != SourceBlock->end(); ++BI)
+ for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+ std::map<Value*, Value*>::const_iterator I =
+ ValueMapping.find(BI->getOperand(i));
+ if (I != ValueMapping.end())
+ BI->setOperand(i, I->second);
+ }
+
+ // Next we check to see if any of the successors of DestBlock had PHI nodes.
+ // If so, we need to add entries to the PHI nodes for SourceBlock now.
+ for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
+ SI != SE; ++SI) {
+ BasicBlock *Succ = *SI;
+ for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
+ PHINode *PN = cast<PHINode>(PNI);
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(DestBlock);
+
+ // Remap the value if necessary...
+ std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
+ if (I != ValueMapping.end())
+ IV = I->second;
+ PN->addIncoming(IV, SourceBlock);
+ }
+ }
+
+ // Next, remove the old branch instruction, and any PHI node entries that we
+ // had.
+ BI = Branch; ++BI; // Get an iterator to the first new instruction
+ DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
+ SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
+
+ // Final step: now that we have finished everything up, walk the cloned
+ // instructions one last time, constant propagating and DCE'ing them, because
+ // they may not be needed anymore.
+ //
+ if (HadPHINodes) {
+ while (BI != SourceBlock->end()) {
+ Instruction *Inst = BI++;
+ if (isInstructionTriviallyDead(Inst))
+ Inst->eraseFromParent();
+ else if (Value *V = SimplifyInstruction(Inst)) {
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ }
+ }
+ }
+
+ ++NumEliminated; // We just killed a branch!
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
new file mode 100644
index 000000000000..e21eb9dca270
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,634 @@
+//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop. This pass also implements the following extensions to the basic
+// algorithm:
+//
+// 1. Trivial instructions between the call and return do not prevent the
+// transformation from taking place, though currently the analysis cannot
+// support moving any really useful instructions (only dead ones).
+// 2. This pass transforms functions that are prevented from being tail
+// recursive by an associative and commutative expression to use an
+// accumulator variable, thus compiling the typical naive factorial or
+// 'fib' implementation into efficient code.
+// 3. TRE is performed if the function returns void, if the return
+// returns the result returned by the call, or if the function returns a
+// run-time constant on all exits from the function. It is possible, though
+// unlikely, that the return returns something else (like constant 0), and
+// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
+// the function return the exact same value.
+// 4. If it can prove that callees do not access their caller stack frame,
+// they are marked as eligible for tail call elimination (by the code
+// generator).
+//
+// There are several improvements that could be made:
+//
+// 1. If the function has any alloca instructions, these instructions will be
+// moved out of the entry block of the function, causing them to be
+// evaluated each time through the tail recursion. Safely keeping allocas
+// in the entry block requires analysis to proves that the tail-called
+// function does not read or write the stack object.
+// 2. Tail recursion is only performed if the call immediately precedes the
+// return instruction. It's possible that there could be a jump between
+// the call and the return.
+// 3. There can be intervening operations between the call and the return that
+// prevent the TRE from occurring. For example, there could be GEP's and
+// stores to memory that will not be read or written by the call. This
+// requires some substantial analysis (such as with DSA) to prove safe to
+// move ahead of the call, but doing so could allow many more TREs to be
+// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+// 4. The algorithm we use to detect if callees access their caller stack
+// frames is very primitive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailcallelim"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped, "Number of return duplicated");
+STATISTIC(NumAccumAdded, "Number of accumulators introduced");
+
+namespace {
+ struct TailCallElim : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ TailCallElim() : FunctionPass(ID) {
+ initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ private:
+ CallInst *FindTRECandidate(Instruction *I,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool CanMoveAboveCall(Instruction *I, CallInst *CI);
+ Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
+ };
+}
+
+char TailCallElim::ID = 0;
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false)
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+ return new TailCallElim();
+}
+
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function. We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+ // FIXME: do simple 'address taken' analysis.
+ return true;
+}
+
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
+/// instructions. If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+ bool &CannotTCETailMarkedCall) {
+ bool RetVal = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ RetVal |= AllocaMightEscapeToCalls(AI);
+
+ // If this alloca is in the body of the function, or if it is a variable
+ // sized allocation, we cannot tail call eliminate calls marked 'tail'
+ // with this mechanism.
+ if (BB != &BB->getParent()->getEntryBlock() ||
+ !isa<ConstantInt>(AI->getArraySize()))
+ CannotTCETailMarkedCall = true;
+ }
+ return RetVal;
+}
+
+bool TailCallElim::runOnFunction(Function &F) {
+ // If this function is a varargs function, we won't be able to PHI the args
+ // right, so don't even try to convert it...
+ if (F.getFunctionType()->isVarArg()) return false;
+
+ BasicBlock *OldEntry = 0;
+ bool TailCallsAreMarkedTail = false;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
+ bool MadeChange = false;
+ bool FunctionContainsEscapingAllocas = false;
+
+ // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+ // marked with the 'tail' attribute, because doing so would cause the stack
+ // size to increase (real TCE would deallocate variable sized allocas, TCE
+ // doesn't).
+ bool CannotTCETailMarkedCall = false;
+
+ // Loop over the function, looking for any returning blocks, and keeping track
+ // of whether this function has any non-trivially used allocas.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+ break;
+
+ FunctionContainsEscapingAllocas |=
+ CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+ }
+
+ /// FIXME: The code generator produces really bad code when an 'escaping
+ /// alloca' is changed from being a static alloca to being a dynamic alloca.
+ /// Until this is resolved, disable this transformation if that would ever
+ /// happen. This bug is PR962.
+ if (FunctionContainsEscapingAllocas)
+ return false;
+
+ // Second pass, change any tail calls to loops.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,CannotTCETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTCETailMarkedCall);
+ MadeChange |= Change;
+ }
+ }
+
+ // If we eliminated any tail recursions, it's possible that we inserted some
+ // silly PHI nodes which just merge an initial value (the incoming operand)
+ // with themselves. Check to see if we did and clean up our mess if so. This
+ // occurs when a function passes an argument straight through to its tail
+ // call.
+ if (!ArgumentPHIs.empty()) {
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
+
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN)) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // Finally, if this function contains no non-escaping allocas, or calls
+ // setjmp, mark all calls in the function as eligible for tail calls
+ //(there is no stack memory for them to access).
+ if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ CI->setTailCall();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// instruction from after the call to before the call, assuming that all
+/// instructions between the call and this instruction are movable.
+///
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+ // FIXME: We can move load/store/call/free instructions above the call if the
+ // call does not mod/ref the memory location being processed.
+ if (I->mayHaveSideEffects()) // This also handles volatile loads.
+ return false;
+
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ // Loads may always be moved above calls without side effects.
+ if (CI->mayHaveSideEffects()) {
+ // Non-volatile loads may be moved above a call with side effects if it
+ // does not write to memory and the load provably won't trap.
+ // FIXME: Writes to memory only matter if they may alias the pointer
+ // being loaded from.
+ if (CI->mayWriteToMemory() ||
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
+ L->getAlignment()))
+ return false;
+ }
+ }
+
+ // Otherwise, if this is a side-effect free instruction, check to make sure
+ // that it does not use the return value of the call. If it doesn't use the
+ // return value of the call, it must only use things that are defined before
+ // the call, or movable instructions between the call and the instruction
+ // itself.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i) == CI)
+ return false;
+ return true;
+}
+
+// isDynamicConstant - Return true if the specified value is the same when the
+// return would exit as it was when the initial iteration of the recursive
+// function was executed.
+//
+// We currently handle static constants and arguments that are not modified as
+// part of the recursion.
+//
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
+ if (isa<Constant>(V)) return true; // Static constants are always dyn consts
+
+ // Check to see if this is an immutable argument, if so, the value
+ // will be available to initialize the accumulator.
+ if (Argument *Arg = dyn_cast<Argument>(V)) {
+ // Figure out which argument number this is...
+ unsigned ArgNo = 0;
+ Function *F = CI->getParent()->getParent();
+ for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
+ ++ArgNo;
+
+ // If we are passing this argument into call as the corresponding
+ // argument operand, then the argument is dynamically constant.
+ // Otherwise, we cannot transform this function safely.
+ if (CI->getArgOperand(ArgNo) == Arg)
+ return true;
+ }
+
+ // Switch cases are always constant integers. If the value is being switched
+ // on and the return is only reachable from one of its cases, it's
+ // effectively constant.
+ if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+ if (SI->getCondition() == V)
+ return SI->getDefaultDest() != RI->getParent();
+
+ // Not a constant or immutable argument, we can't safely transform.
+ return false;
+}
+
+// getCommonReturnValue - Check to see if the function containing the specified
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI. If so, return the returned value.
+//
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+ Function *F = CI->getParent()->getParent();
+ Value *ReturnedValue = 0;
+
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
+ return ReturnedValue;
+}
+
+/// CanTransformAccumulatorRecursion - If the specified instruction can be
+/// transformed using accumulator recursion elimination, return the constant
+/// which is the start of the accumulator value. Otherwise return null.
+///
+Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
+ CallInst *CI) {
+ if (!I->isAssociative() || !I->isCommutative()) return 0;
+ assert(I->getNumOperands() == 2 &&
+ "Associative/commutative operations should have 2 args!");
+
+ // Exactly one operand should be the result of the call instruction.
+ if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+ (I->getOperand(0) != CI && I->getOperand(1) != CI))
+ return 0;
+
+ // The only user of this instruction we allow is a single return instruction.
+ if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+ return 0;
+
+ // Ok, now we have to check all of the other return instructions in this
+ // function. If they return non-constants or differing values, then we cannot
+ // transform the function safely.
+ return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+}
+
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = TI->getParent();
+ Function *F = BB->getParent();
+
+ if (&BB->front() == TI) // Make sure there is something before the terminator.
+ return 0;
+
+ // Scan backwards from the return, checking to see if there is a tail call in
+ // this block. If so, set CI to it.
+ CallInst *CI = 0;
+ BasicBlock::iterator BBI = TI;
+ while (true) {
+ CI = dyn_cast<CallInst>(BBI);
+ if (CI && CI->getCalledFunction() == F)
+ break;
+
+ if (BBI == BB->begin())
+ return 0; // Didn't find a potential tail call.
+ --BBI;
+ }
+
+ // If this call is marked as a tail call, and if there are dynamic allocas in
+ // the function, we cannot perform this optimization.
+ if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+ return 0;
+
+ // As a special case, detect code like this:
+ // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+ // and disable this xform in this case, because the code generator will
+ // lower the call to fabs into inline code.
+ if (BB == &F->getEntryBlock() &&
+ FirstNonDbg(BB->front()) == CI &&
+ FirstNonDbg(llvm::next(BB->begin())) == TI &&
+ callIsSmall(F)) {
+ // A single-block function with just a call and a return. Check that
+ // the arguments match.
+ CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+ E = CallSite(CI).arg_end();
+ Function::arg_iterator FI = F->arg_begin(),
+ FE = F->arg_end();
+ for (; I != E && FI != FE; ++I, ++FI)
+ if (*I != &*FI) break;
+ if (I == E && FI == FE)
+ return 0;
+ }
+
+ return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ // If we are introducing accumulator recursion to eliminate operations after
+ // the call instruction that are both associative and commutative, the initial
+ // value for the accumulator is placed in this variable. If this value is set
+ // then we actually perform accumulator recursion elimination instead of
+ // simple tail recursion elimination. If the operation is an LLVM instruction
+ // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then
+ // we are handling the case when the return instruction returns a constant C
+ // which is different to the constant returned by other return instructions
+ // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
+ // special case of accumulator recursion, the operation being "return C".
+ Value *AccumulatorRecursionEliminationInitVal = 0;
+ Instruction *AccumulatorRecursionInstr = 0;
+
+ // Ok, we found a potential tail call. We can currently only transform the
+ // tail call if all of the instructions between the call and the return are
+ // movable to above the call itself, leaving the call next to the return.
+ // Check that this is the case now.
+ BasicBlock::iterator BBI = CI;
+ for (++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be transformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
+ }
+ }
+
+ // We can only transform call/return pairs that either ignore the return value
+ // of the call and return void, ignore the value of the call and return a
+ // constant, return the value returned by the tail call, or that are being
+ // accumulator recursion variable eliminated.
+ if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
+ !isa<UndefValue>(Ret->getReturnValue()) &&
+ AccumulatorRecursionEliminationInitVal == 0 &&
+ !getCommonReturnValue(0, CI)) {
+ // One case remains that we are able to handle: the current return
+ // instruction returns a constant, and all other return instructions
+ // return a different constant.
+ if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+ return false; // Current return instruction does not return a constant.
+ // Check that all other return instructions return a common constant. If
+ // so, record it in AccumulatorRecursionEliminationInitVal.
+ AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+ if (!AccumulatorRecursionEliminationInitVal)
+ return false;
+ }
+
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
+ // OK! We can transform this tail call. If this is the first one found,
+ // create the new entry block, allowing us to branch back to the old entry.
+ if (OldEntry == 0) {
+ OldEntry = &F->getEntryBlock();
+ BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
+ NewEntry->takeName(OldEntry);
+ OldEntry->setName("tailrecurse");
+ BranchInst::Create(OldEntry, NewEntry);
+
+ // If this tail call is marked 'tail' and if there are any allocas in the
+ // entry block, move them up to the new entry block.
+ TailCallsAreMarkedTail = CI->isTailCall();
+ if (TailCallsAreMarkedTail)
+ // Move all fixed sized allocas from OldEntry to NewEntry.
+ for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
+ NEBI = NewEntry->begin(); OEBI != E; )
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ AI->moveBefore(NEBI);
+
+ // Now that we have created a new block, which jumps to the entry
+ // block, insert a PHI node for each argument of the function.
+ // For now, we initialize each PHI to only have the real arguments
+ // which are passed in.
+ Instruction *InsertPos = OldEntry->begin();
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ PHINode *PN = PHINode::Create(I->getType(), 2,
+ I->getName() + ".tr", InsertPos);
+ I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+ PN->addIncoming(I, NewEntry);
+ ArgumentPHIs.push_back(PN);
+ }
+ }
+
+ // If this function has self recursive calls in the tail position where some
+ // are marked tail and some are not, only transform one flavor or another. We
+ // have to choose whether we move allocas in the entry block to the new entry
+ // block or not, so we can't make a good choice for both. NOTE: We could do
+ // slightly better here in the case that the function has no entry block
+ // allocas.
+ if (TailCallsAreMarkedTail && !CI->isTailCall())
+ return false;
+
+ // Ok, now that we know we have a pseudo-entry block WITH all of the
+ // required PHI nodes, add entries into the PHI node for the actual
+ // parameters passed into the tail-recursive call.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
+
+ // If we are introducing an accumulator variable to eliminate the recursion,
+ // do so now. Note that we _know_ that no subsequent tail recursion
+ // eliminations will happen on this function because of the way the
+ // accumulator recursion predicate is set up.
+ //
+ if (AccumulatorRecursionEliminationInitVal) {
+ Instruction *AccRecInstr = AccumulatorRecursionInstr;
+ // Start by inserting a new PHI node for the accumulator.
+ pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
+ PHINode *AccPN =
+ PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1,
+ "accumulator.tr", OldEntry->begin());
+
+ // Loop over all of the predecessors of the tail recursion block. For the
+ // real entry into the function we seed the PHI with the initial value,
+ // computed earlier. For any other existing branches to this block (due to
+ // other tail recursions eliminated) the accumulator is not modified.
+ // Because we haven't added the branch in the current block to OldEntry yet,
+ // it will not show up as a predecessor.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (P == &F->getEntryBlock())
+ AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
+ else
+ AccPN->addIncoming(AccPN, P);
+ }
+
+ if (AccRecInstr) {
+ // Add an incoming argument for the current block, which is computed by
+ // our associative and commutative accumulator instruction.
+ AccPN->addIncoming(AccRecInstr, BB);
+
+ // Next, rewrite the accumulator recursion instruction so that it does not
+ // use the result of the call anymore, instead, use the PHI node we just
+ // inserted.
+ AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ } else {
+ // Add an incoming argument for the current block, which is just the
+ // constant returned by the current return instruction.
+ AccPN->addIncoming(Ret->getReturnValue(), BB);
+ }
+
+ // Finally, rewrite any return instructions in the program to return the PHI
+ // node instead of the "initval" that they do currently. This loop will
+ // actually rewrite the return value we are destroying, but that's ok.
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+ RI->setOperand(0, AccPN);
+ ++NumAccumAdded;
+ }
+
+ // Now that all of the PHI nodes are in place, remove the call and
+ // ret instructions, replacing them with an unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+ NewBI->setDebugLoc(CI->getDebugLoc());
+
+ BB->getInstList().erase(Ret); // Remove return.
+ BB->getInstList().erase(CI); // Remove call.
+ ++NumEliminated;
+ return true;
+}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ bool Change = false;
+
+ // If the return block contains nothing but the return and PHI's,
+ // there might be an opportunity to duplicate the return in its
+ // predecessors and perform TRC there. Look for predecessors that end
+ // in unconditional branch and recursive call(s).
+ SmallVector<BranchInst*, 8> UncondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PTI = Pred->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(BI);
+ }
+
+ while (!UncondBranchPreds.empty()) {
+ BranchInst *BI = UncondBranchPreds.pop_back_val();
+ BasicBlock *Pred = BI->getParent();
+ if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+ OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+ ++NumRetDuped;
+ Change = true;
+ }
+ }
+
+ return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+ if (!CI)
+ return false;
+
+ return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
new file mode 100644
index 000000000000..be7bed1cecdf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -0,0 +1,582 @@
+//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target addressing mode matcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (BaseGV) {
+ OS << (NeedPlus ? " + " : "")
+ << "GV:";
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs)
+ OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "")
+ << "Base:";
+ WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "")
+ << Scale << "*";
+ WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ OS << ']';
+}
+
+void ExtAddrMode::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return MatchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode.
+ ConstantInt *CI = 0; Value *AddLHS = 0;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ }
+
+ // Otherwise, not (x+c)*scale, just return what we have.
+ return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode. If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5) return false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr:
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+ TLI.getPointerTy())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::Add: {
+ // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+
+ // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(1), Depth+1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ break;
+ }
+ //case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ //break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS) return false;
+ int64_t Scale = RHS->getSExtValue();
+ if (Opcode == Instruction::Shl)
+ Scale = 1LL << Scale;
+
+ return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ const TargetData *TD = TLI.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ ConstantOffset += CI->getSExtValue()*TypeSize;
+ } else if (TypeSize) { // Scales of zero don't do anything.
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ // Check to see if we can fold the base pointer in too.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+
+ // Match the base operand of the GEP.
+ if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode. If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (AddrMode.BaseGV == 0) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseGV = 0;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ //cerr << "NOT FOLDING: " << *I;
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = 0;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = 0;
+ }
+ // Couldn't match.
+ return false;
+}
+
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands. If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetLowering &TLI) {
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it!
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+ SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+ SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ const TargetLowering &TLI) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I))
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ // Loop over all the uses, recursively processing them.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ unsigned opNo = UI.getOperandNo();
+ if (opNo == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, opNo));
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ if (!IA) return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+ TLI))
+ return true;
+ }
+
+ return false;
+}
+
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into. If so, there is no cost to
+/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ BasicBlock *MemBB = MemoryInst->getParent();
+ for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
+ UI != E; ++UI)
+ // We know that uses of arguments and instructions have to be instructions.
+ if (cast<Instruction>(*UI)->getParent() == MemBB)
+ return true;
+
+ return false;
+}
+
+
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it. However, the specified instruction has multiple
+/// uses. Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability) return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = 0;
+ if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = 0;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (BaseReg == 0 && ScaledReg == 0)
+ return true;
+
+ // If all uses of this instruction are ultimately load/store/inlineasm's,
+ // check to see if their addressing modes will include this instruction. If
+ // so, we can fold it into all uses, so it doesn't matter if it has multiple
+ // uses.
+ SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallPtrSet<Instruction*, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these uses and see if they could
+ // *actually* fold the instruction.
+ SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+ Instruction *User = MemoryUses[i].first;
+ unsigned OpNo = MemoryUses[i].second;
+
+ // Get the access type of this use. If the use isn't a pointer, we don't
+ // know what it accesses.
+ Value *Address = User->getOperand(OpNo);
+ if (!Address->getType()->isPointerTy())
+ return false;
+ const Type *AddressAccessTy =
+ cast<PointerType>(Address->getType())->getElementType();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ MemoryInst, Result);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.MatchAddr(Address, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+ I) == MatchedAddrModeInsts.end())
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 000000000000..b4f74f97e978
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,550 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+ assert((pred_begin(BB) == pred_end(BB) ||
+ // Can delete self loop.
+ BB->getSinglePredecessor() == BB) && "Block is not dead!");
+ TerminatorInst *BBTerm = BB->getTerminator();
+
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+ BBTerm->getSuccessor(i)->removePredecessor(BB);
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+
+ // Zap the block!
+ BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are
+/// any single-entry PHI nodes in it, fold them away. This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+ if (!isa<PHINode>(BB->begin())) return;
+
+ AliasAnalysis *AA = 0;
+ MemoryDependenceAnalysis *MemDep = 0;
+ if (P) {
+ AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+ MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+ else if (AA && isa<PointerType>(PN->getType()))
+ AA->deleteValue(PN);
+
+ PN->eraseFromParent();
+ }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ bool Changed = false;
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ Changed |= RecursivelyDeleteDeadPHINode(PN);
+
+ return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible. The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
+
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
+ if (!PredBB) return false;
+
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+ // Don't break invokes.
+ if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+
+ succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+ BasicBlock *OnlySucc = BB;
+ for (; SI != SE; ++SI)
+ if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+
+ // Can't merge if there are multiple successors.
+ if (!OnlySucc) return false;
+
+ // Can't merge if there is PHI loop.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN)
+ return false;
+ } else
+ break;
+ }
+
+ // Begin by getting rid of unneeded PHIs.
+ if (isa<PHINode>(BB->front()))
+ FoldSingleEntryPHINodes(BB, P);
+
+ // Delete the unconditional branch from the predecessor...
+ PredBB->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ // Finally, erase the old block and update dominator info.
+ if (P) {
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ DE = Children.end(); DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ LI->removeBlock(BB);
+
+ if (MemoryDependenceAnalysis *MD =
+ P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+ MD->invalidateCachedPredecessors();
+ }
+ }
+
+ BB->eraseFromParent();
+ return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I. The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Instruction *I) {
+ assert(I->getParent() == 0 &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = BIL.insert(BI, I);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BIL, BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors. It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+ TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+ unsigned e = Term->getNumSuccessors();
+#endif
+ for (unsigned i = 0; ; ++i) {
+ assert(i != e && "Didn't find edge?");
+ if (Term->getSuccessor(i) == Succ)
+ return i;
+ }
+ return 0;
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+
+ // If this is a critical edge, let SplitCriticalEdge do it.
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+ return LatchTerm->getSuccessor(SuccNum);
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ BasicBlock::iterator SplitPoint;
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = NULL;
+ return SplitBlock(Succ, Succ->begin(), P);
+ }
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block. The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+ BasicBlock::iterator SplitIt = SplitPt;
+ while (isa<PHINode>(SplitIt))
+ ++SplitIt;
+ BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ DomTreeNode *OldNode = DT->getNode(Old);
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+ for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+
+ return New;
+}
+
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds, const char *Suffix,
+ Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+ BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+ Loop *L = LI ? LI->getLoopFor(BB) : 0;
+ bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ // While here, if we need to preserve loop analyses, collect
+ // some information about how this split will affect loops.
+ bool HasLoopExit = false;
+ bool IsLoopEntry = !!L;
+ bool SplitMakesNewLoopHeader = false;
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+
+ if (LI) {
+ // If we need to preserve LCSSA, determine if any of
+ // the preds is a loop exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Preds[i]))
+ if (!PL->contains(BB))
+ HasLoopExit = true;
+ // If we need to preserve LoopInfo, note whether any of the
+ // preds crosses an interesting loop boundary.
+ if (L) {
+ if (L->contains(Preds[i]))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
+ }
+ }
+ }
+
+ // Update dominator tree if available.
+ DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (NumPreds == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+ if (L) {
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an
+ // adjacent loop). To find this, examine each of the predecessors and
+ // determine which loops enclose them, and select the most-nested loop
+ // which contains the loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (unsigned i = 0; i != NumPreds; ++i)
+ if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
+ // Seek a loop which actually contains the block being split (to
+ // avoid adjacent loops).
+ while (PredLoop && !PredLoop->contains(BB))
+ PredLoop = PredLoop->getParentLoop();
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop &&
+ PredLoop->contains(BB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ L->addBasicBlockToLoop(NewBB, LI->getBase());
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
+ }
+ }
+
+ // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = 0;
+ if (!HasLoopExit) {
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 1; i != NumPreds; ++i)
+ if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+ for (unsigned i = 0; i != NumPreds; ++i)
+ PN->removeIncomingValue(Preds[i], false);
+ } else {
+ // If the values coming into the block are not the same, we need a PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI);
+ if (AA) AA->copyValue(PN, NewPHI);
+
+ // Move all of the PHI values for 'Preds' to the new PHI.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ Value *V = PN->removeIncomingValue(Preds[i], false);
+ NewPHI->addIncoming(V, Preds[i]);
+ }
+ InVal = NewPHI;
+ }
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ return NewBB;
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them. This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+ SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+ const BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+
+ SmallPtrSet<const BasicBlock*, 8> Visited;
+ SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+ SmallPtrSet<const BasicBlock*, 8> InStack;
+
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+ const BasicBlock *ParentBB = Top.first;
+ succ_const_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ Result.push_back(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ InStack.erase(VisitStack.pop_back_val().first);
+ }
+ } while (!VisitStack.empty());
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i)
+ if (PHINode *PN = dyn_cast<PHINode>(*i))
+ if (PN->getParent() == BB)
+ *i = PN->getIncomingValueForBlock(Pred);
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+ return cast<ReturnInst>(NewRet);
+}
+
+/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a
+/// given basic block.
+DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) {
+ if (const Instruction *I = BB->getFirstNonPHI())
+ return I->getDebugLoc();
+ // Scanning entire block may be too expensive, if the first instruction
+ // does not have valid location info.
+ return DebugLoc();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
new file mode 100644
index 000000000000..23a30cc58507
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,182 @@
+//===- BasicInliner.cpp - Basic function level inliner --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basicinliner"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/BasicInliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
+ cl::desc("Control the amount of basic inlining to perform (default = 200)"));
+
+namespace llvm {
+
+ /// BasicInlinerImpl - BasicInliner implemantation class. This hides
+ /// container info, used by basic inliner, from public interface.
+ struct BasicInlinerImpl {
+
+ BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
+ void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
+ public:
+ BasicInlinerImpl(TargetData *T) : TD(T) {}
+
+ /// addFunction - Add function into the list of functions to process.
+ /// All functions must be inserted using this interface before invoking
+ /// inlineFunctions().
+ void addFunction(Function *F) {
+ Functions.push_back(F);
+ }
+
+ /// neverInlineFunction - Sometimes a function is never to be inlined
+ /// because of one or other reason.
+ void neverInlineFunction(Function *F) {
+ NeverInline.insert(F);
+ }
+
+ /// inlineFuctions - Walk all call sites in all functions supplied by
+ /// client. Inline as many call sites as possible. Delete completely
+ /// inlined functions.
+ void inlineFunctions();
+
+ private:
+ TargetData *TD;
+ std::vector<Function *> Functions;
+ SmallPtrSet<const Function *, 16> NeverInline;
+ SmallPtrSet<Function *, 8> DeadFunctions;
+ InlineCostAnalyzer CA;
+ };
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInlinerImpl::inlineFunctions() {
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ std::vector<CallSite> CallSites;
+
+ for (std::vector<Function *>::iterator FI = Functions.begin(),
+ FE = Functions.end(); FI != FE; ++FI) {
+ Function *F = *FI;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()
+ && !CS.getCalledFunction()->isDeclaration())
+ CallSites.push_back(CS);
+ }
+ }
+
+ DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+ // Inline call sites.
+ bool Changed = false;
+ do {
+ Changed = false;
+ for (unsigned index = 0; index != CallSites.size() && !CallSites.empty();
+ ++index) {
+ CallSite CS = CallSites[index];
+ if (Function *Callee = CS.getCalledFunction()) {
+
+ // Eliminate calls that are never inlinable.
+ if (Callee->isDeclaration() ||
+ CS.getInstruction()->getParent()->getParent() == Callee) {
+ CallSites.erase(CallSites.begin() + index);
+ --index;
+ continue;
+ }
+ InlineCost IC = CA.getInlineCost(CS, NeverInline);
+ if (IC.isAlways()) {
+ DEBUG(dbgs() << " Inlining: cost=always"
+ <<", call: " << *CS.getInstruction());
+ } else if (IC.isNever()) {
+ DEBUG(dbgs() << " NOT Inlining: cost=never"
+ <<", call: " << *CS.getInstruction());
+ continue;
+ } else {
+ int Cost = IC.getValue();
+
+ if (Cost >= (int) BasicInlineThreshold) {
+ DEBUG(dbgs() << " NOT Inlining: cost = " << Cost
+ << ", call: " << *CS.getInstruction());
+ continue;
+ } else {
+ DEBUG(dbgs() << " Inlining: cost = " << Cost
+ << ", call: " << *CS.getInstruction());
+ }
+ }
+
+ // Inline
+ InlineFunctionInfo IFI(0, TD);
+ if (InlineFunction(CS, IFI)) {
+ if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+ Callee->hasAvailableExternallyLinkage()))
+ DeadFunctions.insert(Callee);
+ Changed = true;
+ CallSites.erase(CallSites.begin() + index);
+ --index;
+ }
+ }
+ }
+ } while (Changed);
+
+ // Remove completely inlined functions from module.
+ for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
+ E = DeadFunctions.end(); I != E; ++I) {
+ Function *D = *I;
+ Module *M = D->getParent();
+ M->getFunctionList().remove(D);
+ }
+}
+
+BasicInliner::BasicInliner(TargetData *TD) {
+ Impl = new BasicInlinerImpl(TD);
+}
+
+BasicInliner::~BasicInliner() {
+ delete Impl;
+}
+
+/// addFunction - Add function into the list of functions to process.
+/// All functions must be inserted using this interface before invoking
+/// inlineFunctions().
+void BasicInliner::addFunction(Function *F) {
+ Impl->addFunction(F);
+}
+
+/// neverInlineFunction - Sometimes a function is never to be inlined because
+/// of one or other reason.
+void BasicInliner::neverInlineFunction(Function *F) {
+ Impl->neverInlineFunction(F);
+}
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInliner::inlineFunctions() {
+ Impl->inlineFunctions();
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 000000000000..92ce50030a5d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,386 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<ProfileInfo>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, this)) {
+ ++NumBroken;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+ bool AllowIdenticalEdges) {
+ assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ if (TI->getNumSuccessors() == 1) return false;
+
+ const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+ // If there is more than one predecessor, this is a critical edge...
+ assert(I != E && "No preds, but we have an edge to the block?");
+ const BasicBlock *FirstPred = *I;
+ ++I; // Skip one edge due to the incoming arc from TI.
+ if (!AllowIdenticalEdges)
+ return I != E;
+
+ // If AllowIdenticalEdges is true, then we allow this edge to be considered
+ // non-critical iff all preds come from TI's block.
+ while (I != E) {
+ const BasicBlock *P = *I;
+ if (P != FirstPred)
+ return true;
+ // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+ // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+ E = pred_end(P);
+ ++I;
+ }
+ return false;
+}
+
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+ BasicBlock *SplitBB,
+ BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+ "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block...
+ for (BasicBlock::iterator I = DestBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+ Value *V = PN->getIncomingValue(Idx);
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split",
+ SplitBB->getTerminator());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+ NewPN->addIncoming(V, Preds[i]);
+ // Update the original PHI.
+ PN->setIncomingValue(Idx, NewPN);
+ }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge. This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
+/// have many edges to any one destination. This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst. Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+ Pass *P, bool MergeIdenticalEdges) {
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+
+ assert(!isa<IndirectBrInst>(TI) &&
+ "Cannot split critical edge from IndirectBrInst");
+
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+ TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ // Create our unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+ NewBI->setDebugLoc(TI->getDebugLoc());
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB;
+ F.getBasicBlockList().insert(++FBBI, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ {
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+
+
+ // If we don't have a pass object, we can't update anything...
+ if (P == 0) return NewBB;
+
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+
+ // If we have nothing to update, just return.
+ if (DT == 0 && LI == 0 && PI == 0)
+ return NewBB;
+
+ // Now update analysis information. Since the only predecessor of NewBB is
+ // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
+ // anything, as there are other successors of DestBB. However, if all other
+ // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+ // loop header) then NewBB dominates DestBB.
+ SmallVector<BasicBlock*, 8> OtherPreds;
+
+ // If there is a PHI in the block, loop over predecessors with it, which is
+ // faster than iterating pred_begin/end.
+ if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) != NewBB)
+ OtherPreds.push_back(PN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (P != NewBB)
+ OtherPreds.push_back(P);
+ }
+ }
+
+ bool NewBBDominatesDestBB = true;
+
+ // Should we update DominatorTree information?
+ if (DT) {
+ DomTreeNode *TINode = DT->getNode(TIBB);
+
+ // The new block is not the immediate dominator for any other nodes, but
+ // TINode is the immediate dominator for the new node.
+ //
+ if (TINode) { // Don't break unreachable code!
+ DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+ DomTreeNode *DestBBNode = 0;
+
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+ if (!OtherPreds.empty()) {
+ DestBBNode = DT->getNode(DestBB);
+ while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+ if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+ NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+ OtherPreds.pop_back();
+ }
+ OtherPreds.clear();
+ }
+
+ // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+ // doesn't dominate anything.
+ if (NewBBDominatesDestBB) {
+ if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+ DT->changeImmediateDominator(DestBBNode, NewBBNode);
+ }
+ }
+ }
+
+ // Update LoopInfo if it is around.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+ }
+ // If TIBB is in a loop and DestBB is outside of that loop, split the
+ // other exit blocks of the loop that also have predecessors outside
+ // the loop, to maintain a LoopSimplify guarantee.
+ if (!TIL->contains(DestBB) &&
+ P->mustPreserveAnalysisID(LoopSimplifyID)) {
+ assert(!TIL->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (P->mustPreserveAnalysisID(LCSSAID)) {
+ SmallVector<BasicBlock *, 1> OrigPred;
+ OrigPred.push_back(TIBB);
+ CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+ }
+
+ // For each unique exit block...
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ TIL->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ // Collect all the preds that are inside the loop, and note
+ // whether there are any preds outside the loop.
+ SmallVector<BasicBlock *, 4> Preds;
+ bool HasPredOutsideOfLoop = false;
+ BasicBlock *Exit = ExitBlocks[i];
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (TIL->contains(P))
+ Preds.push_back(P);
+ else
+ HasPredOutsideOfLoop = true;
+ }
+ // If there are any preds not in the loop, we'll need to split
+ // the edges. The Preds.empty() check is needed because a block
+ // may appear multiple times in the list. We can't use
+ // getUniqueExitBlocks above because that depends on LoopSimplify
+ // form, which we're in the process of restoring!
+ if (!Preds.empty() && HasPredOutsideOfLoop) {
+ BasicBlock *NewExitBB =
+ SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+ "split", P);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+ }
+ }
+ }
+ // LCSSA form was updated above for the case where LoopSimplify is
+ // available, which means that all predecessors of loop exit blocks
+ // are within the loop. Without LoopSimplify form, it would be
+ // necessary to insert a new phi.
+ assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+ P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+ "SplitCriticalEdge doesn't know how to update LCCSA form "
+ "without LoopSimplify!");
+ }
+ }
+
+ // Update ProfileInfo if it is around.
+ if (PI)
+ PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
+
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 000000000000..14bb17fbda3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,479 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+ return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character. Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI =
+ AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+ const Type *I8Ptr = B.getInt8PtrTy();
+ const Type *I32Ty = B.getInt32Ty();
+ Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+ I8Ptr, I8Ptr, I32Ty, NULL);
+ CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+ ConstantInt::get(I32Ty, C), "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+ CastToCStr(Ptr2, B), Len, "strncmp");
+
+ if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetData *TD, StringRef Name) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ const Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+ I8Ptr, I8Ptr, I8Ptr, NULL);
+ CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Name);
+ if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+ IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ const Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+ I8Ptr, I8Ptr, I8Ptr,
+ Len->getType(), NULL);
+ CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Len, "strncpy");
+ if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+ AttrListPtr::get(&AWI, 1),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context), NULL);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+ if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt32Ty(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+ IRBuilder<> &B, const AttrListPtr &Attrs) {
+ char NameBuffer[20];
+ if (!Op->getType()->isDoubleTy()) {
+ // If we need to add a suffix, copy into NameBuffer.
+ unsigned NameLen = strlen(Name);
+ assert(NameLen < sizeof(NameBuffer)-2);
+ memcpy(NameBuffer, Name, NameLen);
+ if (Op->getType()->isFloatTy())
+ NameBuffer[NameLen] = 'f'; // floorf
+ else
+ NameBuffer[NameLen] = 'l'; // floorl
+ NameBuffer[NameLen+1] = 0;
+ Name = NameBuffer;
+ }
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char,
+ B.getInt32Ty(),
+ /*isSigned*/true,
+ "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+ B.getInt32Ty(),
+ B.getInt32Ty(), File->getType(),
+ NULL);
+ else
+ F = M->getOrInsertFunction("fputc",
+ B.getInt32Ty(),
+ B.getInt32Ty(),
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+ "chari");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+ // We really need TargetData for later.
+ if (!TD) return false;
+
+ this->CI = CI;
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ const FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+ IRBuilder<> B(CI);
+
+ if (Name == "__memcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ // Should be similar to memcpy.
+ if (Name == "__mempcpy_chk") {
+ return false;
+ }
+
+ if (Name == "__memmove_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__memset_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+ Name.substr(2, 6));
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcat_chk") {
+ return false;
+ }
+
+ if (Name == "__strncat_chk") {
+ return false;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 000000000000..6ea831f5345b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,538 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+ ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo) {
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator());
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->getEntryBlock();
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+ // Clone any attributes.
+ if (NewFunc->arg_size() == OldFunc->arg_size())
+ NewFunc->copyAttributesFrom(OldFunc);
+ else {
+ //Some arguments were deleted with the VMap. Copy arguments one by one
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
+ Anew->addAttr( OldFunc->getAttributes()
+ .getParamAttributes(I->getArgNo() + 1));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttr(0, OldFunc->getAttributes()
+ .getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttr(~0, OldFunc->getAttributes()
+ .getFnAttributes()));
+
+ }
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ //
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+ VMap[&BB] = CBB; // Add basic block mapping.
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ // Loop over all of the instructions in the function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+ BE = NewFunc->end(); BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+ RemapInstruction(II, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module. Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function. The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<Type*> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I->getType());
+
+ // Create a new function type...
+ FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+ ArgTypes, F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) { // Is this argument preserved?
+ DestI->setName(I->getName()); // Copy the name over...
+ VMap[I] = DestI++; // Add mapping to VMap
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+ return NewF;
+}
+
+
+
+namespace {
+ /// PruningFunctionCloner - This class is a private class used to implement
+ /// the CloneAndPruneFunctionInto method.
+ struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ SmallVectorImpl<ReturnInst*> &Returns;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+ const TargetData *TD;
+ public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &returns,
+ const char *nameSuffix,
+ ClonedCodeInfo *codeInfo,
+ const TargetData *td)
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ }
+
+ /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone);
+
+ public:
+ /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+ /// mapping its operands through VMap if they are available.
+ Constant *ConstantFoldMappedInstruction(const Instruction *I);
+ };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone){
+ TrackingVH<Value> &BBEntry = VMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry) return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ II != IE; ++II) {
+ // If this instruction constant folds, don't bother cloning the instruction,
+ // instead, just add the constant to the value map.
+ if (Constant *C = ConstantFoldMappedInstruction(II)) {
+ VMap[II] = C;
+ continue;
+ }
+
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const TerminatorInst *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (Cond == 0) {
+ Value *V = VMap[BI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (Cond == 0) { // Or known constant after constant prop in the callee...
+ Value *V = VMap[SI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+ if (Cond) { // Constant fold to uncond branch!
+ BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[OldTI] = NewInst; // Add instruction map to value.
+
+ // Recursively clone any reachable successor blocks.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI);
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->front();
+ }
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+ Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through VMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+ SmallVector<Constant*, 8> Ops;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
+ Ops.push_back(Op);
+ else
+ return 0; // All operands not constant!
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+ if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
+ CE);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
+ Ops.size(), TD);
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ const TargetData *TD,
+ Instruction *TheCall) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ Returns, NameSuffix, CodeInfo, TD);
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock*> CloneWorklist;
+ CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ SmallVector<const PHINode*, 16> PHIToResolve;
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ Value *V = VMap[BI];
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+ if (NewBB == 0) continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->getBasicBlockList().push_back(NewBB);
+
+ // Loop over all of the instructions in the block, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ //
+ BasicBlock::iterator I = NewBB->begin();
+
+ DebugLoc TheCallDL;
+ if (TheCall)
+ TheCallDL = TheCall->getDebugLoc();
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ // Skip over all PHI nodes, remembering them for later.
+ BasicBlock::const_iterator OldI = BI->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+ PHIToResolve.push_back(cast<PHINode>(OldI));
+ }
+
+ // Otherwise, remap the rest of the instructions normally.
+ for (; I != NewBB->end(); ++I)
+ RemapInstruction(I, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ Value *V = VMap[PN->getIncomingBlock(pred)];
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+ Value *InVal = MapValue(PN->getIncomingValue(pred),
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred, --e; // Revisit the next entry.
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock*, unsigned> PredCount;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+ PI != E; ++PI)
+ --PredCount[*PI];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+ E = PredCount.end(); PCI != E; ++PCI) {
+ BasicBlock *Pred = PCI->first;
+ for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with undef or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(VMap[OldI] == PN && "VMap mismatch");
+ VMap[OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ // NOTE: We cannot eliminate single entry phi nodes here, because of
+ // VMap. Single entry phi nodes can have multiple VMap entries
+ // pointing at them. Thus, deleting one would require scanning the VMap
+ // to update any entries in it that would require that. This would be
+ // really slow.
+ }
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happen all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ while (I != NewFunc->end()) {
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) { ++I; continue; }
+
+ // Note that we can't eliminate uncond branches if the destination has
+ // single-entry PHI nodes. Eliminating the single-entry phi nodes would
+ // require scanning the VMap to update any entries that point to the phi
+ // node.
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+ ++I; continue;
+ }
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(I);
+
+ // Move all the instructions in the succ to the pred.
+ I->getInstList().splice(I->end(), Dest->getInstList());
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 000000000000..a08fa35065cc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,127 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constant.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module. This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+ // First off, we need to create the new module.
+ Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+ New->setDataLayout(M->getDataLayout());
+ New->setTargetTriple(M->getTargetTriple());
+ New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+ // Copy all of the dependent libraries over.
+ for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+ New->addLibrary(*I);
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the VMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = new GlobalVariable(*New,
+ I->getType()->getElementType(),
+ false,
+ GlobalValue::ExternalLinkage, 0,
+ I->getName());
+ GV->setAlignment(I->getAlignment());
+ VMap[I] = GV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ GlobalValue::ExternalLinkage, I->getName(), New);
+ NF->copyAttributesFrom(I);
+ VMap[I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+ I->getName(), NULL, New);
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+ if (I->hasInitializer())
+ GV->setInitializer(MapValue(I->getInitializer(), VMap));
+ GV->setLinkage(I->getLinkage());
+ GV->setThreadLocal(I->isThreadLocal());
+ GV->setConstant(I->isConstant());
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *F = cast<Function>(VMap[I]);
+ if (!I->isDeclaration()) {
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[J] = DestI++;
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+ }
+
+ F->setLinkage(I->getLinkage());
+ }
+
+ // And aliases
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+ GA->setLinkage(I->getLinkage());
+ if (const Constant *C = I->getAliasee())
+ GA->setAliasee(MapValue(C, VMap));
+ }
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
+ }
+
+ return New;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 000000000000..081352358b95
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,795 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+ class CodeExtractor {
+ typedef SetVector<Value*> Values;
+ SetVector<BasicBlock*> BlocksToExtract;
+ DominatorTree* DT;
+ bool AggregateArgs;
+ unsigned NumExitBlocks;
+ const Type *RetTy;
+ public:
+ CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+ : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+ Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+
+ bool isEligible(const std::vector<BasicBlock*> &code);
+
+ private:
+ /// definedInRegion - Return true if the specified value is defined in the
+ /// extracted region.
+ bool definedInRegion(Value *V) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (BlocksToExtract.count(I->getParent()))
+ return true;
+ return false;
+ }
+
+ /// definedInCaller - Return true if the specified value is defined in the
+ /// function being code extracted, but not in the region being extracted.
+ /// These values must be passed in as live-ins to the function.
+ bool definedInCaller(Value *V) const {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!BlocksToExtract.count(I->getParent()))
+ return true;
+ return false;
+ }
+
+ void severSplitPHINodes(BasicBlock *&Header);
+ void splitReturnBlocks();
+ void findInputsOutputs(Values &inputs, Values &outputs);
+
+ Function *constructFunction(const Values &inputs,
+ const Values &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode, BasicBlock *newHeader,
+ Function *oldFunction, Module *M);
+
+ void moveCodeToFunction(Function *newFunction);
+
+ void emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *newHeader,
+ Values &inputs,
+ Values &outputs);
+
+ };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+ unsigned NumPredsFromRegion = 0;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+ ++NumPredsFromRegion;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+ Header->getName()+".ce");
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ BlocksToExtract.remove(OldPred);
+ BlocksToExtract.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, update dominator sets. The blocks that dominate the new one are the
+ // blocks that dominate TIBB plus the new block itself.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (NumPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everything within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+ PN->getName()+".ce", NewBB->begin());
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+ E = BlocksToExtract.end(); I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+ BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+ if (DT) {
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
+ DomTreeNode *OldNode = DT->getNode(*I);
+ SmallVector<DomTreeNode*, 8> Children;
+ for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+ DI != DE; ++DI)
+ Children.push_back(*DI);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+ for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+ }
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+ std::set<BasicBlock*> ExitBlocks;
+ for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+ ce = BlocksToExtract.end(); ci != ce; ++ci) {
+ BasicBlock *BB = *ci;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+ if (definedInCaller(*O))
+ inputs.insert(*O);
+
+ // Consider uses of this instruction (outputs).
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (!definedInRegion(*UI)) {
+ outputs.insert(I);
+ break;
+ }
+ } // for: insts
+
+ // Keep track of the exit blocks from the region.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!BlocksToExtract.count(TI->getSuccessor(i)))
+ ExitBlocks.insert(TI->getSuccessor(i));
+ } // for: basic blocks
+
+ NumExitBlocks = ExitBlocks.size();
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+ const Values &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+ case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+ default: RetTy = Type::getInt16Ty(header->getContext()); break;
+ }
+
+ std::vector<Type*> paramTy;
+
+ // Add the types of the input values to the function's argument list
+ for (Values::const_iterator i = inputs.begin(),
+ e = inputs.end(); i != e; ++i) {
+ const Value *value = *i;
+ DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ paramTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+ if (AggregateArgs)
+ paramTy.push_back((*I)->getType());
+ else
+ paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+ }
+
+ DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+ for (std::vector<Type*>::iterator i = paramTy.begin(),
+ e = paramTy.end(); i != e; ++i)
+ DEBUG(dbgs() << **i << ", ");
+ DEBUG(dbgs() << ")\n");
+
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ PointerType *StructPtr =
+ PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+ paramTy.clear();
+ paramTy.push_back(StructPtr);
+ }
+ const FunctionType *funcType =
+ FunctionType::get(RetTy, paramTy, false);
+
+ // Create the new function
+ Function *newFunction = Function::Create(funcType,
+ GlobalValue::InternalLinkage,
+ oldFunction->getName() + "_" +
+ header->getName(), M);
+ // If the old function is no-throw, so is the new one.
+ if (oldFunction->doesNotThrow())
+ newFunction->setDoesNotThrow(true);
+
+ newFunction->getBasicBlockList().push_back(newRootNode);
+
+ // Create an iterator to name all of the arguments we inserted.
+ Function::arg_iterator AI = newFunction->arg_begin();
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+ TerminatorInst *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(AI, Idx, Idx+2,
+ "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ } else
+ RewriteVal = AI++;
+
+ std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+ use != useE; ++use)
+ if (Instruction* inst = dyn_cast<Instruction>(*use))
+ if (BlocksToExtract.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (!AggregateArgs) {
+ AI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+ AI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+ AI->setName(outputs[i]->getName()+".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User*> Users(header->use_begin(), header->use_end());
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+ if (!BlocksToExtract.count(TI->getParent()) &&
+ TI->getParent()->getParent() == oldFunction)
+ TI->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+ for (Value::use_iterator UI = Used->use_begin(),
+ UE = Used->use_end(); UI != UE; ++UI) {
+ PHINode *P = dyn_cast<PHINode>(*UI);
+ if (P && P->getParent() == BB)
+ return P->getIncomingBlock(UI);
+ }
+
+ return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+ Values &inputs, Values &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+
+ LLVMContext &Context = newFunction->getContext();
+
+ // Add inputs as params, or to be filled into the struct
+ for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+ if (AggregateArgs)
+ StructValues.push_back(*i);
+ else
+ params.push_back(*i);
+
+ // Create allocas for the outputs
+ for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+ if (AggregateArgs) {
+ StructValues.push_back(*i);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ codeReplacer->getParent()->begin()->begin());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ }
+ }
+
+ AllocaInst *Struct = 0;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ std::vector<Type*> ArgTypes;
+ for (Values::iterator v = StructValues.begin(),
+ ve = StructValues.end(); v != ve; ++v)
+ ArgTypes.push_back((*v)->getType());
+
+ // Allocate a struct at the beginning of this function
+ Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ Struct =
+ new AllocaInst(StructArgTy, 0, "structArg",
+ codeReplacer->getParent()->begin()->begin());
+ params.push_back(Struct);
+
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ StoreInst *SI = new StoreInst(StructValues[i], GEP);
+ codeReplacer->getInstList().push_back(SI);
+ }
+ }
+
+ // Emit the call to the function
+ CallInst *call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ codeReplacer->getInstList().push_back(call);
+
+ Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+ unsigned FirstOut = inputs.size();
+ if (!AggregateArgs)
+ std::advance(OutputArgBegin, inputs.size());
+
+ // Reload the outputs passed in by reference
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Value *Output = 0;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP
+ = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ "gep_reload_" + outputs[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ Output = GEP;
+ } else {
+ Output = ReloadOutputs[i];
+ }
+ LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ Reloads.push_back(load);
+ codeReplacer->getInstList().push_back(load);
+ std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+ Instruction *inst = cast<Instruction>(Users[u]);
+ if (!BlocksToExtract.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+ unsigned switchVal = 0;
+ for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+ e = BlocksToExtract.end(); i != e; ++i) {
+ TerminatorInst *TI = (*i)->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (!NewTarget) {
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = 0;
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+
+ // Restore values just before we exit
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+ // For an invoke, the normal destination is the only one that is
+ // dominated by the result of the invocation
+ BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+ bool DominatesDef = true;
+
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+ DefBlock = Invoke->getNormalDest();
+
+ // Make sure we are looking at the original successor block, not
+ // at a newly inserted exit block, which won't be in the dominator
+ // info.
+ for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+ ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+ if (DefBlock == I->second) {
+ DefBlock = I->first;
+ break;
+ }
+
+ // In the extract block case, if the block we are extracting ends
+ // with an invoke instruction, make sure that we don't emit a
+ // store of the invoke value for the unwind block.
+ if (!DT && DefBlock != OldTarget)
+ DominatesDef = false;
+ }
+
+ if (DT) {
+ DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+ // If the output value is used by a phi in the target block,
+ // then we need to test for dominance of the phi's predecessor
+ // instead. Unfortunately, this a little complicated since we
+ // have already rewritten uses of the value to uses of the reload.
+ BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],
+ OldTarget);
+ if (pred && DT && DT->dominates(DefBlock, pred))
+ DominatesDef = true;
+ }
+
+ if (DominatesDef) {
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+ FirstOut+out);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+ "gep_" + outputs[out]->getName(),
+ NTRet);
+ new StoreInst(outputs[out], GEP, NTRet);
+ } else {
+ new StoreInst(outputs[out], OAI, NTRet);
+ }
+ }
+ // Advance output iterator even if we don't emit a store
+ if (!AggregateArgs) ++OAI;
+ }
+ }
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy->isVoidTy()) {
+ ReturnInst::Create(Context, 0, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Context,
+ Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setOperand(0, call);
+ TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+ TheSwitch->removeCase(NumExitBlocks); // Remove redundant case
+ break;
+ }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+ Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+ for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+ e = BlocksToExtract.end(); i != e; ++i) {
+ // Delete the basic block from the old function, and the list of blocks
+ oldBlocks.remove(*i);
+
+ // Insert this basic block into the new function
+ newBlocks.push_back(*i);
+ }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+/// add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+ if (!isEligible(code))
+ return 0;
+
+ // 1) Find inputs, outputs
+ // 2) Construct new function
+ // * Add allocas for defs, pass as args by reference
+ // * Pass in uses as args
+ // 3) Move code region, add call instr to func
+ //
+ BlocksToExtract.insert(code.begin(), code.end());
+
+ Values inputs, outputs;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = code[0];
+
+ for (unsigned i = 1, e = code.size(); i != e; ++i)
+ for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+ PI != E; ++PI)
+ assert(BlocksToExtract.count(*PI) &&
+ "No blocks in this region may have entries from outside the region"
+ " except for the first block!");
+
+ // If we have to split PHI nodes or the entry block, do so now.
+ severSplitPHINodes(header);
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ Function *oldFunction = header->getParent();
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ "codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ "newFuncRoot");
+ newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs);
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction = constructFunction(inputs, outputs, header,
+ newFuncRoot,
+ codeReplacer, oldFunction,
+ oldFunction->getParent());
+
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Loop over all of the PHI nodes in the header block, and change any
+ // references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ // Look at all successors of the codeReplacer block. If any of these blocks
+ // had PHI nodes in them, we need to update the "from" block to be the code
+ // replacer, not the original block in the extracted region.
+ std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ std::set<BasicBlock*> ProcessedPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+ PN->setIncomingBlock(i, codeReplacer);
+ else {
+ // There were multiple entries in the PHI for this block, now there
+ // is only one, so remove the duplicated entries.
+ PN->removeIncomingValue(i, false);
+ --i; --e;
+ }
+ }
+ }
+
+ //cerr << "NEW FUNCTION: " << *newFunction;
+ // verifyFunction(*newFunction);
+
+ // cerr << "OLD FUNCTION: " << *oldFunction;
+ // verifyFunction(*oldFunction);
+
+ DEBUG(if (verifyFunction(*newFunction))
+ report_fatal_error("verifyFunction failed!"));
+ return newFunction;
+}
+
+bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+ // Deny code region if it contains allocas or vastarts.
+ for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+ BB != e; ++BB)
+ for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+ I != Ie; ++I)
+ if (isa<AllocaInst>(*I))
+ return false;
+ else if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ return true;
+}
+
+
+/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
+/// function
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+ const std::vector<BasicBlock*> &code,
+ bool AggregateArgs) {
+ return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractBasicBlock - slurp a natural loop into a brand new function
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+ return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - slurp a basic block into a brand new function
+///
+Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
+ std::vector<BasicBlock*> Blocks;
+ Blocks.push_back(BB);
+ return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 000000000000..8cc26492c292
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,146 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack(). This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted. After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), 0,
+ I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = I.getParent()->getParent();
+ Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Change all of the users of the instruction to read from the stack slot
+ // instead.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.use_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead, insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot multiple loads. The problem is that the resultant
+ // PHI node will have multiple values (from each load) coming in from the
+ // same block, which is illegal SSA form. For this reason, we keep track
+ // and reuse loads we insert.
+ std::map<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (V == 0) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful is I is an invoke instruction though, because we can't insert the
+ // store AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!isa<TerminatorInst>(I)) {
+ InsertPt = &I;
+ ++InsertPt;
+ } else {
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical.
+ InvokeInst &II = cast<InvokeInst>(I);
+ assert(II.getNormalDest()->getSinglePredecessor() &&
+ "Cannot demote invoke with a critical successor!");
+ InsertPt = II.getNormalDest()->begin();
+ }
+
+ for (; isa<PHINode>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before any PHI nodes.
+ new StoreInst(&I, Slot, InsertPt);
+
+ return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), 0,
+ P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Iterate over each operand, insert store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); (void)II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert load in place of the phi and replace all uses.
+ Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+ P->replaceAllUsesWith(V);
+
+ // Delete phi.
+ P->eraseFromParent();
+
+ return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 000000000000..d5b382e55e5c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,1182 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+// The code in this file for handling inlines through invoke
+// instructions preserves semantics only under some assumptions about
+// the behavior of unwinders which correspond to gcc-style libUnwind
+// exception personality functions. Eventually the IR will be
+// improved to make this unnecessary, but until then, this code is
+// marked [LIBUNWIND].
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Attributes.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+ return InlineFunction(CallSite(CI), IFI);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+ return InlineFunction(CallSite(II), IFI);
+}
+
+/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
+static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
+ for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
+ EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i);
+ if (exn) return exn;
+ }
+
+ return 0;
+}
+
+/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for
+/// the given llvm.eh.exception call.
+static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) {
+ BasicBlock *exnBlock = exn->getParent();
+
+ EHSelectorInst *outOfBlockSelector = 0;
+ for (Instruction::use_iterator
+ ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) {
+ EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui);
+ if (!sel) continue;
+
+ // Immediately accept an eh.selector in the same block as the
+ // excepton call.
+ if (sel->getParent() == exnBlock) return sel;
+
+ // Otherwise, use the first selector we see.
+ if (!outOfBlockSelector) outOfBlockSelector = sel;
+ }
+
+ return outOfBlockSelector;
+}
+
+/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector
+/// in the given landing pad. In principle, llvm.eh.exception is
+/// required to be in the landing pad; in practice, SplitCriticalEdge
+/// can break that invariant, and then inlining can break it further.
+/// There's a real need for a reliable solution here, but until that
+/// happens, we have some fragile workarounds here.
+static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) {
+ // Look for an exception call in the actual landing pad.
+ EHExceptionInst *exn = findExceptionInBlock(lpad);
+ if (exn) return findSelectorForException(exn);
+
+ // Okay, if that failed, look for one in an obvious successor. If
+ // we find one, we'll fix the IR by moving things back to the
+ // landing pad.
+
+ bool dominates = true; // does the lpad dominate the exn call
+ BasicBlock *nonDominated = 0; // if not, the first non-dominated block
+ BasicBlock *lastDominated = 0; // and the block which branched to it
+
+ BasicBlock *exnBlock = lpad;
+
+ // We need to protect against lpads that lead into infinite loops.
+ SmallPtrSet<BasicBlock*,4> visited;
+ visited.insert(exnBlock);
+
+ do {
+ // We're not going to apply this hack to anything more complicated
+ // than a series of unconditional branches, so if the block
+ // doesn't terminate in an unconditional branch, just fail. More
+ // complicated cases can arise when, say, sinking a call into a
+ // split unwind edge and then inlining it; but that can do almost
+ // *anything* to the CFG, including leaving the selector
+ // completely unreachable. The only way to fix that properly is
+ // to (1) prohibit transforms which move the exception or selector
+ // values away from the landing pad, e.g. by producing them with
+ // instructions that are pinned to an edge like a phi, or
+ // producing them with not-really-instructions, and (2) making
+ // transforms which split edges deal with that.
+ BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back());
+ if (!branch || branch->isConditional()) return 0;
+
+ BasicBlock *successor = branch->getSuccessor(0);
+
+ // Fail if we found an infinite loop.
+ if (!visited.insert(successor)) return 0;
+
+ // If the successor isn't dominated by exnBlock:
+ if (!successor->getSinglePredecessor()) {
+ // We don't want to have to deal with threading the exception
+ // through multiple levels of phi, so give up if we've already
+ // followed a non-dominating edge.
+ if (!dominates) return 0;
+
+ // Otherwise, remember this as a non-dominating edge.
+ dominates = false;
+ nonDominated = successor;
+ lastDominated = exnBlock;
+ }
+
+ exnBlock = successor;
+
+ // Can we stop here?
+ exn = findExceptionInBlock(exnBlock);
+ } while (!exn);
+
+ // Look for a selector call for the exception we found.
+ EHSelectorInst *selector = findSelectorForException(exn);
+ if (!selector) return 0;
+
+ // The easy case is when the landing pad still dominates the
+ // exception call, in which case we can just move both calls back to
+ // the landing pad.
+ if (dominates) {
+ selector->moveBefore(lpad->getFirstNonPHI());
+ exn->moveBefore(selector);
+ return selector;
+ }
+
+ // Otherwise, we have to split at the first non-dominating block.
+ // The CFG looks basically like this:
+ // lpad:
+ // phis_0
+ // insnsAndBranches_1
+ // br label %nonDominated
+ // nonDominated:
+ // phis_2
+ // insns_3
+ // %exn = call i8* @llvm.eh.exception()
+ // insnsAndBranches_4
+ // %selector = call @llvm.eh.selector(i8* %exn, ...
+ // We need to turn this into:
+ // lpad:
+ // phis_0
+ // %exn0 = call i8* @llvm.eh.exception()
+ // %selector0 = call @llvm.eh.selector(i8* %exn0, ...
+ // insnsAndBranches_1
+ // br label %split // from lastDominated
+ // nonDominated:
+ // phis_2 (without edge from lastDominated)
+ // %exn1 = call i8* @llvm.eh.exception()
+ // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ...
+ // br label %split
+ // split:
+ // phis_2 (edge from lastDominated, edge from split)
+ // %exn = phi ...
+ // %selector = phi ...
+ // insns_3
+ // insnsAndBranches_4
+
+ assert(nonDominated);
+ assert(lastDominated);
+
+ // First, make clones of the intrinsics to go in lpad.
+ EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone());
+ EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone());
+ lpadSelector->setArgOperand(0, lpadExn);
+ lpadSelector->insertBefore(lpad->getFirstNonPHI());
+ lpadExn->insertBefore(lpadSelector);
+
+ // Split the non-dominated block.
+ BasicBlock *split =
+ nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(),
+ nonDominated->getName() + ".lpad-fix");
+
+ // Redirect the last dominated branch there.
+ cast<BranchInst>(lastDominated->back()).setSuccessor(0, split);
+
+ // Move the existing intrinsics to the end of the old block.
+ selector->moveBefore(&nonDominated->back());
+ exn->moveBefore(selector);
+
+ Instruction *splitIP = &split->front();
+
+ // For all the phis in nonDominated, make a new phi in split to join
+ // that phi with the edge from lastDominated.
+ for (BasicBlock::iterator
+ i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) {
+ PHINode *phi = dyn_cast<PHINode>(i);
+ if (!phi) break;
+
+ PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(),
+ splitIP);
+ phi->replaceAllUsesWith(splitPhi);
+ splitPhi->addIncoming(phi, nonDominated);
+ splitPhi->addIncoming(phi->removeIncomingValue(lastDominated),
+ lastDominated);
+ }
+
+ // Make new phis for the exception and selector.
+ PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP);
+ exn->replaceAllUsesWith(exnPhi);
+ selector->setArgOperand(0, exn); // except for this use
+ exnPhi->addIncoming(exn, nonDominated);
+ exnPhi->addIncoming(lpadExn, lastDominated);
+
+ PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP);
+ selector->replaceAllUsesWith(selectorPhi);
+ selectorPhi->addIncoming(selector, nonDominated);
+ selectorPhi->addIncoming(lpadSelector, lastDominated);
+
+ return lpadSelector;
+}
+
+namespace {
+ /// A class for recording information about inlining through an invoke.
+ class InvokeInliningInfo {
+ BasicBlock *OuterUnwindDest;
+ EHSelectorInst *OuterSelector;
+ BasicBlock *InnerUnwindDest;
+ PHINode *InnerExceptionPHI;
+ PHINode *InnerSelectorPHI;
+ SmallVector<Value*, 8> UnwindDestPHIValues;
+
+ public:
+ InvokeInliningInfo(InvokeInst *II) :
+ OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
+ InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) {
+
+ // If there are PHI nodes in the unwind destination block, we
+ // need to keep track of which values came into them from the
+ // invoke before removing the edge from this block.
+ llvm::BasicBlock *invokeBB = II->getParent();
+ for (BasicBlock::iterator I = OuterUnwindDest->begin();
+ isa<PHINode>(I); ++I) {
+ // Save the value to use for this edge.
+ PHINode *phi = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB));
+ }
+ }
+
+ /// The outer unwind destination is the target of unwind edges
+ /// introduced for calls within the inlined function.
+ BasicBlock *getOuterUnwindDest() const {
+ return OuterUnwindDest;
+ }
+
+ EHSelectorInst *getOuterSelector() {
+ if (!OuterSelector)
+ OuterSelector = findSelectorForLandingPad(OuterUnwindDest);
+ return OuterSelector;
+ }
+
+ BasicBlock *getInnerUnwindDest();
+
+ bool forwardEHResume(CallInst *call, BasicBlock *src);
+
+ /// Add incoming-PHI values to the unwind destination block for
+ /// the given basic block, using the values for the original
+ /// invoke's source block.
+ void addIncomingPHIValuesFor(BasicBlock *BB) const {
+ addIncomingPHIValuesForInto(BB, OuterUnwindDest);
+ }
+
+ void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+ BasicBlock::iterator I = dest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *phi = cast<PHINode>(I);
+ phi->addIncoming(UnwindDestPHIValues[i], src);
+ }
+ }
+ };
+}
+
+/// Get or create a target for the branch out of rewritten calls to
+/// llvm.eh.resume.
+BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
+ if (InnerUnwindDest) return InnerUnwindDest;
+
+ // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
+ // in the outer landing pad to immediately following the phis.
+ EHSelectorInst *selector = getOuterSelector();
+ if (!selector) return 0;
+
+ // The call to llvm.eh.exception *must* be in the landing pad.
+ Instruction *exn = cast<Instruction>(selector->getArgOperand(0));
+ assert(exn->getParent() == OuterUnwindDest);
+
+ // TODO: recognize when we've already done this, so that we don't
+ // get a linear number of these when inlining calls into lots of
+ // invokes with the same landing pad.
+
+ // Do the hoisting.
+ Instruction *splitPoint = exn->getParent()->getFirstNonPHI();
+ assert(splitPoint != selector && "selector-on-exception dominance broken!");
+ if (splitPoint == exn) {
+ selector->removeFromParent();
+ selector->insertAfter(exn);
+ splitPoint = selector->getNextNode();
+ } else {
+ exn->moveBefore(splitPoint);
+ selector->moveBefore(splitPoint);
+ }
+
+ // Split the landing pad.
+ InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint,
+ OuterUnwindDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned phiCapacity = 2;
+
+ // Create corresponding new phis for all the phis in the outer landing pad.
+ BasicBlock::iterator insertPoint = InnerUnwindDest->begin();
+ BasicBlock::iterator I = OuterUnwindDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *outerPhi = cast<PHINode>(I);
+ PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity,
+ outerPhi->getName() + ".lpad-body",
+ insertPoint);
+ outerPhi->replaceAllUsesWith(innerPhi);
+ innerPhi->addIncoming(outerPhi, OuterUnwindDest);
+ }
+
+ // Create a phi for the exception value...
+ InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity,
+ "exn.lpad-body", insertPoint);
+ exn->replaceAllUsesWith(InnerExceptionPHI);
+ selector->setArgOperand(0, exn); // restore this use
+ InnerExceptionPHI->addIncoming(exn, OuterUnwindDest);
+
+ // ...and the selector.
+ InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity,
+ "selector.lpad-body", insertPoint);
+ selector->replaceAllUsesWith(InnerSelectorPHI);
+ InnerSelectorPHI->addIncoming(selector, OuterUnwindDest);
+
+ // All done.
+ return InnerUnwindDest;
+}
+
+/// [LIBUNWIND] Try to forward the given call, which logically occurs
+/// at the end of the given block, as a branch to the inner unwind
+/// block. Returns true if the call was forwarded.
+bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
+ // First, check whether this is a call to the intrinsic.
+ Function *fn = dyn_cast<Function>(call->getCalledValue());
+ if (!fn || fn->getName() != "llvm.eh.resume")
+ return false;
+
+ // At this point, we need to return true on all paths, because
+ // otherwise we'll construct an invoke of the intrinsic, which is
+ // not well-formed.
+
+ // Try to find or make an inner unwind dest, which will fail if we
+ // can't find a selector call for the outer unwind dest.
+ BasicBlock *dest = getInnerUnwindDest();
+ bool hasSelector = (dest != 0);
+
+ // If we failed, just use the outer unwind dest, dropping the
+ // exception and selector on the floor.
+ if (!hasSelector)
+ dest = OuterUnwindDest;
+
+ // Make a branch.
+ BranchInst::Create(dest, src);
+
+ // Update the phis in the destination. They were inserted in an
+ // order which makes this work.
+ addIncomingPHIValuesForInto(src, dest);
+
+ if (hasSelector) {
+ InnerExceptionPHI->addIncoming(call->getArgOperand(0), src);
+ InnerSelectorPHI->addIncoming(call->getArgOperand(1), src);
+ }
+
+ return true;
+}
+
+/// [LIBUNWIND] Check whether this selector is "only cleanups":
+/// call i32 @llvm.eh.selector(blah, blah, i32 0)
+static bool isCleanupOnlySelector(EHSelectorInst *selector) {
+ if (selector->getNumArgOperands() != 3) return false;
+ ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2));
+ return (val && val->isZero());
+}
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes. This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+/// Returns true to indicate that the next block should be skipped.
+static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+ InvokeInliningInfo &Invoke) {
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *I = BBI++;
+
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI == 0) continue;
+
+ // LIBUNWIND: merge selector instructions.
+ if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) {
+ EHSelectorInst *Outer = Invoke.getOuterSelector();
+ if (!Outer) continue;
+
+ bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner);
+ bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer);
+
+ // If both selectors contain only cleanups, we don't need to do
+ // anything. TODO: this is really just a very specific instance
+ // of a much more general optimization.
+ if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue;
+
+ // Otherwise, we just append the outer selector to the inner selector.
+ SmallVector<Value*, 16> NewSelector;
+ for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i)
+ NewSelector.push_back(Inner->getArgOperand(i));
+ for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
+ NewSelector.push_back(Outer->getArgOperand(i));
+
+ CallInst *NewInner =
+ IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
+ // No need to copy attributes, calling convention, etc.
+ NewInner->takeName(Inner);
+ Inner->replaceAllUsesWith(NewInner);
+ Inner->eraseFromParent();
+ continue;
+ }
+
+ // If this call cannot unwind, don't convert it to an invoke.
+ if (CI->doesNotThrow())
+ continue;
+
+ // Convert this function call into an invoke instruction.
+ // First, split the basic block.
+ BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+ // Delete the unconditional branch inserted by splitBasicBlock
+ BB->getInstList().pop_back();
+
+ // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
+ // directly to the new landing pad.
+ if (Invoke.forwardEHResume(CI, BB)) {
+ // TODO: 'Split' is now unreachable; clean it up.
+
+ // We want to leave the original call intact so that the call
+ // graph and other structures won't get misled. We also have to
+ // avoid processing the next block, or we'll iterate here forever.
+ return true;
+ }
+
+ // Otherwise, create the new invoke instruction.
+ ImmutableCallSite CS(CI);
+ SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), Split,
+ Invoke.getOuterUnwindDest(),
+ InvokeArgs, CI->getName(), BB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+
+ // Make sure that anything using the call now uses the invoke! This also
+ // updates the CallGraph if present, because it uses a WeakVH.
+ CI->replaceAllUsesWith(II);
+
+ Split->getInstList().pop_front(); // Delete the original call
+
+ // Update any PHI nodes in the exceptional block to indicate that
+ // there is now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(BB);
+ return false;
+ }
+
+ return false;
+}
+
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite. If the code doesn't have calls or unwinds, we know there is
+ // nothing to rewrite.
+ if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+ return;
+ }
+
+ InvokeInliningInfo Invoke(II);
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+ if (InlinedCodeInfo.ContainsCalls)
+ if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
+ // Honor a request to skip the next block. We don't need to
+ // consider UnwindInsts in this case either.
+ ++BB;
+ continue;
+ }
+
+ if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // An UnwindInst requires special handling when it gets inlined into an
+ // invoke site. Once this happens, we know that the unwind would cause
+ // a control transfer to the invoke exception destination, so we can
+ // transform it into a direct branch to the exception destination.
+ BranchInst::Create(InvokeDest, UI);
+
+ // Delete the unwind instruction!
+ UI->eraseFromParent();
+
+ // Update any PHI nodes in the exceptional block to indicate that
+ // there is now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(BB);
+ }
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made. Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+ Function::iterator FirstNewBlock,
+ ValueToValueMapTy &VMap,
+ InlineFunctionInfo &IFI) {
+ CallGraph &CG = *IFI.CG;
+ const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Callee = CS.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ const Value *OrigCall = I->first;
+
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI == VMap.end() || VMI->second == 0)
+ continue;
+
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ if (NewCall == 0) continue;
+
+ // Remember that this call site got inlined for the client of
+ // InlineFunction.
+ IFI.InlinedCalls.push_back(NewCall);
+
+ // It's possible that inlining the callsite will cause it to go from an
+ // indirect to a direct call by resolving a function pointer. If this
+ // happens, set the callee of the new call site to a more precise
+ // destination. This can also happen if the call graph node of the caller
+ // was just unnecessarily imprecise.
+ if (I->second->getFunction() == 0)
+ if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ // Indirect call site resolved to direct call.
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+ continue;
+ }
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ }
+
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(CS);
+}
+
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ unsigned ByValAlignment) {
+ const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+ IFI.TD) >= ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ LLVMContext &Context = Arg->getContext();
+
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+
+ // Create the alloca. If we have TargetData, use nice alignment.
+ unsigned Align = 1;
+ if (IFI.TD)
+ Align = IFI.TD->getPrefTypeAlignment(AggTy);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ Align = std::max(Align, ByValAlignment);
+
+ Function *Caller = TheCall->getParent()->getParent();
+
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ &*Caller->begin()->begin());
+ // Emit a memcpy.
+ Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (IFI.TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
+// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
+// intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
+ ++UI) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// hasLifetimeMarkers - Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+ const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
+ if (AI->getType() == Int8PtrTy)
+ return isUsedByLifetimeMarker(AI);
+
+ // Do a scan to find all the casts to i8*.
+ for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
+ ++I) {
+ if (I->getType() != Int8PtrTy) continue;
+ if (I->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(*I))
+ return true;
+ }
+ return false;
+}
+
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
+/// update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
+ const DebugLoc &InlinedAtDL,
+ LLVMContext &Ctx) {
+ if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+ DebugLoc NewInlinedAtDL
+ = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+ NewInlinedAtDL.getAsMDNode(Ctx));
+ }
+
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+ InlinedAtDL.getAsMDNode(Ctx));
+}
+
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+ Instruction *TheCall) {
+ DebugLoc TheCallDL = TheCall->getDebugLoc();
+ if (TheCallDL.isUnknown())
+ return;
+
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ DebugLoc DL = BI->getDebugLoc();
+ if (!DL.isUnknown())
+ BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+ }
+ }
+}
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller. This returns false if it is not possible to inline this
+// call. The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining. For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream. Similarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+ Instruction *TheCall = CS.getInstruction();
+ LLVMContext &Context = TheCall->getContext();
+ assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+ "Instruction not in function!");
+
+ // If IFI has any state in it, zap it before we fill it in.
+ IFI.reset();
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ if (CalledFunc == 0 || // Can't inline external function or indirect
+ CalledFunc->isDeclaration() || // call, or call to a vararg function!
+ CalledFunc->getFunctionType()->isVarArg()) return false;
+
+ // If the call to the callee is not a tail call, we must clear the 'tail'
+ // flags on any calls that we inline.
+ bool MustClearTailCallFlags =
+ !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CS.doesNotThrow();
+
+ BasicBlock *OrigBB = TheCall->getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return false;
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ //
+ Function::iterator LastBlock = &Caller->back();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ SmallVector<ReturnInst*, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy VMap after cloning.
+ ValueToValueMapTy VMap;
+
+ assert(CalledFunc->arg_size() == CS.arg_size() &&
+ "No varargs calls can be inlined!");
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+ ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ CalledFunc->getParamAlignment(ArgNo+1));
+
+ // Calls that we inline may use the new alloca, so we need to clear
+ // their 'tail' flags if HandleByValArgument introduced a new alloca and
+ // the callee has calls.
+ MustClearTailCallFlags |= ActualArg != *AI;
+ }
+
+ VMap[I] = ActualArg;
+ }
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
+ &InlinedFunctionInfo, IFI.TD, TheCall);
+
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ // Update the callgraph if requested.
+ if (IFI.CG)
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+ // Update inlined instructions' line number information.
+ fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ //
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; ) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+ if (AI == 0) continue;
+
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (!isa<Constant>(AI->getArraySize()))
+ continue;
+
+ // Keep track of the static allocas that we inline into the caller.
+ IFI.StaticAllocas.push_back(AI);
+
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+ ++I;
+ }
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().getInstList().splice(InsertPoint,
+ FirstNewBlock->getInstList(),
+ AI, I);
+ }
+ }
+
+ // Leave lifetime markers for the static alloca's, scoping them to the
+ // function we just inlined.
+ if (!IFI.StaticAllocas.empty()) {
+ IRBuilder<> builder(FirstNewBlock->begin());
+ for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+ AllocaInst *AI = IFI.StaticAllocas[ai];
+
+ // If the alloca is already scoped to something smaller than the whole
+ // function then there's no need to add redundant, less accurate markers.
+ if (hasLifetimeMarkers(AI))
+ continue;
+
+ builder.CreateLifetimeStart(AI);
+ for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
+ IRBuilder<> builder(Returns[ri]);
+ builder.CreateLifetimeEnd(AI);
+ }
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+ .CreateCall(StackSave, "savedstack");
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+ }
+
+ // Count the number of StackRestore calls we insert.
+ unsigned NumStackRestores = Returns.size();
+
+ // If we are inlining an invoke instruction, insert restores before each
+ // unwind. These unwinds will be rewritten into branches later.
+ if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
+ ++NumStackRestores;
+ }
+ }
+ }
+
+ // If we are inlining tail call instruction through a call site that isn't
+ // marked 'tail', we must remove the tail marker for any calls in the inlined
+ // code. Also, calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (InlinedFunctionInfo.ContainsCalls &&
+ (MustClearTailCallFlags || MarkNoUnwind)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (MustClearTailCallFlags)
+ CI->setTailCall(false);
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+
+ // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
+ // instructions are unreachable.
+ if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (isa<UnwindInst>(Term)) {
+ new UnreachableInst(Context, Term);
+ BB->getInstList().erase(Term);
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any inlined 'unwind' instructions into branches to the invoke exception
+ // destination, and call instructions into invoke instructions.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ FirstNewBlock->begin(), FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->getBasicBlockList().pop_back();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!TheCall->use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (TheCall == R->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ // We are now done with the inlining.
+ return true;
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ CalledFunc->getName()+".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+ CalledFunc->getName()+".exit");
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ TerminatorInst *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, FirstNewBlock);
+
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+ FirstNewBlock, Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ const Type *RTy = CalledFunc->getReturnType();
+
+ PHINode *PHI = 0;
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ if (!TheCall->use_empty()) {
+ PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+ AfterCallBB->begin());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ TheCall->replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+ }
+
+
+ // Add a branch to the merge points and remove return instructions.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst::Create(AfterCallBB, RI);
+ RI->eraseFromParent();
+ }
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!TheCall->use_empty()) {
+ if (TheCall == Returns[0]->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+ ReturnBB->getInstList());
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!TheCall->use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+ OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+
+ // Remove the unconditional branch.
+ OrigBB->getInstList().erase(Br);
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ Caller->getBasicBlockList().erase(CalleeEntry);
+
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI)
+ if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 000000000000..45c15de9437f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+namespace {
+ struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->hasName() && !AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->hasName() && !I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+
+ char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 000000000000..b654111eba74
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,279 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+ struct LCSSA : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : LoopPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ std::vector<BasicBlock*> LoopBlocks;
+ PredIteratorCache PredCache;
+ Loop *L;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
+ }
+ private:
+ bool ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ /// verifyAnalysis() - Verify loop nest.
+ virtual void verifyAnalysis() const {
+ // Check the special guarantees that LCSSA makes.
+ assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+ }
+
+ /// inLoop - returns true if the given block is within the current loop
+ bool inLoop(BasicBlock *B) const {
+ return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+ }
+ };
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ DominatorTree *DT) {
+ DomTreeNode *DomNode = DT->getNode(BB);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+ return true;
+
+ return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+ L = TheLoop;
+
+ DT = &getAnalysis<DominatorTree>();
+
+ // Get the set of exiting blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ // Speed up queries by creating a sorted vector of blocks.
+ LoopBlocks.clear();
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+ array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, rewrite those uses.
+ bool MadeChange = false;
+
+ for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+ BBI != E; ++BBI) {
+ BasicBlock *BB = *BBI;
+
+ // For large loops, avoid use-scanning by using dominance information: In
+ // particular, if a block does not dominate any of the loop exits, then none
+ // of the values defined in the block could be used outside the loop.
+ if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I->use_empty() ||
+ (I->hasOneUse() && I->use_back()->getParent() == BB &&
+ !isa<PHINode>(I->use_back())))
+ continue;
+
+ MadeChange |= ProcessInstruction(I, ExitBlocks);
+ }
+ }
+
+ assert(L->isLCSSAForm(*DT));
+ PredCache.clear();
+
+ return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] == BB)
+ return true;
+ return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop. If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ SmallVector<Use*, 16> UsesToRewrite;
+
+ BasicBlock *InstBB = Inst->getParent();
+
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ UserBB = PN->getIncomingBlock(UI);
+
+ if (InstBB != UserBB && !inLoop(UserBB))
+ UsesToRewrite.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty()) return false;
+
+ ++NumLCSSA; // We are applying the transformation
+
+ // Invoke instructions are special in that their result value is not available
+ // along their unwind edge. The code below tests to see whether DomBB dominates
+ // the value, so adjust DomBB to the normal destination block, which is
+ // effectively where the value is first usable.
+ BasicBlock *DomBB = Inst->getParent();
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+ DomBB = Inv->getNormalDest();
+
+ DomTreeNode *DomNode = DT->getNode(DomBB);
+
+ SmallVector<PHINode*, 16> AddedPHIs;
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+ BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+ BasicBlock *ExitBB = *BBI;
+ if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+
+ PHINode *PN = PHINode::Create(Inst->getType(),
+ PredCache.GetNumPreds(ExitBB),
+ Inst->getName()+".lcssa",
+ ExitBB->begin());
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+ PN->addIncoming(Inst, *PI);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!inLoop(*PI))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(
+ PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ }
+
+ AddedPHIs.push_back(PN);
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+ }
+
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses in
+ // the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+ if (isa<PHINode>(UserBB->begin()) &&
+ isExitBlock(UserBB, ExitBlocks)) {
+ UsesToRewrite[i]->set(UserBB->begin());
+ continue;
+ }
+
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+ }
+
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+ if (AddedPHIs[i]->use_empty())
+ AddedPHIs[i]->eraseFromParent();
+ }
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 000000000000..0f6d9ae99d66
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,875 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination. This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
+ TerminatorInst *T = BB->getTerminator();
+ IRBuilder<> Builder(T);
+
+ // Branch - See if we are conditional jumping on constant
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ //cerr << "Function: " << T->getParent()->getParent()
+ // << "\nRemoving branch from " << T->getParent()
+ // << "\n\nTo: " << OldDest << endl;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Destination);
+ BI->eraseFromParent();
+ return true;
+ }
+
+ if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Dest1);
+ Value *Cond = BI->getCondition();
+ BI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ return true;
+ }
+ return false;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch into a
+ // single branch instruction!
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest
+ BasicBlock *DefaultDest = TheOnlyDest;
+ assert(TheOnlyDest == SI->getDefaultDest() &&
+ "Default destination is not successor #0?");
+
+ // Figure out which case it goes to.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ if (SI->getSuccessorValue(i) == CI) {
+ TheOnlyDest = SI->getSuccessor(i);
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (SI->getSuccessor(i) == DefaultDest) {
+ // Remove this entry.
+ DefaultDest->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e; // Don't skip an entry...
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+ BasicBlock *BB = SI->getParent();
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == TheOnlyDest)
+ TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ else
+ Succ->removePredecessor(BB);
+ }
+
+ // Delete the old switch.
+ Value *Cond = SI->getCondition();
+ SI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ return true;
+ }
+
+ if (SI->getNumSuccessors() == 2) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ SI->getSuccessorValue(1), "cond");
+
+ // Insert the new branch.
+ Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0));
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+
+ if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+ // indirectbr blockaddress(@F, @BB) -> br label @BB
+ if (BlockAddress *BA =
+ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+ BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ if (IBI->getDestination(i) == TheOnlyDest)
+ TheOnlyDest = 0;
+ else
+ IBI->getDestination(i)->removePredecessor(IBI->getParent());
+ }
+ Value *Address = IBI->getAddress();
+ IBI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Address);
+
+ // If we didn't find our destination in the IBI successor list, then we
+ // have undefined behavior. Replace the unconditional branch with an
+ // 'unreachable' instruction.
+ if (TheOnlyDest) {
+ BB->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+ if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+ // We don't want debug info removed by anything this general, unless
+ // debug info is empty.
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+ if (DDI->getAddress())
+ return false;
+ return true;
+ }
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+ if (DVI->getValue())
+ return false;
+ return true;
+ }
+
+ if (!I->mayHaveSideEffects()) return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // Safe to delete llvm.stacksave if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave)
+ return true;
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively. Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+ return false;
+
+ SmallVector<Instruction*, 16> DeadInsts;
+ DeadInsts.push_back(I);
+
+ do {
+ I = DeadInsts.pop_back_val();
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, 0);
+
+ if (!OpV->use_empty()) continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI))
+ DeadInsts.push_back(OpI);
+ }
+
+ I->eraseFromParent();
+ } while (!DeadInsts.empty());
+
+ return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::use_iterator UI = I->use_begin();
+ Value::use_iterator UE = I->use_end();
+ if (UI == UE)
+ return true;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively. Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+ SmallPtrSet<Instruction*, 4> Visited;
+ for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+ I = cast<Instruction>(*I->use_begin())) {
+ if (I->use_empty())
+ return RecursivelyDeleteTriviallyDeadInstructions(I);
+
+ // If we find an instruction more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (!Visited.insert(I)) {
+ // Break the cycle and delete the instruction and its operands.
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+ bool MadeChange = false;
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+
+ if (Value *V = SimplifyInstruction(Inst, TD)) {
+ WeakVH BIHandle(BI);
+ ReplaceAndSimplifyAllUses(Inst, V, TD);
+ MadeChange = true;
+ if (BIHandle != BI)
+ BI = BB->begin();
+ continue;
+ }
+
+ if (Inst->isTerminator())
+ break;
+
+ WeakVH BIHandle(BI);
+ MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ if (BIHandle != BI)
+ BI = BB->begin();
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ TargetData *TD) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+ Value *PNV = SimplifyInstruction(PN, TD);
+ if (PNV == 0) continue;
+
+ // If we're able to simplify the phi to a single value, substitute the new
+ // value into all of its uses.
+ assert(PNV != PN && "SimplifyInstruction broken!");
+
+ Value *OldPhiIt = PhiIt;
+ ReplaceAndSimplifyAllUses(PN, PNV, TD);
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+ }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with undef, it must be dead.
+ if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ if (P) {
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+ if (PI) {
+ PI->replaceAllUses(PredBB, DestBB);
+ PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+ }
+ }
+ // Nuke BB.
+ PredBB->eraseFromParent();
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+ BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Use that list to make another list of common predecessors of BB and Succ
+ BlockSet CommonPreds;
+ for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (BBPreds.count(P))
+ CommonPreds.insert(P);
+ }
+
+ // Shortcut, if there are no common predecessors, merging is always safe
+ if (CommonPreds.empty())
+ return true;
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ if (BBPN->getIncomingValueForBlock(*PI)
+ != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ if (Val != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential side-effect free intrinsics and the branch. If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't intoduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+ if (PN->getIncomingBlock(UI) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ PN->addIncoming(OldValPN->getIncomingValue(i),
+ OldValPN->getIncomingBlock(i));
+ } else {
+ // Add an incoming value for each of the new incoming values.
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+ PN->addIncoming(OldVal, BBPreds[i]);
+ }
+ }
+ }
+
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+
+ // Copy over any phi, debug or lifetime instruction.
+ BB->getTerminator()->eraseFromParent();
+ Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+ } else {
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ bool Changed = false;
+
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ // Map from PHI hash values to PHI nodes. If multiple PHIs have
+ // the same hash value, the element is the first PHI in the
+ // linked list in CollisionMap.
+ DenseMap<uintptr_t, PHINode *> HashMap;
+
+ // Maintain linked lists of PHI nodes with common hash values.
+ DenseMap<PHINode *, PHINode *> CollisionMap;
+
+ // Examine each PHI.
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I++); ) {
+ // Compute a hash value on the operands. Instcombine will likely have sorted
+ // them, which helps expose duplicates, but we have to check all the
+ // operands to be safe in case instcombine hasn't run.
+ uintptr_t Hash = 0;
+ // This hash algorithm is quite weak as hash functions go, but it seems
+ // to do a good enough job for this particular purpose, and is very quick.
+ for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+ I != E; ++I) {
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+ Hash >>= 1;
+ // If we've never seen this hash value before, it's a unique PHI.
+ std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+ HashMap.insert(std::make_pair(Hash, PN));
+ if (Pair.second) continue;
+ // Otherwise it's either a duplicate or a hash collision.
+ for (PHINode *OtherPN = Pair.first->second; ; ) {
+ if (OtherPN->isIdenticalTo(PN)) {
+ // A duplicate. Replace this PHI with its duplicate.
+ PN->replaceAllUsesWith(OtherPN);
+ PN->eraseFromParent();
+ Changed = true;
+ break;
+ }
+ // A non-duplicate hash collision.
+ DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+ if (I == CollisionMap.end()) {
+ // Set this PHI to be the head of the linked list of colliding PHIs.
+ PHINode *Old = Pair.first->second;
+ Pair.first->second = PN;
+ CollisionMap[PN] = Old;
+ break;
+ }
+ // Procede to the next PHI in the list.
+ OtherPN = I->second;
+ }
+ }
+
+ return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+ unsigned PrefAlign) {
+ V = V->stripPointerCasts();
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // If there is a requested alignment and if this is an alloca, round up.
+ if (AI->getAlignment() >= PrefAlign)
+ return AI->getAlignment();
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (GV->isDeclaration()) return Align;
+
+ if (GV->getAlignment() >= PrefAlign)
+ return GV->getAlignment();
+ // We can only increase the alignment of the global if it has no alignment
+ // specified or if it is not assigned a section. If it is assigned a
+ // section, the global could be densely packed with other objects in the
+ // section, increasing the alignment could cause padding issues.
+ if (!GV->hasSection() || GV->getAlignment() == 0)
+ GV->setAlignment(PrefAlign);
+ return GV->getAlignment();
+ }
+
+ return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+ const TargetData *TD) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = enforceKnownAlignment(V, Align, PrefAlign);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
+///===---------------------------------------------------------------------===//
+/// Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ StoreInst *SI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ Instruction *DbgVal = NULL;
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ Argument *ExtendedArg = NULL;
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (ExtendedArg)
+ DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
+ else
+ DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ LoadInst *LI, DIBuilder &Builder) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return false;
+
+ Instruction *DbgVal =
+ Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+ DIVar, LI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc LIDL = LI->getDebugLoc();
+ if (!LIDL.isUnknown())
+ DbgVal->setDebugLoc(LIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+ DIBuilder DIB(*F.getParent());
+ SmallVector<DbgDeclareInst *, 4> Dbgs;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ Dbgs.push_back(DDI);
+ }
+ if (Dbgs.empty())
+ return false;
+
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+ E = Dbgs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ bool RemoveDDI = true;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ else
+ RemoveDDI = false;
+ if (RemoveDDI)
+ DDI->eraseFromParent();
+ }
+ }
+ return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
+ if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ return DDI;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 000000000000..e79fb5ac21b4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,753 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-simplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested , "Number of nested loops split out");
+
+namespace {
+ struct LoopSimplify : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : LoopPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ Loop *L;
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const;
+
+ private:
+ bool ProcessLoop(Loop *L, LPPassManager &LPM);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ BasicBlock *InsertPreheaderForLoop(Loop *L);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+ L = l;
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ Changed |= ProcessLoop(L, LPM);
+
+ return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+ E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
+
+ // Inform each successor of each dead pred.
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ (*SI)->removePredecessor(*I);
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = (*I)->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ (*I)->getTerminator()->eraseFromParent();
+ new UnreachableInst((*I)->getContext(), *I);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L);
+ if (Preheader) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ if (RewriteLoopExitBlock(L, ExitBlock)) {
+ ++NumInserted;
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (SeparateNestedLoop(L, LPM)) {
+ ++NumNested;
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+ if (LoopLatch) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, Changed,
+ Preheader ? Preheader->getTerminator() : 0)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(P);
+ }
+ }
+
+ // Split out the loop pre-header.
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
+ ".preheader", this);
+
+ NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+ << "\n");
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+ return NewBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks. This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+ SmallVector<BasicBlock*, 8> LoopBlocks;
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+ BasicBlock *P = *I;
+ if (L->contains(P)) {
+ // Don't do this if the loop is exited via an indirect branch.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ LoopBlocks.push_back(P);
+ }
+ }
+
+ assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+ BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
+ LoopBlocks.size(), ".loopexit",
+ this);
+
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewBB->getName() << "\n");
+ return NewBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there. Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ std::set<BasicBlock*> &Blocks) {
+ std::vector<BasicBlock *> WorkList;
+ WorkList.push_back(InputBB);
+ do {
+ BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *WBB = *I;
+ WorkList.push_back(WBB);
+ }
+ } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AliasAnalysis *AA, LoopInfo *LI) {
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ if (AA) AA->deleteValue(PN);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block. This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop. This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+ if (PN == 0) return 0; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirectbr edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return 0;
+
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+ }
+
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
+ OuterLoopPreds.size(),
+ ".outer", this);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = new Loop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ // Add the new loop to the pass manager queue.
+ LPM.insertLoopIntoQueue(NewOuter);
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ NewOuter->addBlockEntry(*I);
+
+ // Now reset the header in L, which had been moved by
+ // SplitBlockPredecessors for the outer loop.
+ L->moveToHeader(Header);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ std::set<BasicBlock*> BlocksInL;
+ for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(Header, P))
+ AddBlockAndPredsToSet(P, Header, BlocksInL);
+ }
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L)
+ LI->changeLoopFor(BB, NewOuter);
+ --i;
+ }
+ }
+
+ return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it. If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header. This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return 0;
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+ BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+ Header->getName()+".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+ PN->getName()+".be", BETerminator);
+ if (AA) AA->copyValue(PN, NewPN);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (UniqueValue == 0)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ if (AA) AA->deleteValue(NewPN);
+ BEBlock->getInstList().erase(NewPN);
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to just to the BEBlock instead of the header.
+ for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+ TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+ if (TI->getSuccessor(Op) == Header)
+ TI->setSuccessor(Op, BEBlock);
+ }
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 000000000000..6772511b5d5a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,395 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// It works best when loops have been canonicalized by the -indvars pass,
+// allowing it to determine the trip counts of loops easily.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+ ValueToValueMapTy &VMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
+ if (It != VMap.end())
+ I->setOperand(op, It->second);
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+ if (It != VMap.end())
+ PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+ }
+ }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ BasicBlock *OnlyPred = BB->getSinglePredecessor();
+ if (!OnlyPred) return 0;
+
+ if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+ // Resolve any PHI nodes at the start of the block. They are all
+ // guaranteed to have exactly one entry if they exist, unless there are
+ // multiple duplicate (but guaranteed to be equal) entries for the
+ // incoming edges. This occurs when there are multiple edges from
+ // OnlyPred to OnlySucc.
+ FoldSingleEntryPHINodes(BB);
+
+ // Delete the unconditional branch from the predecessor...
+ OnlyPred->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(OnlyPred);
+
+ // Move all definitions in the successor to the predecessor...
+ OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+ std::string OldName = BB->getName();
+
+ // Erase basic block from the function...
+ LI->removeBlock(BB);
+ BB->eraseFromParent();
+
+ // Inherit predecessor's name if it exists...
+ if (!OldName.empty() && !OnlyPred->hasName())
+ OnlyPred->setName(OldName);
+
+ return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+bool llvm::UnrollLoop(Loop *L, unsigned Count,
+ LoopInfo *LI, LPPassManager *LPM) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ if (!BI || BI->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Can't unroll; loop not terminated by a conditional branch.\n");
+ return false;
+ }
+
+ if (Header->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Won't unroll loop: address of header block is taken.\n");
+ return false;
+ }
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ // Find trip multiple if count is not available
+ unsigned TripMultiple = 1;
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+
+ if (TripCount != 0)
+ DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
+ if (TripMultiple != 1)
+ DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+
+ // Effectively "DCE" unrolled iterations that are beyond the tripcount
+ // and will never be executed.
+ if (TripCount != 0 && Count > TripCount)
+ Count = TripCount;
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = Count == TripCount;
+
+ // If we know the trip count, we know the multiple...
+ unsigned BreakoutTrip = 0;
+ if (TripCount != 0) {
+ BreakoutTrip = TripCount % Count;
+ TripMultiple = 0;
+ } else {
+ // Figure out what multiple to use.
+ BreakoutTrip = TripMultiple =
+ (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ }
+
+ if (CompletelyUnroll) {
+ DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n");
+ } else {
+ DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ } else if (TripMultiple != 1) {
+ DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ }
+ DEBUG(dbgs() << "!\n");
+ }
+
+ std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ ValueToValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ OrigPHINode.push_back(PN);
+ if (Instruction *I =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
+ if (L->contains(I))
+ LastValueMap[I] = I;
+ }
+
+ std::vector<BasicBlock*> Headers;
+ std::vector<BasicBlock*> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ for (unsigned It = 1; It != Count; ++It) {
+ std::vector<BasicBlock*> NewBlocks;
+
+ for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
+ E = LoopBlocks.end(); BB != E; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ // Loop over all of the PHI nodes in the block, changing them to use the
+ // incoming values from the previous block.
+ if (*BB == Header)
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI))
+ InVal = LastValueMap[InValI];
+ VMap[OrigPHINode[i]] = InVal;
+ New->getInstList().erase(NewPHI);
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ // Add phi entries for newly created values to all exit blocks except
+ // the successor of the latch block. The successor of the exit block will
+ // be updated specially after unrolling all the way.
+ if (*BB != LatchBlock)
+ for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
+ ++SI)
+ if (!L->contains(*SI))
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ phi->addIncoming(Incoming, New);
+ }
+
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock) {
+ Latches.push_back(New);
+
+ // Also, clear out the new latch's back edge so that it doesn't look
+ // like a new loop, so that it's amenable to being merged with adjacent
+ // blocks later on.
+ TerminatorInst *Term = New->getTerminator();
+ assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
+ assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
+ Term->setSuccessor(!ContinueOnTrue, NULL);
+ }
+
+ NewBlocks.push_back(New);
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (unsigned i = 0; i < NewBlocks.size(); ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ ::RemapInstruction(I, LastValueMap);
+ }
+
+ // The latch block exits the loop. If there are any PHI nodes in the
+ // successor blocks, update them to use the appropriate values computed as the
+ // last iteration of the loop.
+ if (Count != 1) {
+ BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
+ for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
+ SI != SE; ++SI) {
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ PN->addIncoming(InVal, LastIterationBB);
+ }
+ }
+ }
+
+ // Now, if we're doing complete unrolling, loop over the PHI nodes in the
+ // original block, setting them to their incoming values.
+ if (CompletelyUnroll) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ Header->getInstList().erase(PN);
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll && j == 0) {
+ Dest = LoopExit;
+ NeedConditional = false;
+ }
+
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+ NeedConditional = false;
+ }
+
+ if (NeedConditional) {
+ // Update the conditional branch's successor for the following
+ // iteration.
+ Term->setSuccessor(!ContinueOnTrue, Dest);
+ } else {
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+ }
+ }
+
+ // Merge adjacent basic blocks, if possible.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+ if (Term->isUnconditional()) {
+ BasicBlock *Dest = Term->getSuccessor(0);
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ }
+ }
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+ for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+ BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ if (isInstructionTriviallyDead(Inst))
+ (*BB)->getInstList().erase(Inst);
+ else if (Value *V = SimplifyInstruction(Inst))
+ if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+ Inst->replaceAllUsesWith(V);
+ (*BB)->getInstList().erase(Inst);
+ }
+ }
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+ // Remove the loop from the LoopPassManager if it's completely removed.
+ if (CompletelyUnroll && LPM != NULL)
+ LPM->deleteLoopFromQueue(L);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 000000000000..c1213fac7bc7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+ cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+ cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+ class LowerExpectIntrinsic : public FunctionPass {
+
+ bool HandleSwitchExpect(SwitchInst *SI);
+
+ bool HandleIfExpect(BranchInst *BI);
+
+ public:
+ static char ID;
+ LowerExpectIntrinsic() : FunctionPass(ID) {
+ initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+ };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+ CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+ return false;
+
+ Value *ArgValue = CI->getArgOperand(0);
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!ExpectedValue)
+ return false;
+
+ LLVMContext &Context = CI->getContext();
+ const Type *Int32Ty = Type::getInt32Ty(Context);
+
+ unsigned caseNo = SI->findCaseValue(ExpectedValue);
+ std::vector<Value *> Vec;
+ unsigned n = SI->getNumCases();
+ Vec.resize(n + 1); // +1 for MDString
+
+ Vec[0] = MDString::get(Context, "branch_weights");
+ for (unsigned i = 0; i < n; ++i) {
+ Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+ }
+
+ MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
+ SI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+ SI->setCondition(ArgValue);
+ return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+ if (BI->isUnconditional())
+ return false;
+
+ // Handle non-optimized IR code like:
+ // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+ // %tobool = icmp ne i64 %expval, 0
+ // br i1 %tobool, label %if.then, label %if.end
+
+ ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+ return false;
+
+ Value *ArgValue = CI->getArgOperand(0);
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!ExpectedValue)
+ return false;
+
+ LLVMContext &Context = CI->getContext();
+ const Type *Int32Ty = Type::getInt32Ty(Context);
+ bool Likely = ExpectedValue->isOne();
+
+ // If expect value is equal to 1 it means that we are more likely to take
+ // branch 0, in other case more likely is branch 1.
+ Value *Ops[] = {
+ MDString::get(Context, "branch_weights"),
+ ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight),
+ ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight)
+ };
+
+ MDNode *WeightsNode = MDNode::get(Context, Ops);
+ BI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+ CmpI->setOperand(0, ArgValue);
+ return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I++;
+
+ // Create "block_weights" metadata.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (HandleIfExpect(BI))
+ IfHandled++;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (HandleSwitchExpect(SI))
+ IfHandled++;
+ }
+
+ // remove llvm.expect intrinsics.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ) {
+ CallInst *CI = dyn_cast<CallInst>(BI++);
+ if (!CI)
+ continue;
+
+ Function *Fn = CI->getCalledFunction();
+ if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+ Value *Exp = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(Exp);
+ CI->eraseFromParent();
+ }
+ }
+ }
+
+ return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+ "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+ return new LowerExpectIntrinsic();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 000000000000..f77d19de900d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,603 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort(). If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive. It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+ class LowerInvoke : public FunctionPass {
+ // Used for both models.
+ Constant *AbortFn;
+
+ // Used for expensive EH support.
+ StructType *JBLinkTy;
+ GlobalVariable *JBListHead;
+ Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+ bool useExpensiveEHSupport;
+
+ // We peek in TLI to grab the target's jmp_buf size and alignment
+ const TargetLowering *TLI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvoke(const TargetLowering *tli = NULL,
+ bool useExpensiveEHSupport = ExpensiveEHSupport)
+ : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+ TLI(tli) {
+ initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+ }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+ }
+
+ private:
+ bool insertCheapEHSupport(Function &F);
+ void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+ void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum, AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch);
+ bool insertExpensiveEHSupport(Function &F);
+ };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false)
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+ return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport) {
+ return new LowerInvoke(TLI, useExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+ const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ if (useExpensiveEHSupport) {
+ // Insert a type for the linked list of jump buffers.
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+ JBSize = JBSize ? JBSize : 200;
+ Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+ JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+ Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+ JBLinkTy->setBody(Elts);
+
+ const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+ // Now that we've done that, insert the jmpbuf list head global, unless it
+ // already exists.
+ if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+ JBListHead = new GlobalVariable(M, PtrJBList, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(PtrJBList),
+ "llvm.sjljeh.jblist");
+ }
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+ StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ }
+
+ // We need the 'write' and 'abort' functions for both models.
+ AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+ (Type *)0);
+ return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(BB);
+
+ // Remove the invoke instruction now.
+ BB->getInstList().erase(II);
+
+ ++NumInvokes; Changed = true;
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "", UI)->setTailCall();
+
+ // Insert a return instruction. This really should be a "barrier", as it
+ // is unreachable.
+ ReturnInst::Create(F.getContext(),
+ F.getReturnType()->isVoidTy() ?
+ 0 : Constant::getNullValue(F.getReturnType()), UI);
+
+ // Remove the unwind instruction now.
+ BB->getInstList().erase(UI);
+
+ ++NumUnwinds; Changed = true;
+ }
+ return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum,
+ AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert a store of the invoke num before the invoke and store zero into the
+ // location afterward.
+ new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
+
+ // Insert a store of the stack ptr before the invoke, so we can restore it
+ // later in the exception case.
+ CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+ new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+ // nonvolatile.
+ new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
+ InvokeNum, false, NI);
+
+ Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
+ II->getUnwindDest()->getFirstNonPHI()
+ );
+ CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+ // Insert a normal call instruction.
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Replace the invoke with an uncond branch.
+ BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+ II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges. Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge. This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ const Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Scan all of the uses and see if the live range is live across an unwind
+ // edge. If we find a use live across an invoke edge, create an alloca
+ // and spill the value.
+ std::set<InvokeInst*> InvokesWithStoreInserted;
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<UnwindInst*,16> Unwinds;
+ SmallVector<InvokeInst*,16> Invokes;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ Unwinds.push_back(UI);
+ }
+
+ if (Unwinds.empty() && Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // TODO: This is not an optimal way to do this. In particular, this always
+ // inserts setjmp calls into the entries of functions with invoke instructions
+ // even though there are possibly paths through the function that do not
+ // execute any invokes. In particular, for functions with early exits, e.g.
+ // the 'addMove' method in hexxagon, it would be nice to not have to do the
+ // setjmp stuff on the early exit path. This requires a bit of dataflow, but
+ // would not be too hard to do.
+
+ // If we have an invoke instruction, insert a setjmp that dominates all
+ // invokes. After the setjmp, use a cond branch that goes to the original
+ // code path on zero, and to a designated 'catch' block of nonzero.
+ Value *OldJmpBufPtr = 0;
+ if (!Invokes.empty()) {
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge
+ // we spill into a stack location, guaranteeing that there is nothing live
+ // across the unwind edge. This process also splits all critical edges
+ // coming out of invoke's.
+ splitLiveRangesLiveAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be live across invokes.
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+ AllocaInst *JmpBuf =
+ new AllocaInst(JBLinkTy, 0, Align,
+ "jblink", F.begin()->begin());
+
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+ "OldBuf",
+ EntryBB->getTerminator());
+
+ // Copy the JBListHead to the alloca.
+ Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+ EntryBB->getTerminator());
+ new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+ // Add the new jumpbuf to the list.
+ new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+ // Create the catch block. The catch block is basically a big switch
+ // statement that goes to all of the invoke catch blocks.
+ BasicBlock *CatchBB =
+ BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+ // Create an alloca which keeps track of the stack pointer before every
+ // invoke, this allows us to properly restore the stack pointer after
+ // long jumping.
+ AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+ "stackptr", EntryBB->begin());
+
+ // Create an alloca which keeps track of which invoke is currently
+ // executing. For normal calls it contains zero.
+ AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+ "invokenum",EntryBB->begin());
+ new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ InvokeNum, true, EntryBB->getTerminator());
+
+ // Insert a load in the Catch block, and a switch on its value. By default,
+ // we go to a block that just does an unwind (which is the correct action
+ // for a standard call).
+ BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
+
+ Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+ SwitchInst *CatchSwitch =
+ SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+ // Now that things are set up, insert the setjmp call itself.
+
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "setjmp.cont");
+
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+ "TheJmpBuf",
+ EntryBB->getTerminator());
+ JmpBufPtr = new BitCastInst(JmpBufPtr,
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", EntryBB->getTerminator());
+ Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+ EntryBB->getTerminator());
+
+ // Compare the return value to zero.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, SJRet,
+ Constant::getNullValue(SJRet->getType()),
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+ // At this point, we are all set up, rewrite each invoke instruction.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+ }
+
+ // We know that there is at least one unwind.
+
+ // Create three new blocks, the block to load the jmpbuf ptr and compare
+ // against null, the block to do the longjmp, and the error block for if it
+ // is null. Add them at the end of the function because they are not hot.
+ BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+ "dounwind", &F);
+ BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+ BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+ // If this function contains an invoke, restore the old jumpbuf ptr.
+ Value *BufPtr;
+ if (OldJmpBufPtr) {
+ // Before the return, insert a copy from the saved value to the new value.
+ BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+ new StoreInst(BufPtr, JBListHead, UnwindHandler);
+ } else {
+ BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+ }
+
+ // Load the JBList, if it's null, then there was no catch!
+ Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+ Constant::getNullValue(BufPtr->getType()),
+ "notnull");
+ BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+ // Create the block to do the longjmp.
+ // Get a pointer to the jmpbuf and longjmp.
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+ Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
+ UnwindBlock);
+ Idx[0] = new BitCastInst(Idx[0],
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", UnwindBlock);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+ CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
+ new UnreachableInst(F.getContext(), UnwindBlock);
+
+ // Set up the term block ("throw without a catch").
+ new UnreachableInst(F.getContext(), TermBlock);
+
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "",
+ TermBlock->getTerminator())->setTailCall();
+
+
+ // Replace all unwinds with a branch to the unwind handler.
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(UnwindHandler, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, restore the old jmpbuf pointer to its input value.
+ if (OldJmpBufPtr) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *R = Returns[i];
+
+ // Before the return, insert a copy from the saved value to the new value.
+ Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+ new StoreInst(OldBuf, JBListHead, true, R);
+ }
+ }
+
+ return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+ if (useExpensiveEHSupport)
+ return insertExpensiveEHSupport(F);
+ else
+ return insertCheapEHSupport(F);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 000000000000..ed733d393a11
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,323 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+ /// instructions.
+ class LowerSwitch : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerInvokePassID);
+ }
+
+ struct CaseRange {
+ Constant* Low;
+ Constant* High;
+ BasicBlock* BB;
+
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+ Low(low), High(high), BB(bb) { }
+ };
+
+ typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange>::iterator CaseItr;
+ private:
+ void processSwitchInst(SwitchInst *SI);
+
+ BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+ };
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+ bool Changed = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ processSwitchInst(SI);
+ }
+ }
+
+ return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C) {
+ O << "[";
+
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+ E = C.end(); B != E; ) {
+ O << *B->Low << " -" << *B->High;
+ if (++B != E) O << ", ";
+ }
+
+ return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+ Value* Val, BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ unsigned Size = End - Begin;
+
+ if (Size == 1)
+ return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+ CaseRange& Pivot = *(Begin + Mid);
+ DEBUG(dbgs() << "Pivot ==> "
+ << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+ << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+ BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+ OrigBlock, Default);
+ BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+ OrigBlock, Default);
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewNode);
+
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+ Val, Pivot.Low, "Pivot");
+ NewNode->getInstList().push_back(Comp);
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewLeaf);
+
+ // Emit comparison
+ ICmpInst* Comp = NULL;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+ Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+ Val->getName()+".off",
+ NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock* Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode* PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+ cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+ unsigned numCmps = 0;
+
+ // Start with "simple" cases
+ for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+ Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+ SI->getSuccessorValue(i),
+ SI->getSuccessor(i)));
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function *F = CurBlock->getParent();
+ Value *Val = SI->getOperand(0); // The value we are switching on...
+ BasicBlock* Default = SI->getDefaultDest();
+
+ // If there is only the default destination, don't bother with the code below.
+ if (SI->getNumOperands() == 2) {
+ BranchInst::Create(SI->getDefaultDest(), CurBlock);
+ CurBlock->getInstList().erase(SI);
+ return;
+ }
+
+ // Create a new, empty default block so that the new hierarchy of
+ // if-then statements go to this and the PHI nodes are happy.
+ BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+
+ BranchInst::Create(Default, NewDefault);
+
+ // If there is an entry in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+ OrigBlock, NewDefault);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ CurBlock->getInstList().erase(SI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 000000000000..f4ca81af6d87
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+ struct PromotePass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromotePass() : FunctionPass(ID) {
+ initializePromotePassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerInvokePassID);
+ }
+ };
+} // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+
+bool PromotePass::runOnFunction(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromotePass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 000000000000..e5a00f4e9774
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1134 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+// Programming Languages
+// POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+ typedef std::pair<BasicBlock*, unsigned> EltTy;
+ static inline EltTy getEmptyKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+ }
+ static inline EltTy getTombstoneKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+ }
+ static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+ return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+ }
+ static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+ return LHS == RHS;
+ }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkers(GEPI))
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+namespace {
+ struct AllocaInfo;
+
+ // Data package used by RenamePass()
+ class RenamePassData {
+ public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P,
+ const ValVector &V) : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+ };
+
+ /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+ /// load/store instructions in the block that directly load or store an alloca.
+ ///
+ /// This functionality is important because it avoids scanning large basic
+ /// blocks multiple times when promoting many allocas in the same block.
+ class LargeBlockInfo {
+ /// InstNumbers - For each instruction that we track, keep the index of the
+ /// instruction. The index starts out as the number of the instruction from
+ /// the start of the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+ public:
+
+ /// isInterestingInstruction - This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// getInstructionIndex - Get or calculate the index of the specified
+ /// instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end()) return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+ BBI != E; ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) {
+ InstNumbers.erase(I);
+ }
+
+ void clear() {
+ InstNumbers.clear();
+ }
+ };
+
+ struct PromoteMem2Reg {
+ /// Allocas - The alloca instructions being promoted.
+ ///
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree &DT;
+ DIBuilder *DIB;
+
+ /// AST - An AliasSetTracker object to update. If null, don't update it.
+ ///
+ AliasSetTracker *AST;
+
+ /// AllocaLookup - Reverse mapping of Allocas.
+ ///
+ DenseMap<AllocaInst*, unsigned> AllocaLookup;
+
+ /// NewPhiNodes - The PhiNodes we're adding.
+ ///
+ DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+
+ /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+ /// it corresponds to.
+ DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+
+ /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+ /// each alloca that is of pointer type, we keep track of what to copyValue
+ /// to the inserted PHI nodes here.
+ ///
+ std::vector<Value*> PointerAllocaValues;
+
+ /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+ /// intrinsic that describes it, if any, so that we can convert it to a
+ /// dbg.value intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
+ /// Visited - The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock*, 16> Visited;
+
+ /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+ /// non-determinstic behavior.
+ DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+ /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode*, unsigned> DomLevels;
+
+ /// BBNumPreds - Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+ public:
+ PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+ AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DIB(0), AST(ast) {}
+ ~PromoteMem2Reg() {
+ delete DIB;
+ }
+
+ void run();
+
+ /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+ ///
+ bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+ return DT.dominates(BB1, BB2);
+ }
+
+ private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+ return NP-1;
+ }
+
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+
+ void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+ void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+ };
+
+ struct AllocaInfo {
+ SmallVector<BasicBlock*, 32> DefiningBlocks;
+ SmallVector<BasicBlock*, 32> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ DbgDeclare = 0;
+ }
+
+ /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+ /// ivars.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+
+ DbgDeclare = FindAllocaDbgDeclare(AI);
+ }
+ };
+
+ typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+ struct DomTreeNodeCompare {
+ bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+ return LHS.second < RHS.second;
+ }
+ };
+} // end of anonymous namespace
+
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
+
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE;) {
+ Instruction *Inst = cast<Instruction>(*UI);
+ ++UI;
+ Inst->eraseFromParent();
+ }
+ }
+ I->eraseFromParent();
+ }
+}
+
+void PromoteMem2Reg::run() {
+ Function &F = *DT.getRoot()->getParent();
+
+ if (AST) PointerAllocaValues.resize(Allocas.size());
+ AllocaDbgDeclares.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) &&
+ "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ removeLifetimeIntrinsicUsers(AI);
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ RewriteSingleStoreAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (Info.UsingBlocks.empty()) {
+ // Record debuginfo for the store and remove the declaration's debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ if (!DIB)
+ DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
+ DDI->eraseFromParent();
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock) {
+ PromoteSingleBlockAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the stores are all that is
+ // left.
+ if (Info.UsingBlocks.empty()) {
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ if (!DIB)
+ DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ }
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ DDI->eraseFromParent();
+
+ ++NumLocalPromoted;
+ continue;
+ }
+ }
+
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ SmallVector<DomTreeNode*, 32> Worklist;
+
+ DomTreeNode *Root = DT.getRootNode();
+ DomLevels[Root] = 0;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ unsigned ChildLevel = DomLevels[Node] + 1;
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+ CI != CE; ++CI) {
+ DomLevels[*CI] = ChildLevel;
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ BBNumbers[I] = ID++;
+ }
+
+ // If we have an AST to keep updated, remember some pointer value that is
+ // stored into the alloca.
+ if (AST)
+ PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+
+ // Remember the dbg.declare intrinsic describing this alloca, if any.
+ if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need PHI
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ DetermineInsertionPoint(AI, AllocaNum, Info);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ //
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ //
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ do {
+ RenamePassData RPD;
+ RPD.swap(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ } while (!RenamePassWorkList.empty());
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ Instruction *A = Allocas[i];
+
+ // If there are any uses of the alloca instructions left, they must be in
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
+ if (!A->use_empty())
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ if (AST) AST->deleteValue(A);
+ A->eraseFromParent();
+ }
+
+ // Remove alloca's dbg.declare instrinsics from the function.
+ for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+ DDI->eraseFromParent();
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+ if (AST && PN->getType()->isPointerTy())
+ AST->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ //
+ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ std::sort(Preds.begin(), Preds.end());
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVector<BasicBlock*, 16>::iterator EntIt =
+ std::lower_bound(Preds.begin(), Preds.end(),
+ SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+ SomePHI->addIncoming(UndefVal, Preds[pred]);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
+/// are blocks which lead to uses. Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB)) continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI) continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i, --e;
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getOperand(0) != AI) continue;
+
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ break;
+ }
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB))
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm. Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info) {
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock*, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ DomTreeNodeCompare> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end(); I != E; ++I) {
+ if (DomTreeNode *Node = DT.getNode(*I))
+ PQ.push(std::make_pair(Node, DomLevels[Node]));
+ }
+
+ SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode*, 32> Visited;
+ SmallVector<DomTreeNode*, 32> Worklist;
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ DomTreeNode *SuccNode = DT.getNode(*SI);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels[SuccNode];
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!Visited.insert(SuccNode))
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (!LiveInBlocks.count(SuccBB))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+ if (!DefBlocks.count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+ ++CI) {
+ if (!Visited.count(*CI))
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+ QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+ AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+namespace {
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+ bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+ const std::pair<unsigned, StoreInst*> &RHS) {
+ return LHS.first < RHS.first;
+ }
+};
+
+}
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block. If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // If there are no stores to the alloca, just replace any loads with undef.
+ if (StoresByIndex.empty()) {
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LBI.deleteValue(LI);
+ LI->eraseFromParent();
+ }
+ return;
+ }
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI) continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
+ StoreIndexSearchPredicate());
+
+ // If there is no store before this load, then we can't promote this load.
+ if (I == StoresByIndex.begin()) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+
+ // Otherwise, there was a store before this load, the load takes its value.
+ --I;
+ LI->replaceAllUsesWith(I->second->getOperand(0));
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN) return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ BB->begin());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+
+ if (AST && PN->getType()->isPointerTy())
+ AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+ return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting. IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = 0;
+ for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+ if (*I == BB)
+ ++NumEdges;
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (APN == 0) break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB)) return;
+
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+ Instruction *I = II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src) continue;
+
+ DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end()) continue;
+
+ Value *V = IncomingVals[AI->second];
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ BB->getInstList().erase(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest) continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ IncomingVals[ai->second] = SI->getOperand(0);
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
+ if (!DIB)
+ DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ }
+ BB->getInstList().erase(SI);
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E) return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I))
+ Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+ goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate. This function does
+/// not modify the CFG of the function at all. All allocas must be from the
+/// same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+ DominatorTree &DT, AliasSetTracker *AST) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty()) return;
+
+ PromoteMem2Reg(Allocas, DT, AST).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 000000000000..b47a7ccd80ba
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,520 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+ delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+ ProtoType = Ty;
+ ProtoName = Name;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
+ "All rewritten values must have the same type");
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI,
+ DenseMap<BasicBlock*, Value*> &ValueMapping) {
+ unsigned PHINumValues = PHI->getNumIncomingValues();
+ if (PHINumValues != ValueMapping.size())
+ return false;
+
+ // Scan the phi to see if it matches.
+ for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+ if (ValueMapping[PHI->getIncomingBlock(i)] !=
+ PHI->getIncomingValue(i)) {
+ return false;
+ }
+
+ return true;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+ Value *Res = GetValueAtEndOfBlockInternal(BB);
+ return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlock(BB);
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+ Value *SingularValue = 0;
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (i == 0)
+ SingularValue = PredVal;
+ else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ } else {
+ bool isFirstPred = true;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBB = *PI;
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ }
+
+ // If there are no predecessors, just return undef.
+ if (PredValues.empty())
+ return UndefValue::get(ProtoType);
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (IsEquivalentPHI(SomePHI, ValueMapping))
+ return SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+ ProtoName, &BB->front());
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (Value *V = SimplifyInstruction(InsertedPHI)) {
+ InsertedPHI->eraseFromParent();
+ return V;
+ }
+
+ // Set DebugLoc.
+ InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB));
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+ class PHIiter {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHIiter(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHIiter(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHIiter &operator++() { ++idx; return *this; }
+ bool operator==(const PHIiter& x) const { return idx == x.idx; }
+ bool operator!=(const PHIiter& x) const { return !operator==(x); }
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+}
+
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ typedef BasicBlock BlkT;
+ typedef Value *ValT;
+ typedef PHINode PhiT;
+
+ typedef succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ typedef PHIiter PHI_iterator;
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock*> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+ Preds->push_back(SomePhi->getIncomingBlock(PI));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+ }
+
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->ProtoType);
+ }
+
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+ Updater->ProtoName, &BB->front());
+ return PHI;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static PHINode *InstrIsPHI(Instruction *I) {
+ return dyn_cast<PHINode>(I);
+ }
+
+ /// ValueIsPHI - Check if a value is a PHI.
+ ///
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ Value *SomeVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ BasicBlock *BB = User->getParent();
+ std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ updateDebugInfo(SI);
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ } else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(SI, Insts)) continue;
+ updateDebugInfo(SI);
+
+ // Remember that this is the active value in the block.
+ StoredValue = SI->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 000000000000..9d9c324b8468
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,2779 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+ cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
+
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+class SimplifyCFGOpt {
+ const TargetData *const TD;
+
+ Value *isValueEqualityComparison(TerminatorInst *TI);
+ BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder);
+ bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder);
+
+ bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+ bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
+ bool SimplifyUnreachable(UnreachableInst *UI);
+ bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool SimplifyIndirectBr(IndirectBrInst *IBI);
+ bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
+ bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+
+public:
+ explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) !=
+ PN->getIncomingValueForBlock(SI2BB))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block. The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred) {
+ if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Succ->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = cast<PHINode>(BB->begin());
+ assert(SomePHI->getNumIncomingValues() == 2 &&
+ "Function can only handle blocks with 2 predecessors!");
+ BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+ BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
+ return 0;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping. If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands. If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+ SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+ unsigned &CostRemaining) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions all dominate instructions, but not all constantexprs
+ // can be executed unconditionally.
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+ if (C->canTrap())
+ return false;
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB) return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
+
+ // If we aren't allowing aggressive promotion anymore, then don't consider
+ // instructions in the 'if region'.
+ if (AggressiveInsts == 0) return false;
+
+ // If we have seen this instruction before, don't count it again.
+ if (AggressiveInsts->count(I)) return true;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!I->isSafeToSpeculativelyExecute())
+ return false;
+
+ unsigned Cost = 0;
+
+ switch (I->getOpcode()) {
+ default: return false; // Cannot hoist this out safely.
+ case Instruction::Load:
+ // We have to check to make sure there are no instructions before the
+ // load in its basic block, as we are going to hoist the load out to its
+ // predecessor.
+ if (PBB->getFirstNonPHIOrDbg() != I)
+ return false;
+ Cost = 1;
+ break;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
+ return false;
+ Cost = 1;
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ Cost = 1;
+ break; // These are all cheap and non-trapping instructions.
+
+ case Instruction::Select:
+ Cost = 2;
+ break;
+ }
+
+ if (Cost > CostRemaining)
+ return false;
+
+ CostRemaining -= Cost;
+
+ // Okay, we can only really hoist these out if their operands do
+ // not take us over the cost threshold.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
+ return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+ // Normal constant int.
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+ if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ return CI;
+
+ // This is some kind of pointer constant. Turn it into a pointer-sized
+ // ConstantInt if possible.
+ const IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+
+ // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+ if (isa<ConstantPointerNull>(V))
+ return ConstantInt::get(PtrTy, 0);
+
+ // IntToPtr const int.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+ // The constant is very likely to have the right type already.
+ if (CI->getType() == PtrTy)
+ return CI;
+ else
+ return cast<ConstantInt>
+ (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ }
+ return 0;
+}
+
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+ const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ // If this is an icmp against a constant, handle this as one of the cases.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ UsedICmps++;
+ Vals.push_back(C);
+ return I->getOperand(0);
+ }
+
+ // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+ // the set.
+ ConstantRange Span =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+
+ // If this is an and/!= check then we want to optimize "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+ // We don't handle wrapped sets yet.
+ Span.isWrappedSet())
+ return 0;
+
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ UsedICmps++;
+ return I->getOperand(0);
+ }
+ return 0;
+ }
+
+ // Otherwise, we can only handle an | or &, depending on isEQ.
+ if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+ return 0;
+
+ unsigned NumValsBeforeLHS = Vals.size();
+ unsigned UsedICmpsBeforeLHS = UsedICmps;
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ unsigned NumVals = Vals.size();
+ unsigned UsedICmpsBeforeRHS = UsedICmps;
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ if (LHS == RHS)
+ return LHS;
+ Vals.resize(NumVals);
+ UsedICmps = UsedICmpsBeforeRHS;
+ }
+
+ // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+ // set it and return success.
+ if (Extra == 0 || Extra == I->getOperand(1)) {
+ Extra = I->getOperand(1);
+ return LHS;
+ }
+
+ Vals.resize(NumValsBeforeLHS);
+ UsedICmps = UsedICmpsBeforeLHS;
+ return 0;
+ }
+
+ // If the LHS can't be folded in, but Extra is available and RHS can, try to
+ // use LHS as Extra.
+ if (Extra == 0 || Extra == I->getOperand(0)) {
+ Value *OldExtra = Extra;
+ Extra = I->getOperand(0);
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps))
+ return RHS;
+ assert(Vals.size() == NumValsBeforeLHS);
+ Extra = OldExtra;
+ }
+
+ return 0;
+}
+
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+ Instruction* Cond = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
+ }
+
+ TI->eraseFromParent();
+ if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+ Value *CV = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) <= 128)
+ CV = SI->getCondition();
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+ ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+ GetConstantInt(ICI->getOperand(1), TD))
+ CV = ICI->getOperand(0);
+
+ // Unwrap any lossless ptrtoint cast.
+ if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+ CV = PTII->getOperand(0);
+ return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<std::pair<ConstantInt*,
+ BasicBlock*> > &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
+ BI->getSuccessor(ICI->getPredicate() ==
+ ICmpInst::ICMP_NE)));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+ for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+ if (Cases[i].second == BB) {
+ Cases.erase(Cases.begin()+i);
+ --i; --e;
+ }
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->size() == 0) return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].first;
+ for (unsigned i = 0, e = V2->size(); i != e; ++i)
+ if (TheVal == (*V2)[i].first)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].first == (*V2)[i2].first)
+ return true;
+ if ((*V1)[i1].first < (*V2)[i2].first)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison. If so, simplify TI. This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal) return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal) return false; // Different predicates.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+ BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+ PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(ThisDef);
+ (void) NI;
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].second->removePredecessor(TI->getParent());
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+ }
+
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].first);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+ if (DeadCases.count(SI->getCaseValue(i))) {
+ SI->getSuccessor(i)->removePredecessor(TI->getParent());
+ SI->removeCase(i);
+ }
+
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == TIBB) {
+ if (TIV != 0)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].first;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].first == TIV) {
+ TheRealDest = ThisCases[i].second;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
+
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(TheRealDest);
+ (void) NI;
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+}
+
+namespace {
+ /// ConstantIntOrdering - This class implements a stable ordering of constant
+ /// integers that does not depend on their address. This is important for
+ /// applications that sort ConstantInt's to ensure uniqueness.
+ struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+ };
+}
+
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+ const ConstantInt *LHS = *(const ConstantInt**)P1;
+ const ConstantInt *RHS = *(const ConstantInt**)P2;
+ if (LHS->getValue().ult(RHS->getValue()))
+ return 1;
+ if (LHS->getValue() == RHS->getValue())
+ return 0;
+ return -1;
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+ bool Changed = false;
+
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+
+ // See if the predecessor is a comparison with the same value.
+ TerminatorInst *PTI = Pred->getTerminator();
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+
+ if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallVector<BasicBlock*, 8> NewSuccessors;
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second != BB)
+ PTIHandled.insert(PredCases[i].first);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ PredDefault = BBDefault;
+ NewSuccessors.push_back(BBDefault);
+ }
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].first) &&
+ BBCases[i].second != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].second);
+ }
+
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == BB) {
+ PTIHandled.insert(PredCases[i].first);
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].first)) {
+ // If this is one we are capable of getting...
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].second);
+ PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ PTIHandled.begin(),
+ E = PTIHandled.end(); I != E; ++I) {
+ PredCases.push_back(std::make_pair(*I, BBDefault));
+ NewSuccessors.push_back(BBDefault);
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+ AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without TargetData");
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
+ PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+ EraseTerminatorInstAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = 0;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (InfLoopBlock == 0) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block. The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
+
+ // If we get here, we can hoist at least one instruction.
+ BasicBlock *BIParent = BI->getParent();
+
+ do {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (isa<TerminatorInst>(I1))
+ goto HoistTerminator;
+
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ I1 = BB1_Itr++;
+ I2 = BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ }
+ } while (I1->isIdenticalToWhenDefined(I2));
+
+ return true;
+
+HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return true;
+
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ BIParent->getInstList().insert(BI, NT);
+ if (!NT->getType()->isVoidTy()) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+
+ IRBuilder<true, NoFolder> Builder(NT);
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V) continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName()));
+
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
+ }
+ }
+
+ // Update any PHI nodes in our new successors.
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+ AddPredecessorToBlock(*SI, BIParent, BB1);
+
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+}
+
+/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
+/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
+/// (for now, restricted to a single instruction that's side effect free) from
+/// the BB1 into the branch block to speculatively execute it.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
+ // Only speculatively execution a single instruction (not counting the
+ // terminator) for now.
+ Instruction *HInst = NULL;
+ Instruction *Term = BB1->getTerminator();
+ for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+ BBI != BBE; ++BBI) {
+ Instruction *I = BBI;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+ if (I == Term) break;
+
+ if (HInst)
+ return false;
+ HInst = I;
+ }
+ if (!HInst)
+ return false;
+
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<FCmpInst>(BrCond))
+ return false;
+
+ // If BB1 is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (BB1 != BI->getSuccessor(0)) {
+ assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+
+ // Turn
+ // BB:
+ // %t1 = icmp
+ // br i1 %t1, label %BB1, label %BB2
+ // BB1:
+ // %t3 = add %t2, c
+ // br label BB2
+ // BB2:
+ // =>
+ // BB:
+ // %t1 = icmp
+ // %t4 = add %t2, c
+ // %t3 = select i1 %t1, %t2, %t3
+ switch (HInst->getOpcode()) {
+ default: return false; // Not safe / profitable to hoist.
+ case Instruction::Add:
+ case Instruction::Sub:
+ // Not worth doing for vector ops.
+ if (HInst->getType()->isVectorTy())
+ return false;
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // Don't mess with vector operations.
+ if (HInst->getType()->isVectorTy())
+ return false;
+ break; // These are all cheap and non-trapping instructions.
+ }
+
+ // If the instruction is obviously dead, don't try to predicate it.
+ if (HInst->use_empty()) {
+ HInst->eraseFromParent();
+ return true;
+ }
+
+ // Can we speculatively execute the instruction? And what is the value
+ // if the condition is false? Consider the phi uses, if the incoming value
+ // from the "if" block are all the same V, then V is the value of the
+ // select if the condition is false.
+ BasicBlock *BIParent = BI->getParent();
+ SmallVector<PHINode*, 4> PHIUses;
+ Value *FalseV = NULL;
+
+ BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+ for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
+ UI != E; ++UI) {
+ // Ignore any user that is not a PHI node in BB2. These can only occur in
+ // unreachable blocks, because they would not be dominated by the instr.
+ PHINode *PN = dyn_cast<PHINode>(*UI);
+ if (!PN || PN->getParent() != BB2)
+ return false;
+ PHIUses.push_back(PN);
+
+ Value *PHIV = PN->getIncomingValueForBlock(BIParent);
+ if (!FalseV)
+ FalseV = PHIV;
+ else if (FalseV != PHIV)
+ return false; // Inconsistent value when condition is false.
+ }
+
+ assert(FalseV && "Must have at least one user, and it must be a PHI");
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in this BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (OpI && OpI->getParent() == BIParent &&
+ !OpI->isUsedInBasicBlock(BIParent))
+ return false;
+ }
+
+ // If we get here, we can hoist the instruction. Try to place it
+ // before the icmp instruction preceding the conditional branch.
+ BasicBlock::iterator InsertPos = BI;
+ if (InsertPos != BIParent->begin())
+ --InsertPos;
+ // Skip debug info between condition and branch.
+ while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
+ --InsertPos;
+ if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
+ SmallPtrSet<Instruction *, 4> BB1Insns;
+ for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end();
+ BB1I != BB1E; ++BB1I)
+ BB1Insns.insert(BB1I);
+ for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
+ UI != UE; ++UI) {
+ Instruction *Use = cast<Instruction>(*UI);
+ if (!BB1Insns.count(Use)) continue;
+
+ // If BrCond uses the instruction that place it just before
+ // branch instruction.
+ InsertPos = BI;
+ break;
+ }
+ } else
+ InsertPos = BI;
+ BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the previously determined FalseV.
+ IRBuilder<true, NoFolder> Builder(BI);
+ SelectInst *SI;
+ if (Invert)
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BrCond, FalseV, HInst,
+ FalseV->getName() + "." + HInst->getName()));
+ else
+ SI = cast<SelectInst>
+ (Builder.CreateSelect(BrCond, HInst, FalseV,
+ HInst->getName() + "." + FalseV->getName()));
+
+ // Make the PHI node use the select for all incoming values for "then" and
+ // "if" blocks.
+ for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
+ PHINode *PN = PHIUses[i];
+ for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
+ if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
+ PN->setIncomingValue(j, SI);
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned Size = 0;
+
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (Size > 10) return false; // Don't clone large BB's.
+ ++Size;
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+ BasicBlock *BB = BI->getParent();
+ PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+ // NOTE: we currently cannot transform this case if the PHI node is used
+ // outside of the block.
+ if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+ return false;
+
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ // Now we know that this block has multiple preds and two succs.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+
+ // Okay, this is a simple enough basic block. See if any phi values are
+ // constants.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+ if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+ // Skip if the predecessor's terminator is an indirect branch.
+ if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+ RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ DenseMap<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
+
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Value *V = SimplifyInstruction(N, TD)) {
+ TranslateMap[BBI] = V;
+ delete N; // Instruction folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
+ }
+ }
+
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
+ }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI, TD) | true;
+ }
+
+ return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ BasicBlock *BB = PN->getParent();
+ BasicBlock *IfTrue, *IfFalse;
+ Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!IfCond ||
+ // Don't bother if the branch will be constant folded trivially.
+ isa<ConstantInt>(IfCond))
+ return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ SmallPtrSet<Instruction*, 4> AggressiveInsts;
+ unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+ MaxCostVal1 = PHINodeFoldingThreshold;
+
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = SimplifyInstruction(PN, TD)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ MaxCostVal0) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ MaxCostVal1))
+ return false;
+ }
+
+ // If we folded the the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) return true;
+
+ // Don't fold i1 branches on PHIs which contain binary operators. These can
+ // often be turned into switches and other things.
+ if (PN->getType()->isIntegerTy(1) &&
+ (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+ isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+ isa<BinaryOperator>(IfCond)))
+ return false;
+
+ // If we all PHI nodes are promotable, check to make sure that all
+ // instructions in the predecessor blocks can be promoted as well. If
+ // not, we won't be able to get rid of the control flow, so it's not
+ // worth promoting to select instructions.
+ BasicBlock *DomBlock = 0;
+ BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+ BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+ IfBlock1 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock1);
+ for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+ IfBlock2 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock2);
+ for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+ Instruction *InsertPt = DomBlock->getTerminator();
+ IRBuilder<true, NoFolder> Builder(InsertPt);
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ if (IfBlock1)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock1->getInstList(), IfBlock1->begin(),
+ IfBlock1->getTerminator());
+ if (IfBlock2)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock2->getInstList(), IfBlock2->begin(),
+ IfBlock2->getTerminator());
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ // Change the PHI node into a select instruction.
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+ SelectInst *NV =
+ cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
+ PN->replaceAllUsesWith(NV);
+ NV->takeName(PN);
+ PN->eraseFromParent();
+ }
+
+ // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ TerminatorInst *OldTI = DomBlock->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+ Builder.CreateBr(BB);
+ OldTI->eraseFromParent();
+ return true;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+ IRBuilder<> &Builder) {
+ assert(BI->isConditional() && "Must be a conditional branch");
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+ ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+
+ // Check to ensure both blocks are empty (just a return) or optionally empty
+ // with PHI nodes. If there are other instructions, merging would cause extra
+ // computation on one path or the other.
+ if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+ if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+
+ Builder.SetInsertPoint(BI);
+ // Okay, we found a branch that is going to two return nodes. If
+ // there is no return value for this function, just change the
+ // branch into a return.
+ if (FalseRet->getNumOperands() == 0) {
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+ Builder.CreateRetVoid();
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+
+ // Otherwise, figure out what the true and false return values are
+ // so we can insert a new select instruction.
+ Value *TrueValue = TrueRet->getReturnValue();
+ Value *FalseValue = FalseRet->getReturnValue();
+
+ // Unwrap any PHI nodes in the return blocks.
+ if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+ if (TVPN->getParent() == TrueSucc)
+ TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+ if (FVPN->getParent() == FalseSucc)
+ FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+ // In order for this transformation to be safe, we must be able to
+ // unconditionally execute both operands to the return. This is
+ // normally the case, but we could have a potentially-trapping
+ // constant expression that prevents this transformation from being
+ // safe.
+ if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+ if (TCV->canTrap())
+ return false;
+ if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+ if (FCV->canTrap())
+ return false;
+
+ // Okay, we collected all the mapped values and checked them for sanity, and
+ // defined to really do this transformation. First, update the CFG.
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+
+ // Insert select instructions where needed.
+ Value *BrCond = BI->getCondition();
+ if (TrueValue) {
+ // Insert a select if the results differ.
+ if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+ } else if (isa<UndefValue>(TrueValue)) {
+ TrueValue = FalseValue;
+ } else {
+ TrueValue = Builder.CreateSelect(BrCond, TrueValue,
+ FalseValue, "retval");
+ }
+ }
+
+ Value *RI = !TrueValue ?
+ Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+
+ (void) RI;
+
+ DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+
+ EraseTerminatorInstAndDCECond(BI);
+
+ return true;
+}
+
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
+
+ // Only allow this if the condition is a simple instruction that can be
+ // executed unconditionally. It must be in the same block as the branch, and
+ // must be at the front of the block.
+ BasicBlock::iterator FrontIt = BB->front();
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+
+ // Allow a single instruction to be hoisted in addition to the compare
+ // that feeds the branch. We later ensure that any values that _it_ uses
+ // were also live in the predecessor, so that we don't unnecessarily create
+ // register pressure or inhibit out-of-order execution.
+ Instruction *BonusInst = 0;
+ if (&*FrontIt != Cond &&
+ FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ FrontIt->isSafeToSpeculativelyExecute()) {
+ BonusInst = &*FrontIt;
+ ++FrontIt;
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+ }
+
+ // Only a single bonus inst is allowed.
+ if (&*FrontIt != Cond)
+ return false;
+
+ // Make sure the instruction after the condition is the cond branch.
+ BasicBlock::iterator CondIt = Cond; ++CondIt;
+
+ // Ingore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+
+ if (&*CondIt != BI)
+ return false;
+
+ // Cond is known to be a compare or binary operator. Check to make sure that
+ // neither operand is a potentially-trapping constant expression.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+ if (CE->canTrap())
+ return false;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+ if (CE->canTrap())
+ return false;
+
+ // Finally, don't infinitely unroll conditional loops.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+ if (TrueDest == BB || FalseDest == BB)
+ return false;
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBlock = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
+ continue;
+
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond = false;
+
+ if (PBI->getSuccessor(0) == TrueDest)
+ Opc = Instruction::Or;
+ else if (PBI->getSuccessor(1) == FalseDest)
+ Opc = Instruction::And;
+ else if (PBI->getSuccessor(0) == FalseDest)
+ Opc = Instruction::And, InvertPredCond = true;
+ else if (PBI->getSuccessor(1) == TrueDest)
+ Opc = Instruction::Or, InvertPredCond = true;
+ else
+ continue;
+
+ // Ensure that any values used in the bonus instruction are also used
+ // by the terminator of the predecessor. This means that those values
+ // must already have been resolved, so we won't be inhibiting the
+ // out-of-order core by speculating them earlier.
+ if (BonusInst) {
+ // Collect the values used by the bonus inst
+ SmallPtrSet<Value*, 4> UsedValues;
+ for (Instruction::op_iterator OI = BonusInst->op_begin(),
+ OE = BonusInst->op_end(); OI != OE; ++OI) {
+ Value* V = *OI;
+ if (!isa<Constant>(V))
+ UsedValues.insert(V);
+ }
+
+ SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+ Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+
+ // Walk up to four levels back up the use-def chain of the predecessor's
+ // terminator to see if all those values were used. The choice of four
+ // levels is arbitrary, to provide a compile-time-cost bound.
+ while (!Worklist.empty()) {
+ std::pair<Value*, unsigned> Pair = Worklist.back();
+ Worklist.pop_back();
+
+ if (Pair.second >= 4) continue;
+ UsedValues.erase(Pair.first);
+ if (UsedValues.empty()) break;
+
+ if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+ }
+ }
+
+ if (!UsedValues.empty()) return false;
+ }
+
+ DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+ IRBuilder<> Builder(PBI);
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond = Builder.CreateNot(NewCond,
+ PBI->getCondition()->getName()+".not");
+ }
+
+ PBI->setCondition(NewCond);
+ BasicBlock *OldTrue = PBI->getSuccessor(0);
+ BasicBlock *OldFalse = PBI->getSuccessor(1);
+ PBI->setSuccessor(0, OldFalse);
+ PBI->setSuccessor(1, OldTrue);
+ }
+
+ // If we have a bonus inst, clone it into the predecessor block.
+ Instruction *NewBonus = 0;
+ if (BonusInst) {
+ NewBonus = BonusInst->clone();
+ PredBlock->getInstList().insert(PBI, NewBonus);
+ NewBonus->takeName(BonusInst);
+ BonusInst->setName(BonusInst->getName()+".old");
+ }
+
+ // Clone Cond into the predecessor basic block, and or/and the
+ // two conditions together.
+ Instruction *New = Cond->clone();
+ if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+ PredBlock->getInstList().insert(PBI, New);
+ New->takeName(Cond);
+ Cond->setName(New->getName()+".old");
+
+ Instruction *NewCond =
+ cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
+ New, "or.cond"));
+ PBI->setCondition(NewCond);
+ if (PBI->getSuccessor(0) == BB) {
+ AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ PBI->setSuccessor(0, TrueDest);
+ }
+ if (PBI->getSuccessor(1) == BB) {
+ AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ PBI->setSuccessor(1, FalseDest);
+ }
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isa<DbgInfoIntrinsic>(*I))
+ I->clone()->insertBefore(PBI);
+
+ return true;
+ }
+ return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+
+ // Otherwise, if there are multiple predecessors, insert a PHI that merges
+ // in the constant and simplify the block result. Subsequent passes of
+ // simplifycfg will thread the block.
+ if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+ std::distance(PB, PE),
+ BI->getCondition()->getName() + ".pr",
+ BB->begin());
+ // Okay, we're going to insert the PHI node. Since PBI is not the only
+ // predecessor, compute the PHI'd conditional value for all of the preds.
+ // Any predecessor where the condition is not computable we keep symbolic.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+ PBI != BI && PBI->isConditional() &&
+ PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue), P);
+ } else {
+ NewPN->addIncoming(BI->getCondition(), P);
+ }
+ }
+
+ BI->setCondition(NewPN);
+ return true;
+ }
+ }
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors is a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (&*BBI != BI)
+ return false;
+
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ PBIOp = BIOp = 0;
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ PBIOp = 0, BIOp = 1;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ PBIOp = 1, BIOp = 0;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ PBIOp = BIOp = 1;
+ else
+ return false;
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ isa<PHINode>(II); ++II, ++NumPhis)
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
+
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ OtherDest = InfLoopBlock;
+ }
+
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ IRBuilder<true, NoFolder> Builder(PBI);
+ if (PBIOp)
+ PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
+
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
+
+ // Merge the conditions.
+ Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ PHINode *PN;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ Value *NV = cast<SelectInst>
+ (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
+ PN->setIncomingValue(PBBIdx, NV);
+ }
+ }
+
+ DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB){
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+ // Then remove the rest.
+ for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = OldTerm->getSuccessor(I);
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = 0;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = 0;
+ else
+ Succ->removePredecessor(OldTerm->getParent());
+ }
+
+ IRBuilder<> Builder(OldTerm);
+ Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+ // Insert an appropriate new terminator.
+ if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (TrueBB == FalseBB)
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ Builder.CreateBr(TrueBB);
+ else
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (KeepEdge1 == 0)
+ // Only TrueBB was found.
+ Builder.CreateBr(TrueBB);
+ else
+ // Only FalseBB was found.
+ Builder.CreateBr(FalseBB);
+ }
+
+ EraseTerminatorInstAndDCECond(OldTerm);
+ return true;
+}
+
+// SimplifySwitchOnSelect - Replaces
+// (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+ // Check for constant integer values in the select.
+ ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+ ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+ if (!TrueVal || !FalseVal)
+ return false;
+
+ // Find the relevant condition and destinations.
+ Value *Condition = Select->getCondition();
+ BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
+ BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+}
+
+// SimplifyIndirectBrOnSelect - Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
+
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
+
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+ const TargetData *TD,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = ICI->getParent();
+
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = SimplifyInstruction(ICI, TD)) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB) | true;
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != 0) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB) | true;
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
+
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
+
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
+
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+ BB->getParent(), BB);
+ SI->addCase(Cst, NewBB);
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ Builder.SetInsertPoint(NewBB);
+ Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+ Builder.CreateBr(SuccBlock);
+ PHIUse->addIncoming(NewCst, NewBB);
+ return true;
+}
+
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
+ IRBuilder<> &Builder) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0) return false;
+
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = true;
+ Value *ExtraCase = 0;
+ unsigned UsedICmps = 0;
+
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ UsedICmps);
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ UsedICmps);
+ TrueWhenEqual = false;
+ }
+
+ // If we didn't have a multiply compared value, fail.
+ if (CompVal == 0) return false;
+
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
+
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just an cond branch.
+ if (ExtraCase && Values.size() < 2) return false;
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n" << *BB);
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ // Remove the uncond branch added to the old block.
+ TerminatorInst *OldTI = BB->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+
+ if (TrueWhenEqual)
+ Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+ else
+ Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+
+ OldTI->eraseFromParent();
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ Builder.SetInsertPoint(BI);
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without TargetData");
+ CompVal = Builder.CreatePtrToInt(CompVal,
+ TD->getIntPtrType(CompVal->getContext()),
+ "magicptr");
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+
+ DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+ BasicBlock *BB = RI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty() && DupRet) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI, Builder))
+ return true;
+ }
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
+ // Check to see if the first instruction in this block is just an unwind.
+ // If so, replace any invoke instructions which use this as an exception
+ // destination with call instructions.
+ BasicBlock *BB = UI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+ bool Changed = false;
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.back();
+ InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+ if (II && II->getUnwindDest() == BB) {
+ // Insert a new branch instruction before the invoke, because this
+ // is now a fall through.
+ Builder.SetInsertPoint(II);
+ BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+ Pred->getInstList().remove(II); // Take out of symbol table
+
+ // Insert the call now.
+ SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+ Builder.SetInsertPoint(BI);
+ CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+ Args, II->getName());
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the Call now does instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+
+ Preds.pop_back();
+ }
+
+ // If this block is now dead (and isn't the entry block), remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
+
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI != BB->begin()) {
+ BasicBlock::iterator BBI = UI;
+ --BBI;
+ // Do not delete instructions that can have side effects, like calls
+ // (which may never return) and volatile loads and stores.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (SI->isVolatile())
+ break;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ if (LI->isVolatile())
+ break;
+
+ // Delete this instruction (any uses are guaranteed to be dead)
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI) return Changed;
+
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+ IRBuilder<> Builder(TI);
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ Builder.CreateBr(BI->getSuccessor(1));
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ Builder.CreateBr(BI->getSuccessor(0));
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getSuccessor(0) == BB) {
+ std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+ std::pair<unsigned, unsigned>& entry =
+ Popularity[SI->getSuccessor(i)];
+ if (entry.first == 0) {
+ entry.first = 1;
+ entry.second = i;
+ } else {
+ entry.first++;
+ }
+ }
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ unsigned MaxIndex = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second.first > MaxPop ||
+ (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+ MaxPop = I->second.first;
+ MaxIndex = I->second.second;
+ MaxBlock = I->first;
+ }
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setSuccessor(0, MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == MaxBlock) {
+ SI->removeCase(i);
+ --i; --e;
+ }
+ }
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+ Builder.SetInsertPoint(BI);
+ CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+ Args, II->getName());
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
+
+ return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+ // Make sure all cases point to the same destination and gather the values.
+ SmallVector<ConstantInt *, 16> Cases;
+ Cases.push_back(SI->getCaseValue(1));
+ for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+ if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+ return false;
+ Cases.push_back(SI->getCaseValue(I));
+ }
+ assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+ // Sort the case values, then check if they form a range we can transform.
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+ return false;
+ }
+
+ Constant *Offset = ConstantExpr::getNeg(Cases.back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+ Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest());
+
+ // Prune obsolete incoming values off the successor's PHI nodes.
+ for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ return true;
+}
+
+/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool EliminateDeadSwitchCases(SwitchInst *SI) {
+ Value *Cond = SI->getCondition();
+ unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+ APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
+ ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne);
+
+ // Gather dead cases.
+ SmallVector<ConstantInt*, 8> DeadCases;
+ for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
+ if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
+ (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
+ DeadCases.push_back(SI->getCaseValue(I));
+ DEBUG(dbgs() << "SimplifyCFG: switch case '"
+ << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+ }
+ }
+
+ // Remove dead cases from the switch.
+ for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
+ unsigned Case = SI->findCaseValue(DeadCases[I]);
+ // Prune unused values from PHI nodes.
+ SI->getSuccessor(Case)->removePredecessor(SI->getParent());
+ SI->removeCase(Case);
+ }
+
+ return !DeadCases.empty();
+}
+
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+ BasicBlock *BB,
+ int *PhiIndex) {
+ if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+ return NULL; // BB must be empty to be a candidate for simplification.
+ if (!BB->getSinglePredecessor())
+ return NULL; // BB must be dominated by the switch.
+
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Branch || !Branch->isUnconditional())
+ return NULL; // Terminator must be unconditional branch.
+
+ BasicBlock *Succ = Branch->getSuccessor(0);
+
+ BasicBlock::iterator I = Succ->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(BB);
+ assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+ Value *InValue = PHI->getIncomingValue(Idx);
+ if (InValue != CaseValue) continue;
+
+ *PhiIndex = Idx;
+ return PHI;
+ }
+
+ return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+ typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+ ForwardingNodesMap ForwardingNodes;
+
+ for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
+ ConstantInt *CaseValue = SI->getCaseValue(I);
+ BasicBlock *CaseDest = SI->getSuccessor(I);
+
+ int PhiIndex;
+ PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+ &PhiIndex);
+ if (!PHI) continue;
+
+ ForwardingNodes[PHI].push_back(PhiIndex);
+ }
+
+ bool Changed = false;
+
+ for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+ E = ForwardingNodes.end(); I != E; ++I) {
+ PHINode *Phi = I->first;
+ SmallVector<int,4> &Indexes = I->second;
+
+ if (Indexes.size() < 2) continue;
+
+ for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+ Phi->setIncomingValue(Indexes[I], SI->getCondition());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+ // If this switch is too complex to want to look at, ignore it.
+ if (!isValueEqualityComparison(SI))
+ return false;
+
+ BasicBlock *BB = SI->getParent();
+
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB) | true;
+
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB) | true;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB) | true;
+
+ // Try to transform the switch into an icmp and a branch.
+ if (TurnSwitchRangeIntoICmp(SI, Builder))
+ return SimplifyCFG(BB) | true;
+
+ // Remove unreachable cases.
+ if (EliminateDeadSwitchCases(SI))
+ return SimplifyCFG(BB) | true;
+
+ if (ForwardSwitchConditionToPHI(SI))
+ return SimplifyCFG(BB) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return SimplifyCFG(BB) | true;
+ }
+ return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
+ BasicBlock *BB = BI->getParent();
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
+
+ // If the only instruction in the block is a seteq/setne comparison
+ // against a constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator()
+ && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+ return true;
+ }
+
+ return false;
+}
+
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ BasicBlock *BB = BI->getParent();
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+ return SimplifyCFG(BB) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB) | true;
+ }
+ }
+
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+ return true;
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (HoistThenElseCodeToIf(BI))
+ return SimplifyCFG(BB) | true;
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to successor #1.
+ TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+ return SimplifyCFG(BB) | true;
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to successor #0.
+ TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+ return SimplifyCFG(BB) | true;
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI, TD))
+ return SimplifyCFG(BB) | true;
+
+ // If this basic block is ONLY a setcc and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the setcc into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB) | true;
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB, true);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ IRBuilder<> Builder(BB);
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TD);
+
+ Builder.SetInsertPoint(BB->getTerminator());
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ if (SimplifyUncondBranch(BI, Builder)) return true;
+ } else {
+ if (SimplifyCondBranch(BI, Builder)) return true;
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (SimplifyReturn(RI, Builder)) return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (SimplifySwitch(SI, Builder)) return true;
+ } else if (UnreachableInst *UI =
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI)) return true;
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ if (SimplifyUnwind(UI, Builder)) return true;
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI)) return true;
+ }
+
+ return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG. For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG. It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
+ return SimplifyCFGOpt(TD).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 000000000000..ac005f95b33a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+ struct InstSimplifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstSimplifier() : FunctionPass(ID) {
+ initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+ /// runOnFunction - Remove instructions that simplify.
+ bool runOnFunction(Function &F) {
+ const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+ for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ Instruction *I = BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty())
+ if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+ }
+ };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+ false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+ return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 000000000000..46d4adaaa154
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,142 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them. Additionally, it keeps track of which node is the new
+// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block. The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ std::vector<BasicBlock*> ReturningBlocks;
+ std::vector<BasicBlock*> UnwindingBlocks;
+ std::vector<BasicBlock*> UnreachableBlocks;
+ for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (isa<ReturnInst>(I->getTerminator()))
+ ReturningBlocks.push_back(I);
+ else if (isa<UnwindInst>(I->getTerminator()))
+ UnwindingBlocks.push_back(I);
+ else if (isa<UnreachableInst>(I->getTerminator()))
+ UnreachableBlocks.push_back(I);
+
+ // Handle unwinding blocks first.
+ if (UnwindingBlocks.empty()) {
+ UnwindBlock = 0;
+ } else if (UnwindingBlocks.size() == 1) {
+ UnwindBlock = UnwindingBlocks.front();
+ } else {
+ UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+ new UnwindInst(F.getContext(), UnwindBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+ E = UnwindingBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unwind insn
+ BranchInst::Create(UnwindBlock, BB);
+ }
+ }
+
+ // Then unreachable blocks.
+ if (UnreachableBlocks.empty()) {
+ UnreachableBlock = 0;
+ } else if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+ E = UnreachableBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty()) {
+ ReturnBlock = 0;
+ return false; // No blocks return
+ } else if (ReturningBlocks.size() == 1) {
+ ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return false;
+ }
+
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedReturnBlock", &F);
+
+ PHINode *PN = 0;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+ E = ReturningBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+ ReturnBlock = NewRetBlock;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 000000000000..24e8c8ff5c5f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerInvokePass(Registry);
+ initializeLowerSwitchPass(Registry);
+ initializePromotePassPass(Registry);
+ initializeUnifyFunctionExitNodesPass(Registry);
+ initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 000000000000..973b105a1cbb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,201 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::Anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper) {
+ ValueToValueMapTy::iterator I = VM.find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != VM.end() && I->second) return I->second;
+
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
+ if (isa<GlobalValue>(V) || isa<MDString>(V))
+ return VM[V] = const_cast<Value*>(V);
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ // Inline asm may need *type* remapping.
+ FunctionType *NewTy = IA->getFunctionType();
+ if (TypeMapper) {
+ NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+ if (NewTy != IA->getFunctionType())
+ V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+ IA->hasSideEffects(), IA->isAlignStack());
+ }
+
+ return VM[V] = const_cast<Value*>(V);
+ }
+
+
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+ return VM[V] = const_cast<Value*>(V);
+
+ // Create a dummy node in case we have a metadata cycle.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ VM[V] = Dummy;
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
+
+ // Ok, at least one operand needs remapping.
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i) {
+ Value *Op = MD->getOperand(i);
+ Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
+ }
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts);
+ Dummy->replaceAllUsesWith(NewMD);
+ VM[V] = NewMD;
+ MDNode::deleteTemporary(Dummy);
+ return NewMD;
+ }
+
+ VM[V] = const_cast<Value*>(V);
+ MDNode::deleteTemporary(Dummy);
+
+ // No operands needed remapping. Use an identity mapping.
+ return const_cast<Value*>(V);
+ }
+
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
+ Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+ if (C == 0)
+ return 0;
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Function *F =
+ cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+ Flags, TypeMapper));
+ return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+ }
+
+ // Otherwise, we have some other constant to remap. Start by checking to see
+ // if all operands have an identity remapping.
+ unsigned OpNo = 0, NumOperands = C->getNumOperands();
+ Value *Mapped = 0;
+ for (; OpNo != NumOperands; ++OpNo) {
+ Value *Op = C->getOperand(OpNo);
+ Mapped = MapValue(Op, VM, Flags, TypeMapper);
+ if (Mapped != C) break;
+ }
+
+ // See if the type mapper wants to remap the type as well.
+ Type *NewTy = C->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+
+ // If the result type and all operands match up, then just insert an identity
+ // mapping.
+ if (OpNo == NumOperands && NewTy == C->getType())
+ return VM[V] = C;
+
+ // Okay, we need to create a new constant. We've already processed some or
+ // all of the operands, set them all up now.
+ SmallVector<Constant*, 8> Ops;
+ Ops.reserve(NumOperands);
+ for (unsigned j = 0; j != OpNo; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(j)));
+
+ // If one of the operands mismatch, push it and the other mapped operands.
+ if (OpNo != NumOperands) {
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++OpNo; OpNo != NumOperands; ++OpNo)
+ Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+ Flags, TypeMapper));
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return VM[V] = CE->getWithOperands(Ops, NewTy);
+ if (isa<ConstantArray>(C))
+ return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+ if (isa<ConstantStruct>(C))
+ return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+ if (isa<ConstantVector>(C))
+ return VM[V] = ConstantVector::get(Ops);
+ // If this is a no-operand constant, it must be because the type was remapped.
+ if (isa<UndefValue>(C))
+ return VM[V] = UndefValue::get(NewTy);
+ if (isa<ConstantAggregateZero>(C))
+ return VM[V] = ConstantAggregateZero::get(NewTy);
+ assert(isa<ConstantPointerNull>(C));
+ return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+ // Remap operands.
+ for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+ Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ *op = V;
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced value not in value map!");
+ }
+
+ // Remap phi nodes' incoming blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ PN->setIncomingBlock(i, cast<BasicBlock>(V));
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced block not in value map!");
+ }
+ }
+
+ // Remap attached metadata. Don't bother remapping DebugLoc, it can never
+ // have mappings to do.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ MDNode *Old = MI->second;
+ MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+ if (New != Old)
+ I->setMetadata(MI->first, New);
+ }
+
+ // If the instruction's type is being remapped, do so now.
+ if (TypeMapper)
+ I->mutateType(TypeMapper->remapType(I->getType()));
+}
diff --git a/contrib/llvm/lib/VMCore/AsmWriter.cpp b/contrib/llvm/lib/VMCore/AsmWriter.cpp
new file mode 100644
index 000000000000..94794c35fe0b
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/AsmWriter.cpp
@@ -0,0 +1,2038 @@
+//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Writer.h
+//
+// Note that these routines must be extremely tolerant of various errors in the
+// LLVM code, because it can be used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Make virtual table appear in this compilation unit.
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static const Module *getModuleFromVal(const Value *V) {
+ if (const Argument *MA = dyn_cast<Argument>(V))
+ return MA->getParent() ? MA->getParent()->getParent() : 0;
+
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return BB->getParent() ? BB->getParent()->getParent() : 0;
+
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
+ return M ? M->getParent() : 0;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return GV->getParent();
+ return 0;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ unsigned char C = Name[i];
+ if (isprint(C) && C != '\\' && C != '"')
+ Out << C;
+ else
+ Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+}
+
+enum PrefixType {
+ GlobalPrefix,
+ LabelPrefix,
+ LocalPrefix,
+ NoPrefix
+};
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+ assert(!Name.empty() && "Cannot get empty name!");
+ switch (Prefix) {
+ default: llvm_unreachable("Bad prefix!");
+ case NoPrefix: break;
+ case GlobalPrefix: OS << '@'; break;
+ case LabelPrefix: break;
+ case LocalPrefix: OS << '%'; break;
+ }
+
+ // Scan the name to see if it needs quotes first.
+ bool NeedsQuotes = isdigit(Name[0]);
+ if (!NeedsQuotes) {
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ char C = Name[i];
+ if (!isalnum(C) && C != '-' && C != '.' && C != '_') {
+ NeedsQuotes = true;
+ break;
+ }
+ }
+ }
+
+ // If we didn't need any quotes, just write out the name in one blast.
+ if (!NeedsQuotes) {
+ OS << Name;
+ return;
+ }
+
+ // Okay, we need quotes. Output the quotes and escape any scary characters as
+ // needed.
+ OS << '"';
+ PrintEscapedString(Name, OS);
+ OS << '"';
+}
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, const Value *V) {
+ PrintLLVMName(OS, V->getName(),
+ isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
+}
+
+//===----------------------------------------------------------------------===//
+// TypePrinting Class: Type printing machinery
+//===----------------------------------------------------------------------===//
+
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+ TypePrinting(const TypePrinting &); // DO NOT IMPLEMENT
+ void operator=(const TypePrinting&); // DO NOT IMPLEMENT
+public:
+
+ /// NamedTypes - The named types that are used by the current module.
+ std::vector<StructType*> NamedTypes;
+
+ /// NumberedTypes - The numbered types, along with their value.
+ DenseMap<StructType*, unsigned> NumberedTypes;
+
+
+ TypePrinting() {}
+ ~TypePrinting() {}
+
+ void incorporateTypes(const Module &M);
+
+ void print(Type *Ty, raw_ostream &OS);
+
+ void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // end anonymous namespace.
+
+
+void TypePrinting::incorporateTypes(const Module &M) {
+ M.findUsedStructTypes(NamedTypes);
+
+ // The list of struct types we got back includes all the struct types, split
+ // the unnamed ones out to a numbering and remove the anonymous structs.
+ unsigned NextNumber = 0;
+
+ std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
+ for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
+ StructType *STy = *I;
+
+ // Ignore anonymous types.
+ if (STy->isAnonymous())
+ continue;
+
+ if (STy->getName().empty())
+ NumberedTypes[STy] = NextNumber++;
+ else
+ *NextToUse++ = STy;
+ }
+
+ NamedTypes.erase(NextToUse, NamedTypes.end());
+}
+
+
+/// CalcTypeName - Write the specified type to the specified raw_ostream, making
+/// use of type names or up references to shorten the type name where possible.
+void TypePrinting::print(Type *Ty, raw_ostream &OS) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: OS << "void"; break;
+ case Type::FloatTyID: OS << "float"; break;
+ case Type::DoubleTyID: OS << "double"; break;
+ case Type::X86_FP80TyID: OS << "x86_fp80"; break;
+ case Type::FP128TyID: OS << "fp128"; break;
+ case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
+ case Type::LabelTyID: OS << "label"; break;
+ case Type::MetadataTyID: OS << "metadata"; break;
+ case Type::X86_MMXTyID: OS << "x86_mmx"; break;
+ case Type::IntegerTyID:
+ OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
+ return;
+
+ case Type::FunctionTyID: {
+ FunctionType *FTy = cast<FunctionType>(Ty);
+ print(FTy->getReturnType(), OS);
+ OS << " (";
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ if (I != FTy->param_begin())
+ OS << ", ";
+ print(*I, OS);
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams()) OS << ", ";
+ OS << "...";
+ }
+ OS << ')';
+ return;
+ }
+ case Type::StructTyID: {
+ StructType *STy = cast<StructType>(Ty);
+
+ if (STy->isAnonymous())
+ return printStructBody(STy, OS);
+
+ if (!STy->getName().empty())
+ return PrintLLVMName(OS, STy->getName(), LocalPrefix);
+
+ DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
+ if (I != NumberedTypes.end())
+ OS << '%' << I->second;
+ else // Not enumerated, print the hex address.
+ OS << "%\"type 0x" << STy << '\"';
+ return;
+ }
+ case Type::PointerTyID: {
+ PointerType *PTy = cast<PointerType>(Ty);
+ print(PTy->getElementType(), OS);
+ if (unsigned AddressSpace = PTy->getAddressSpace())
+ OS << " addrspace(" << AddressSpace << ')';
+ OS << '*';
+ return;
+ }
+ case Type::ArrayTyID: {
+ ArrayType *ATy = cast<ArrayType>(Ty);
+ OS << '[' << ATy->getNumElements() << " x ";
+ print(ATy->getElementType(), OS);
+ OS << ']';
+ return;
+ }
+ case Type::VectorTyID: {
+ VectorType *PTy = cast<VectorType>(Ty);
+ OS << "<" << PTy->getNumElements() << " x ";
+ print(PTy->getElementType(), OS);
+ OS << '>';
+ return;
+ }
+ default:
+ OS << "<unrecognized-type>";
+ return;
+ }
+}
+
+void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
+ if (STy->isOpaque()) {
+ OS << "opaque";
+ return;
+ }
+
+ if (STy->isPacked())
+ OS << '<';
+
+ if (STy->getNumElements() == 0) {
+ OS << "{}";
+ } else {
+ StructType::element_iterator I = STy->element_begin();
+ OS << "{ ";
+ print(*I++, OS);
+ for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
+ OS << ", ";
+ print(*I, OS);
+ }
+
+ OS << " }";
+ }
+ if (STy->isPacked())
+ OS << '>';
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SlotTracker Class: Enumerate slot numbers for unnamed values
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class provides computation of slot numbers for LLVM Assembly writing.
+///
+class SlotTracker {
+public:
+ /// ValueMap - A mapping of Values to slot numbers.
+ typedef DenseMap<const Value*, unsigned> ValueMap;
+
+private:
+ /// TheModule - The module for which we are holding slot numbers.
+ const Module* TheModule;
+
+ /// TheFunction - The function for which we are holding slot numbers.
+ const Function* TheFunction;
+ bool FunctionProcessed;
+
+ /// mMap - The slot map for the module level data.
+ ValueMap mMap;
+ unsigned mNext;
+
+ /// fMap - The slot map for the function level data.
+ ValueMap fMap;
+ unsigned fNext;
+
+ /// mdnMap - Map for MDNodes.
+ DenseMap<const MDNode*, unsigned> mdnMap;
+ unsigned mdnNext;
+public:
+ /// Construct from a module
+ explicit SlotTracker(const Module *M);
+ /// Construct from a function, starting out in incorp state.
+ explicit SlotTracker(const Function *F);
+
+ /// Return the slot number of the specified value in it's type
+ /// plane. If something is not in the SlotTracker, return -1.
+ int getLocalSlot(const Value *V);
+ int getGlobalSlot(const GlobalValue *V);
+ int getMetadataSlot(const MDNode *N);
+
+ /// If you'd like to deal with a function instead of just a module, use
+ /// this method to get its data into the SlotTracker.
+ void incorporateFunction(const Function *F) {
+ TheFunction = F;
+ FunctionProcessed = false;
+ }
+
+ /// After calling incorporateFunction, use this method to remove the
+ /// most recently incorporated function from the SlotTracker. This
+ /// will reset the state of the machine back to just the module contents.
+ void purgeFunction();
+
+ /// MDNode map iterators.
+ typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
+ mdn_iterator mdn_begin() { return mdnMap.begin(); }
+ mdn_iterator mdn_end() { return mdnMap.end(); }
+ unsigned mdn_size() const { return mdnMap.size(); }
+ bool mdn_empty() const { return mdnMap.empty(); }
+
+ /// This function does the actual initialization.
+ inline void initialize();
+
+ // Implementation Details
+private:
+ /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+ void CreateModuleSlot(const GlobalValue *V);
+
+ /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
+ void CreateMetadataSlot(const MDNode *N);
+
+ /// CreateFunctionSlot - Insert the specified Value* into the slot table.
+ void CreateFunctionSlot(const Value *V);
+
+ /// Add all of the module level global variables (and their initializers)
+ /// and function declarations, but not the contents of those functions.
+ void processModule();
+
+ /// Add all of the functions arguments, basic blocks, and instructions.
+ void processFunction();
+
+ SlotTracker(const SlotTracker &); // DO NOT IMPLEMENT
+ void operator=(const SlotTracker &); // DO NOT IMPLEMENT
+};
+
+} // end anonymous namespace
+
+
+static SlotTracker *createSlotTracker(const Value *V) {
+ if (const Argument *FA = dyn_cast<Argument>(V))
+ return new SlotTracker(FA->getParent());
+
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return new SlotTracker(I->getParent()->getParent());
+
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return new SlotTracker(BB->getParent());
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return new SlotTracker(GV->getParent());
+
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return new SlotTracker(GA->getParent());
+
+ if (const Function *Func = dyn_cast<Function>(V))
+ return new SlotTracker(Func);
+
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ if (!MD->isFunctionLocal())
+ return new SlotTracker(MD->getFunction());
+
+ return new SlotTracker((Function *)0);
+ }
+
+ return 0;
+}
+
+#if 0
+#define ST_DEBUG(X) dbgs() << X
+#else
+#define ST_DEBUG(X)
+#endif
+
+// Module level constructor. Causes the contents of the Module (sans functions)
+// to be added to the slot table.
+SlotTracker::SlotTracker(const Module *M)
+ : TheModule(M), TheFunction(0), FunctionProcessed(false),
+ mNext(0), fNext(0), mdnNext(0) {
+}
+
+// Function level constructor. Causes the contents of the Module and the one
+// function provided to be added to the slot table.
+SlotTracker::SlotTracker(const Function *F)
+ : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
+ mNext(0), fNext(0), mdnNext(0) {
+}
+
+inline void SlotTracker::initialize() {
+ if (TheModule) {
+ processModule();
+ TheModule = 0; ///< Prevent re-processing next time we're called.
+ }
+
+ if (TheFunction && !FunctionProcessed)
+ processFunction();
+}
+
+// Iterate through all the global variables, functions, and global
+// variable initializers and create slots for them.
+void SlotTracker::processModule() {
+ ST_DEBUG("begin processModule!\n");
+
+ // Add all of the unnamed global variables to the value table.
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (!I->hasName())
+ CreateModuleSlot(I);
+ }
+
+ // Add metadata used by named metadata.
+ for (Module::const_named_metadata_iterator
+ I = TheModule->named_metadata_begin(),
+ E = TheModule->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ CreateMetadataSlot(NMD->getOperand(i));
+ }
+
+ // Add all the unnamed functions to the table.
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ if (!I->hasName())
+ CreateModuleSlot(I);
+
+ ST_DEBUG("end processModule!\n");
+}
+
+// Process the arguments, basic blocks, and instructions of a function.
+void SlotTracker::processFunction() {
+ ST_DEBUG("begin processFunction!\n");
+ fNext = 0;
+
+ // Add all the function arguments with no names.
+ for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
+ AE = TheFunction->arg_end(); AI != AE; ++AI)
+ if (!AI->hasName())
+ CreateFunctionSlot(AI);
+
+ ST_DEBUG("Inserting Instructions:\n");
+
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+ // Add all of the basic blocks and instructions with no names.
+ for (Function::const_iterator BB = TheFunction->begin(),
+ E = TheFunction->end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ CreateFunctionSlot(BB);
+
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ if (!I->getType()->isVoidTy() && !I->hasName())
+ CreateFunctionSlot(I);
+
+ // Intrinsics can directly use metadata. We allow direct calls to any
+ // llvm.foo function here, because the target may not be linked into the
+ // optimizer.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (Function *F = CI->getCalledFunction())
+ if (F->getName().startswith("llvm."))
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+ CreateMetadataSlot(N);
+ }
+
+ // Process metadata attached with this instruction.
+ I->getAllMetadata(MDForInst);
+ for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+ CreateMetadataSlot(MDForInst[i].second);
+ MDForInst.clear();
+ }
+ }
+
+ FunctionProcessed = true;
+
+ ST_DEBUG("end processFunction!\n");
+}
+
+/// Clean up after incorporating a function. This is the only way to get out of
+/// the function incorporation state that affects get*Slot/Create*Slot. Function
+/// incorporation state is indicated by TheFunction != 0.
+void SlotTracker::purgeFunction() {
+ ST_DEBUG("begin purgeFunction!\n");
+ fMap.clear(); // Simply discard the function level map
+ TheFunction = 0;
+ FunctionProcessed = false;
+ ST_DEBUG("end purgeFunction!\n");
+}
+
+/// getGlobalSlot - Get the slot number of a global value.
+int SlotTracker::getGlobalSlot(const GlobalValue *V) {
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ // Find the value in the module map
+ ValueMap::iterator MI = mMap.find(V);
+ return MI == mMap.end() ? -1 : (int)MI->second;
+}
+
+/// getMetadataSlot - Get the slot number of a MDNode.
+int SlotTracker::getMetadataSlot(const MDNode *N) {
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ // Find the MDNode in the module map
+ mdn_iterator MI = mdnMap.find(N);
+ return MI == mdnMap.end() ? -1 : (int)MI->second;
+}
+
+
+/// getLocalSlot - Get the slot number for a value that is local to a function.
+int SlotTracker::getLocalSlot(const Value *V) {
+ assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
+
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ ValueMap::iterator FI = fMap.find(V);
+ return FI == fMap.end() ? -1 : (int)FI->second;
+}
+
+
+/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
+ assert(V && "Can't insert a null Value into SlotTracker!");
+ assert(!V->getType()->isVoidTy() && "Doesn't need a slot!");
+ assert(!V->hasName() && "Doesn't need a slot!");
+
+ unsigned DestSlot = mNext++;
+ mMap[V] = DestSlot;
+
+ ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+ DestSlot << " [");
+ // G = Global, F = Function, A = Alias, o = other
+ ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
+ (isa<Function>(V) ? 'F' :
+ (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
+}
+
+/// CreateSlot - Create a new slot for the specified value if it has no name.
+void SlotTracker::CreateFunctionSlot(const Value *V) {
+ assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!");
+
+ unsigned DestSlot = fNext++;
+ fMap[V] = DestSlot;
+
+ // G = Global, F = Function, o = other
+ ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+ DestSlot << " [o]\n");
+}
+
+/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
+void SlotTracker::CreateMetadataSlot(const MDNode *N) {
+ assert(N && "Can't insert a null Value into SlotTracker!");
+
+ // Don't insert if N is a function-local metadata, these are always printed
+ // inline.
+ if (!N->isFunctionLocal()) {
+ mdn_iterator I = mdnMap.find(N);
+ if (I != mdnMap.end())
+ return;
+
+ unsigned DestSlot = mdnNext++;
+ mdnMap[N] = DestSlot;
+ }
+
+ // Recursively add any MDNodes referenced by operands.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const MDNode *Op = dyn_cast_or_null<MDNode>(N->getOperand(i)))
+ CreateMetadataSlot(Op);
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter Implementation
+//===----------------------------------------------------------------------===//
+
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context);
+
+
+
+static const char *getPredicateText(unsigned predicate) {
+ const char * pred = "unknown";
+ switch (predicate) {
+ case FCmpInst::FCMP_FALSE: pred = "false"; break;
+ case FCmpInst::FCMP_OEQ: pred = "oeq"; break;
+ case FCmpInst::FCMP_OGT: pred = "ogt"; break;
+ case FCmpInst::FCMP_OGE: pred = "oge"; break;
+ case FCmpInst::FCMP_OLT: pred = "olt"; break;
+ case FCmpInst::FCMP_OLE: pred = "ole"; break;
+ case FCmpInst::FCMP_ONE: pred = "one"; break;
+ case FCmpInst::FCMP_ORD: pred = "ord"; break;
+ case FCmpInst::FCMP_UNO: pred = "uno"; break;
+ case FCmpInst::FCMP_UEQ: pred = "ueq"; break;
+ case FCmpInst::FCMP_UGT: pred = "ugt"; break;
+ case FCmpInst::FCMP_UGE: pred = "uge"; break;
+ case FCmpInst::FCMP_ULT: pred = "ult"; break;
+ case FCmpInst::FCMP_ULE: pred = "ule"; break;
+ case FCmpInst::FCMP_UNE: pred = "une"; break;
+ case FCmpInst::FCMP_TRUE: pred = "true"; break;
+ case ICmpInst::ICMP_EQ: pred = "eq"; break;
+ case ICmpInst::ICMP_NE: pred = "ne"; break;
+ case ICmpInst::ICMP_SGT: pred = "sgt"; break;
+ case ICmpInst::ICMP_SGE: pred = "sge"; break;
+ case ICmpInst::ICMP_SLT: pred = "slt"; break;
+ case ICmpInst::ICMP_SLE: pred = "sle"; break;
+ case ICmpInst::ICMP_UGT: pred = "ugt"; break;
+ case ICmpInst::ICMP_UGE: pred = "uge"; break;
+ case ICmpInst::ICMP_ULT: pred = "ult"; break;
+ case ICmpInst::ICMP_ULE: pred = "ule"; break;
+ }
+ return pred;
+}
+
+
+static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(U)) {
+ if (OBO->hasNoUnsignedWrap())
+ Out << " nuw";
+ if (OBO->hasNoSignedWrap())
+ Out << " nsw";
+ } else if (const PossiblyExactOperator *Div =
+ dyn_cast<PossiblyExactOperator>(U)) {
+ if (Div->isExact())
+ Out << " exact";
+ } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+ if (GEP->isInBounds())
+ Out << " inbounds";
+ }
+}
+
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+ TypePrinting &TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ if (CI->getType()->isIntegerTy(1)) {
+ Out << (CI->getZExtValue() ? "true" : "false");
+ return;
+ }
+ Out << CI->getValue();
+ return;
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
+ &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+ // We would like to output the FP constant value in exponential notation,
+ // but we cannot do this if doing so will lose precision. Check here to
+ // make sure that we only output it in exponential format if we can parse
+ // the value back and get the same value.
+ //
+ bool ignored;
+ bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+ double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+ CFP->getValueAPF().convertToFloat();
+ SmallString<128> StrVal;
+ raw_svector_ostream(StrVal) << Val;
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN, that atof will accept, but the lexer will not. Check
+ // that the string matches the "[-+]?[0-9]" regex.
+ //
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+ // Reparse stringized version!
+ if (atof(StrVal.c_str()) == Val) {
+ Out << StrVal.str();
+ return;
+ }
+ }
+ // Otherwise we could not reparse it to exactly the same value, so we must
+ // output the string in hexadecimal format! Note that loading and storing
+ // floating point types changes the bits of NaNs on some hosts, notably
+ // x86, so we must not use these types.
+ assert(sizeof(double) == sizeof(uint64_t) &&
+ "assuming that double is 64 bits!");
+ char Buffer[40];
+ APFloat apf = CFP->getValueAPF();
+ // Floats are represented in ASCII IR as double, convert.
+ if (!isDouble)
+ apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ Out << "0x" <<
+ utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
+ Buffer+40);
+ return;
+ }
+
+ // Some form of long double. These appear as a magic letter identifying
+ // the type, then a fixed number of hex digits.
+ Out << "0x";
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+ Out << 'K';
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t* p = api.getRawData();
+ uint64_t word = p[1];
+ int shiftcount=12;
+ int width = api.getBitWidth();
+ for (int j=0; j<width; j+=4, shiftcount-=4) {
+ unsigned int nibble = (word>>shiftcount) & 15;
+ if (nibble < 10)
+ Out << (unsigned char)(nibble + '0');
+ else
+ Out << (unsigned char)(nibble - 10 + 'A');
+ if (shiftcount == 0 && j+4 < width) {
+ word = *p;
+ shiftcount = 64;
+ if (width-j-4 < 64)
+ shiftcount = width-j-4;
+ }
+ }
+ return;
+ } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad)
+ Out << 'L';
+ else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble)
+ Out << 'M';
+ else
+ llvm_unreachable("Unsupported floating point type");
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t* p = api.getRawData();
+ uint64_t word = *p;
+ int shiftcount=60;
+ int width = api.getBitWidth();
+ for (int j=0; j<width; j+=4, shiftcount-=4) {
+ unsigned int nibble = (word>>shiftcount) & 15;
+ if (nibble < 10)
+ Out << (unsigned char)(nibble + '0');
+ else
+ Out << (unsigned char)(nibble - 10 + 'A');
+ if (shiftcount == 0 && j+4 < width) {
+ word = *(++p);
+ shiftcount = 64;
+ if (width-j-4 < 64)
+ shiftcount = width-j-4;
+ }
+ }
+ return;
+ }
+
+ if (isa<ConstantAggregateZero>(CV)) {
+ Out << "zeroinitializer";
+ return;
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "blockaddress(";
+ WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+ Context);
+ Out << ", ";
+ WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+ Context);
+ Out << ")";
+ return;
+ }
+
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ // As a special case, print the array as a string if it is an array of
+ // i8 with ConstantInt values.
+ //
+ Type *ETy = CA->getType()->getElementType();
+ if (CA->isString()) {
+ Out << "c\"";
+ PrintEscapedString(CA->getAsString(), Out);
+ Out << '"';
+ } else { // Cannot output in string format...
+ Out << '[';
+ if (CA->getNumOperands()) {
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(0),
+ &TypePrinter, Machine,
+ Context);
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+ Context);
+ }
+ }
+ Out << ']';
+ }
+ return;
+ }
+
+ if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ if (CS->getType()->isPacked())
+ Out << '<';
+ Out << '{';
+ unsigned N = CS->getNumOperands();
+ if (N) {
+ Out << ' ';
+ TypePrinter.print(CS->getOperand(0)->getType(), Out);
+ Out << ' ';
+
+ WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+ Context);
+
+ for (unsigned i = 1; i < N; i++) {
+ Out << ", ";
+ TypePrinter.print(CS->getOperand(i)->getType(), Out);
+ Out << ' ';
+
+ WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+ Context);
+ }
+ Out << ' ';
+ }
+
+ Out << '}';
+ if (CS->getType()->isPacked())
+ Out << '>';
+ return;
+ }
+
+ if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Type *ETy = CP->getType()->getElementType();
+ assert(CP->getNumOperands() > 0 &&
+ "Number of operands for a PackedConst must be > 0");
+ Out << '<';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
+ Context);
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
+ Context);
+ }
+ Out << '>';
+ return;
+ }
+
+ if (isa<ConstantPointerNull>(CV)) {
+ Out << "null";
+ return;
+ }
+
+ if (isa<UndefValue>(CV)) {
+ Out << "undef";
+ return;
+ }
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ Out << CE->getOpcodeName();
+ WriteOptimizationInfo(Out, CE);
+ if (CE->isCompare())
+ Out << ' ' << getPredicateText(CE->getPredicate());
+ Out << " (";
+
+ for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+ TypePrinter.print((*OI)->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+ if (OI+1 != CE->op_end())
+ Out << ", ";
+ }
+
+ if (CE->hasIndices()) {
+ ArrayRef<unsigned> Indices = CE->getIndices();
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ Out << ", " << Indices[i];
+ }
+
+ if (CE->isCast()) {
+ Out << " to ";
+ TypePrinter.print(CE->getType(), Out);
+ }
+
+ Out << ')';
+ return;
+ }
+
+ Out << "<placeholder or erroneous Constant>";
+}
+
+static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!{";
+ for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
+ const Value *V = Node->getOperand(mi);
+ if (V == 0)
+ Out << "null";
+ else {
+ TypePrinter->print(V->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, Node->getOperand(mi),
+ TypePrinter, Machine, Context);
+ }
+ if (mi + 1 != me)
+ Out << ", ";
+ }
+
+ Out << "}";
+}
+
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream. This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+ TypePrinting *TypePrinter,
+ SlotTracker *Machine,
+ const Module *Context) {
+ if (V->hasName()) {
+ PrintLLVMName(Out, V);
+ return;
+ }
+
+ const Constant *CV = dyn_cast<Constant>(V);
+ if (CV && !isa<GlobalValue>(CV)) {
+ assert(TypePrinter && "Constants require TypePrinting!");
+ WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+ return;
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Out << "asm ";
+ if (IA->hasSideEffects())
+ Out << "sideeffect ";
+ if (IA->isAlignStack())
+ Out << "alignstack ";
+ Out << '"';
+ PrintEscapedString(IA->getAsmString(), Out);
+ Out << "\", \"";
+ PrintEscapedString(IA->getConstraintString(), Out);
+ Out << '"';
+ return;
+ }
+
+ if (const MDNode *N = dyn_cast<MDNode>(V)) {
+ if (N->isFunctionLocal()) {
+ // Print metadata inline, not via slot reference number.
+ WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
+ return;
+ }
+
+ if (!Machine) {
+ if (N->isFunctionLocal())
+ Machine = new SlotTracker(N->getFunction());
+ else
+ Machine = new SlotTracker(Context);
+ }
+ int Slot = Machine->getMetadataSlot(N);
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
+ return;
+ }
+
+ if (const MDString *MDS = dyn_cast<MDString>(V)) {
+ Out << "!\"";
+ PrintEscapedString(MDS->getString(), Out);
+ Out << '"';
+ return;
+ }
+
+ if (V->getValueID() == Value::PseudoSourceValueVal ||
+ V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
+ V->print(Out);
+ return;
+ }
+
+ char Prefix = '%';
+ int Slot;
+ if (Machine) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
+ } else {
+ Slot = Machine->getLocalSlot(V);
+ }
+ } else {
+ Machine = createSlotTracker(V);
+ if (Machine) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
+ } else {
+ Slot = Machine->getLocalSlot(V);
+ }
+ delete Machine;
+ } else {
+ Slot = -1;
+ }
+ }
+
+ if (Slot != -1)
+ Out << Prefix << Slot;
+ else
+ Out << "<badref>";
+}
+
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
+ bool PrintType, const Module *Context) {
+
+ // Fast path: Don't construct and populate a TypePrinting object if we
+ // won't be needing any types printed.
+ if (!PrintType &&
+ ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+ V->hasName() || isa<GlobalValue>(V))) {
+ WriteAsOperandInternal(Out, V, 0, 0, Context);
+ return;
+ }
+
+ if (Context == 0) Context = getModuleFromVal(V);
+
+ TypePrinting TypePrinter;
+ if (Context)
+ TypePrinter.incorporateTypes(*Context);
+ if (PrintType) {
+ TypePrinter.print(V->getType(), Out);
+ Out << ' ';
+ }
+
+ WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
+}
+
+namespace {
+
+class AssemblyWriter {
+ formatted_raw_ostream &Out;
+ SlotTracker &Machine;
+ const Module *TheModule;
+ TypePrinting TypePrinter;
+ AssemblyAnnotationWriter *AnnotationWriter;
+
+public:
+ inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+ const Module *M,
+ AssemblyAnnotationWriter *AAW)
+ : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+ if (M)
+ TypePrinter.incorporateTypes(*M);
+ }
+
+ void printMDNodeBody(const MDNode *MD);
+ void printNamedMDNode(const NamedMDNode *NMD);
+
+ void printModule(const Module *M);
+
+ void writeOperand(const Value *Op, bool PrintType);
+ void writeParamOperand(const Value *Operand, Attributes Attrs);
+
+ void writeAllMDNodes();
+
+ void printTypeIdentities();
+ void printGlobal(const GlobalVariable *GV);
+ void printAlias(const GlobalAlias *GV);
+ void printFunction(const Function *F);
+ void printArgument(const Argument *FA, Attributes Attrs);
+ void printBasicBlock(const BasicBlock *BB);
+ void printInstruction(const Instruction &I);
+
+private:
+ // printInfoComment - Print a little comment after the instruction indicating
+ // which slot it occupies.
+ void printInfoComment(const Value &V);
+};
+} // end of anonymous namespace
+
+void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
+ if (Operand == 0) {
+ Out << "<null operand!>";
+ return;
+ }
+ if (PrintType) {
+ TypePrinter.print(Operand->getType(), Out);
+ Out << ' ';
+ }
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::writeParamOperand(const Value *Operand,
+ Attributes Attrs) {
+ if (Operand == 0) {
+ Out << "<null operand!>";
+ return;
+ }
+
+ // Print the type
+ TypePrinter.print(Operand->getType(), Out);
+ // Print parameter attributes list
+ if (Attrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(Attrs);
+ Out << ' ';
+ // Print the operand
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::printModule(const Module *M) {
+ if (!M->getModuleIdentifier().empty() &&
+ // Don't print the ID if it will start a new line (which would
+ // require a comment char before it).
+ M->getModuleIdentifier().find('\n') == std::string::npos)
+ Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+
+ if (!M->getDataLayout().empty())
+ Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
+ if (!M->getTargetTriple().empty())
+ Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
+
+ if (!M->getModuleInlineAsm().empty()) {
+ // Split the string into lines, to make it easier to read the .ll file.
+ std::string Asm = M->getModuleInlineAsm();
+ size_t CurPos = 0;
+ size_t NewLine = Asm.find_first_of('\n', CurPos);
+ Out << '\n';
+ while (NewLine != std::string::npos) {
+ // We found a newline, print the portion of the asm string from the
+ // last newline up to this newline.
+ Out << "module asm \"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+ Out);
+ Out << "\"\n";
+ CurPos = NewLine+1;
+ NewLine = Asm.find_first_of('\n', CurPos);
+ }
+ std::string rest(Asm.begin()+CurPos, Asm.end());
+ if (!rest.empty()) {
+ Out << "module asm \"";
+ PrintEscapedString(rest, Out);
+ Out << "\"\n";
+ }
+ }
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = M->lib_begin();
+ Module::lib_iterator LE = M->lib_end();
+ if (LI != LE) {
+ Out << '\n';
+ Out << "deplibs = [ ";
+ while (LI != LE) {
+ Out << '"' << *LI << '"';
+ ++LI;
+ if (LI != LE)
+ Out << ", ";
+ }
+ Out << " ]";
+ }
+
+ printTypeIdentities();
+
+ // Output all globals.
+ if (!M->global_empty()) Out << '\n';
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ printGlobal(I);
+
+ // Output all aliases.
+ if (!M->alias_empty()) Out << "\n";
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ printAlias(I);
+
+ // Output all of the functions.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+ printFunction(I);
+
+ // Output named metadata.
+ if (!M->named_metadata_empty()) Out << '\n';
+
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I)
+ printNamedMDNode(I);
+
+ // Output metadata.
+ if (!Machine.mdn_empty()) {
+ Out << '\n';
+ writeAllMDNodes();
+ }
+}
+
+void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
+ Out << '!';
+ StringRef Name = NMD->getName();
+ if (Name.empty()) {
+ Out << "<empty name> ";
+ } else {
+ if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' ||
+ Name[0] == '.' || Name[0] == '_')
+ Out << Name[0];
+ else
+ Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+ for (unsigned i = 1, e = Name.size(); i != e; ++i) {
+ unsigned char C = Name[i];
+ if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_')
+ Out << C;
+ else
+ Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+ }
+ Out << " = !{";
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ if (i) Out << ", ";
+ int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
+ }
+ Out << "}\n";
+}
+
+
+static void PrintLinkage(GlobalValue::LinkageTypes LT,
+ formatted_raw_ostream &Out) {
+ switch (LT) {
+ case GlobalValue::ExternalLinkage: break;
+ case GlobalValue::PrivateLinkage: Out << "private "; break;
+ case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "linker_private_weak ";
+ break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "linker_private_weak_def_auto ";
+ break;
+ case GlobalValue::InternalLinkage: Out << "internal "; break;
+ case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
+ case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
+ case GlobalValue::WeakAnyLinkage: Out << "weak "; break;
+ case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break;
+ case GlobalValue::CommonLinkage: Out << "common "; break;
+ case GlobalValue::AppendingLinkage: Out << "appending "; break;
+ case GlobalValue::DLLImportLinkage: Out << "dllimport "; break;
+ case GlobalValue::DLLExportLinkage: Out << "dllexport "; break;
+ case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "available_externally ";
+ break;
+ }
+}
+
+
+static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
+ formatted_raw_ostream &Out) {
+ switch (Vis) {
+ case GlobalValue::DefaultVisibility: break;
+ case GlobalValue::HiddenVisibility: Out << "hidden "; break;
+ case GlobalValue::ProtectedVisibility: Out << "protected "; break;
+ }
+}
+
+void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+ if (GV->isMaterializable())
+ Out << "; Materializable\n";
+
+ WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+ Out << " = ";
+
+ if (!GV->hasInitializer() && GV->hasExternalLinkage())
+ Out << "external ";
+
+ PrintLinkage(GV->getLinkage(), Out);
+ PrintVisibility(GV->getVisibility(), Out);
+
+ if (GV->isThreadLocal()) Out << "thread_local ";
+ if (unsigned AddressSpace = GV->getType()->getAddressSpace())
+ Out << "addrspace(" << AddressSpace << ") ";
+ if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
+ Out << (GV->isConstant() ? "constant " : "global ");
+ TypePrinter.print(GV->getType()->getElementType(), Out);
+
+ if (GV->hasInitializer()) {
+ Out << ' ';
+ writeOperand(GV->getInitializer(), false);
+ }
+
+ if (GV->hasSection()) {
+ Out << ", section \"";
+ PrintEscapedString(GV->getSection(), Out);
+ Out << '"';
+ }
+ if (GV->getAlignment())
+ Out << ", align " << GV->getAlignment();
+
+ printInfoComment(*GV);
+ Out << '\n';
+}
+
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+ if (GA->isMaterializable())
+ Out << "; Materializable\n";
+
+ // Don't crash when dumping partially built GA
+ if (!GA->hasName())
+ Out << "<<nameless>> = ";
+ else {
+ PrintLLVMName(Out, GA);
+ Out << " = ";
+ }
+ PrintVisibility(GA->getVisibility(), Out);
+
+ Out << "alias ";
+
+ PrintLinkage(GA->getLinkage(), Out);
+
+ const Constant *Aliasee = GA->getAliasee();
+
+ if (Aliasee == 0) {
+ TypePrinter.print(GA->getType(), Out);
+ Out << " <<NULL ALIASEE>>";
+ } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
+ TypePrinter.print(GV->getType(), Out);
+ Out << ' ';
+ PrintLLVMName(Out, GV);
+ } else if (const Function *F = dyn_cast<Function>(Aliasee)) {
+ TypePrinter.print(F->getFunctionType(), Out);
+ Out << "* ";
+
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+ } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
+ TypePrinter.print(GA->getType(), Out);
+ Out << ' ';
+ PrintLLVMName(Out, GA);
+ } else {
+ const ConstantExpr *CE = cast<ConstantExpr>(Aliasee);
+ // The only valid GEP is an all zero GEP.
+ assert((CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ "Unsupported aliasee");
+ writeOperand(CE, false);
+ }
+
+ printInfoComment(*GA);
+ Out << '\n';
+}
+
+void AssemblyWriter::printTypeIdentities() {
+ if (TypePrinter.NumberedTypes.empty() &&
+ TypePrinter.NamedTypes.empty())
+ return;
+
+ Out << '\n';
+
+ // We know all the numbers that each type is used and we know that it is a
+ // dense assignment. Convert the map to an index table.
+ std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
+ for (DenseMap<StructType*, unsigned>::iterator I =
+ TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
+ I != E; ++I) {
+ assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
+ NumberedTypes[I->second] = I->first;
+ }
+
+ // Emit all numbered types.
+ for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
+ Out << '%' << i << " = type ";
+
+ // Make sure we print out at least one level of the type structure, so
+ // that we do not get %2 = type %2
+ TypePrinter.printStructBody(NumberedTypes[i], Out);
+ Out << '\n';
+ }
+
+ for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
+ PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
+ Out << " = type ";
+
+ // Make sure we print out at least one level of the type structure, so
+ // that we do not get %FILE = type %FILE
+ TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
+ Out << '\n';
+ }
+}
+
+/// printFunction - Print all aspects of a function.
+///
+void AssemblyWriter::printFunction(const Function *F) {
+ // Print out the return type and name.
+ Out << '\n';
+
+ if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
+
+ if (F->isMaterializable())
+ Out << "; Materializable\n";
+
+ if (F->isDeclaration())
+ Out << "declare ";
+ else
+ Out << "define ";
+
+ PrintLinkage(F->getLinkage(), Out);
+ PrintVisibility(F->getVisibility(), Out);
+
+ // Print the calling convention.
+ switch (F->getCallingConv()) {
+ case CallingConv::C: break; // default
+ case CallingConv::Fast: Out << "fastcc "; break;
+ case CallingConv::Cold: Out << "coldcc "; break;
+ case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break;
+ case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+ case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break;
+ case CallingConv::ARM_APCS: Out << "arm_apcscc "; break;
+ case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break;
+ case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
+ case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << "ptx_kernel "; break;
+ case CallingConv::PTX_Device: Out << "ptx_device "; break;
+ default: Out << "cc" << F->getCallingConv() << " "; break;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ const AttrListPtr &Attrs = F->getAttributes();
+ Attributes RetAttrs = Attrs.getRetAttributes();
+ if (RetAttrs != Attribute::None)
+ Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
+ TypePrinter.print(F->getReturnType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+ Out << '(';
+ Machine.incorporateFunction(F);
+
+ // Loop over the arguments, printing them...
+
+ unsigned Idx = 1;
+ if (!F->isDeclaration()) {
+ // If this isn't a declaration, print the argument names as well.
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ // Insert commas as we go... the first arg doesn't get a comma
+ if (I != F->arg_begin()) Out << ", ";
+ printArgument(I, Attrs.getParamAttributes(Idx));
+ Idx++;
+ }
+ } else {
+ // Otherwise, print the types from the function type.
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ // Insert commas as we go... the first arg doesn't get a comma
+ if (i) Out << ", ";
+
+ // Output type...
+ TypePrinter.print(FT->getParamType(i), Out);
+
+ Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
+ if (ArgAttrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(ArgAttrs);
+ }
+ }
+
+ // Finish printing arguments...
+ if (FT->isVarArg()) {
+ if (FT->getNumParams()) Out << ", ";
+ Out << "..."; // Output varargs portion of signature!
+ }
+ Out << ')';
+ if (F->hasUnnamedAddr())
+ Out << " unnamed_addr";
+ Attributes FnAttrs = Attrs.getFnAttributes();
+ if (FnAttrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
+ if (F->hasSection()) {
+ Out << " section \"";
+ PrintEscapedString(F->getSection(), Out);
+ Out << '"';
+ }
+ if (F->getAlignment())
+ Out << " align " << F->getAlignment();
+ if (F->hasGC())
+ Out << " gc \"" << F->getGC() << '"';
+ if (F->isDeclaration()) {
+ Out << '\n';
+ } else {
+ Out << " {";
+ // Output all of the function's basic blocks.
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
+ printBasicBlock(I);
+
+ Out << "}\n";
+ }
+
+ Machine.purgeFunction();
+}
+
+/// printArgument - This member is called for every argument that is passed into
+/// the function. Simply print it out
+///
+void AssemblyWriter::printArgument(const Argument *Arg,
+ Attributes Attrs) {
+ // Output type...
+ TypePrinter.print(Arg->getType(), Out);
+
+ // Output parameter attributes list
+ if (Attrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(Attrs);
+
+ // Output name, if available...
+ if (Arg->hasName()) {
+ Out << ' ';
+ PrintLLVMName(Out, Arg);
+ }
+}
+
+/// printBasicBlock - This member is called for each basic block in a method.
+///
+void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+ if (BB->hasName()) { // Print out the label if it exists...
+ Out << "\n";
+ PrintLLVMName(Out, BB->getName(), LabelPrefix);
+ Out << ':';
+ } else if (!BB->use_empty()) { // Don't print block # of no uses...
+ Out << "\n; <label>:";
+ int Slot = Machine.getLocalSlot(BB);
+ if (Slot != -1)
+ Out << Slot;
+ else
+ Out << "<badref>";
+ }
+
+ if (BB->getParent() == 0) {
+ Out.PadToColumn(50);
+ Out << "; Error: Block without parent!";
+ } else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
+ // Output predecessors for the block.
+ Out.PadToColumn(50);
+ Out << ";";
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ if (PI == PE) {
+ Out << " No predecessors!";
+ } else {
+ Out << " preds = ";
+ writeOperand(*PI, false);
+ for (++PI; PI != PE; ++PI) {
+ Out << ", ";
+ writeOperand(*PI, false);
+ }
+ }
+ }
+
+ Out << "\n";
+
+ if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ printInstruction(*I);
+ Out << '\n';
+ }
+
+ if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
+}
+
+/// printInfoComment - Print a little comment after the instruction indicating
+/// which slot it occupies.
+///
+void AssemblyWriter::printInfoComment(const Value &V) {
+ if (AnnotationWriter) {
+ AnnotationWriter->printInfoComment(V, Out);
+ return;
+ }
+}
+
+// This member is called for each Instruction in a function..
+void AssemblyWriter::printInstruction(const Instruction &I) {
+ if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
+
+ // Print out indentation for an instruction.
+ Out << " ";
+
+ // Print out name if it exists...
+ if (I.hasName()) {
+ PrintLLVMName(Out, &I);
+ Out << " = ";
+ } else if (!I.getType()->isVoidTy()) {
+ // Print out the def slot taken.
+ int SlotNum = Machine.getLocalSlot(&I);
+ if (SlotNum == -1)
+ Out << "<badref> = ";
+ else
+ Out << '%' << SlotNum << " = ";
+ }
+
+ // If this is a volatile load or store, print out the volatile marker.
+ if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) ||
+ (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())) {
+ Out << "volatile ";
+ } else if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) {
+ // If this is a call, check if it's a tail call.
+ Out << "tail ";
+ }
+
+ // Print out the opcode...
+ Out << I.getOpcodeName();
+
+ // Print out optimization information.
+ WriteOptimizationInfo(Out, &I);
+
+ // Print out the compare instruction predicates
+ if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
+ Out << ' ' << getPredicateText(CI->getPredicate());
+
+ // Print out the type of the operands...
+ const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+
+ // Special case conditional branches to swizzle the condition out to the front
+ if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
+ BranchInst &BI(cast<BranchInst>(I));
+ Out << ' ';
+ writeOperand(BI.getCondition(), true);
+ Out << ", ";
+ writeOperand(BI.getSuccessor(0), true);
+ Out << ", ";
+ writeOperand(BI.getSuccessor(1), true);
+
+ } else if (isa<SwitchInst>(I)) {
+ // Special case switch instruction to get formatting nice and correct.
+ Out << ' ';
+ writeOperand(Operand , true);
+ Out << ", ";
+ writeOperand(I.getOperand(1), true);
+ Out << " [";
+
+ for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) {
+ Out << "\n ";
+ writeOperand(I.getOperand(op ), true);
+ Out << ", ";
+ writeOperand(I.getOperand(op+1), true);
+ }
+ Out << "\n ]";
+ } else if (isa<IndirectBrInst>(I)) {
+ // Special case indirectbr instruction to get formatting nice and correct.
+ Out << ' ';
+ writeOperand(Operand, true);
+ Out << ", [";
+
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+ if (i != 1)
+ Out << ", ";
+ writeOperand(I.getOperand(i), true);
+ }
+ Out << ']';
+ } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+ Out << ' ';
+ TypePrinter.print(I.getType(), Out);
+ Out << ' ';
+
+ for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
+ if (op) Out << ", ";
+ Out << "[ ";
+ writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+ writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
+ }
+ } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
+ Out << ' ';
+ writeOperand(I.getOperand(0), true);
+ for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+ Out << ", " << *i;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
+ Out << ' ';
+ writeOperand(I.getOperand(0), true); Out << ", ";
+ writeOperand(I.getOperand(1), true);
+ for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+ Out << ", " << *i;
+ } else if (isa<ReturnInst>(I) && !Operand) {
+ Out << " void";
+ } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Print the calling convention being used.
+ switch (CI->getCallingConv()) {
+ case CallingConv::C: break; // default
+ case CallingConv::Fast: Out << " fastcc"; break;
+ case CallingConv::Cold: Out << " coldcc"; break;
+ case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
+ case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
+ case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
+ case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
+ case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+ case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << " ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << " ptx_device"; break;
+ default: Out << " cc" << CI->getCallingConv(); break;
+ }
+
+ Operand = CI->getCalledValue();
+ PointerType *PTy = cast<PointerType>(Operand->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Type *RetTy = FTy->getReturnType();
+ const AttrListPtr &PAL = CI->getAttributes();
+
+ if (PAL.getRetAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+ // If possible, print out the short form of the call instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ if (!FTy->isVarArg() &&
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+ TypePrinter.print(RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ } else {
+ writeOperand(Operand, true);
+ }
+ Out << '(';
+ for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+ if (op > 0)
+ Out << ", ";
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
+ }
+ Out << ')';
+ if (PAL.getFnAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ Operand = II->getCalledValue();
+ PointerType *PTy = cast<PointerType>(Operand->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Type *RetTy = FTy->getReturnType();
+ const AttrListPtr &PAL = II->getAttributes();
+
+ // Print the calling convention being used.
+ switch (II->getCallingConv()) {
+ case CallingConv::C: break; // default
+ case CallingConv::Fast: Out << " fastcc"; break;
+ case CallingConv::Cold: Out << " coldcc"; break;
+ case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
+ case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
+ case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
+ case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
+ case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+ case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << " ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << " ptx_device"; break;
+ default: Out << " cc" << II->getCallingConv(); break;
+ }
+
+ if (PAL.getRetAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+ // If possible, print out the short form of the invoke instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ if (!FTy->isVarArg() &&
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+ TypePrinter.print(RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ } else {
+ writeOperand(Operand, true);
+ }
+ Out << '(';
+ for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+ if (op)
+ Out << ", ";
+ writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
+ }
+
+ Out << ')';
+ if (PAL.getFnAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+
+ Out << "\n to ";
+ writeOperand(II->getNormalDest(), true);
+ Out << " unwind ";
+ writeOperand(II->getUnwindDest(), true);
+
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(AI->getType()->getElementType(), Out);
+ if (!AI->getArraySize() || AI->isArrayAllocation()) {
+ Out << ", ";
+ writeOperand(AI->getArraySize(), true);
+ }
+ if (AI->getAlignment()) {
+ Out << ", align " << AI->getAlignment();
+ }
+ } else if (isa<CastInst>(I)) {
+ if (Operand) {
+ Out << ' ';
+ writeOperand(Operand, true); // Work with broken code
+ }
+ Out << " to ";
+ TypePrinter.print(I.getType(), Out);
+ } else if (isa<VAArgInst>(I)) {
+ if (Operand) {
+ Out << ' ';
+ writeOperand(Operand, true); // Work with broken code
+ }
+ Out << ", ";
+ TypePrinter.print(I.getType(), Out);
+ } else if (Operand) { // Print the normal way.
+
+ // PrintAllTypes - Instructions who have operands of all the same type
+ // omit the type from all but the first operand. If the instruction has
+ // different type operands (for example br), then they are all printed.
+ bool PrintAllTypes = false;
+ Type *TheType = Operand->getType();
+
+ // Select, Store and ShuffleVector always print all types.
+ if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
+ || isa<ReturnInst>(I)) {
+ PrintAllTypes = true;
+ } else {
+ for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
+ Operand = I.getOperand(i);
+ // note that Operand shouldn't be null, but the test helps make dump()
+ // more tolerant of malformed IR
+ if (Operand && Operand->getType() != TheType) {
+ PrintAllTypes = true; // We have differing types! Print them all!
+ break;
+ }
+ }
+ }
+
+ if (!PrintAllTypes) {
+ Out << ' ';
+ TypePrinter.print(TheType, Out);
+ }
+
+ Out << ' ';
+ for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
+ if (i) Out << ", ";
+ writeOperand(I.getOperand(i), PrintAllTypes);
+ }
+ }
+
+ // Print post operand alignment for load/store.
+ if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) {
+ Out << ", align " << cast<LoadInst>(I).getAlignment();
+ } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) {
+ Out << ", align " << cast<StoreInst>(I).getAlignment();
+ }
+
+ // Print Metadata info.
+ SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+ I.getAllMetadata(InstMD);
+ if (!InstMD.empty()) {
+ SmallVector<StringRef, 8> MDNames;
+ I.getType()->getContext().getMDKindNames(MDNames);
+ for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+ unsigned Kind = InstMD[i].first;
+ if (Kind < MDNames.size()) {
+ Out << ", !" << MDNames[Kind];
+ } else {
+ Out << ", !<unknown kind #" << Kind << ">";
+ }
+ Out << ' ';
+ WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+ TheModule);
+ }
+ }
+ printInfoComment(I);
+}
+
+static void WriteMDNodeComment(const MDNode *Node,
+ formatted_raw_ostream &Out) {
+ if (Node->getNumOperands() < 1)
+ return;
+ ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0));
+ if (!CI) return;
+ APInt Val = CI->getValue();
+ APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
+ if (Val.ult(LLVMDebugVersion))
+ return;
+
+ Out.PadToColumn(50);
+ if (Tag == dwarf::DW_TAG_user_base)
+ Out << "; [ DW_TAG_user_base ]";
+ else if (Tag.isIntN(32)) {
+ if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
+ Out << "; [ " << TagName << " ]";
+ }
+}
+
+void AssemblyWriter::writeAllMDNodes() {
+ SmallVector<const MDNode *, 16> Nodes;
+ Nodes.resize(Machine.mdn_size());
+ for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
+ I != E; ++I)
+ Nodes[I->second] = cast<MDNode>(I->first);
+
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+ Out << '!' << i << " = metadata ";
+ printMDNodeBody(Nodes[i]);
+ }
+}
+
+void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
+ WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+ WriteMDNodeComment(Node, Out);
+ Out << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declarations
+//===----------------------------------------------------------------------===//
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(this);
+ formatted_raw_ostream OS(ROS);
+ AssemblyWriter W(OS, SlotTable, this, AAW);
+ W.printModule(this);
+}
+
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(getParent());
+ formatted_raw_ostream OS(ROS);
+ AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+ W.printNamedMDNode(this);
+}
+
+void Type::print(raw_ostream &OS) const {
+ if (this == 0) {
+ OS << "<null Type>";
+ return;
+ }
+ TypePrinting TP;
+ TP.print(const_cast<Type*>(this), OS);
+
+ // If the type is a named struct type, print the body as well.
+ if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
+ if (!STy->isAnonymous()) {
+ OS << " = type ";
+ TP.printStructBody(STy, OS);
+ }
+}
+
+void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ if (this == 0) {
+ ROS << "printing a <null> value\n";
+ return;
+ }
+ formatted_raw_ostream OS(ROS);
+ if (const Instruction *I = dyn_cast<Instruction>(this)) {
+ const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+ SlotTracker SlotTable(F);
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
+ W.printInstruction(*I);
+ } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
+ SlotTracker SlotTable(BB->getParent());
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
+ W.printBasicBlock(BB);
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+ SlotTracker SlotTable(GV->getParent());
+ AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+ if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
+ W.printGlobal(V);
+ else if (const Function *F = dyn_cast<Function>(GV))
+ W.printFunction(F);
+ else
+ W.printAlias(cast<GlobalAlias>(GV));
+ } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+ const Function *F = N->getFunction();
+ SlotTracker SlotTable(F);
+ AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+ W.printMDNodeBody(N);
+ } else if (const Constant *C = dyn_cast<Constant>(this)) {
+ TypePrinting TypePrinter;
+ TypePrinter.print(C->getType(), OS);
+ OS << ' ';
+ WriteConstantInternal(OS, C, TypePrinter, 0, 0);
+ } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
+ isa<Argument>(this)) {
+ WriteAsOperand(OS, this, true, 0);
+ } else {
+ // Otherwise we don't know what it is. Call the virtual function to
+ // allow a subclass to print itself.
+ printCustom(OS);
+ }
+}
+
+// Value::printCustom - subclasses should override this to implement printing.
+void Value::printCustom(raw_ostream &OS) const {
+ llvm_unreachable("Unknown value to print out!");
+}
+
+// Value::dump - allow easy printing of Values from the debugger.
+void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+
+// Type::dump - allow easy printing of Types from the debugger.
+void Type::dump() const { print(dbgs()); }
+
+// Module::dump() - Allow printing of Modules from the debugger.
+void Module::dump() const { print(dbgs(), 0); }
diff --git a/contrib/llvm/lib/VMCore/Attributes.cpp b/contrib/llvm/lib/VMCore/Attributes.cpp
new file mode 100644
index 000000000000..bf6efa1645a2
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Attributes.cpp
@@ -0,0 +1,353 @@
+//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AttributesList class and Attribute utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Attribute Function Definitions
+//===----------------------------------------------------------------------===//
+
+std::string Attribute::getAsString(Attributes Attrs) {
+ std::string Result;
+ if (Attrs & Attribute::ZExt)
+ Result += "zeroext ";
+ if (Attrs & Attribute::SExt)
+ Result += "signext ";
+ if (Attrs & Attribute::NoReturn)
+ Result += "noreturn ";
+ if (Attrs & Attribute::NoUnwind)
+ Result += "nounwind ";
+ if (Attrs & Attribute::UWTable)
+ Result += "uwtable ";
+ if (Attrs & Attribute::InReg)
+ Result += "inreg ";
+ if (Attrs & Attribute::NoAlias)
+ Result += "noalias ";
+ if (Attrs & Attribute::NoCapture)
+ Result += "nocapture ";
+ if (Attrs & Attribute::StructRet)
+ Result += "sret ";
+ if (Attrs & Attribute::ByVal)
+ Result += "byval ";
+ if (Attrs & Attribute::Nest)
+ Result += "nest ";
+ if (Attrs & Attribute::ReadNone)
+ Result += "readnone ";
+ if (Attrs & Attribute::ReadOnly)
+ Result += "readonly ";
+ if (Attrs & Attribute::OptimizeForSize)
+ Result += "optsize ";
+ if (Attrs & Attribute::NoInline)
+ Result += "noinline ";
+ if (Attrs & Attribute::InlineHint)
+ Result += "inlinehint ";
+ if (Attrs & Attribute::AlwaysInline)
+ Result += "alwaysinline ";
+ if (Attrs & Attribute::StackProtect)
+ Result += "ssp ";
+ if (Attrs & Attribute::StackProtectReq)
+ Result += "sspreq ";
+ if (Attrs & Attribute::NoRedZone)
+ Result += "noredzone ";
+ if (Attrs & Attribute::NoImplicitFloat)
+ Result += "noimplicitfloat ";
+ if (Attrs & Attribute::Naked)
+ Result += "naked ";
+ if (Attrs & Attribute::Hotpatch)
+ Result += "hotpatch ";
+ if (Attrs & Attribute::NonLazyBind)
+ Result += "nonlazybind ";
+ if (Attrs & Attribute::StackAlignment) {
+ Result += "alignstack(";
+ Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+ Result += ") ";
+ }
+ if (Attrs & Attribute::Alignment) {
+ Result += "align ";
+ Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
+ Result += " ";
+ }
+ // Trim the trailing space.
+ assert(!Result.empty() && "Unknown attribute!");
+ Result.erase(Result.end()-1);
+ return Result;
+}
+
+Attributes Attribute::typeIncompatible(const Type *Ty) {
+ Attributes Incompatible = None;
+
+ if (!Ty->isIntegerTy())
+ // Attributes that only apply to integers.
+ Incompatible |= SExt | ZExt;
+
+ if (!Ty->isPointerTy())
+ // Attributes that only apply to pointers.
+ Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
+
+ return Incompatible;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeListImpl Definition
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ class AttributeListImpl;
+}
+
+static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
+
+namespace llvm {
+static ManagedStatic<sys::SmartMutex<true> > ALMutex;
+
+class AttributeListImpl : public FoldingSetNode {
+ sys::cas_flag RefCount;
+
+ // AttributesList is uniqued, these should not be publicly available.
+ void operator=(const AttributeListImpl &); // Do not implement
+ AttributeListImpl(const AttributeListImpl &); // Do not implement
+ ~AttributeListImpl(); // Private implementation
+public:
+ SmallVector<AttributeWithIndex, 4> Attrs;
+
+ AttributeListImpl(const AttributeWithIndex *Attr, unsigned NumAttrs)
+ : Attrs(Attr, Attr+NumAttrs) {
+ RefCount = 0;
+ }
+
+ void AddRef() {
+ sys::SmartScopedLock<true> Lock(*ALMutex);
+ ++RefCount;
+ }
+ void DropRef() {
+ sys::SmartScopedLock<true> Lock(*ALMutex);
+ if (!AttributesLists.isConstructed())
+ return;
+ sys::cas_flag new_val = --RefCount;
+ if (new_val == 0)
+ delete this;
+ }
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Attrs.data(), Attrs.size());
+ }
+ static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr,
+ unsigned NumAttrs) {
+ for (unsigned i = 0; i != NumAttrs; ++i)
+ ID.AddInteger(uint64_t(Attr[i].Attrs) << 32 | unsigned(Attr[i].Index));
+ }
+};
+}
+
+AttributeListImpl::~AttributeListImpl() {
+ // NOTE: Lock must be acquired by caller.
+ AttributesLists->RemoveNode(this);
+}
+
+
+AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) {
+ // If there are no attributes then return a null AttributesList pointer.
+ if (NumAttrs == 0)
+ return AttrListPtr();
+
+#ifndef NDEBUG
+ for (unsigned i = 0; i != NumAttrs; ++i) {
+ assert(Attrs[i].Attrs != Attribute::None &&
+ "Pointless attribute!");
+ assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
+ "Misordered AttributesList!");
+ }
+#endif
+
+ // Otherwise, build a key to look up the existing attributes.
+ FoldingSetNodeID ID;
+ AttributeListImpl::Profile(ID, Attrs, NumAttrs);
+ void *InsertPos;
+
+ sys::SmartScopedLock<true> Lock(*ALMutex);
+
+ AttributeListImpl *PAL =
+ AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
+
+ // If we didn't find any existing attributes of the same shape then
+ // create a new one and insert it.
+ if (!PAL) {
+ PAL = new AttributeListImpl(Attrs, NumAttrs);
+ AttributesLists->InsertNode(PAL, InsertPos);
+ }
+
+ // Return the AttributesList that we found or created.
+ return AttrListPtr(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AttrListPtr Method Implementations
+//===----------------------------------------------------------------------===//
+
+AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
+ if (LI) LI->AddRef();
+}
+
+AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
+ if (AttrList) AttrList->AddRef();
+}
+
+const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+ sys::SmartScopedLock<true> Lock(*ALMutex);
+ if (AttrList == RHS.AttrList) return *this;
+ if (AttrList) AttrList->DropRef();
+ AttrList = RHS.AttrList;
+ if (AttrList) AttrList->AddRef();
+ return *this;
+}
+
+AttrListPtr::~AttrListPtr() {
+ if (AttrList) AttrList->DropRef();
+}
+
+/// getNumSlots - Return the number of slots used in this attribute list.
+/// This is the number of arguments that have an attribute set on them
+/// (including the function itself).
+unsigned AttrListPtr::getNumSlots() const {
+ return AttrList ? AttrList->Attrs.size() : 0;
+}
+
+/// getSlot - Return the AttributeWithIndex at the specified slot. This
+/// holds a number plus a set of attributes.
+const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
+ assert(AttrList && Slot < AttrList->Attrs.size() && "Slot # out of range!");
+ return AttrList->Attrs[Slot];
+}
+
+
+/// getAttributes - The attributes for the specified index are
+/// returned. Attributes for the result are denoted with Idx = 0.
+/// Function notes are denoted with idx = ~0.
+Attributes AttrListPtr::getAttributes(unsigned Idx) const {
+ if (AttrList == 0) return Attribute::None;
+
+ const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+ for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
+ if (Attrs[i].Index == Idx)
+ return Attrs[i].Attrs;
+ return Attribute::None;
+}
+
+/// hasAttrSomewhere - Return true if the specified attribute is set for at
+/// least one parameter or for the return value.
+bool AttrListPtr::hasAttrSomewhere(Attributes Attr) const {
+ if (AttrList == 0) return false;
+
+ const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
+ if (Attrs[i].Attrs & Attr)
+ return true;
+ return false;
+}
+
+
+AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+ Attributes OldAttrs = getAttributes(Idx);
+#ifndef NDEBUG
+ // FIXME it is not obvious how this should work for alignment.
+ // For now, say we can't change a known alignment.
+ Attributes OldAlign = OldAttrs & Attribute::Alignment;
+ Attributes NewAlign = Attrs & Attribute::Alignment;
+ assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
+ "Attempt to change alignment!");
+#endif
+
+ Attributes NewAttrs = OldAttrs | Attrs;
+ if (NewAttrs == OldAttrs)
+ return *this;
+
+ SmallVector<AttributeWithIndex, 8> NewAttrList;
+ if (AttrList == 0)
+ NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+ else {
+ const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+ unsigned i = 0, e = OldAttrList.size();
+ // Copy attributes for arguments before this one.
+ for (; i != e && OldAttrList[i].Index < Idx; ++i)
+ NewAttrList.push_back(OldAttrList[i]);
+
+ // If there are attributes already at this index, merge them in.
+ if (i != e && OldAttrList[i].Index == Idx) {
+ Attrs |= OldAttrList[i].Attrs;
+ ++i;
+ }
+
+ NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+
+ // Copy attributes for arguments after this one.
+ NewAttrList.insert(NewAttrList.end(),
+ OldAttrList.begin()+i, OldAttrList.end());
+ }
+
+ return get(NewAttrList.data(), NewAttrList.size());
+}
+
+AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+#ifndef NDEBUG
+ // FIXME it is not obvious how this should work for alignment.
+ // For now, say we can't pass in alignment, which no current use does.
+ assert(!(Attrs & Attribute::Alignment) && "Attempt to exclude alignment!");
+#endif
+ if (AttrList == 0) return AttrListPtr();
+
+ Attributes OldAttrs = getAttributes(Idx);
+ Attributes NewAttrs = OldAttrs & ~Attrs;
+ if (NewAttrs == OldAttrs)
+ return *this;
+
+ SmallVector<AttributeWithIndex, 8> NewAttrList;
+ const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+ unsigned i = 0, e = OldAttrList.size();
+
+ // Copy attributes for arguments before this one.
+ for (; i != e && OldAttrList[i].Index < Idx; ++i)
+ NewAttrList.push_back(OldAttrList[i]);
+
+ // If there are attributes already at this index, merge them in.
+ assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
+ Attrs = OldAttrList[i].Attrs & ~Attrs;
+ ++i;
+ if (Attrs) // If any attributes left for this parameter, add them.
+ NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+
+ // Copy attributes for arguments after this one.
+ NewAttrList.insert(NewAttrList.end(),
+ OldAttrList.begin()+i, OldAttrList.end());
+
+ return get(NewAttrList.data(), NewAttrList.size());
+}
+
+void AttrListPtr::dump() const {
+ dbgs() << "PAL[ ";
+ for (unsigned i = 0; i < getNumSlots(); ++i) {
+ const AttributeWithIndex &PAWI = getSlot(i);
+ dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+ }
+
+ dbgs() << "]\n";
+}
diff --git a/contrib/llvm/lib/VMCore/AutoUpgrade.cpp b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
new file mode 100644
index 000000000000..9e93ff370e25
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
@@ -0,0 +1,281 @@
+//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the auto-upgrade helper functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AutoUpgrade.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstring>
+using namespace llvm;
+
+
+static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+ assert(F && "Illegal to upgrade a non-existent Function.");
+
+ // Quickly eliminate it, if it's not a candidate.
+ StringRef Name = F->getName();
+ if (Name.size() <= 8 || !Name.startswith("llvm."))
+ return false;
+ Name = Name.substr(5); // Strip off "llvm."
+
+ const FunctionType *FTy = F->getFunctionType();
+ Module *M = F->getParent();
+
+ switch (Name[0]) {
+ default: break;
+ case 'p':
+ // This upgrades the llvm.prefetch intrinsic to accept one more parameter,
+ // which is a instruction / data cache identifier. The old version only
+ // implicitly accepted the data version.
+ if (Name == "prefetch") {
+ // Don't do anything if it has the correct number of arguments already
+ if (FTy->getNumParams() == 4)
+ break;
+
+ assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
+ // We first need to change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name. We
+ // arbitrarily unique it here allowing us to construct a correctly named
+ // and typed function below.
+ std::string NameTmp = F->getName();
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
+ FTy->getReturnType(),
+ FTy->getParamType(0),
+ FTy->getParamType(1),
+ FTy->getParamType(2),
+ FTy->getParamType(2),
+ (Type*)0));
+ return true;
+ }
+
+ break;
+ case 'x': {
+ const char *NewFnName = NULL;
+ // This fixes the poorly named crc32 intrinsics.
+ if (Name == "x86.sse42.crc32.8")
+ NewFnName = "llvm.x86.sse42.crc32.32.8";
+ else if (Name == "x86.sse42.crc32.16")
+ NewFnName = "llvm.x86.sse42.crc32.32.16";
+ else if (Name == "x86.sse42.crc32.32")
+ NewFnName = "llvm.x86.sse42.crc32.32.32";
+ else if (Name == "x86.sse42.crc64.8")
+ NewFnName = "llvm.x86.sse42.crc32.64.8";
+ else if (Name == "x86.sse42.crc64.64")
+ NewFnName = "llvm.x86.sse42.crc32.64.64";
+
+ if (NewFnName) {
+ F->setName(NewFnName);
+ NewFn = F;
+ return true;
+ }
+
+ // Calls to these instructions are transformed into unaligned loads.
+ if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
+ Name == "x86.sse2.loadu.pd")
+ return true;
+
+ // Calls to these instructions are transformed into nontemporal stores.
+ if (Name == "x86.sse.movnt.ps" || Name == "x86.sse2.movnt.dq" ||
+ Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
+ return true;
+
+ break;
+ }
+ }
+
+ // This may not belong here. This function is effectively being overloaded
+ // to both detect an intrinsic which needs upgrading, and to provide the
+ // upgraded form of the intrinsic. We should perhaps have two separate
+ // functions for this.
+ return false;
+}
+
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+ NewFn = 0;
+ bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+
+ // Upgrade intrinsic attributes. This does not change the function.
+ if (NewFn)
+ F = NewFn;
+ if (unsigned id = F->getIntrinsicID())
+ F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+ return Upgraded;
+}
+
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+ // Nothing to do yet.
+ return false;
+}
+
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
+// upgraded intrinsic. All argument and return casting must be provided in
+// order to seamlessly integrate with existing context.
+void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+ Function *F = CI->getCalledFunction();
+ LLVMContext &C = CI->getContext();
+ ImmutableCallSite CS(CI);
+
+ assert(F && "CallInst has no function associated with it.");
+
+ if (!NewFn) {
+ if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+ F->getName() == "llvm.x86.sse2.loadu.dq" ||
+ F->getName() == "llvm.x86.sse2.loadu.pd") {
+ // Convert to a native, unaligned load.
+ const Type *VecTy = CI->getType();
+ const Type *IntTy = IntegerType::get(C, 128);
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
+ PointerType::getUnqual(IntTy),
+ "cast");
+ LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
+ LI->setAlignment(1); // Unaligned load.
+ BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
+
+ // Fix up all the uses with our new load.
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(BC);
+
+ // Remove intrinsic.
+ CI->eraseFromParent();
+ } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
+ F->getName() == "llvm.x86.sse2.movnt.dq" ||
+ F->getName() == "llvm.x86.sse2.movnt.pd" ||
+ F->getName() == "llvm.x86.sse2.movnt.i") {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ Module *M = F->getParent();
+ SmallVector<Value *, 1> Elts;
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+ MDNode *Node = MDNode::get(C, Elts);
+
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = Builder.CreateBitCast(Arg0,
+ PointerType::getUnqual(Arg1->getType()),
+ "cast");
+ StoreInst *SI = Builder.CreateStore(Arg1, BC);
+ SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ SI->setAlignment(16);
+
+ // Remove intrinsic.
+ CI->eraseFromParent();
+ } else {
+ llvm_unreachable("Unknown function for CallInst upgrade.");
+ }
+ return;
+ }
+
+ switch (NewFn->getIntrinsicID()) {
+ case Intrinsic::prefetch: {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
+
+ // Add the extra "data cache" argument
+ Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ llvm::ConstantInt::get(I32Ty, 1) };
+ CallInst *NewCI = CallInst::Create(NewFn, Operands,
+ CI->getName(), CI);
+ NewCI->setTailCall(CI->isTailCall());
+ NewCI->setCallingConv(CI->getCallingConv());
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty())
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(NewCI);
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ break;
+ }
+ }
+}
+
+// This tests each Function to determine if it needs upgrading. When we find
+// one we are interested in, we then upgrade all calls to reflect the new
+// function.
+void llvm::UpgradeCallsToIntrinsic(Function* F) {
+ assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
+
+ // Upgrade the function and check if it is a totaly new function.
+ Function *NewFn;
+ if (UpgradeIntrinsicFunction(F, NewFn)) {
+ if (NewFn != F) {
+ // Replace all uses to the old function with the new one if necessary.
+ for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
+ UI != UE; ) {
+ if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, NewFn);
+ }
+ // Remove old function, no longer used, from the module.
+ F->eraseFromParent();
+ }
+ }
+}
+
+/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
+/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
+/// strips that use.
+void llvm::CheckDebugInfoIntrinsics(Module *M) {
+ if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
+ while (!FuncStart->use_empty())
+ cast<CallInst>(FuncStart->use_back())->eraseFromParent();
+ FuncStart->eraseFromParent();
+ }
+
+ if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
+ while (!StopPoint->use_empty())
+ cast<CallInst>(StopPoint->use_back())->eraseFromParent();
+ StopPoint->eraseFromParent();
+ }
+
+ if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
+ while (!RegionStart->use_empty())
+ cast<CallInst>(RegionStart->use_back())->eraseFromParent();
+ RegionStart->eraseFromParent();
+ }
+
+ if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
+ while (!RegionEnd->use_empty())
+ cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
+ RegionEnd->eraseFromParent();
+ }
+
+ if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
+ if (!Declare->use_empty()) {
+ DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
+ if (!isa<MDNode>(DDI->getArgOperand(0)) ||
+ !isa<MDNode>(DDI->getArgOperand(1))) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ CI->eraseFromParent();
+ }
+ Declare->eraseFromParent();
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/VMCore/BasicBlock.cpp b/contrib/llvm/lib/VMCore/BasicBlock.cpp
new file mode 100644
index 000000000000..70265c899d7e
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/BasicBlock.cpp
@@ -0,0 +1,346 @@
+//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BasicBlock class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include <algorithm>
+using namespace llvm;
+
+ValueSymbolTable *BasicBlock::getValueSymbolTable() {
+ if (Function *F = getParent())
+ return &F->getValueSymbolTable();
+ return 0;
+}
+
+LLVMContext &BasicBlock::getContext() const {
+ return getType()->getContext();
+}
+
+// Explicit instantiation of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
+
+
+BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
+ BasicBlock *InsertBefore)
+ : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (InsertBefore) {
+ assert(NewParent &&
+ "Cannot insert block before another block with no function!");
+ NewParent->getBasicBlockList().insert(InsertBefore, this);
+ } else if (NewParent) {
+ NewParent->getBasicBlockList().push_back(this);
+ }
+
+ setName(Name);
+}
+
+
+BasicBlock::~BasicBlock() {
+ // If the address of the block is taken and it is being deleted (e.g. because
+ // it is dead), this means that there is either a dangling constant expr
+ // hanging off the block, or an undefined use of the block (source code
+ // expecting the address of a label to keep the block alive even though there
+ // is no indirect branch). Handle these cases by zapping the BlockAddress
+ // nodes. There are no other possible uses at this point.
+ if (hasAddressTaken()) {
+ assert(!use_empty() && "There should be at least one blockaddress!");
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
+ while (!use_empty()) {
+ BlockAddress *BA = cast<BlockAddress>(use_back());
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+ }
+
+ assert(getParent() == 0 && "BasicBlock still linked into the program!");
+ dropAllReferences();
+ InstList.clear();
+}
+
+void BasicBlock::setParent(Function *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+
+ // Set Parent=parent, updating instruction symtab entries as appropriate.
+ InstList.setSymTabObject(&Parent, parent);
+
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void BasicBlock::removeFromParent() {
+ getParent()->getBasicBlockList().remove(this);
+}
+
+void BasicBlock::eraseFromParent() {
+ getParent()->getBasicBlockList().erase(this);
+}
+
+/// moveBefore - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right before MovePos.
+void BasicBlock::moveBefore(BasicBlock *MovePos) {
+ MovePos->getParent()->getBasicBlockList().splice(MovePos,
+ getParent()->getBasicBlockList(), this);
+}
+
+/// moveAfter - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right after MovePos.
+void BasicBlock::moveAfter(BasicBlock *MovePos) {
+ Function::iterator I = MovePos;
+ MovePos->getParent()->getBasicBlockList().splice(++I,
+ getParent()->getBasicBlockList(), this);
+}
+
+
+TerminatorInst *BasicBlock::getTerminator() {
+ if (InstList.empty()) return 0;
+ return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+const TerminatorInst *BasicBlock::getTerminator() const {
+ if (InstList.empty()) return 0;
+ return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+Instruction* BasicBlock::getFirstNonPHI() {
+ BasicBlock::iterator i = begin();
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ while (isa<PHINode>(i)) ++i;
+ return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbg() {
+ BasicBlock::iterator i = begin();
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
+ return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ BasicBlock::iterator i = begin();
+ for (;; ++i) {
+ if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+ continue;
+
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
+ if (!II)
+ break;
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ break;
+ }
+ return &*i;
+}
+
+void BasicBlock::dropAllReferences() {
+ for(iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+}
+
+/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// return the block, otherwise return a null pointer.
+BasicBlock *BasicBlock::getSinglePredecessor() {
+ pred_iterator PI = pred_begin(this), E = pred_end(this);
+ if (PI == E) return 0; // No preds.
+ BasicBlock *ThePred = *PI;
+ ++PI;
+ return (PI == E) ? ThePred : 0 /*multiple preds*/;
+}
+
+/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// return the block, otherwise return a null pointer.
+/// Note that unique predecessor doesn't mean single edge, there can be
+/// multiple edges from the unique predecessor to this block (for example
+/// a switch statement with multiple cases having the same destination).
+BasicBlock *BasicBlock::getUniquePredecessor() {
+ pred_iterator PI = pred_begin(this), E = pred_end(this);
+ if (PI == E) return 0; // No preds.
+ BasicBlock *PredBB = *PI;
+ ++PI;
+ for (;PI != E; ++PI) {
+ if (*PI != PredBB)
+ return 0;
+ // The same predecessor appears multiple times in the predecessor list.
+ // This is OK.
+ }
+ return PredBB;
+}
+
+/// removePredecessor - This method is used to notify a BasicBlock that the
+/// specified Predecessor of the block is no longer able to reach it. This is
+/// actually not used to update the Predecessor list, but is actually used to
+/// update the PHI nodes that reside in the block. Note that this should be
+/// called while the predecessor still refers to this block.
+///
+void BasicBlock::removePredecessor(BasicBlock *Pred,
+ bool DontDeleteUselessPHIs) {
+ assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
+ find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+ "removePredecessor: BB is not a predecessor!");
+
+ if (InstList.empty()) return;
+ PHINode *APN = dyn_cast<PHINode>(&front());
+ if (!APN) return; // Quick exit.
+
+ // If there are exactly two predecessors, then we want to nuke the PHI nodes
+ // altogether. However, we cannot do this, if this in this case:
+ //
+ // Loop:
+ // %x = phi [X, Loop]
+ // %x2 = add %x, 1 ;; This would become %x2 = add %x2, 1
+ // br Loop ;; %x2 does not dominate all uses
+ //
+ // This is because the PHI node input is actually taken from the predecessor
+ // basic block. The only case this can happen is with a self loop, so we
+ // check for this case explicitly now.
+ //
+ unsigned max_idx = APN->getNumIncomingValues();
+ assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
+ if (max_idx == 2) {
+ BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
+
+ // Disable PHI elimination!
+ if (this == Other) max_idx = 3;
+ }
+
+ // <= Two predecessors BEFORE I remove one?
+ if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+ // Yup, loop through and nuke the PHI nodes
+ while (PHINode *PN = dyn_cast<PHINode>(&front())) {
+ // Remove the predecessor first.
+ PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+
+ // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
+ if (max_idx == 2) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ // We are left with an infinite loop with no entries: kill the PHI.
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ getInstList().pop_front(); // Remove the PHI node
+ }
+
+ // If the PHI node already only had one entry, it got deleted by
+ // removeIncomingValue.
+ }
+ } else {
+ // Okay, now we know that we need to remove predecessor #pred_idx from all
+ // PHI nodes. Iterate over each PHI node fixing them up
+ PHINode *PN;
+ for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
+ ++II;
+ PN->removeIncomingValue(Pred, false);
+ // If all incoming values to the Phi are the same, we can replace the Phi
+ // with that value.
+ Value* PNV = 0;
+ if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+ if (PNV != PN) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+}
+
+
+/// splitBasicBlock - This splits a basic block into two at the specified
+/// instruction. Note that all instructions BEFORE the specified iterator stay
+/// as part of the original basic block, an unconditional branch is added to
+/// the new BB, and the rest of the instructions in the BB are moved to the new
+/// BB, including the old terminator. This invalidates the iterator.
+///
+/// Note that this only works on well formed basic blocks (must have a
+/// terminator), and 'I' must not be the end of instruction list (which would
+/// cause a degenerate basic block to be formed, having a terminator inside of
+/// the basic block).
+///
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
+ assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
+ assert(I != InstList.end() &&
+ "Trying to get me to create degenerate basic block!");
+
+ BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
+ .getNodePtrUnchecked();
+ BasicBlock *New = BasicBlock::Create(getContext(), BBName,
+ getParent(), InsertBefore);
+
+ // Move all of the specified instructions from the original basic block into
+ // the new basic block.
+ New->getInstList().splice(New->end(), this->getInstList(), I, end());
+
+ // Add a branch instruction to the newly formed basic block.
+ BranchInst::Create(New, this);
+
+ // Now we must loop through all of the successors of the New block (which
+ // _were_ the successors of the 'this' block), and update any PHI nodes in
+ // successors. If there were PHI nodes in the successors, then they need to
+ // know that incoming branches will be from New, not from Old.
+ //
+ for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
+ // Loop over any phi nodes in the basic block, updating the BB field of
+ // incoming values...
+ BasicBlock *Successor = *I;
+ PHINode *PN;
+ for (BasicBlock::iterator II = Successor->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ int IDX = PN->getBasicBlockIndex(this);
+ while (IDX != -1) {
+ PN->setIncomingBlock((unsigned)IDX, New);
+ IDX = PN->getBasicBlockIndex(this);
+ }
+ }
+ }
+ return New;
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+ TerminatorInst *TI = getTerminator();
+ if (!TI)
+ // Cope with being called on a BasicBlock that doesn't have a terminator
+ // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+ return;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II);
+ ++II) {
+ int i;
+ while ((i = PN->getBasicBlockIndex(this)) >= 0)
+ PN->setIncomingBlock(i, New);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantFold.cpp b/contrib/llvm/lib/VMCore/ConstantFold.cpp
new file mode 100644
index 000000000000..323e2a280999
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ConstantFold.cpp
@@ -0,0 +1,2344 @@
+//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements folding of constants for LLVM. This implements the
+// (internal) ConstantFold.h interface, which is used by the
+// ConstantExpr::get* methods to automatically fold constants when possible.
+//
+// The current constant folding implementation is implemented in two pieces: the
+// pieces that don't need TargetData, and the pieces that do. This is to avoid
+// a dependence in VMCore on Target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantFold.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ConstantFold*Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+/// BitCastConstantVector - Convert the specified ConstantVector node to the
+/// specified vector type. At this point, we know that the elements of the
+/// input vector constant are all simple integer or FP values.
+static Constant *BitCastConstantVector(ConstantVector *CV,
+ const VectorType *DstTy) {
+
+ if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+ if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
+ // If this cast changes element count then we can't handle it here:
+ // doing so requires endianness information. This should be handled by
+ // Analysis/ConstantFolding.cpp
+ unsigned NumElts = DstTy->getNumElements();
+ if (NumElts != CV->getNumOperands())
+ return 0;
+
+ // Check to verify that all elements of the input are simple.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!isa<ConstantInt>(CV->getOperand(i)) &&
+ !isa<ConstantFP>(CV->getOperand(i)))
+ return 0;
+ }
+
+ // Bitcast each element now.
+ std::vector<Constant*> Result;
+ const Type *DstEltTy = DstTy->getElementType();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
+ DstEltTy));
+ return ConstantVector::get(Result);
+}
+
+/// This function determines which opcode to use to fold two constant cast
+/// expressions together. It uses CastInst::isEliminableCastPair to determine
+/// the opcode. Consequently its just a wrapper around that function.
+/// @brief Determine if it is valid to fold a cast of a cast
+static unsigned
+foldConstantCastPair(
+ unsigned opc, ///< opcode of the second cast constant expression
+ ConstantExpr *Op, ///< the first cast constant expression
+ const Type *DstTy ///< desintation type of the first cast
+) {
+ assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
+ assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
+ assert(CastInst::isCast(opc) && "Invalid cast opcode");
+
+ // The the types and opcodes for the two Cast constant expressions
+ const Type *SrcTy = Op->getOperand(0)->getType();
+ const Type *MidTy = Op->getType();
+ Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opc);
+
+ // Let CastInst::isEliminableCastPair do the heavy lifting.
+ return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
+ Type::getInt64Ty(DstTy->getContext()));
+}
+
+static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
+ const Type *SrcTy = V->getType();
+ if (SrcTy == DestTy)
+ return V; // no-op cast
+
+ // Check to see if we are casting a pointer to an aggregate to a pointer to
+ // the first element. If so, return the appropriate GEP instruction.
+ if (const PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+ if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+ if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
+ SmallVector<Value*, 8> IdxList;
+ Value *Zero =
+ Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
+ IdxList.push_back(Zero);
+ const Type *ElTy = PTy->getElementType();
+ while (ElTy != DPTy->getElementType()) {
+ if (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ if (STy->getNumElements() == 0) break;
+ ElTy = STy->getElementType(0);
+ IdxList.push_back(Zero);
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(ElTy)) {
+ if (ElTy->isPointerTy()) break; // Can't index into pointers!
+ ElTy = STy->getElementType();
+ IdxList.push_back(Zero);
+ } else {
+ break;
+ }
+ }
+
+ if (ElTy == DPTy->getElementType())
+ // This GEP is inbounds because all indices are zero.
+ return ConstantExpr::getInBoundsGetElementPtr(V, &IdxList[0],
+ IdxList.size());
+ }
+
+ // Handle casts from one vector constant to another. We know that the src
+ // and dest type have the same size (otherwise its an illegal cast).
+ if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+ if (const VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+ assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+ "Not cast between same sized vectors!");
+ SrcTy = NULL;
+ // First, check for null. Undef is already handled.
+ if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(DestTy);
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+ return BitCastConstantVector(CV, DestPTy);
+ }
+
+ // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
+ // This allows for other simplifications (although some of them
+ // can only be handled by Analysis/ConstantFolding.cpp).
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
+ return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
+ }
+
+ // Finally, implement bitcast folding now. The code below doesn't handle
+ // bitcast right.
+ if (isa<ConstantPointerNull>(V)) // ptr->ptr cast.
+ return ConstantPointerNull::get(cast<PointerType>(DestTy));
+
+ // Handle integral constant input.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (DestTy->isIntegerTy())
+ // Integral -> Integral. This is a no-op because the bit widths must
+ // be the same. Consequently, we just fold to V.
+ return V;
+
+ if (DestTy->isFloatingPointTy())
+ return ConstantFP::get(DestTy->getContext(),
+ APFloat(CI->getValue(),
+ !DestTy->isPPC_FP128Ty()));
+
+ // Otherwise, can't fold this (vector?)
+ return 0;
+ }
+
+ // Handle ConstantFP input: FP -> Integral.
+ if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+ return ConstantInt::get(FP->getContext(),
+ FP->getValueAPF().bitcastToAPInt());
+
+ return 0;
+}
+
+
+/// ExtractConstantBytes - V is an integer constant which only has a subset of
+/// its bytes used. The bytes used are indicated by ByteStart (which is the
+/// first byte used, counting from the least significant byte) and ByteSize,
+/// which is the number of bytes used.
+///
+/// This function analyzes the specified constant to see if the specified byte
+/// range can be returned as a simplified constant. If so, the constant is
+/// returned, otherwise null is returned.
+///
+static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
+ unsigned ByteSize) {
+ assert(C->getType()->isIntegerTy() &&
+ (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
+ "Non-byte sized integer input");
+ unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
+ assert(ByteSize && "Must be accessing some piece");
+ assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
+ assert(ByteSize != CSize && "Should not extract everything");
+
+ // Constant Integers are simple.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ APInt V = CI->getValue();
+ if (ByteStart)
+ V = V.lshr(ByteStart*8);
+ V = V.trunc(ByteSize*8);
+ return ConstantInt::get(CI->getContext(), V);
+ }
+
+ // In the input is a constant expr, we might be able to recursively simplify.
+ // If not, we definitely can't do anything.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (CE == 0) return 0;
+
+ switch (CE->getOpcode()) {
+ default: return 0;
+ case Instruction::Or: {
+ Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+ if (RHS == 0)
+ return 0;
+
+ // X | -1 -> -1.
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
+ if (RHSC->isAllOnesValue())
+ return RHSC;
+
+ Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+ if (LHS == 0)
+ return 0;
+ return ConstantExpr::getOr(LHS, RHS);
+ }
+ case Instruction::And: {
+ Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+ if (RHS == 0)
+ return 0;
+
+ // X & 0 -> 0.
+ if (RHS->isNullValue())
+ return RHS;
+
+ Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+ if (LHS == 0)
+ return 0;
+ return ConstantExpr::getAnd(LHS, RHS);
+ }
+ case Instruction::LShr: {
+ ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+ if (Amt == 0)
+ return 0;
+ unsigned ShAmt = Amt->getZExtValue();
+ // Cannot analyze non-byte shifts.
+ if ((ShAmt & 7) != 0)
+ return 0;
+ ShAmt >>= 3;
+
+ // If the extract is known to be all zeros, return zero.
+ if (ByteStart >= CSize-ShAmt)
+ return Constant::getNullValue(IntegerType::get(CE->getContext(),
+ ByteSize*8));
+ // If the extract is known to be fully in the input, extract it.
+ if (ByteStart+ByteSize+ShAmt <= CSize)
+ return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+
+ // TODO: Handle the 'partially zero' case.
+ return 0;
+ }
+
+ case Instruction::Shl: {
+ ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+ if (Amt == 0)
+ return 0;
+ unsigned ShAmt = Amt->getZExtValue();
+ // Cannot analyze non-byte shifts.
+ if ((ShAmt & 7) != 0)
+ return 0;
+ ShAmt >>= 3;
+
+ // If the extract is known to be all zeros, return zero.
+ if (ByteStart+ByteSize <= ShAmt)
+ return Constant::getNullValue(IntegerType::get(CE->getContext(),
+ ByteSize*8));
+ // If the extract is known to be fully in the input, extract it.
+ if (ByteStart >= ShAmt)
+ return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+
+ // TODO: Handle the 'partially zero' case.
+ return 0;
+ }
+
+ case Instruction::ZExt: {
+ unsigned SrcBitSize =
+ cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
+
+ // If extracting something that is completely zero, return 0.
+ if (ByteStart*8 >= SrcBitSize)
+ return Constant::getNullValue(IntegerType::get(CE->getContext(),
+ ByteSize*8));
+
+ // If exactly extracting the input, return it.
+ if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
+ return CE->getOperand(0);
+
+ // If extracting something completely in the input, if if the input is a
+ // multiple of 8 bits, recurse.
+ if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
+ return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
+
+ // Otherwise, if extracting a subset of the input, which is not multiple of
+ // 8 bits, do a shift and trunc to get the bits.
+ if ((ByteStart+ByteSize)*8 < SrcBitSize) {
+ assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
+ Constant *Res = CE->getOperand(0);
+ if (ByteStart)
+ Res = ConstantExpr::getLShr(Res,
+ ConstantInt::get(Res->getType(), ByteStart*8));
+ return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
+ ByteSize*8));
+ }
+
+ // TODO: Handle the 'partially zero' case.
+ return 0;
+ }
+ }
+}
+
+/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
+ bool Folded) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
+ Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+ return ConstantExpr::getNUWMul(E, N);
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked()) {
+ unsigned NumElems = STy->getNumElements();
+ // An empty struct has size zero.
+ if (NumElems == 0)
+ return ConstantExpr::getNullValue(DestTy);
+ // Check for a struct with all members having the same size.
+ Constant *MemberSize =
+ getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberSize !=
+ getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame) {
+ Constant *N = ConstantInt::get(DestTy, NumElems);
+ return ConstantExpr::getNUWMul(MemberSize, N);
+ }
+ }
+
+ // Pointer size doesn't depend on the pointee type, so canonicalize them
+ // to an arbitrary pointee.
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (!PTy->getElementType()->isIntegerTy(1))
+ return
+ getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace()),
+ DestTy, true);
+
+ // If there's no interesting folding happening, bail so that we don't create
+ // a constant that looks like it needs folding but really doesn't.
+ if (!Folded)
+ return 0;
+
+ // Base case: Get a regular sizeof expression.
+ Constant *C = ConstantExpr::getSizeOf(Ty);
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy, false),
+ C, DestTy);
+ return C;
+}
+
+/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
+ bool Folded) {
+ // The alignment of an array is equal to the alignment of the
+ // array element. Note that this is not always true for vectors.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy,
+ false),
+ C, DestTy);
+ return C;
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ // Packed structs always have an alignment of 1.
+ if (STy->isPacked())
+ return ConstantInt::get(DestTy, 1);
+
+ // Otherwise, struct alignment is the maximum alignment of any member.
+ // Without target data, we can't compare much, but we can check to see
+ // if all the members have the same alignment.
+ unsigned NumElems = STy->getNumElements();
+ // An empty struct has minimal alignment.
+ if (NumElems == 0)
+ return ConstantInt::get(DestTy, 1);
+ // Check for a struct with all members having the same alignment.
+ Constant *MemberAlign =
+ getFoldedAlignOf(STy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame)
+ return MemberAlign;
+ }
+
+ // Pointer alignment doesn't depend on the pointee type, so canonicalize them
+ // to an arbitrary pointee.
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (!PTy->getElementType()->isIntegerTy(1))
+ return
+ getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
+ 1),
+ PTy->getAddressSpace()),
+ DestTy, true);
+
+ // If there's no interesting folding happening, bail so that we don't create
+ // a constant that looks like it needs folding but really doesn't.
+ if (!Folded)
+ return 0;
+
+ // Base case: Get a regular alignof expression.
+ Constant *C = ConstantExpr::getAlignOf(Ty);
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy, false),
+ C, DestTy);
+ return C;
+}
+
+/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
+/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
+ const Type *DestTy,
+ bool Folded) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+ DestTy, false),
+ FieldNo, DestTy);
+ Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+ return ConstantExpr::getNUWMul(E, N);
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked()) {
+ unsigned NumElems = STy->getNumElements();
+ // An empty struct has no members.
+ if (NumElems == 0)
+ return 0;
+ // Check for a struct with all members having the same size.
+ Constant *MemberSize =
+ getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberSize !=
+ getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame) {
+ Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
+ false,
+ DestTy,
+ false),
+ FieldNo, DestTy);
+ return ConstantExpr::getNUWMul(MemberSize, N);
+ }
+ }
+
+ // If there's no interesting folding happening, bail so that we don't create
+ // a constant that looks like it needs folding but really doesn't.
+ if (!Folded)
+ return 0;
+
+ // Base case: Get a regular offsetof expression.
+ Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ DestTy, false),
+ C, DestTy);
+ return C;
+}
+
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
+ const Type *DestTy) {
+ if (isa<UndefValue>(V)) {
+ // zext(undef) = 0, because the top bits will be zero.
+ // sext(undef) = 0, because the top bits will all be the same.
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (opc == Instruction::ZExt || opc == Instruction::SExt ||
+ opc == Instruction::UIToFP || opc == Instruction::SIToFP)
+ return Constant::getNullValue(DestTy);
+ return UndefValue::get(DestTy);
+ }
+
+ // No compile-time operations on this type yet.
+ if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
+ return 0;
+
+ if (V->isNullValue() && !DestTy->isX86_MMXTy())
+ return Constant::getNullValue(DestTy);
+
+ // If the cast operand is a constant expression, there's a few things we can
+ // do to try to simplify it.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->isCast()) {
+ // Try hard to fold cast of cast because they are often eliminable.
+ if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
+ return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
+ } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // If all of the indexes in the GEP are null values, there is no pointer
+ // adjustment going on. We might as well cast the source pointer.
+ bool isAllNull = true;
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!CE->getOperand(i)->isNullValue()) {
+ isAllNull = false;
+ break;
+ }
+ if (isAllNull)
+ // This is casting one pointer type to another, always BitCast
+ return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+ }
+ }
+
+ // If the cast operand is a constant vector, perform the cast by
+ // operating on each element. In the cast of bitcasts, the element
+ // count may be mismatched; don't attempt to handle that here.
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+ if (DestTy->isVectorTy() &&
+ cast<VectorType>(DestTy)->getNumElements() ==
+ CV->getType()->getNumElements()) {
+ std::vector<Constant*> res;
+ const VectorType *DestVecTy = cast<VectorType>(DestTy);
+ const Type *DstEltTy = DestVecTy->getElementType();
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ res.push_back(ConstantExpr::getCast(opc,
+ CV->getOperand(i), DstEltTy));
+ return ConstantVector::get(res);
+ }
+
+ // We actually have to do a cast now. Perform the cast according to the
+ // opcode specified.
+ switch (opc) {
+ default:
+ llvm_unreachable("Failed to cast constant expression");
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+ bool ignored;
+ APFloat Val = FPC->getValueAPF();
+ Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle :
+ DestTy->isDoubleTy() ? APFloat::IEEEdouble :
+ DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
+ DestTy->isFP128Ty() ? APFloat::IEEEquad :
+ APFloat::Bogus,
+ APFloat::rmNearestTiesToEven, &ignored);
+ return ConstantFP::get(V->getContext(), Val);
+ }
+ return 0; // Can't fold.
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+ const APFloat &V = FPC->getValueAPF();
+ bool ignored;
+ uint64_t x[2];
+ uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored);
+ APInt Val(DestBitWidth, 2, x);
+ return ConstantInt::get(FPC->getContext(), Val);
+ }
+ return 0; // Can't fold.
+ case Instruction::IntToPtr: //always treated as unsigned
+ if (V->isNullValue()) // Is it an integral null value?
+ return ConstantPointerNull::get(cast<PointerType>(DestTy));
+ return 0; // Other pointer types cannot be casted
+ case Instruction::PtrToInt: // always treated as unsigned
+ // Is it a null pointer value?
+ if (V->isNullValue())
+ return ConstantInt::get(DestTy, 0);
+ // If this is a sizeof-like expression, pull out multiplications by
+ // known factors to expose them to subsequent folding. If it's an
+ // alignof-like expression, factor out known factors.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue()) {
+ const Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ if (CE->getNumOperands() == 2) {
+ // Handle a sizeof-like expression.
+ Constant *Idx = CE->getOperand(1);
+ bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
+ if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
+ Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
+ DestTy, false),
+ Idx, DestTy);
+ return ConstantExpr::getMul(C, Idx);
+ }
+ } else if (CE->getNumOperands() == 3 &&
+ CE->getOperand(1)->isNullValue()) {
+ // Handle an alignof-like expression.
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked()) {
+ ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
+ if (CI->isOne() &&
+ STy->getNumElements() == 2 &&
+ STy->getElementType(0)->isIntegerTy(1)) {
+ return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
+ }
+ }
+ // Handle an offsetof-like expression.
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
+ if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
+ DestTy, false))
+ return C;
+ }
+ }
+ }
+ // Other pointer types cannot be casted
+ return 0;
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt api = CI->getValue();
+ APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true);
+ (void)apf.convertFromAPInt(api,
+ opc==Instruction::SIToFP,
+ APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(V->getContext(), apf);
+ }
+ return 0;
+ case Instruction::ZExt:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().zext(BitWidth));
+ }
+ return 0;
+ case Instruction::SExt:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().sext(BitWidth));
+ }
+ return 0;
+ case Instruction::Trunc: {
+ uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().trunc(DestBitWidth));
+ }
+
+ // The input must be a constantexpr. See if we can simplify this based on
+ // the bytes we are demanding. Only do this if the source and dest are an
+ // even multiple of a byte.
+ if ((DestBitWidth & 7) == 0 &&
+ (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0)
+ if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
+ return Res;
+
+ return 0;
+ }
+ case Instruction::BitCast:
+ return FoldBitCast(V, DestTy);
+ }
+}
+
+Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
+ Constant *V1, Constant *V2) {
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
+ return CB->getZExtValue() ? V1 : V2;
+
+ // Check for zero aggregate and ConstantVector of zeros
+ if (Cond->isNullValue()) return V2;
+
+ if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
+
+ if (CondV->isAllOnesValue()) return V1;
+
+ const VectorType *VTy = cast<VectorType>(V1->getType());
+ ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
+ ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
+
+ if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
+ (CP2 || isa<ConstantAggregateZero>(V2))) {
+
+ // Find the element type of the returned vector
+ const Type *EltTy = VTy->getElementType();
+ unsigned NumElem = VTy->getNumElements();
+ std::vector<Constant*> Res(NumElem);
+
+ bool Valid = true;
+ for (unsigned i = 0; i < NumElem; ++i) {
+ ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
+ if (!c) {
+ Valid = false;
+ break;
+ }
+ Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res[i] = c->getZExtValue() ? C1 : C2;
+ }
+ // If we were able to build the vector, return it
+ if (Valid) return ConstantVector::get(Res);
+ }
+ }
+
+
+ if (isa<UndefValue>(Cond)) {
+ if (isa<UndefValue>(V1)) return V1;
+ return V2;
+ }
+ if (isa<UndefValue>(V1)) return V2;
+ if (isa<UndefValue>(V2)) return V1;
+ if (V1 == V2) return V1;
+
+ if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+ if (TrueVal->getOpcode() == Instruction::Select)
+ if (TrueVal->getOperand(0) == Cond)
+ return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+ }
+ if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+ if (FalseVal->getOpcode() == Instruction::Select)
+ if (FalseVal->getOperand(0) == Cond)
+ return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+ }
+
+ return 0;
+}
+
+Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
+ Constant *Idx) {
+ if (isa<UndefValue>(Val)) // ee(undef, x) -> undef
+ return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
+ if (Val->isNullValue()) // ee(zero, x) -> zero
+ return Constant::getNullValue(
+ cast<VectorType>(Val->getType())->getElementType());
+
+ if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+ return CVal->getOperand(CIdx->getZExtValue());
+ } else if (isa<UndefValue>(Idx)) {
+ // ee({w,x,y,z}, undef) -> w (an arbitrary value).
+ return CVal->getOperand(0);
+ }
+ }
+ return 0;
+}
+
+Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
+ Constant *Elt,
+ Constant *Idx) {
+ ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+ if (!CIdx) return 0;
+ APInt idxVal = CIdx->getValue();
+ if (isa<UndefValue>(Val)) {
+ // Insertion of scalar constant into vector undef
+ // Optimize away insertion of undef
+ if (isa<UndefValue>(Elt))
+ return Val;
+ // Otherwise break the aggregate undef into multiple undefs and do
+ // the insertion
+ unsigned numOps =
+ cast<VectorType>(Val->getType())->getNumElements();
+ std::vector<Constant*> Ops;
+ Ops.reserve(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ Constant *Op =
+ (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
+ Ops.push_back(Op);
+ }
+ return ConstantVector::get(Ops);
+ }
+ if (isa<ConstantAggregateZero>(Val)) {
+ // Insertion of scalar constant into vector aggregate zero
+ // Optimize away insertion of zero
+ if (Elt->isNullValue())
+ return Val;
+ // Otherwise break the aggregate zero into multiple zeros and do
+ // the insertion
+ unsigned numOps =
+ cast<VectorType>(Val->getType())->getNumElements();
+ std::vector<Constant*> Ops;
+ Ops.reserve(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ Constant *Op =
+ (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
+ Ops.push_back(Op);
+ }
+ return ConstantVector::get(Ops);
+ }
+ if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+ // Insertion of scalar constant into vector constant
+ std::vector<Constant*> Ops;
+ Ops.reserve(CVal->getNumOperands());
+ for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
+ Constant *Op =
+ (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
+ Ops.push_back(Op);
+ }
+ return ConstantVector::get(Ops);
+ }
+
+ return 0;
+}
+
+/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
+/// return the specified element value. Otherwise return null.
+static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
+ return CV->getOperand(EltNo);
+
+ const Type *EltTy = cast<VectorType>(C->getType())->getElementType();
+ if (isa<ConstantAggregateZero>(C))
+ return Constant::getNullValue(EltTy);
+ if (isa<UndefValue>(C))
+ return UndefValue::get(EltTy);
+ return 0;
+}
+
+Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
+ Constant *V2,
+ Constant *Mask) {
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
+
+ unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
+ unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
+ const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+
+ // Loop over the shuffle mask, evaluating each element.
+ SmallVector<Constant*, 32> Result;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ Constant *InElt = GetVectorElement(Mask, i);
+ if (InElt == 0) return 0;
+
+ if (isa<UndefValue>(InElt))
+ InElt = UndefValue::get(EltTy);
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
+ unsigned Elt = CI->getZExtValue();
+ if (Elt >= SrcNumElts*2)
+ InElt = UndefValue::get(EltTy);
+ else if (Elt >= SrcNumElts)
+ InElt = GetVectorElement(V2, Elt - SrcNumElts);
+ else
+ InElt = GetVectorElement(V1, Elt);
+ if (InElt == 0) return 0;
+ } else {
+ // Unknown value.
+ return 0;
+ }
+ Result.push_back(InElt);
+ }
+
+ return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
+ ArrayRef<unsigned> Idxs) {
+ // Base case: no indices, so return the entire value.
+ if (Idxs.empty())
+ return Agg;
+
+ if (isa<UndefValue>(Agg)) // ev(undef, x) -> undef
+ return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
+ Idxs));
+
+ if (isa<ConstantAggregateZero>(Agg)) // ev(0, x) -> 0
+ return
+ Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
+ Idxs));
+
+ // Otherwise recurse.
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
+ return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]),
+ Idxs.slice(1));
+
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
+ return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]),
+ Idxs.slice(1));
+ ConstantVector *CV = cast<ConstantVector>(Agg);
+ return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]),
+ Idxs.slice(1));
+}
+
+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
+ Constant *Val,
+ ArrayRef<unsigned> Idxs) {
+ // Base case: no indices, so replace the entire value.
+ if (Idxs.empty())
+ return Val;
+
+ if (isa<UndefValue>(Agg)) {
+ // Insertion of constant into aggregate undef
+ // Optimize away insertion of undef.
+ if (isa<UndefValue>(Val))
+ return Agg;
+
+ // Otherwise break the aggregate undef into multiple undefs and do
+ // the insertion.
+ const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+ unsigned numOps;
+ if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+ numOps = AR->getNumElements();
+ else
+ numOps = cast<StructType>(AggTy)->getNumElements();
+
+ std::vector<Constant*> Ops(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ const Type *MemberTy = AggTy->getTypeAtIndex(i);
+ Constant *Op =
+ (Idxs[0] == i) ?
+ ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
+ Val, Idxs.slice(1)) :
+ UndefValue::get(MemberTy);
+ Ops[i] = Op;
+ }
+
+ if (const StructType* ST = dyn_cast<StructType>(AggTy))
+ return ConstantStruct::get(ST, Ops);
+ return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+ }
+
+ if (isa<ConstantAggregateZero>(Agg)) {
+ // Insertion of constant into aggregate zero
+ // Optimize away insertion of zero.
+ if (Val->isNullValue())
+ return Agg;
+
+ // Otherwise break the aggregate zero into multiple zeros and do
+ // the insertion.
+ const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+ unsigned numOps;
+ if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+ numOps = AR->getNumElements();
+ else
+ numOps = cast<StructType>(AggTy)->getNumElements();
+
+ std::vector<Constant*> Ops(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ const Type *MemberTy = AggTy->getTypeAtIndex(i);
+ Constant *Op =
+ (Idxs[0] == i) ?
+ ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
+ Val, Idxs.slice(1)) :
+ Constant::getNullValue(MemberTy);
+ Ops[i] = Op;
+ }
+
+ if (const StructType *ST = dyn_cast<StructType>(AggTy))
+ return ConstantStruct::get(ST, Ops);
+ return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+ }
+
+ if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
+ // Insertion of constant into aggregate constant.
+ std::vector<Constant*> Ops(Agg->getNumOperands());
+ for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
+ Constant *Op = cast<Constant>(Agg->getOperand(i));
+ if (Idxs[0] == i)
+ Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1));
+ Ops[i] = Op;
+ }
+
+ if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
+ return ConstantStruct::get(ST, Ops);
+ return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
+ }
+
+ return 0;
+}
+
+
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
+ Constant *C1, Constant *C2) {
+ // No compile-time operations on this type yet.
+ if (C1->getType()->isPPC_FP128Ty())
+ return 0;
+
+ // Handle UndefValue up front.
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ switch (Opcode) {
+ case Instruction::Xor:
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return Constant::getNullValue(C1->getType());
+ // Fallthrough
+ case Instruction::Add:
+ case Instruction::Sub:
+ return UndefValue::get(C1->getType());
+ case Instruction::And:
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
+ return C1;
+ return Constant::getNullValue(C1->getType()); // undef & X -> 0
+ case Instruction::Mul: {
+ ConstantInt *CI;
+ // X * undef -> undef if X is odd or undef
+ if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
+ ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
+ (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+ return UndefValue::get(C1->getType());
+
+ // X * undef -> 0 otherwise
+ return Constant::getNullValue(C1->getType());
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ // undef / 1 -> undef
+ if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
+ if (CI2->isOne())
+ return C1;
+ // FALL THROUGH
+ case Instruction::URem:
+ case Instruction::SRem:
+ if (!isa<UndefValue>(C2)) // undef / X -> 0
+ return Constant::getNullValue(C1->getType());
+ return C2; // X / undef -> undef
+ case Instruction::Or: // X | undef -> -1
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
+ return C1;
+ return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
+ case Instruction::LShr:
+ if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+ return C1; // undef lshr undef -> undef
+ return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
+ // undef lshr X -> 0
+ case Instruction::AShr:
+ if (!isa<UndefValue>(C2)) // undef ashr X --> all ones
+ return Constant::getAllOnesValue(C1->getType());
+ else if (isa<UndefValue>(C1))
+ return C1; // undef ashr undef -> undef
+ else
+ return C1; // X ashr undef --> X
+ case Instruction::Shl:
+ if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+ return C1; // undef shl undef -> undef
+ // undef << X -> 0 or X << undef -> 0
+ return Constant::getNullValue(C1->getType());
+ }
+ }
+
+ // Handle simplifications when the RHS is a constant int.
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+ switch (Opcode) {
+ case Instruction::Add:
+ if (CI2->equalsInt(0)) return C1; // X + 0 == X
+ break;
+ case Instruction::Sub:
+ if (CI2->equalsInt(0)) return C1; // X - 0 == X
+ break;
+ case Instruction::Mul:
+ if (CI2->equalsInt(0)) return C2; // X * 0 == 0
+ if (CI2->equalsInt(1))
+ return C1; // X * 1 == X
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ if (CI2->equalsInt(1))
+ return C1; // X / 1 == X
+ if (CI2->equalsInt(0))
+ return UndefValue::get(CI2->getType()); // X / 0 == undef
+ break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ if (CI2->equalsInt(1))
+ return Constant::getNullValue(CI2->getType()); // X % 1 == 0
+ if (CI2->equalsInt(0))
+ return UndefValue::get(CI2->getType()); // X % 0 == undef
+ break;
+ case Instruction::And:
+ if (CI2->isZero()) return C2; // X & 0 == 0
+ if (CI2->isAllOnesValue())
+ return C1; // X & -1 == X
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
+ if (CE1->getOpcode() == Instruction::ZExt) {
+ unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned SrcWidth =
+ CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
+ if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
+ return C1;
+ }
+
+ // If and'ing the address of a global with a constant, fold it.
+ if (CE1->getOpcode() == Instruction::PtrToInt &&
+ isa<GlobalValue>(CE1->getOperand(0))) {
+ GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
+
+ // Functions are at least 4-byte aligned.
+ unsigned GVAlign = GV->getAlignment();
+ if (isa<Function>(GV))
+ GVAlign = std::max(GVAlign, 4U);
+
+ if (GVAlign > 1) {
+ unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+ APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
+
+ // If checking bits we know are clear, return zero.
+ if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
+ return Constant::getNullValue(CI2->getType());
+ }
+ }
+ }
+ break;
+ case Instruction::Or:
+ if (CI2->equalsInt(0)) return C1; // X | 0 == X
+ if (CI2->isAllOnesValue())
+ return C2; // X | -1 == -1
+ break;
+ case Instruction::Xor:
+ if (CI2->equalsInt(0)) return C1; // X ^ 0 == X
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ switch (CE1->getOpcode()) {
+ default: break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ // cmp pred ^ true -> cmp !pred
+ assert(CI2->equalsInt(1));
+ CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
+ pred = CmpInst::getInversePredicate(pred);
+ return ConstantExpr::getCompare(pred, CE1->getOperand(0),
+ CE1->getOperand(1));
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+ if (CE1->getOpcode() == Instruction::ZExt) // Top bits known zero.
+ return ConstantExpr::getLShr(C1, C2);
+ break;
+ }
+ } else if (isa<ConstantInt>(C1)) {
+ // If C1 is a ConstantInt and C2 is not, swap the operands.
+ if (Instruction::isCommutative(Opcode))
+ return ConstantExpr::get(Opcode, C2, C1);
+ }
+
+ // At this point we know neither constant is an UndefValue.
+ if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+ using namespace APIntOps;
+ const APInt &C1V = CI1->getValue();
+ const APInt &C2V = CI2->getValue();
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Add:
+ return ConstantInt::get(CI1->getContext(), C1V + C2V);
+ case Instruction::Sub:
+ return ConstantInt::get(CI1->getContext(), C1V - C2V);
+ case Instruction::Mul:
+ return ConstantInt::get(CI1->getContext(), C1V * C2V);
+ case Instruction::UDiv:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
+ case Instruction::SDiv:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef
+ return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
+ case Instruction::URem:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
+ case Instruction::SRem:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef
+ return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
+ case Instruction::And:
+ return ConstantInt::get(CI1->getContext(), C1V & C2V);
+ case Instruction::Or:
+ return ConstantInt::get(CI1->getContext(), C1V | C2V);
+ case Instruction::Xor:
+ return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
+ case Instruction::Shl: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ case Instruction::LShr: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ case Instruction::AShr: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ if (CI1->equalsInt(0)) return C1;
+ break;
+ default:
+ break;
+ }
+ } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+ if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+ APFloat C1V = CFP1->getValueAPF();
+ APFloat C2V = CFP2->getValueAPF();
+ APFloat C3V = C1V; // copy for modification
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::FAdd:
+ (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FSub:
+ (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FMul:
+ (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FDiv:
+ (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ case Instruction::FRem:
+ (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C1->getContext(), C3V);
+ }
+ }
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+ ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
+ ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
+ if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
+ (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
+ std::vector<Constant*> Res;
+ const Type* EltTy = VTy->getElementType();
+ Constant *C1 = 0;
+ Constant *C2 = 0;
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Add:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getAdd(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::FAdd:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getFAdd(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::Sub:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getSub(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::FSub:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getFSub(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::Mul:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getMul(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::FMul:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getFMul(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::UDiv:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getUDiv(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::SDiv:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getSDiv(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::FDiv:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getFDiv(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::URem:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getURem(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::SRem:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getSRem(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::FRem:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getFRem(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::And:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getAnd(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::Or:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getOr(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::Xor:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getXor(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::LShr:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getLShr(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::AShr:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getAShr(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ case Instruction::Shl:
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(ConstantExpr::getShl(C1, C2));
+ }
+ return ConstantVector::get(Res);
+ }
+ }
+ }
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ // There are many possible foldings we could do here. We should probably
+ // at least fold add of a pointer with an integer into the appropriate
+ // getelementptr. This will improve alias analysis a bit.
+
+ // Given ((a + b) + c), if (b + c) folds to something interesting, return
+ // (a + (b + c)).
+ if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
+ Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
+ if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
+ return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
+ }
+ } else if (isa<ConstantExpr>(C2)) {
+ // If C2 is a constant expr and C1 isn't, flop them around and fold the
+ // other way if possible.
+ if (Instruction::isCommutative(Opcode))
+ return ConstantFoldBinaryInstruction(Opcode, C2, C1);
+ }
+
+ // i1 can be simplified in many cases.
+ if (C1->getType()->isIntegerTy(1)) {
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ return ConstantExpr::getXor(C1, C2);
+ case Instruction::Mul:
+ return ConstantExpr::getAnd(C1, C2);
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // We can assume that C2 == 0. If it were one the result would be
+ // undefined because the shift value is as large as the bitwidth.
+ return C1;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ // We can assume that C2 == 1. If it were zero the result would be
+ // undefined through division by zero.
+ return C1;
+ case Instruction::URem:
+ case Instruction::SRem:
+ // We can assume that C2 == 1. If it were zero the result would be
+ // undefined through division by zero.
+ return ConstantInt::getFalse(C1->getContext());
+ default:
+ break;
+ }
+ }
+
+ // We don't know how to fold this.
+ return 0;
+}
+
+/// isZeroSizedType - This type is zero sized if its an array or structure of
+/// zero sized types. The only leaf zero sized type is an empty structure.
+static bool isMaybeZeroSizedType(const Type *Ty) {
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (STy->isOpaque()) return true; // Can't say.
+
+ // If all of elements have zero size, this does too.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
+ return true;
+
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ return isMaybeZeroSizedType(ATy->getElementType());
+ }
+ return false;
+}
+
+/// IdxCompare - Compare the two constants as though they were getelementptr
+/// indices. This allows coersion of the types to be the same thing.
+///
+/// If the two constants are the "same" (after coersion), return 0. If the
+/// first is less than the second, return -1, if the second is less than the
+/// first, return 1. If the constants are not integral, return -2.
+///
+static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+ if (C1 == C2) return 0;
+
+ // Ok, we found a different index. If they are not ConstantInt, we can't do
+ // anything with them.
+ if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
+ return -2; // don't know!
+
+ // Ok, we have two differing integer indices. Sign extend them to be the same
+ // type. Long is always big enough, so we use it.
+ if (!C1->getType()->isIntegerTy(64))
+ C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+
+ if (!C2->getType()->isIntegerTy(64))
+ C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
+
+ if (C1 == C2) return 0; // They are equal
+
+ // If the type being indexed over is really just a zero sized type, there is
+ // no pointer difference being made here.
+ if (isMaybeZeroSizedType(ElTy))
+ return -2; // dunno.
+
+ // If they are really different, now that they are the same type, then we
+ // found a difference!
+ if (cast<ConstantInt>(C1)->getSExtValue() <
+ cast<ConstantInt>(C2)->getSExtValue())
+ return -1;
+ else
+ return 1;
+}
+
+/// evaluateFCmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided. This doesn't need to handle simple
+/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
+/// If we can determine that the two constants have a particular relation to
+/// each other, we should return the corresponding FCmpInst predicate,
+/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
+/// ConstantFoldCompareInstruction.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two. We consider ConstantFP
+/// to be the simplest, and ConstantExprs to be the most complex.
+static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
+ assert(V1->getType() == V2->getType() &&
+ "Cannot compare values of different types!");
+
+ // No compile-time operations on this type yet.
+ if (V1->getType()->isPPC_FP128Ty())
+ return FCmpInst::BAD_FCMP_PREDICATE;
+
+ // Handle degenerate case quickly
+ if (V1 == V2) return FCmpInst::FCMP_OEQ;
+
+ if (!isa<ConstantExpr>(V1)) {
+ if (!isa<ConstantExpr>(V2)) {
+ // We distilled thisUse the standard constant folder for a few cases
+ ConstantInt *R = 0;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OEQ;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OLT;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OGT;
+
+ // Nothing more we can do
+ return FCmpInst::BAD_FCMP_PREDICATE;
+ }
+
+ // If the first operand is simple and second is ConstantExpr, swap operands.
+ FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+ if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
+ return FCmpInst::getSwappedPredicate(SwappedRelation);
+ } else {
+ // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
+ // constantexpr or a simple constant.
+ ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+ switch (CE1->getOpcode()) {
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ // We might be able to do something with these but we don't right now.
+ break;
+ default:
+ break;
+ }
+ }
+ // There are MANY other foldings that we could perform here. They will
+ // probably be added on demand, as they seem needed.
+ return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+/// evaluateICmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided. This doesn't need to handle simple
+/// things like integer comparisons, but should instead handle ConstantExprs
+/// and GlobalValues. If we can determine that the two constants have a
+/// particular relation to each other, we should return the corresponding ICmp
+/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two. We consider simple
+/// constants (like ConstantInt) to be the simplest, followed by
+/// GlobalValues, followed by ConstantExpr's (the most complex).
+///
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
+ bool isSigned) {
+ assert(V1->getType() == V2->getType() &&
+ "Cannot compare different types of values!");
+ if (V1 == V2) return ICmpInst::ICMP_EQ;
+
+ if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
+ !isa<BlockAddress>(V1)) {
+ if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
+ !isa<BlockAddress>(V2)) {
+ // We distilled this down to a simple case, use the standard constant
+ // folder.
+ ConstantInt *R = 0;
+ ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+ if (R && !R->isZero())
+ return pred;
+ pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+ if (R && !R->isZero())
+ return pred;
+ pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+ if (R && !R->isZero())
+ return pred;
+
+ // If we couldn't figure it out, bail.
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // If the first operand is simple, swap operands.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
+ if (isa<ConstantExpr>(V2)) { // Swap as necessary.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+ // constant (which, since the types must match, means that it's a
+ // ConstantPointerNull).
+ if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+ // Don't try to decide equality of aliases.
+ if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
+ if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
+ return ICmpInst::ICMP_NE;
+ } else if (isa<BlockAddress>(V2)) {
+ return ICmpInst::ICMP_NE; // Globals never equal labels.
+ } else {
+ assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+ // GlobalVals can never be null unless they have external weak linkage.
+ // We don't try to evaluate aliases here.
+ if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
+ return ICmpInst::ICMP_NE;
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+ if (isa<ConstantExpr>(V2)) { // Swap as necessary.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+ // constant (which, since the types must match, means that it is a
+ // ConstantPointerNull).
+ if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+ // Block address in another function can't equal this one, but block
+ // addresses in the current function might be the same if blocks are
+ // empty.
+ if (BA2->getFunction() != BA->getFunction())
+ return ICmpInst::ICMP_NE;
+ } else {
+ // Block addresses aren't null, don't equal the address of globals.
+ assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
+ "Canonicalization guarantee!");
+ return ICmpInst::ICMP_NE;
+ }
+ } else {
+ // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
+ // constantexpr, a global, block address, or a simple constant.
+ ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+ Constant *CE1Op0 = CE1->getOperand(0);
+
+ switch (CE1->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ break; // We can't evaluate floating point casts or truncations.
+
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::BitCast:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the cast is not actually changing bits, and the second operand is a
+ // null pointer, do the comparison with the pre-casted value.
+ if (V2->isNullValue() &&
+ (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
+ if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+ if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+ return evaluateICmpRelation(CE1Op0,
+ Constant::getNullValue(CE1Op0->getType()),
+ isSigned);
+ }
+ break;
+
+ case Instruction::GetElementPtr:
+ // Ok, since this is a getelementptr, we know that the constant has a
+ // pointer type. Check the various cases.
+ if (isa<ConstantPointerNull>(V2)) {
+ // If we are comparing a GEP to a null pointer, check to see if the base
+ // of the GEP equals the null pointer.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+ if (GV->hasExternalWeakLinkage())
+ // Weak linkage GVals could be zero or not. We're comparing that
+ // to null pointer so its greater-or-equal
+ return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ else
+ // If its not weak linkage, the GVal must have a non-zero address
+ // so the result is greater-than
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ } else if (isa<ConstantPointerNull>(CE1Op0)) {
+ // If we are indexing from a null pointer, check to see if we have any
+ // non-zero indices.
+ for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
+ if (!CE1->getOperand(i)->isNullValue())
+ // Offsetting from null, must not be equal.
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ // Only zero indexes from null, must still be zero.
+ return ICmpInst::ICMP_EQ;
+ }
+ // Otherwise, we can't really say if the first operand is null or not.
+ } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+ if (isa<ConstantPointerNull>(CE1Op0)) {
+ if (GV2->hasExternalWeakLinkage())
+ // Weak linkage GVals could be zero or not. We're comparing it to
+ // a null pointer, so its less-or-equal
+ return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ else
+ // If its not weak linkage, the GVal must have a non-zero address
+ // so the result is less-than
+ return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+ if (GV == GV2) {
+ // If this is a getelementptr of the same global, then it must be
+ // different. Because the types must match, the getelementptr could
+ // only have at most one index, and because we fold getelementptr's
+ // with a single zero index, it must be nonzero.
+ assert(CE1->getNumOperands() == 2 &&
+ !CE1->getOperand(1)->isNullValue() &&
+ "Surprising getelementptr!");
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ } else {
+ // If they are different globals, we don't know what the value is,
+ // but they can't be equal.
+ return ICmpInst::ICMP_NE;
+ }
+ }
+ } else {
+ ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+ Constant *CE2Op0 = CE2->getOperand(0);
+
+ // There are MANY other foldings that we could perform here. They will
+ // probably be added on demand, as they seem needed.
+ switch (CE2->getOpcode()) {
+ default: break;
+ case Instruction::GetElementPtr:
+ // By far the most common case to handle is when the base pointers are
+ // obviously to the same or different globals.
+ if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
+ if (CE1Op0 != CE2Op0) // Don't know relative ordering, but not equal
+ return ICmpInst::ICMP_NE;
+ // Ok, we know that both getelementptr instructions are based on the
+ // same global. From this, we can precisely determine the relative
+ // ordering of the resultant pointers.
+ unsigned i = 1;
+
+ // The logic below assumes that the result of the comparison
+ // can be determined by finding the first index that differs.
+ // This doesn't work if there is over-indexing in any
+ // subsequent indices, so check for that case first.
+ if (!CE1->isGEPWithNoNotionalOverIndexing() ||
+ !CE2->isGEPWithNoNotionalOverIndexing())
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+
+ // Compare all of the operands the GEP's have in common.
+ gep_type_iterator GTI = gep_type_begin(CE1);
+ for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
+ ++i, ++GTI)
+ switch (IdxCompare(CE1->getOperand(i),
+ CE2->getOperand(i), GTI.getIndexedType())) {
+ case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
+ case 1: return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
+ case -2: return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Ok, we ran out of things they have in common. If any leftovers
+ // are non-zero then we have a difference, otherwise we are equal.
+ for (; i < CE1->getNumOperands(); ++i)
+ if (!CE1->getOperand(i)->isNullValue()) {
+ if (isa<ConstantInt>(CE1->getOperand(i)))
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+ }
+
+ for (; i < CE2->getNumOperands(); ++i)
+ if (!CE2->getOperand(i)->isNullValue()) {
+ if (isa<ConstantInt>(CE2->getOperand(i)))
+ return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+ }
+ return ICmpInst::ICMP_EQ;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
+ Constant *C1, Constant *C2) {
+ const Type *ResultTy;
+ if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+ ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
+ VT->getNumElements());
+ else
+ ResultTy = Type::getInt1Ty(C1->getContext());
+
+ // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
+ if (pred == FCmpInst::FCMP_FALSE)
+ return Constant::getNullValue(ResultTy);
+
+ if (pred == FCmpInst::FCMP_TRUE)
+ return Constant::getAllOnesValue(ResultTy);
+
+ // Handle some degenerate cases first
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Also, if both operands are undef, we can return undef.
+ if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
+ (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+ return UndefValue::get(ResultTy);
+ // Otherwise, pick the same value as the non-undef operand, and fold
+ // it to true or false.
+ return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+ }
+
+ // No compile-time operations on this type yet.
+ if (C1->getType()->isPPC_FP128Ty())
+ return 0;
+
+ // icmp eq/ne(null,GV) -> false/true
+ if (C1->isNullValue()) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+ if (pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(C1->getContext());
+ else if (pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(C1->getContext());
+ }
+ // icmp eq/ne(GV,null) -> false/true
+ } else if (C2->isNullValue()) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+ if (pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(C1->getContext());
+ else if (pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(C1->getContext());
+ }
+ }
+
+ // If the comparison is a comparison between two i1's, simplify it.
+ if (C1->getType()->isIntegerTy(1)) {
+ switch(pred) {
+ case ICmpInst::ICMP_EQ:
+ if (isa<ConstantInt>(C2))
+ return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
+ return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
+ case ICmpInst::ICMP_NE:
+ return ConstantExpr::getXor(C1, C2);
+ default:
+ break;
+ }
+ }
+
+ if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
+ APInt V1 = cast<ConstantInt>(C1)->getValue();
+ APInt V2 = cast<ConstantInt>(C2)->getValue();
+ switch (pred) {
+ default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
+ case ICmpInst::ICMP_EQ: return ConstantInt::get(ResultTy, V1 == V2);
+ case ICmpInst::ICMP_NE: return ConstantInt::get(ResultTy, V1 != V2);
+ case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
+ case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
+ case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
+ case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
+ case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
+ case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
+ case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
+ case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
+ }
+ } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
+ APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
+ APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
+ APFloat::cmpResult R = C1V.compare(C2V);
+ switch (pred) {
+ default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
+ case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
+ case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy);
+ case FCmpInst::FCMP_UNO:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
+ case FCmpInst::FCMP_ORD:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
+ case FCmpInst::FCMP_UEQ:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_OEQ:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_UNE:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
+ case FCmpInst::FCMP_ONE:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+ R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_ULT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan);
+ case FCmpInst::FCMP_OLT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
+ case FCmpInst::FCMP_UGT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_OGT:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_ULE:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_OLE:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_UGE:
+ return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
+ case FCmpInst::FCMP_OGE:
+ return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual);
+ }
+ } else if (C1->getType()->isVectorTy()) {
+ SmallVector<Constant*, 16> C1Elts, C2Elts;
+ C1->getVectorElements(C1Elts);
+ C2->getVectorElements(C2Elts);
+ if (C1Elts.empty() || C2Elts.empty())
+ return 0;
+
+ // If we can constant fold the comparison of each element, constant fold
+ // the whole vector comparison.
+ SmallVector<Constant*, 4> ResElts;
+ // Compare the elements, producing an i1 result or constant expr.
+ for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
+ ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
+
+ return ConstantVector::get(ResElts);
+ }
+
+ if (C1->getType()->isFloatingPointTy()) {
+ int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
+ switch (evaluateFCmpRelation(C1, C2)) {
+ default: llvm_unreachable("Unknown relation!");
+ case FCmpInst::FCMP_UNO:
+ case FCmpInst::FCMP_ORD:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_TRUE:
+ case FCmpInst::FCMP_FALSE:
+ case FCmpInst::BAD_FCMP_PREDICATE:
+ break; // Couldn't determine anything about these constants.
+ case FCmpInst::FCMP_OEQ: // We know that C1 == C2
+ Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
+ pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
+ pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+ break;
+ case FCmpInst::FCMP_OLT: // We know that C1 < C2
+ Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+ pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
+ pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
+ break;
+ case FCmpInst::FCMP_OGT: // We know that C1 > C2
+ Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+ pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
+ pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+ break;
+ case FCmpInst::FCMP_OLE: // We know that C1 <= C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT)
+ Result = 1;
+ break;
+ case FCmpInst::FCMP_OGE: // We known that C1 >= C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
+ Result = 1;
+ break;
+ case FCmpInst::FCMP_ONE: // We know that C1 != C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE)
+ Result = 1;
+ break;
+ }
+
+ // If we evaluated the result, return it now.
+ if (Result != -1)
+ return ConstantInt::get(ResultTy, Result);
+
+ } else {
+ // Evaluate the relation between the two constants, per the predicate.
+ int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
+ switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+ default: llvm_unreachable("Unknown relational!");
+ case ICmpInst::BAD_ICMP_PREDICATE:
+ break; // Couldn't determine anything about these constants.
+ case ICmpInst::ICMP_EQ: // We know the constants are equal!
+ // If we know the constants are equal, we can decide the result of this
+ // computation precisely.
+ Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (pred) {
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
+ Result = 1; break;
+ case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (pred) {
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
+ Result = 1; break;
+ case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (pred) {
+ case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
+ Result = 1; break;
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (pred) {
+ case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
+ Result = 1; break;
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
+ Result = 0; break;
+ }
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (pred == ICmpInst::ICMP_UGT) Result = 0;
+ if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
+ break;
+ case ICmpInst::ICMP_SLE:
+ if (pred == ICmpInst::ICMP_SGT) Result = 0;
+ if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
+ break;
+ case ICmpInst::ICMP_UGE:
+ if (pred == ICmpInst::ICMP_ULT) Result = 0;
+ if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
+ break;
+ case ICmpInst::ICMP_SGE:
+ if (pred == ICmpInst::ICMP_SLT) Result = 0;
+ if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
+ break;
+ case ICmpInst::ICMP_NE:
+ if (pred == ICmpInst::ICMP_EQ) Result = 0;
+ if (pred == ICmpInst::ICMP_NE) Result = 1;
+ break;
+ }
+
+ // If we evaluated the result, return it now.
+ if (Result != -1)
+ return ConstantInt::get(ResultTy, Result);
+
+ // If the right hand side is a bitcast, try using its inverse to simplify
+ // it by moving it to the left hand side. We can't do this if it would turn
+ // a vector compare into a scalar compare or visa versa.
+ if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
+ Constant *CE2Op0 = CE2->getOperand(0);
+ if (CE2->getOpcode() == Instruction::BitCast &&
+ CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+ Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
+ return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
+ }
+ }
+
+ // If the left hand side is an extension, try eliminating it.
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+ (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+ Constant *CE1Op0 = CE1->getOperand(0);
+ Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
+ if (CE1Inverse == CE1Op0) {
+ // Check whether we can safely truncate the right hand side.
+ Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
+ if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
+ return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
+ }
+ }
+ }
+ }
+
+ if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
+ (C1->isNullValue() && !C2->isNullValue())) {
+ // If C2 is a constant expr and C1 isn't, flip them around and fold the
+ // other way if possible.
+ // Also, if C1 is null and C2 isn't, flip them around.
+ pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+ return ConstantExpr::getICmp(pred, C2, C1);
+ }
+ }
+ return 0;
+}
+
+/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
+/// is "inbounds".
+template<typename IndexTy>
+static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
+ // No indices means nothing that could be out of bounds.
+ if (NumIdx == 0) return true;
+
+ // If the first index is zero, it's in bounds.
+ if (cast<Constant>(Idxs[0])->isNullValue()) return true;
+
+ // If the first index is one and all the rest are zero, it's in bounds,
+ // by the one-past-the-end rule.
+ if (!cast<ConstantInt>(Idxs[0])->isOne())
+ return false;
+ for (unsigned i = 1, e = NumIdx; i != e; ++i)
+ if (!cast<Constant>(Idxs[i])->isNullValue())
+ return false;
+ return true;
+}
+
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+ bool inBounds,
+ IndexTy const *Idxs,
+ unsigned NumIdx) {
+ if (NumIdx == 0) return C;
+ Constant *Idx0 = cast<Constant>(Idxs[0]);
+ if ((NumIdx == 1 && Idx0->isNullValue()))
+ return C;
+
+ if (isa<UndefValue>(C)) {
+ const PointerType *Ptr = cast<PointerType>(C->getType());
+ const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx);
+ assert(Ty != 0 && "Invalid indices for GEP!");
+ return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
+ }
+
+ if (C->isNullValue()) {
+ bool isNull = true;
+ for (unsigned i = 0, e = NumIdx; i != e; ++i)
+ if (!cast<Constant>(Idxs[i])->isNullValue()) {
+ isNull = false;
+ break;
+ }
+ if (isNull) {
+ const PointerType *Ptr = cast<PointerType>(C->getType());
+ const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs,
+ Idxs+NumIdx);
+ assert(Ty != 0 && "Invalid indices for GEP!");
+ return ConstantPointerNull::get(PointerType::get(Ty,
+ Ptr->getAddressSpace()));
+ }
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ const Type *LastTy = 0;
+ for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+ I != E; ++I)
+ LastTy = *I;
+
+ if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+ SmallVector<Value*, 16> NewIndices;
+ NewIndices.reserve(NumIdx + CE->getNumOperands());
+ for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
+ NewIndices.push_back(CE->getOperand(i));
+
+ // Add the last index of the source with the first index of the new GEP.
+ // Make sure to handle the case when they are actually different types.
+ Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
+ // Otherwise it must be an array.
+ if (!Idx0->isNullValue()) {
+ const Type *IdxTy = Combined->getType();
+ if (IdxTy != Idx0->getType()) {
+ const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+ Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
+ Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
+ Combined = ConstantExpr::get(Instruction::Add, C1, C2);
+ } else {
+ Combined =
+ ConstantExpr::get(Instruction::Add, Idx0, Combined);
+ }
+ }
+
+ NewIndices.push_back(Combined);
+ NewIndices.append(Idxs+1, Idxs+NumIdx);
+ return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
+ ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
+ &NewIndices[0],
+ NewIndices.size()) :
+ ConstantExpr::getGetElementPtr(CE->getOperand(0),
+ &NewIndices[0],
+ NewIndices.size());
+ }
+ }
+
+ // Implement folding of:
+ // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+ // i64 0, i64 0)
+ // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+ //
+ if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
+ if (const PointerType *SPT =
+ dyn_cast<PointerType>(CE->getOperand(0)->getType()))
+ if (const ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
+ if (const ArrayType *CAT =
+ dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
+ if (CAT->getElementType() == SAT->getElementType())
+ return inBounds ?
+ ConstantExpr::getInBoundsGetElementPtr(
+ (Constant*)CE->getOperand(0), Idxs, NumIdx) :
+ ConstantExpr::getGetElementPtr(
+ (Constant*)CE->getOperand(0), Idxs, NumIdx);
+ }
+ }
+
+ // Check to see if any array indices are not within the corresponding
+ // notional array bounds. If so, try to determine if they can be factored
+ // out into preceding dimensions.
+ bool Unknown = false;
+ SmallVector<Constant *, 8> NewIdxs;
+ const Type *Ty = C->getType();
+ const Type *Prev = 0;
+ for (unsigned i = 0; i != NumIdx;
+ Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ if (ATy->getNumElements() <= INT64_MAX &&
+ ATy->getNumElements() != 0 &&
+ CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+ if (isa<SequentialType>(Prev)) {
+ // It's out of range, but we can factor it into the prior
+ // dimension.
+ NewIdxs.resize(NumIdx);
+ ConstantInt *Factor = ConstantInt::get(CI->getType(),
+ ATy->getNumElements());
+ NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
+
+ Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
+ Constant *Div = ConstantExpr::getSDiv(CI, Factor);
+
+ // Before adding, extend both operands to i64 to avoid
+ // overflow trouble.
+ if (!PrevIdx->getType()->isIntegerTy(64))
+ PrevIdx = ConstantExpr::getSExt(PrevIdx,
+ Type::getInt64Ty(Div->getContext()));
+ if (!Div->getType()->isIntegerTy(64))
+ Div = ConstantExpr::getSExt(Div,
+ Type::getInt64Ty(Div->getContext()));
+
+ NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
+ } else {
+ // It's out of range, but the prior dimension is a struct
+ // so we can't do anything about it.
+ Unknown = true;
+ }
+ }
+ } else {
+ // We don't know if it's in range or not.
+ Unknown = true;
+ }
+ }
+
+ // If we did any factoring, start over with the adjusted indices.
+ if (!NewIdxs.empty()) {
+ for (unsigned i = 0; i != NumIdx; ++i)
+ if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
+ return inBounds ?
+ ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
+ NewIdxs.size()) :
+ ConstantExpr::getGetElementPtr(C, NewIdxs.data(), NewIdxs.size());
+ }
+
+ // If all indices are known integers and normalized, we can do a simple
+ // check for the "inbounds" property.
+ if (!Unknown && !inBounds &&
+ isa<GlobalVariable>(C) && isInBoundsIndices(Idxs, NumIdx))
+ return ConstantExpr::getInBoundsGetElementPtr(C, Idxs, NumIdx);
+
+ return 0;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+ bool inBounds,
+ Constant* const *Idxs,
+ unsigned NumIdx) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+ bool inBounds,
+ Value* const *Idxs,
+ unsigned NumIdx) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantFold.h b/contrib/llvm/lib/VMCore/ConstantFold.h
new file mode 100644
index 000000000000..653a1c3f377d
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ConstantFold.h
@@ -0,0 +1,56 @@
+//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the (internal) constant folding interfaces for LLVM. These
+// interfaces are used by the ConstantExpr::get* methods to automatically fold
+// constants when possible.
+//
+// These operators may return a null object if they don't know how to perform
+// the specified operation on the specified constant types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CONSTANTFOLDING_H
+#define CONSTANTFOLDING_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+ class Value;
+ class Constant;
+ class Type;
+
+ // Constant fold various types of instruction...
+ Constant *ConstantFoldCastInstruction(
+ unsigned opcode, ///< The opcode of the cast
+ Constant *V, ///< The source constant
+ const Type *DestTy ///< The destination type
+ );
+ Constant *ConstantFoldSelectInstruction(Constant *Cond,
+ Constant *V1, Constant *V2);
+ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
+ Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt,
+ Constant *Idx);
+ Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
+ Constant *Mask);
+ Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
+ ArrayRef<unsigned> Idxs);
+ Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
+ ArrayRef<unsigned> Idxs);
+ Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
+ Constant *V2);
+ Constant *ConstantFoldCompareInstruction(unsigned short predicate,
+ Constant *C1, Constant *C2);
+ Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+ Constant* const *Idxs, unsigned NumIdx);
+ Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+ Value* const *Idxs, unsigned NumIdx);
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/Constants.cpp b/contrib/llvm/lib/VMCore/Constants.cpp
new file mode 100644
index 000000000000..316c8846f94f
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Constants.cpp
@@ -0,0 +1,2173 @@
+//===-- Constants.cpp - Implement Constant nodes --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Constant* classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "LLVMContextImpl.h"
+#include "ConstantFold.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Class
+//===----------------------------------------------------------------------===//
+
+bool Constant::isNegativeZeroValue() const {
+ // Floating point values have an explicit -0.0 value.
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->isZero() && CFP->isNegative();
+
+ // Otherwise, just use +0.0.
+ return isNullValue();
+}
+
+bool Constant::isNullValue() const {
+ // 0 is null.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->isZero();
+
+ // +0.0 is null.
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->isZero() && !CFP->isNegative();
+
+ // constant zero is zero for aggregates and cpnull is null for pointers.
+ return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+}
+
+// Constructor to create a '0' constant of arbitrary type...
+Constant *Constant::getNullValue(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ return ConstantInt::get(Ty, 0);
+ case Type::FloatTyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEsingle));
+ case Type::DoubleTyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEdouble));
+ case Type::X86_FP80TyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::x87DoubleExtended));
+ case Type::FP128TyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEquad));
+ case Type::PPC_FP128TyID:
+ return ConstantFP::get(Ty->getContext(),
+ APFloat(APInt::getNullValue(128)));
+ case Type::PointerTyID:
+ return ConstantPointerNull::get(cast<PointerType>(Ty));
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ return ConstantAggregateZero::get(Ty);
+ default:
+ // Function, Label, or Opaque type?
+ assert(!"Cannot create a null constant of that type!");
+ return 0;
+ }
+}
+
+Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
+ const Type *ScalarTy = Ty->getScalarType();
+
+ // Create the base integer constant.
+ Constant *C = ConstantInt::get(Ty->getContext(), V);
+
+ // Convert an integer to a pointer, if necessary.
+ if (const PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+ C = ConstantExpr::getIntToPtr(C, PTy);
+
+ // Broadcast a scalar to a vector, if necessary.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+ return C;
+}
+
+Constant *Constant::getAllOnesValue(const Type *Ty) {
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ConstantInt::get(Ty->getContext(),
+ APInt::getAllOnesValue(ITy->getBitWidth()));
+
+ if (Ty->isFloatingPointTy()) {
+ APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+ !Ty->isPPC_FP128Ty());
+ return ConstantFP::get(Ty->getContext(), FL);
+ }
+
+ SmallVector<Constant*, 16> Elts;
+ const VectorType *VTy = cast<VectorType>(Ty);
+ Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
+ assert(Elts[0] && "Not a vector integer type!");
+ return cast<ConstantVector>(ConstantVector::get(Elts));
+}
+
+void Constant::destroyConstantImpl() {
+ // When a Constant is destroyed, there may be lingering
+ // references to the constant by other constants in the constant pool. These
+ // constants are implicitly dependent on the module that is being deleted,
+ // but they don't know that. Because we only find out when the CPV is
+ // deleted, we must now notify all of our users (that should only be
+ // Constants) that they are, in fact, invalid now and should be deleted.
+ //
+ while (!use_empty()) {
+ Value *V = use_back();
+#ifndef NDEBUG // Only in -g mode...
+ if (!isa<Constant>(V)) {
+ dbgs() << "While deleting: " << *this
+ << "\n\nUse still stuck around after Def is destroyed: "
+ << *V << "\n\n";
+ }
+#endif
+ assert(isa<Constant>(V) && "References remain to Constant being destroyed");
+ Constant *CV = cast<Constant>(V);
+ CV->destroyConstant();
+
+ // The constant should remove itself from our use list...
+ assert((use_empty() || use_back() != V) && "Constant not removed!");
+ }
+
+ // Value has no outstanding references it is safe to delete it now...
+ delete this;
+}
+
+/// canTrap - Return true if evaluation of this constant could trap. This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+ assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+ // The only thing that could possibly trap are constant exprs.
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
+ if (!CE) return false;
+
+ // ConstantExpr traps if any operands can trap.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (CE->getOperand(i)->canTrap())
+ return true;
+
+ // Otherwise, only specific operations can trap.
+ switch (CE->getOpcode()) {
+ default:
+ return false;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ // Div and rem can trap if the RHS is not known to be non-zero.
+ if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
+ return true;
+ return false;
+ }
+}
+
+/// isConstantUsed - Return true if the constant has users other than constant
+/// exprs and other dangling things.
+bool Constant::isConstantUsed() const {
+ for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ const Constant *UC = dyn_cast<Constant>(*UI);
+ if (UC == 0 || isa<GlobalValue>(UC))
+ return true;
+
+ if (UC->isConstantUsed())
+ return true;
+ }
+ return false;
+}
+
+
+
+/// getRelocationInfo - This method classifies the entry according to
+/// whether or not it may generate a relocation entry. This must be
+/// conservative, so if it might codegen to a relocatable entry, it should say
+/// so. The return values are:
+///
+/// NoRelocation: This constant pool entry is guaranteed to never have a
+/// relocation applied to it (because it holds a simple constant like
+/// '4').
+/// LocalRelocation: This entry has relocations, but the entries are
+/// guaranteed to be resolvable by the static linker, so the dynamic
+/// linker will never see them.
+/// GlobalRelocations: This entry may have arbitrary relocations.
+///
+/// FIXME: This really should not be in VMCore.
+Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return LocalRelocation; // Local to this file/library.
+ return GlobalRelocations; // Global reference.
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
+ return BA->getFunction()->getRelocationInfo();
+
+ // While raw uses of blockaddress need to be relocated, differences between
+ // two of them don't when they are for labels in the same function. This is a
+ // common idiom when creating a table for the indirect goto extension, so we
+ // handle it efficiently here.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+ if (CE->getOpcode() == Instruction::Sub) {
+ ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
+ ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
+ if (LHS && RHS &&
+ LHS->getOpcode() == Instruction::PtrToInt &&
+ RHS->getOpcode() == Instruction::PtrToInt &&
+ isa<BlockAddress>(LHS->getOperand(0)) &&
+ isa<BlockAddress>(RHS->getOperand(0)) &&
+ cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
+ cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+ return NoRelocation;
+ }
+
+ PossibleRelocationsTy Result = NoRelocation;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ Result = std::max(Result,
+ cast<Constant>(getOperand(i))->getRelocationInfo());
+
+ return Result;
+}
+
+
+/// getVectorElements - This method, which is only valid on constant of vector
+/// type, returns the elements of the vector in the specified smallvector.
+/// This handles breaking down a vector undef into undef elements, etc. For
+/// constant exprs and other cases we can't handle, we return an empty vector.
+void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
+ assert(getType()->isVectorTy() && "Not a vector constant!");
+
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
+ Elts.push_back(CV->getOperand(i));
+ return;
+ }
+
+ const VectorType *VT = cast<VectorType>(getType());
+ if (isa<ConstantAggregateZero>(this)) {
+ Elts.assign(VT->getNumElements(),
+ Constant::getNullValue(VT->getElementType()));
+ return;
+ }
+
+ if (isa<UndefValue>(this)) {
+ Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType()));
+ return;
+ }
+
+ // Unknown type, must be constant expr etc.
+}
+
+
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it. This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false; // Cannot remove this
+
+ while (!C->use_empty()) {
+ const Constant *User = dyn_cast<Constant>(C->use_back());
+ if (!User) return false; // Non-constant usage;
+ if (!removeDeadUsersOfConstant(User))
+ return false; // Constant wasn't dead
+ }
+
+ const_cast<Constant*>(C)->destroyConstant();
+ return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them. This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+ Value::const_use_iterator I = use_begin(), E = use_end();
+ Value::const_use_iterator LastNonDeadUser = E;
+ while (I != E) {
+ const Constant *User = dyn_cast<Constant>(*I);
+ if (User == 0) {
+ LastNonDeadUser = I;
+ ++I;
+ continue;
+ }
+
+ if (!removeDeadUsersOfConstant(User)) {
+ // If the constant wasn't dead, remember that this was the last live use
+ // and move on to the next constant.
+ LastNonDeadUser = I;
+ ++I;
+ continue;
+ }
+
+ // If the constant was dead, then the iterator is invalidated.
+ if (LastNonDeadUser == E) {
+ I = use_begin();
+ if (I == E) break;
+ } else {
+ I = LastNonDeadUser;
+ ++I;
+ }
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ConstantInt
+//===----------------------------------------------------------------------===//
+
+ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
+ : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+ assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
+}
+
+ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ if (!pImpl->TheTrueVal)
+ pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+ return pImpl->TheTrueVal;
+}
+
+ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ if (!pImpl->TheFalseVal)
+ pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+ return pImpl->TheFalseVal;
+}
+
+Constant *ConstantInt::getTrue(const Type *Ty) {
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (!VTy) {
+ assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
+ return ConstantInt::getTrue(Ty->getContext());
+ }
+ assert(VTy->getElementType()->isIntegerTy(1) &&
+ "True must be vector of i1 or i1.");
+ SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+ ConstantInt::getTrue(Ty->getContext()));
+ return ConstantVector::get(Splat);
+}
+
+Constant *ConstantInt::getFalse(const Type *Ty) {
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (!VTy) {
+ assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
+ return ConstantInt::getFalse(Ty->getContext());
+ }
+ assert(VTy->getElementType()->isIntegerTy(1) &&
+ "False must be vector of i1 or i1.");
+ SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+ ConstantInt::getFalse(Ty->getContext()));
+ return ConstantVector::get(Splat);
+}
+
+
+// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap
+// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
+// operator== and operator!= to ensure that the DenseMap doesn't attempt to
+// compare APInt's of different widths, which would violate an APInt class
+// invariant which generates an assertion.
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
+ // Get the corresponding integer type for the bit width of the value.
+ const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
+ // get an existing value or the insertion position
+ DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+ ConstantInt *&Slot = Context.pImpl->IntConstants[Key];
+ if (!Slot) Slot = new ConstantInt(ITy, V);
+ return Slot;
+}
+
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+ Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::get(SmallVector<Constant*,
+ 16>(VTy->getNumElements(), C));
+
+ return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, uint64_t V,
+ bool isSigned) {
+ return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+ConstantInt* ConstantInt::getSigned(const IntegerType* Ty, int64_t V) {
+ return get(Ty, V, true);
+}
+
+Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
+ return get(Ty, V, true);
+}
+
+Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
+ ConstantInt *C = get(Ty->getContext(), V);
+ assert(C->getType() == Ty->getScalarType() &&
+ "ConstantInt type doesn't match the type implied by its value!");
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::get(
+ SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+ return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
+ uint8_t radix) {
+ return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantFP
+//===----------------------------------------------------------------------===//
+
+static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
+ if (Ty->isFloatTy())
+ return &APFloat::IEEEsingle;
+ if (Ty->isDoubleTy())
+ return &APFloat::IEEEdouble;
+ if (Ty->isX86_FP80Ty())
+ return &APFloat::x87DoubleExtended;
+ else if (Ty->isFP128Ty())
+ return &APFloat::IEEEquad;
+
+ assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
+ return &APFloat::PPCDoubleDouble;
+}
+
+/// get() - This returns a constant fp for the specified value in the
+/// specified type. This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+Constant *ConstantFP::get(const Type* Ty, double V) {
+ LLVMContext &Context = Ty->getContext();
+
+ APFloat FV(V);
+ bool ignored;
+ FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+ APFloat::rmNearestTiesToEven, &ignored);
+ Constant *C = get(Context, FV);
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::get(
+ SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+ return C;
+}
+
+
+Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
+ LLVMContext &Context = Ty->getContext();
+
+ APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+ Constant *C = get(Context, FV);
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::get(
+ SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+ return C;
+}
+
+
+ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
+ LLVMContext &Context = Ty->getContext();
+ APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+ apf.changeSign();
+ return get(Context, apf);
+}
+
+
+Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
+ if (PTy->getElementType()->isFloatingPointTy()) {
+ SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
+ getNegativeZero(PTy->getElementType()));
+ return ConstantVector::get(zeros);
+ }
+
+ if (Ty->isFloatingPointTy())
+ return getNegativeZero(Ty);
+
+ return Constant::getNullValue(Ty);
+}
+
+
+// ConstantFP accessors.
+ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
+ DenseMapAPFloatKeyInfo::KeyTy Key(V);
+
+ LLVMContextImpl* pImpl = Context.pImpl;
+
+ ConstantFP *&Slot = pImpl->FPConstants[Key];
+
+ if (!Slot) {
+ const Type *Ty;
+ if (&V.getSemantics() == &APFloat::IEEEsingle)
+ Ty = Type::getFloatTy(Context);
+ else if (&V.getSemantics() == &APFloat::IEEEdouble)
+ Ty = Type::getDoubleTy(Context);
+ else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+ Ty = Type::getX86_FP80Ty(Context);
+ else if (&V.getSemantics() == &APFloat::IEEEquad)
+ Ty = Type::getFP128Ty(Context);
+ else {
+ assert(&V.getSemantics() == &APFloat::PPCDoubleDouble &&
+ "Unknown FP format");
+ Ty = Type::getPPC_FP128Ty(Context);
+ }
+ Slot = new ConstantFP(Ty, V);
+ }
+
+ return Slot;
+}
+
+ConstantFP *ConstantFP::getInfinity(const Type *Ty, bool Negative) {
+ const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getInf(Semantics, Negative));
+}
+
+ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
+ : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+ assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+ "FP type Mismatch");
+}
+
+bool ConstantFP::isExactlyValue(const APFloat &V) const {
+ return Val.bitwiseIsEqual(V);
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantXXX Classes
+//===----------------------------------------------------------------------===//
+
+
+ConstantArray::ConstantArray(const ArrayType *T,
+ const std::vector<Constant*> &V)
+ : Constant(T, ConstantArrayVal,
+ OperandTraits<ConstantArray>::op_end(this) - V.size(),
+ V.size()) {
+ assert(V.size() == T->getNumElements() &&
+ "Invalid initializer vector for constant array");
+ Use *OL = OperandList;
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert(C->getType() == T->getElementType() &&
+ "Initializer for array element doesn't match array element type!");
+ *OL = C;
+ }
+}
+
+Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) {
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ assert(V[i]->getType() == Ty->getElementType() &&
+ "Wrong type in array element initializer");
+ }
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+ // If this is an all-zero array, return a ConstantAggregateZero object
+ if (!V.empty()) {
+ Constant *C = V[0];
+ if (!C->isNullValue())
+ return pImpl->ArrayConstants.getOrCreate(Ty, V);
+
+ for (unsigned i = 1, e = V.size(); i != e; ++i)
+ if (V[i] != C)
+ return pImpl->ArrayConstants.getOrCreate(Ty, V);
+ }
+
+ return ConstantAggregateZero::get(Ty);
+}
+
+/// ConstantArray::get(const string&) - Return an array that is initialized to
+/// contain the specified string. If length is zero then a null terminator is
+/// added to the specified string so that it may be used in a natural way.
+/// Otherwise, the length parameter specifies how much of the string to use
+/// and it won't be null terminated.
+///
+Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
+ bool AddNull) {
+ std::vector<Constant*> ElementVals;
+ ElementVals.reserve(Str.size() + size_t(AddNull));
+ for (unsigned i = 0; i < Str.size(); ++i)
+ ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
+
+ // Add a null terminator to the string...
+ if (AddNull) {
+ ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
+ }
+
+ ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size());
+ return get(ATy, ElementVals);
+}
+
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements. The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+ ArrayRef<Constant*> V,
+ bool Packed) {
+ SmallVector<Type*, 16> EltTypes;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ EltTypes.push_back(V[i]->getType());
+
+ return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+ bool Packed) {
+ assert(!V.empty() &&
+ "ConstantStruct::getTypeForElements cannot be called on empty list");
+ return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
+ConstantStruct::ConstantStruct(const StructType *T,
+ const std::vector<Constant*> &V)
+ : Constant(T, ConstantStructVal,
+ OperandTraits<ConstantStruct>::op_end(this) - V.size(),
+ V.size()) {
+ assert((T->isOpaque() || V.size() == T->getNumElements()) &&
+ "Invalid initializer vector for constant structure");
+ Use *OL = OperandList;
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) &&
+ "Initializer for struct element doesn't match struct element type!");
+ *OL = C;
+ }
+}
+
+// ConstantStruct accessors.
+Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
+ // Create a ConstantAggregateZero value if all elements are zeros.
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (!V[i]->isNullValue())
+ return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
+
+ assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
+ "Incorrect # elements specified to ConstantStruct::get");
+ return ConstantAggregateZero::get(ST);
+}
+
+Constant* ConstantStruct::get(const StructType *T, ...) {
+ va_list ap;
+ SmallVector<Constant*, 8> Values;
+ va_start(ap, T);
+ while (Constant *Val = va_arg(ap, llvm::Constant*))
+ Values.push_back(Val);
+ va_end(ap);
+ return get(T, Values);
+}
+
+ConstantVector::ConstantVector(const VectorType *T,
+ const std::vector<Constant*> &V)
+ : Constant(T, ConstantVectorVal,
+ OperandTraits<ConstantVector>::op_end(this) - V.size(),
+ V.size()) {
+ Use *OL = OperandList;
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert(C->getType() == T->getElementType() &&
+ "Initializer for vector element doesn't match vector element type!");
+ *OL = C;
+ }
+}
+
+// ConstantVector accessors.
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
+ assert(!V.empty() && "Vectors can't be empty");
+ const VectorType *T = VectorType::get(V.front()->getType(), V.size());
+ LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+ // If this is an all-undef or all-zero vector, return a
+ // ConstantAggregateZero or UndefValue.
+ Constant *C = V[0];
+ bool isZero = C->isNullValue();
+ bool isUndef = isa<UndefValue>(C);
+
+ if (isZero || isUndef) {
+ for (unsigned i = 1, e = V.size(); i != e; ++i)
+ if (V[i] != C) {
+ isZero = isUndef = false;
+ break;
+ }
+ }
+
+ if (isZero)
+ return ConstantAggregateZero::get(T);
+ if (isUndef)
+ return UndefValue::get(T);
+
+ return pImpl->VectorConstants.getOrCreate(T, V);
+}
+
+// Utility function for determining if a ConstantExpr is a CastOp or not. This
+// can't be inline because we don't want to #include Instruction.h into
+// Constant.h
+bool ConstantExpr::isCast() const {
+ return Instruction::isCast(getOpcode());
+}
+
+bool ConstantExpr::isCompare() const {
+ return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
+}
+
+bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
+ if (getOpcode() != Instruction::GetElementPtr) return false;
+
+ gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
+ User::const_op_iterator OI = llvm::next(this->op_begin());
+
+ // Skip the first index, as it has no static limit.
+ ++GEPI;
+ ++OI;
+
+ // The remaining indices must be compile-time known integers within the
+ // bounds of the corresponding notional static array types.
+ for (; GEPI != E; ++GEPI, ++OI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
+ if (!CI) return false;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+ if (CI->getValue().getActiveBits() > 64 ||
+ CI->getZExtValue() >= ATy->getNumElements())
+ return false;
+ }
+
+ // All the indices checked out.
+ return true;
+}
+
+bool ConstantExpr::hasIndices() const {
+ return getOpcode() == Instruction::ExtractValue ||
+ getOpcode() == Instruction::InsertValue;
+}
+
+ArrayRef<unsigned> ConstantExpr::getIndices() const {
+ if (const ExtractValueConstantExpr *EVCE =
+ dyn_cast<ExtractValueConstantExpr>(this))
+ return EVCE->Indices;
+
+ return cast<InsertValueConstantExpr>(this)->Indices;
+}
+
+unsigned ConstantExpr::getPredicate() const {
+ assert(isCompare());
+ return ((const CompareConstantExpr*)this)->predicate;
+}
+
+/// getWithOperandReplaced - Return a constant expression identical to this
+/// one, but with the specified operand set to the specified value.
+Constant *
+ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
+ assert(OpNo < getNumOperands() && "Operand num is out of range!");
+ assert(Op->getType() == getOperand(OpNo)->getType() &&
+ "Replacing operand with value of different type!");
+ if (getOperand(OpNo) == Op)
+ return const_cast<ConstantExpr*>(this);
+
+ Constant *Op0, *Op1, *Op2;
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return ConstantExpr::getCast(getOpcode(), Op, getType());
+ case Instruction::Select:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ Op2 = (OpNo == 2) ? Op : getOperand(2);
+ return ConstantExpr::getSelect(Op0, Op1, Op2);
+ case Instruction::InsertElement:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ Op2 = (OpNo == 2) ? Op : getOperand(2);
+ return ConstantExpr::getInsertElement(Op0, Op1, Op2);
+ case Instruction::ExtractElement:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ return ConstantExpr::getExtractElement(Op0, Op1);
+ case Instruction::ShuffleVector:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ Op2 = (OpNo == 2) ? Op : getOperand(2);
+ return ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ case Instruction::GetElementPtr: {
+ SmallVector<Constant*, 8> Ops;
+ Ops.resize(getNumOperands()-1);
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+ Ops[i-1] = getOperand(i);
+ if (OpNo == 0)
+ return cast<GEPOperator>(this)->isInBounds() ?
+ ConstantExpr::getInBoundsGetElementPtr(Op, &Ops[0], Ops.size()) :
+ ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
+ Ops[OpNo-1] = Op;
+ return cast<GEPOperator>(this)->isInBounds() ?
+ ConstantExpr::getInBoundsGetElementPtr(getOperand(0), &Ops[0],Ops.size()):
+ ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
+ }
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassOptionalData);
+ }
+}
+
+/// getWithOperands - This returns the current constant expression with the
+/// operands replaced with the specified values. The specified array must
+/// have the same number of operands as our current one.
+Constant *ConstantExpr::
+getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
+ assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
+ bool AnyChange = Ty != getType();
+ for (unsigned i = 0; i != Ops.size(); ++i)
+ AnyChange |= Ops[i] != getOperand(i);
+
+ if (!AnyChange) // No operands changed, return self.
+ return const_cast<ConstantExpr*>(this);
+
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ return cast<GEPOperator>(this)->isInBounds() ?
+ ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) :
+ ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// isValueValidForType implementations
+
+bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+ if (Ty == Type::getInt1Ty(Ty->getContext()))
+ return Val == 0 || Val == 1;
+ if (NumBits >= 64)
+ return true; // always true, has to fit in largest type
+ uint64_t Max = (1ll << NumBits) - 1;
+ return Val <= Max;
+}
+
+bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+ if (Ty == Type::getInt1Ty(Ty->getContext()))
+ return Val == 0 || Val == 1 || Val == -1;
+ if (NumBits >= 64)
+ return true; // always true, has to fit in largest type
+ int64_t Min = -(1ll << (NumBits-1));
+ int64_t Max = (1ll << (NumBits-1)) - 1;
+ return (Val >= Min && Val <= Max);
+}
+
+bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ switch (Ty->getTypeID()) {
+ default:
+ return false; // These can't be represented as floating point!
+
+ // FIXME rounding mode needs to be more flexible
+ case Type::FloatTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+ return true;
+ Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::DoubleTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble)
+ return true;
+ Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::X86_FP80TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+ case Type::FP128TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::IEEEquad;
+ case Type::PPC_FP128TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Factory Function Implementation
+
+ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
+ assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
+ "Cannot create an aggregate zero of non-aggregate type!");
+
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+ return pImpl->AggZeroConstants.getOrCreate(Ty, 0);
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantAggregateZero::destroyConstant() {
+ getType()->getContext().pImpl->AggZeroConstants.remove(this);
+ destroyConstantImpl();
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantArray::destroyConstant() {
+ getType()->getContext().pImpl->ArrayConstants.remove(this);
+ destroyConstantImpl();
+}
+
+/// isString - This method returns true if the array is an array of i8, and
+/// if the elements of the array are all ConstantInt's.
+bool ConstantArray::isString() const {
+ // Check the element type for i8...
+ if (!getType()->getElementType()->isIntegerTy(8))
+ return false;
+ // Check the elements to make sure they are all integers, not constant
+ // expressions.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ return true;
+}
+
+/// isCString - This method returns true if the array is a string (see
+/// isString) and it ends in a null byte \\0 and does not contains any other
+/// null bytes except its terminator.
+bool ConstantArray::isCString() const {
+ // Check the element type for i8...
+ if (!getType()->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Last element must be a null.
+ if (!getOperand(getNumOperands()-1)->isNullValue())
+ return false;
+ // Other elements must be non-null integers.
+ for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ if (getOperand(i)->isNullValue())
+ return false;
+ }
+ return true;
+}
+
+
+/// convertToString - Helper function for getAsString() and getAsCString().
+static std::string convertToString(const User *U, unsigned len) {
+ std::string Result;
+ Result.reserve(len);
+ for (unsigned i = 0; i != len; ++i)
+ Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue());
+ return Result;
+}
+
+/// getAsString - If this array is isString(), then this method converts the
+/// array to an std::string and returns it. Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsString() const {
+ assert(isString() && "Not a string!");
+ return convertToString(this, getNumOperands());
+}
+
+
+/// getAsCString - If this array is isCString(), then this method converts the
+/// array (without the trailing null byte) to an std::string and returns it.
+/// Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsCString() const {
+ assert(isCString() && "Not a string!");
+ return convertToString(this, getNumOperands() - 1);
+}
+
+
+//---- ConstantStruct::get() implementation...
+//
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantStruct::destroyConstant() {
+ getType()->getContext().pImpl->StructConstants.remove(this);
+ destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantVector::destroyConstant() {
+ getType()->getContext().pImpl->VectorConstants.remove(this);
+ destroyConstantImpl();
+}
+
+/// This function will return true iff every element in this vector constant
+/// is set to all ones.
+/// @returns true iff this constant's elements are all set to all ones.
+/// @brief Determine if the value is all ones.
+bool ConstantVector::isAllOnesValue() const {
+ // Check out first element.
+ const Constant *Elt = getOperand(0);
+ const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI || !CI->isAllOnesValue()) return false;
+ // Then make sure all remaining elements point to the same value.
+ for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+ if (getOperand(I) != Elt)
+ return false;
+
+ return true;
+}
+
+/// getSplatValue - If this is a splat constant, where all of the
+/// elements have the same value, return that value. Otherwise return null.
+Constant *ConstantVector::getSplatValue() const {
+ // Check out first element.
+ Constant *Elt = getOperand(0);
+ // Then make sure all remaining elements point to the same value.
+ for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+ if (getOperand(I) != Elt)
+ return 0;
+ return Elt;
+}
+
+//---- ConstantPointerNull::get() implementation.
+//
+
+ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+ return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantPointerNull::destroyConstant() {
+ getType()->getContext().pImpl->NullPtrConstants.remove(this);
+ destroyConstantImpl();
+}
+
+
+//---- UndefValue::get() implementation.
+//
+
+UndefValue *UndefValue::get(const Type *Ty) {
+ return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void UndefValue::destroyConstant() {
+ getType()->getContext().pImpl->UndefValueConstants.remove(this);
+ destroyConstantImpl();
+}
+
+//---- BlockAddress::get() implementation.
+//
+
+BlockAddress *BlockAddress::get(BasicBlock *BB) {
+ assert(BB->getParent() != 0 && "Block must have a parent");
+ return get(BB->getParent(), BB);
+}
+
+BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
+ BlockAddress *&BA =
+ F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
+ if (BA == 0)
+ BA = new BlockAddress(F, BB);
+
+ assert(BA->getFunction() == F && "Basic block moved between functions");
+ return BA;
+}
+
+BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
+: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
+ &Op<0>(), 2) {
+ setOperand(0, F);
+ setOperand(1, BB);
+ BB->AdjustBlockAddressRefCount(1);
+}
+
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void BlockAddress::destroyConstant() {
+ getFunction()->getType()->getContext().pImpl
+ ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
+ getBasicBlock()->AdjustBlockAddressRefCount(-1);
+ destroyConstantImpl();
+}
+
+void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+ // This could be replacing either the Basic Block or the Function. In either
+ // case, we have to remove the map entry.
+ Function *NewF = getFunction();
+ BasicBlock *NewBB = getBasicBlock();
+
+ if (U == &Op<0>())
+ NewF = cast<Function>(To);
+ else
+ NewBB = cast<BasicBlock>(To);
+
+ // See if the 'new' entry already exists, if not, just update this in place
+ // and return early.
+ BlockAddress *&NewBA =
+ getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
+ if (NewBA == 0) {
+ getBasicBlock()->AdjustBlockAddressRefCount(-1);
+
+ // Remove the old entry, this can't cause the map to rehash (just a
+ // tombstone will get added).
+ getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+ getBasicBlock()));
+ NewBA = this;
+ setOperand(0, NewF);
+ setOperand(1, NewBB);
+ getBasicBlock()->AdjustBlockAddressRefCount(1);
+ return;
+ }
+
+ // Otherwise, I do need to replace this with an existing value.
+ assert(NewBA != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(NewBA);
+
+ destroyConstant();
+}
+
+//---- ConstantExpr::get() implementations.
+//
+
+/// This is a utility function to handle folding of casts and lookup of the
+/// cast in the ExprConstants map. It is used by the various get* methods below.
+static inline Constant *getFoldedCast(
+ Instruction::CastOps opc, Constant *C, const Type *Ty) {
+ assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+ // Fold a few common cases
+ if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+ return FC;
+
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> argVec(1, C);
+ ExprMapKeyType Key(opc, argVec);
+
+ return pImpl->ExprConstants.getOrCreate(Ty, Key);
+}
+
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
+ Instruction::CastOps opc = Instruction::CastOps(oc);
+ assert(Instruction::isCast(opc) && "opcode out of range");
+ assert(C && Ty && "Null arguments to getCast");
+ assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
+
+ switch (opc) {
+ default:
+ llvm_unreachable("Invalid cast opcode");
+ break;
+ case Instruction::Trunc: return getTrunc(C, Ty);
+ case Instruction::ZExt: return getZExt(C, Ty);
+ case Instruction::SExt: return getSExt(C, Ty);
+ case Instruction::FPTrunc: return getFPTrunc(C, Ty);
+ case Instruction::FPExt: return getFPExtend(C, Ty);
+ case Instruction::UIToFP: return getUIToFP(C, Ty);
+ case Instruction::SIToFP: return getSIToFP(C, Ty);
+ case Instruction::FPToUI: return getFPToUI(C, Ty);
+ case Instruction::FPToSI: return getFPToSI(C, Ty);
+ case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+ case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+ case Instruction::BitCast: return getBitCast(C, Ty);
+ }
+ return 0;
+}
+
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return getBitCast(C, Ty);
+ return getZExt(C, Ty);
+}
+
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return getBitCast(C, Ty);
+ return getSExt(C, Ty);
+}
+
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return getBitCast(C, Ty);
+ return getTrunc(C, Ty);
+}
+
+Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
+
+ if (Ty->isIntegerTy())
+ return getPtrToInt(S, Ty);
+ return getBitCast(S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
+ bool isSigned) {
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ Ty->isIntOrIntVectorTy() && "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ if (SrcBits == DstBits)
+ return C; // Avoid a useless cast
+ Instruction::CastOps opcode =
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+ return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+ assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
+ assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+ "SrcTy must be larger than DestTy for Trunc!");
+
+ return getFoldedCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+ assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
+ assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+ "SrcTy must be smaller than DestTy for SExt!");
+
+ return getFoldedCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+ assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
+ assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+ "SrcTy must be smaller than DestTy for ZExt!");
+
+ return getFoldedCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+ "This is an illegal floating point truncation!");
+ return getFoldedCast(Instruction::FPTrunc, C, Ty);
+}
+
+Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+ "This is an illegal floating point extension!");
+ return getFoldedCast(Instruction::FPExt, C, Ty);
+}
+
+Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "This is an illegal uint to floating point cast!");
+ return getFoldedCast(Instruction::UIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "This is an illegal sint to floating point cast!");
+ return getFoldedCast(Instruction::SIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "This is an illegal floating point to uint cast!");
+ return getFoldedCast(Instruction::FPToUI, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "This is an illegal floating point to sint cast!");
+ return getFoldedCast(Instruction::FPToSI, C, Ty);
+}
+
+Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
+ assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
+ assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+ return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+}
+
+Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
+ assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
+ assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+ return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+}
+
+Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
+ assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
+ "Invalid constantexpr bitcast!");
+
+ // It is common to ask for a bitcast of a value to its own type, handle this
+ // speedily.
+ if (C->getType() == DstTy) return C;
+
+ return getFoldedCast(Instruction::BitCast, C, DstTy);
+}
+
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+ unsigned Flags) {
+ // Check the operands for consistency first.
+ assert(Opcode >= Instruction::BinaryOpsBegin &&
+ Opcode < Instruction::BinaryOpsEnd &&
+ "Invalid opcode in binary constant expression");
+ assert(C1->getType() == C2->getType() &&
+ "Operand types in binary constant expression should match");
+
+#ifndef NDEBUG
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create an integer operation on a non-integer type!");
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isFPOrFPVectorTy() &&
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::FDiv:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isFPOrFPVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::FRem:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isFPOrFPVectorTy() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create a logical operation on a non-integral type!");
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVectorTy() &&
+ "Tried to create a shift operation on a non-integer type!");
+ break;
+ default:
+ break;
+ }
+#endif
+
+ if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+ return FC; // Fold a few common cases.
+
+ std::vector<Constant*> argVec(1, C1);
+ argVec.push_back(C2);
+ ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+
+ LLVMContextImpl *pImpl = C1->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
+}
+
+Constant *ConstantExpr::getSizeOf(const Type* Ty) {
+ // sizeof is implemented as: (i64) gep (Ty*)null, 1
+ // Note that a non-inbounds gep is used, as null isn't within any object.
+ Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+ Constant *GEP = getGetElementPtr(
+ Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
+ return getPtrToInt(GEP,
+ Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getAlignOf(const Type* Ty) {
+ // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
+ // Note that a non-inbounds gep is used, as null isn't within any object.
+ const Type *AligningTy =
+ StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+ Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+ Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
+ Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+ Constant *Indices[2] = { Zero, One };
+ Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
+ return getPtrToInt(GEP,
+ Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+ return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
+ FieldNo));
+}
+
+Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+ // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
+ // Note that a non-inbounds gep is used, as null isn't within any object.
+ Constant *GEPIdx[] = {
+ ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
+ FieldNo
+ };
+ Constant *GEP = getGetElementPtr(
+ Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
+ return getPtrToInt(GEP,
+ Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getCompare(unsigned short Predicate,
+ Constant *C1, Constant *C2) {
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid CmpInst predicate");
+ case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ONE: case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+ case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ return getFCmp(Predicate, C1, C2);
+
+ case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return getICmp(Predicate, C1, C2);
+ }
+}
+
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
+ assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
+
+ if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+ return SC; // Fold common cases
+
+ std::vector<Constant*> argVec(3, C);
+ argVec[1] = V1;
+ argVec[2] = V2;
+ ExprMapKeyType Key(Instruction::Select, argVec);
+
+ LLVMContextImpl *pImpl = C->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+ unsigned NumIdx, bool InBounds) {
+ if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
+ return FC; // Fold a few common cases.
+
+ // Get the result type of the getelementptr!
+ const Type *Ty =
+ GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+ assert(Ty && "GEP indices invalid!");
+ unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
+ Type *ReqTy = Ty->getPointerTo(AS);
+
+ assert(C->getType()->isPointerTy() &&
+ "Non-pointer type for constant GetElementPtr expression");
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.reserve(NumIdx+1);
+ ArgVec.push_back(C);
+ for (unsigned i = 0; i != NumIdx; ++i)
+ ArgVec.push_back(cast<Constant>(Idxs[i]));
+ const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+ InBounds ? GEPOperator::IsInBounds : 0);
+
+ LLVMContextImpl *pImpl = C->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *
+ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+ assert(LHS->getType() == RHS->getType());
+ assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE &&
+ pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+
+ if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+ return FC; // Fold a few common cases...
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.push_back(LHS);
+ ArgVec.push_back(RHS);
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+ const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+ if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+ ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+ LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *
+ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+ assert(LHS->getType() == RHS->getType());
+ assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+
+ if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+ return FC; // Fold a few common cases...
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.push_back(LHS);
+ ArgVec.push_back(RHS);
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+
+ const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+ if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+ ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+ LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+ assert(Val->getType()->isVectorTy() &&
+ "Tried to create extractelement operation on non-vector type!");
+ assert(Idx->getType()->isIntegerTy(32) &&
+ "Extractelement index must be i32 type!");
+
+ if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+ return FC; // Fold a few common cases.
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec(1, Val);
+ ArgVec.push_back(Idx);
+ const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+
+ LLVMContextImpl *pImpl = Val->getContext().pImpl;
+ Type *ReqTy = cast<VectorType>(Val->getType())->getElementType();
+ return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
+ Constant *Idx) {
+ assert(Val->getType()->isVectorTy() &&
+ "Tried to create insertelement operation on non-vector type!");
+ assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
+ && "Insertelement types must match!");
+ assert(Idx->getType()->isIntegerTy(32) &&
+ "Insertelement index must be i32 type!");
+
+ if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+ return FC; // Fold a few common cases.
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec(1, Val);
+ ArgVec.push_back(Elt);
+ ArgVec.push_back(Idx);
+ const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+
+ LLVMContextImpl *pImpl = Val->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
+}
+
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
+ Constant *Mask) {
+ assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector constant expr operands!");
+
+ if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+ return FC; // Fold a few common cases.
+
+ unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
+ const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+ const Type *ShufTy = VectorType::get(EltTy, NElts);
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec(1, V1);
+ ArgVec.push_back(V2);
+ ArgVec.push_back(Mask);
+ const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+
+ LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
+}
+
+Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
+ ArrayRef<unsigned> Idxs) {
+ assert(ExtractValueInst::getIndexedType(Agg->getType(),
+ Idxs) == Val->getType() &&
+ "insertvalue indices invalid!");
+ assert(Agg->getType()->isFirstClassType() &&
+ "Non-first-class type for constant insertvalue expression");
+ Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
+ assert(FC && "insertvalue constant expr couldn't be folded!");
+ return FC;
+}
+
+Constant *ConstantExpr::getExtractValue(Constant *Agg,
+ ArrayRef<unsigned> Idxs) {
+ assert(Agg->getType()->isFirstClassType() &&
+ "Tried to create extractelement operation on non-first-class type!");
+
+ const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+ (void)ReqTy;
+ assert(ReqTy && "extractvalue indices invalid!");
+
+ assert(Agg->getType()->isFirstClassType() &&
+ "Non-first-class type for constant extractvalue expression");
+ Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
+ assert(FC && "ExtractValue constant expr couldn't be folded!");
+ return FC;
+}
+
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ "Cannot NEG a nonintegral value!");
+ return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+ C, HasNUW, HasNSW);
+}
+
+Constant *ConstantExpr::getFNeg(Constant *C) {
+ assert(C->getType()->isFPOrFPVectorTy() &&
+ "Cannot FNEG a non-floating-point value!");
+ return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+}
+
+Constant *ConstantExpr::getNot(Constant *C) {
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ "Cannot NOT a nonintegral value!");
+ return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
+}
+
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Add, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+ return get(Instruction::FAdd, C1, C2);
+}
+
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Sub, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+ return get(Instruction::FSub, C1, C2);
+}
+
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Mul, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+ return get(Instruction::FMul, C1, C2);
+}
+
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::UDiv, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::SDiv, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
+ return get(Instruction::FDiv, C1, C2);
+}
+
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
+ return get(Instruction::URem, C1, C2);
+}
+
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
+ return get(Instruction::SRem, C1, C2);
+}
+
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
+ return get(Instruction::FRem, C1, C2);
+}
+
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
+ return get(Instruction::And, C1, C2);
+}
+
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
+ return get(Instruction::Or, C1, C2);
+}
+
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
+ return get(Instruction::Xor, C1, C2);
+}
+
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Shl, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::LShr, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::AShr, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantExpr::destroyConstant() {
+ getType()->getContext().pImpl->ExprConstants.remove(this);
+ destroyConstantImpl();
+}
+
+const char *ConstantExpr::getOpcodeName() const {
+ return Instruction::getOpcodeName(getOpcode());
+}
+
+
+
+GetElementPtrConstantExpr::
+GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::GetElementPtr,
+ OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+ - (IdxList.size()+1), IdxList.size()+1) {
+ OperandList[0] = C;
+ for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+ OperandList[i+1] = IdxList[i];
+}
+
+
+//===----------------------------------------------------------------------===//
+// replaceUsesOfWithOnConstant implementations
+
+/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// 'From' to be uses of 'To'. This must update the uniquing data structures
+/// etc.
+///
+/// Note that we intentionally replace all uses of From with To here. Consider
+/// a large array that uses 'From' 1000 times. By handling this case all here,
+/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// single invocation handles all 1000 uses. Handling them one at a time would
+/// work, but would be really slow because it would have to unique each updated
+/// array instance.
+///
+void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+ std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
+ Lookup.first.first = cast<ArrayType>(getType());
+ Lookup.second = this;
+
+ std::vector<Constant*> &Values = Lookup.first.second;
+ Values.reserve(getNumOperands()); // Build replacement array.
+
+ // Fill values with the modified operands of the constant array. Also,
+ // compute whether this turns into an all-zeros array.
+ bool isAllZeros = false;
+ unsigned NumUpdated = 0;
+ if (!ToC->isNullValue()) {
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ if (Val == From) {
+ Val = ToC;
+ ++NumUpdated;
+ }
+ Values.push_back(Val);
+ }
+ } else {
+ isAllZeros = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ if (Val == From) {
+ Val = ToC;
+ ++NumUpdated;
+ }
+ Values.push_back(Val);
+ if (isAllZeros) isAllZeros = Val->isNullValue();
+ }
+ }
+
+ Constant *Replacement = 0;
+ if (isAllZeros) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else {
+ // Check to see if we have this array type already.
+ bool Exists;
+ LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
+ pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists);
+
+ if (Exists) {
+ Replacement = I->second;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant array, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ pImpl->ArrayConstants.MoveConstantToNewSlot(this, I);
+
+ // Update to the new value. Optimize for the case when we have a single
+ // operand that we're changing, but handle bulk updates efficiently.
+ if (NumUpdated == 1) {
+ unsigned OperandToUpdate = U - OperandList;
+ assert(getOperand(OperandToUpdate) == From &&
+ "ReplaceAllUsesWith broken!");
+ setOperand(OperandToUpdate, ToC);
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) == From)
+ setOperand(i, ToC);
+ }
+ return;
+ }
+ }
+
+ // Otherwise, I do need to replace this with an existing value.
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ unsigned OperandToUpdate = U-OperandList;
+ assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
+
+ std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
+ Lookup.first.first = cast<StructType>(getType());
+ Lookup.second = this;
+ std::vector<Constant*> &Values = Lookup.first.second;
+ Values.reserve(getNumOperands()); // Build replacement struct.
+
+
+ // Fill values with the modified operands of the constant struct. Also,
+ // compute whether this turns into an all-zeros struct.
+ bool isAllZeros = false;
+ if (!ToC->isNullValue()) {
+ for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+ Values.push_back(cast<Constant>(O->get()));
+ } else {
+ isAllZeros = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ Values.push_back(Val);
+ if (isAllZeros) isAllZeros = Val->isNullValue();
+ }
+ }
+ Values[OperandToUpdate] = ToC;
+
+ LLVMContextImpl *pImpl = getContext().pImpl;
+
+ Constant *Replacement = 0;
+ if (isAllZeros) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else {
+ // Check to see if we have this struct type already.
+ bool Exists;
+ LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
+ pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
+
+ if (Exists) {
+ Replacement = I->second;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant struct, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ pImpl->StructConstants.MoveConstantToNewSlot(this, I);
+
+ // Update to the new value.
+ setOperand(OperandToUpdate, ToC);
+ return;
+ }
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+
+ std::vector<Constant*> Values;
+ Values.reserve(getNumOperands()); // Build replacement array...
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Constant *Val = getOperand(i);
+ if (Val == From) Val = cast<Constant>(To);
+ Values.push_back(Val);
+ }
+
+ Constant *Replacement = get(Values);
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
+ Use *U) {
+ assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
+ Constant *To = cast<Constant>(ToV);
+
+ Constant *Replacement = 0;
+ if (getOpcode() == Instruction::GetElementPtr) {
+ SmallVector<Constant*, 8> Indices;
+ Constant *Pointer = getOperand(0);
+ Indices.reserve(getNumOperands()-1);
+ if (Pointer == From) Pointer = To;
+
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ Constant *Val = getOperand(i);
+ if (Val == From) Val = To;
+ Indices.push_back(Val);
+ }
+ Replacement = ConstantExpr::getGetElementPtr(Pointer,
+ &Indices[0], Indices.size(),
+ cast<GEPOperator>(this)->isInBounds());
+ } else if (getOpcode() == Instruction::ExtractValue) {
+ Constant *Agg = getOperand(0);
+ if (Agg == From) Agg = To;
+
+ ArrayRef<unsigned> Indices = getIndices();
+ Replacement = ConstantExpr::getExtractValue(Agg, Indices);
+ } else if (getOpcode() == Instruction::InsertValue) {
+ Constant *Agg = getOperand(0);
+ Constant *Val = getOperand(1);
+ if (Agg == From) Agg = To;
+ if (Val == From) Val = To;
+
+ ArrayRef<unsigned> Indices = getIndices();
+ Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices);
+ } else if (isCast()) {
+ assert(getOperand(0) == From && "Cast only has one use!");
+ Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
+ } else if (getOpcode() == Instruction::Select) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ Constant *C3 = getOperand(2);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (C3 == From) C3 = To;
+ Replacement = ConstantExpr::getSelect(C1, C2, C3);
+ } else if (getOpcode() == Instruction::ExtractElement) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ Replacement = ConstantExpr::getExtractElement(C1, C2);
+ } else if (getOpcode() == Instruction::InsertElement) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ Constant *C3 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (C3 == From) C3 = To;
+ Replacement = ConstantExpr::getInsertElement(C1, C2, C3);
+ } else if (getOpcode() == Instruction::ShuffleVector) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ Constant *C3 = getOperand(2);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (C3 == From) C3 = To;
+ Replacement = ConstantExpr::getShuffleVector(C1, C2, C3);
+ } else if (isCompare()) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (getOpcode() == Instruction::ICmp)
+ Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
+ else {
+ assert(getOpcode() == Instruction::FCmp);
+ Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
+ }
+ } else if (getNumOperands() == 2) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassOptionalData);
+ } else {
+ llvm_unreachable("Unknown ConstantExpr type!");
+ return;
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantsContext.h b/contrib/llvm/lib/VMCore/ConstantsContext.h
new file mode 100644
index 000000000000..bd134d9b892d
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ConstantsContext.h
@@ -0,0 +1,709 @@
+//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTSCONTEXT_H
+#define LLVM_CONSTANTSCONTEXT_H
+
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace llvm {
+template<class ValType>
+struct ConstantTraits;
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class UnaryConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
+ : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+ Op<0>() = C;
+ }
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class BinaryConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
+ unsigned Flags)
+ : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ SubclassOptionalData = Flags;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class SelectConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class ExtractElementConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ ExtractElementConstantExpr(Constant *C1, Constant *C2)
+ : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(),
+ Instruction::ExtractElement, &Op<0>(), 2) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class InsertElementConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(C1->getType(), Instruction::InsertElement,
+ &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class ShuffleVectorConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(VectorType::get(
+ cast<VectorType>(C1->getType())->getElementType(),
+ cast<VectorType>(C3->getType())->getNumElements()),
+ Instruction::ShuffleVector,
+ &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class ExtractValueConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ ExtractValueConstantExpr(Constant *Agg,
+ const SmallVector<unsigned, 4> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+ Indices(IdxList) {
+ Op<0>() = Agg;
+ }
+
+ /// Indices - These identify which value to extract.
+ const SmallVector<unsigned, 4> Indices;
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class InsertValueConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ InsertValueConstantExpr(Constant *Agg, Constant *Val,
+ const SmallVector<unsigned, 4> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+ Indices(IdxList) {
+ Op<0>() = Agg;
+ Op<1>() = Val;
+ }
+
+ /// Indices - These identify the position for the insertion.
+ const SmallVector<unsigned, 4> Indices;
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class GetElementPtrConstantExpr : public ConstantExpr {
+ GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+ const Type *DestTy);
+public:
+ static GetElementPtrConstantExpr *Create(Constant *C,
+ const std::vector<Constant*>&IdxList,
+ const Type *DestTy,
+ unsigned Flags) {
+ GetElementPtrConstantExpr *Result =
+ new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+ Result->SubclassOptionalData = Flags;
+ return Result;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+struct CompareConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ unsigned short predicate;
+ CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
+ unsigned short pred, Constant* LHS, Constant* RHS)
+ : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+template <>
+struct OperandTraits<UnaryConstantExpr> :
+ public FixedNumOperandTraits<UnaryConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> :
+ public FixedNumOperandTraits<BinaryConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> :
+ public FixedNumOperandTraits<SelectConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> :
+ public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> :
+ public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> :
+ public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> :
+ public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> :
+ public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> :
+ public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> :
+ public FixedNumOperandTraits<CompareConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+struct ExprMapKeyType {
+ ExprMapKeyType(unsigned opc,
+ ArrayRef<Constant*> ops,
+ unsigned short flags = 0,
+ unsigned short optionalflags = 0,
+ ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+ : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
+ operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
+ uint8_t opcode;
+ uint8_t subclassoptionaldata;
+ uint16_t subclassdata;
+ std::vector<Constant*> operands;
+ SmallVector<unsigned, 4> indices;
+ bool operator==(const ExprMapKeyType& that) const {
+ return this->opcode == that.opcode &&
+ this->subclassdata == that.subclassdata &&
+ this->subclassoptionaldata == that.subclassoptionaldata &&
+ this->operands == that.operands &&
+ this->indices == that.indices;
+ }
+ bool operator<(const ExprMapKeyType & that) const {
+ if (this->opcode != that.opcode) return this->opcode < that.opcode;
+ if (this->operands != that.operands) return this->operands < that.operands;
+ if (this->subclassdata != that.subclassdata)
+ return this->subclassdata < that.subclassdata;
+ if (this->subclassoptionaldata != that.subclassoptionaldata)
+ return this->subclassoptionaldata < that.subclassoptionaldata;
+ if (this->indices != that.indices) return this->indices < that.indices;
+ return false;
+ }
+
+ bool operator!=(const ExprMapKeyType& that) const {
+ return !(*this == that);
+ }
+};
+
+struct InlineAsmKeyType {
+ InlineAsmKeyType(StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
+ bool isAlignStack)
+ : asm_string(AsmString), constraints(Constraints),
+ has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {}
+ std::string asm_string;
+ std::string constraints;
+ bool has_side_effects;
+ bool is_align_stack;
+ bool operator==(const InlineAsmKeyType& that) const {
+ return this->asm_string == that.asm_string &&
+ this->constraints == that.constraints &&
+ this->has_side_effects == that.has_side_effects &&
+ this->is_align_stack == that.is_align_stack;
+ }
+ bool operator<(const InlineAsmKeyType& that) const {
+ if (this->asm_string != that.asm_string)
+ return this->asm_string < that.asm_string;
+ if (this->constraints != that.constraints)
+ return this->constraints < that.constraints;
+ if (this->has_side_effects != that.has_side_effects)
+ return this->has_side_effects < that.has_side_effects;
+ if (this->is_align_stack != that.is_align_stack)
+ return this->is_align_stack < that.is_align_stack;
+ return false;
+ }
+
+ bool operator!=(const InlineAsmKeyType& that) const {
+ return !(*this == that);
+ }
+};
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ConstantUniqueMap*. This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+template<typename T, typename Alloc>
+struct ConstantTraits< std::vector<T, Alloc> > {
+ static unsigned uses(const std::vector<T, Alloc>& v) {
+ return v.size();
+ }
+};
+
+template<>
+struct ConstantTraits<Constant *> {
+ static unsigned uses(Constant * const & v) {
+ return 1;
+ }
+};
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator {
+ static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
+ return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+ }
+};
+
+template<class ConstantClass>
+struct ConstantKeyData {
+ typedef void ValType;
+ static ValType getValType(ConstantClass *C) {
+ llvm_unreachable("Unknown Constant type!");
+ }
+};
+
+template<>
+struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+ static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
+ unsigned short pred = 0) {
+ if (Instruction::isCast(V.opcode))
+ return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+ if ((V.opcode >= Instruction::BinaryOpsBegin &&
+ V.opcode < Instruction::BinaryOpsEnd))
+ return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
+ V.subclassoptionaldata);
+ if (V.opcode == Instruction::Select)
+ return new SelectConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::ExtractElement)
+ return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::InsertElement)
+ return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::ShuffleVector)
+ return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::InsertValue)
+ return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+ V.indices, Ty);
+ if (V.opcode == Instruction::ExtractValue)
+ return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+ if (V.opcode == Instruction::GetElementPtr) {
+ std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+ return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
+ V.subclassoptionaldata);
+ }
+
+ // The compare instructions are weird. We have to encode the predicate
+ // value and it is combined with the instruction opcode by multiplying
+ // the opcode by one hundred. We must decode this to get the predicate.
+ if (V.opcode == Instruction::ICmp)
+ return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
+ V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::FCmp)
+ return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
+ V.operands[0], V.operands[1]);
+ llvm_unreachable("Invalid ConstantExpr!");
+ return 0;
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantExpr> {
+ typedef ExprMapKeyType ValType;
+ static ValType getValType(ConstantExpr *CE) {
+ std::vector<Constant*> Operands;
+ Operands.reserve(CE->getNumOperands());
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ Operands.push_back(cast<Constant>(CE->getOperand(i)));
+ return ExprMapKeyType(CE->getOpcode(), Operands,
+ CE->isCompare() ? CE->getPredicate() : 0,
+ CE->getRawSubclassOptionalData(),
+ CE->hasIndices() ?
+ CE->getIndices() : ArrayRef<unsigned>());
+ }
+};
+
+// ConstantAggregateZero does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
+ static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
+ return new ConstantAggregateZero(Ty);
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantVector> {
+ typedef std::vector<Constant*> ValType;
+ static ValType getValType(ConstantVector *CP) {
+ std::vector<Constant*> Elements;
+ Elements.reserve(CP->getNumOperands());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ Elements.push_back(CP->getOperand(i));
+ return Elements;
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantAggregateZero> {
+ typedef char ValType;
+ static ValType getValType(ConstantAggregateZero *C) {
+ return 0;
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantArray> {
+ typedef std::vector<Constant*> ValType;
+ static ValType getValType(ConstantArray *CA) {
+ std::vector<Constant*> Elements;
+ Elements.reserve(CA->getNumOperands());
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ Elements.push_back(cast<Constant>(CA->getOperand(i)));
+ return Elements;
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantStruct> {
+ typedef std::vector<Constant*> ValType;
+ static ValType getValType(ConstantStruct *CS) {
+ std::vector<Constant*> Elements;
+ Elements.reserve(CS->getNumOperands());
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
+ Elements.push_back(cast<Constant>(CS->getOperand(i)));
+ return Elements;
+ }
+};
+
+// ConstantPointerNull does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
+ static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
+ return new ConstantPointerNull(Ty);
+ }
+};
+
+template<>
+struct ConstantKeyData<ConstantPointerNull> {
+ typedef char ValType;
+ static ValType getValType(ConstantPointerNull *C) {
+ return 0;
+ }
+};
+
+// UndefValue does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<UndefValue, Type, ValType> {
+ static UndefValue *create(const Type *Ty, const ValType &V) {
+ return new UndefValue(Ty);
+ }
+};
+
+template<>
+struct ConstantKeyData<UndefValue> {
+ typedef char ValType;
+ static ValType getValType(UndefValue *C) {
+ return 0;
+ }
+};
+
+template<>
+struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
+ static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) {
+ return new InlineAsm(Ty, Key.asm_string, Key.constraints,
+ Key.has_side_effects, Key.is_align_stack);
+ }
+};
+
+template<>
+struct ConstantKeyData<InlineAsm> {
+ typedef InlineAsmKeyType ValType;
+ static ValType getValType(InlineAsm *Asm) {
+ return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
+ Asm->hasSideEffects(), Asm->isAlignStack());
+ }
+};
+
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+ bool HasLargeKey = false /*true for arrays and structs*/ >
+class ConstantUniqueMap {
+public:
+ typedef std::pair<const TypeClass*, ValType> MapKey;
+ typedef std::map<MapKey, ConstantClass *> MapTy;
+ typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
+private:
+ /// Map - This is the main map from the element descriptor to the Constants.
+ /// This is the primary way we avoid creating two of the same shape
+ /// constant.
+ MapTy Map;
+
+ /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+ /// from the constants to their element in Map. This is important for
+ /// removal of constants from the array, which would otherwise have to scan
+ /// through the map with very large keys.
+ InverseMapTy InverseMap;
+
+public:
+ typename MapTy::iterator map_begin() { return Map.begin(); }
+ typename MapTy::iterator map_end() { return Map.end(); }
+
+ void freeConstants() {
+ for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+ I != E; ++I) {
+ // Asserts that use_empty().
+ delete I->second;
+ }
+ }
+
+ /// InsertOrGetItem - Return an iterator for the specified element.
+ /// If the element exists in the map, the returned iterator points to the
+ /// entry and Exists=true. If not, the iterator points to the newly
+ /// inserted entry and returns Exists=false. Newly inserted entries have
+ /// I->second == 0, and should be filled in.
+ typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
+ &InsertVal,
+ bool &Exists) {
+ std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+ Exists = !IP.second;
+ return IP.first;
+ }
+
+private:
+ typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+ if (HasLargeKey) {
+ typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+ assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+ IMI->second->second == CP &&
+ "InverseMap corrupt!");
+ return IMI->second;
+ }
+
+ typename MapTy::iterator I =
+ Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()),
+ ConstantKeyData<ConstantClass>::getValType(CP)));
+ if (I == Map.end() || I->second != CP) {
+ // FIXME: This should not use a linear scan. If this gets to be a
+ // performance problem, someone should look at this.
+ for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+ /* empty */;
+ }
+ return I;
+ }
+
+ ConstantClass *Create(const TypeClass *Ty, ValRefType V,
+ typename MapTy::iterator I) {
+ ConstantClass* Result =
+ ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+ assert(Result->getType() == Ty && "Type specified is not correct!");
+ I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+ if (HasLargeKey) // Remember the reverse mapping if needed.
+ InverseMap.insert(std::make_pair(Result, I));
+
+ return Result;
+ }
+public:
+
+ /// getOrCreate - Return the specified constant from the map, creating it if
+ /// necessary.
+ ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) {
+ MapKey Lookup(Ty, V);
+ ConstantClass* Result = 0;
+
+ typename MapTy::iterator I = Map.find(Lookup);
+ // Is it in the map?
+ if (I != Map.end())
+ Result = I->second;
+
+ if (!Result) {
+ // If no preexisting value, create one now...
+ Result = Create(Ty, V, I);
+ }
+
+ return Result;
+ }
+
+ void remove(ConstantClass *CP) {
+ typename MapTy::iterator I = FindExistingElement(CP);
+ assert(I != Map.end() && "Constant not found in constant table!");
+ assert(I->second == CP && "Didn't find correct element?");
+
+ if (HasLargeKey) // Remember the reverse mapping if needed.
+ InverseMap.erase(CP);
+
+ Map.erase(I);
+ }
+
+ /// MoveConstantToNewSlot - If we are about to change C to be the element
+ /// specified by I, update our internal data structures to reflect this
+ /// fact.
+ void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+ // First, remove the old location of the specified constant in the map.
+ typename MapTy::iterator OldI = FindExistingElement(C);
+ assert(OldI != Map.end() && "Constant not found in constant table!");
+ assert(OldI->second == C && "Didn't find correct element?");
+
+ // Remove the old entry from the map.
+ Map.erase(OldI);
+
+ // Update the inverse map so that we know that this constant is now
+ // located at descriptor I.
+ if (HasLargeKey) {
+ assert(I->second == C && "Bad inversemap entry!");
+ InverseMap[C] = I;
+ }
+ }
+
+ void dump() const {
+ DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/Core.cpp b/contrib/llvm/lib/VMCore/Core.cpp
new file mode 100644
index 000000000000..2a816e123a61
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Core.cpp
@@ -0,0 +1,2217 @@
+//===-- Core.cpp ----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeCore(PassRegistry &Registry) {
+ initializeDominatorTreePass(Registry);
+ initializePrintModulePassPass(Registry);
+ initializePrintFunctionPassPass(Registry);
+ initializeVerifierPass(Registry);
+ initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+ initializeCore(*unwrap(R));
+}
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message) {
+ free(Message);
+}
+
+
+/*===-- Operations on contexts --------------------------------------------===*/
+
+LLVMContextRef LLVMContextCreate() {
+ return wrap(new LLVMContext());
+}
+
+LLVMContextRef LLVMGetGlobalContext() {
+ return wrap(&getGlobalContext());
+}
+
+void LLVMContextDispose(LLVMContextRef C) {
+ delete unwrap(C);
+}
+
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+ unsigned SLen) {
+ return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+ return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
+ return wrap(new Module(ModuleID, getGlobalContext()));
+}
+
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
+ LLVMContextRef C) {
+ return wrap(new Module(ModuleID, *unwrap(C)));
+}
+
+void LLVMDisposeModule(LLVMModuleRef M) {
+ delete unwrap(M);
+}
+
+/*--.. Data layout .........................................................--*/
+const char * LLVMGetDataLayout(LLVMModuleRef M) {
+ return unwrap(M)->getDataLayout().c_str();
+}
+
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
+ unwrap(M)->setDataLayout(Triple);
+}
+
+/*--.. Target triple .......................................................--*/
+const char * LLVMGetTarget(LLVMModuleRef M) {
+ return unwrap(M)->getTargetTriple().c_str();
+}
+
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
+ unwrap(M)->setTargetTriple(Triple);
+}
+
+void LLVMDumpModule(LLVMModuleRef M) {
+ unwrap(M)->dump();
+}
+
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+ unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
+
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+ return wrap(&unwrap(M)->getContext());
+}
+
+
+/*===-- Operations on types -----------------------------------------------===*/
+
+/*--.. Operations on all types (mostly) ....................................--*/
+
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
+ switch (unwrap(Ty)->getTypeID()) {
+ default:
+ assert(false && "Unhandled TypeID.");
+ case Type::VoidTyID:
+ return LLVMVoidTypeKind;
+ case Type::FloatTyID:
+ return LLVMFloatTypeKind;
+ case Type::DoubleTyID:
+ return LLVMDoubleTypeKind;
+ case Type::X86_FP80TyID:
+ return LLVMX86_FP80TypeKind;
+ case Type::FP128TyID:
+ return LLVMFP128TypeKind;
+ case Type::PPC_FP128TyID:
+ return LLVMPPC_FP128TypeKind;
+ case Type::LabelTyID:
+ return LLVMLabelTypeKind;
+ case Type::MetadataTyID:
+ return LLVMMetadataTypeKind;
+ case Type::IntegerTyID:
+ return LLVMIntegerTypeKind;
+ case Type::FunctionTyID:
+ return LLVMFunctionTypeKind;
+ case Type::StructTyID:
+ return LLVMStructTypeKind;
+ case Type::ArrayTyID:
+ return LLVMArrayTypeKind;
+ case Type::PointerTyID:
+ return LLVMPointerTypeKind;
+ case Type::VectorTyID:
+ return LLVMVectorTypeKind;
+ case Type::X86_MMXTyID:
+ return LLVMX86_MMXTypeKind;
+ }
+}
+
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
+ return wrap(&unwrap(Ty)->getContext());
+}
+
+/*--.. Operations on integer types .........................................--*/
+
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
+ return wrap(IntegerType::get(*unwrap(C), NumBits));
+}
+
+LLVMTypeRef LLVMInt1Type(void) {
+ return LLVMInt1TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt8Type(void) {
+ return LLVMInt8TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt16Type(void) {
+ return LLVMInt16TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt32Type(void) {
+ return LLVMInt32TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt64Type(void) {
+ return LLVMInt64TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMIntType(unsigned NumBits) {
+ return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
+}
+
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
+ return unwrap<IntegerType>(IntegerTy)->getBitWidth();
+}
+
+/*--.. Operations on real types ............................................--*/
+
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
+}
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
+}
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
+
+LLVMTypeRef LLVMFloatType(void) {
+ return LLVMFloatTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMDoubleType(void) {
+ return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86FP80Type(void) {
+ return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFP128Type(void) {
+ return LLVMFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMPPCFP128Type(void) {
+ return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86MMXType(void) {
+ return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
+
+/*--.. Operations on function types ........................................--*/
+
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+ LLVMTypeRef *ParamTypes, unsigned ParamCount,
+ LLVMBool IsVarArg) {
+ ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+ return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
+}
+
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+ return unwrap<FunctionType>(FunctionTy)->isVarArg();
+}
+
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
+ return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
+}
+
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
+ return unwrap<FunctionType>(FunctionTy)->getNumParams();
+}
+
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
+ FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
+ for (FunctionType::param_iterator I = Ty->param_begin(),
+ E = Ty->param_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
+/*--.. Operations on struct types ..........................................--*/
+
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, LLVMBool Packed) {
+ ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+ return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
+}
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, LLVMBool Packed) {
+ return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+ ElementCount, Packed);
+}
+
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
+{
+ return wrap(StructType::createNamed(*unwrap(C), Name));
+}
+
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, LLVMBool Packed) {
+ ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+ unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
+}
+
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->getNumElements();
+}
+
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
+ StructType *Ty = unwrap<StructType>(StructTy);
+ for (StructType::element_iterator I = Ty->element_begin(),
+ E = Ty->element_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->isPacked();
+}
+
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->isOpaque();
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getTypeByName(Name));
+}
+
+/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
+
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
+ return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
+ return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
+}
+
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
+ return wrap(VectorType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
+ return wrap(unwrap<SequentialType>(Ty)->getElementType());
+}
+
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
+ return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
+ return unwrap<PointerType>(PointerTy)->getAddressSpace();
+}
+
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
+ return unwrap<VectorType>(VectorTy)->getNumElements();
+}
+
+/*--.. Operations on other types ...........................................--*/
+
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C) {
+ return wrap(Type::getVoidTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
+ return wrap(Type::getLabelTy(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMVoidType(void) {
+ return LLVMVoidTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMLabelType(void) {
+ return LLVMLabelTypeInContext(LLVMGetGlobalContext());
+}
+
+/*===-- Operations on values ----------------------------------------------===*/
+
+/*--.. Operations on all values ............................................--*/
+
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
+ return wrap(unwrap(Val)->getType());
+}
+
+const char *LLVMGetValueName(LLVMValueRef Val) {
+ return unwrap(Val)->getName().data();
+}
+
+void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
+ unwrap(Val)->setName(Name);
+}
+
+void LLVMDumpValue(LLVMValueRef Val) {
+ unwrap(Val)->dump();
+}
+
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
+ unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
+}
+
+int LLVMHasMetadata(LLVMValueRef Inst) {
+ return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+ return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+ unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
+/*--.. Conversion functions ................................................--*/
+
+#define LLVM_DEFINE_VALUE_CAST(name) \
+ LLVMValueRef LLVMIsA##name(LLVMValueRef Val) { \
+ return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
+ }
+
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
+
+/*--.. Operations on Uses ..................................................--*/
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
+ Value *V = unwrap(Val);
+ Value::use_iterator I = V->use_begin();
+ if (I == V->use_end())
+ return 0;
+ return wrap(&(I.getUse()));
+}
+
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+ Use *Next = unwrap(U)->getNext();
+ if (Next)
+ return wrap(Next);
+ return 0;
+}
+
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+ return wrap(unwrap(U)->getUser());
+}
+
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+ return wrap(unwrap(U)->get());
+}
+
+/*--.. Operations on Users .................................................--*/
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
+ return wrap(unwrap<User>(Val)->getOperand(Index));
+}
+
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+ unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+ return unwrap<User>(Val)->getNumOperands();
+}
+
+/*--.. Operations on constants of any type .................................--*/
+
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
+ return wrap(Constant::getNullValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
+ return wrap(Constant::getAllOnesValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
+ return wrap(UndefValue::get(unwrap(Ty)));
+}
+
+LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
+ return isa<Constant>(unwrap(Ty));
+}
+
+LLVMBool LLVMIsNull(LLVMValueRef Val) {
+ if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
+ return C->isNullValue();
+ return false;
+}
+
+LLVMBool LLVMIsUndef(LLVMValueRef Val) {
+ return isa<UndefValue>(unwrap(Val));
+}
+
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
+ return
+ wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
+}
+
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+ unsigned SLen) {
+ return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+ return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+ unsigned Count) {
+ return wrap(MDNode::get(*unwrap(C),
+ ArrayRef<Value*>(unwrap<Value>(Vals, Count), Count)));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+ return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
+/*--.. Operations on scalar constants ......................................--*/
+
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+ LLVMBool SignExtend) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
+}
+
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+ unsigned NumWords,
+ const uint64_t Words[]) {
+ IntegerType *Ty = unwrap<IntegerType>(IntTy);
+ return wrap(ConstantInt::get(Ty->getContext(),
+ APInt(Ty->getBitWidth(), NumWords, Words)));
+}
+
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
+ uint8_t Radix) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
+ Radix));
+}
+
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
+ unsigned SLen, uint8_t Radix) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
+ Radix));
+}
+
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
+ return wrap(ConstantFP::get(unwrap(RealTy), N));
+}
+
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
+ return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
+}
+
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
+ unsigned SLen) {
+ return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
+}
+
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
+ return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
+}
+
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
+ return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
+}
+
+/*--.. Operations on composite constants ...................................--*/
+
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+ unsigned Length,
+ LLVMBool DontNullTerminate) {
+ /* Inverted the sense of AddNull because ', 0)' is a
+ better mnemonic for null termination than ', 1)'. */
+ return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
+ DontNullTerminate == 0));
+}
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
+ LLVMValueRef *ConstantVals,
+ unsigned Count, LLVMBool Packed) {
+ Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+ return wrap(ConstantStruct::getAnon(*unwrap(C),
+ ArrayRef<Constant*>(Elements, Count),
+ Packed != 0));
+}
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+ LLVMBool DontNullTerminate) {
+ return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
+ DontNullTerminate);
+}
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+ LLVMValueRef *ConstantVals, unsigned Length) {
+ ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+ return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
+}
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+ LLVMBool Packed) {
+ return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
+ Packed);
+}
+
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+ LLVMValueRef *ConstantVals,
+ unsigned Count) {
+ Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+ const StructType *Ty = cast<StructType>(unwrap(StructTy));
+
+ return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count)));
+}
+
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
+ return wrap(ConstantVector::get(ArrayRef<Constant*>(
+ unwrap<Constant>(ScalarConstantVals, Size), Size)));
+}
+/*--.. Constant expressions ................................................--*/
+
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
+ return (LLVMOpcode)unwrap<ConstantExpr>(ConstantVal)->getOpcode();
+}
+
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
+ return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
+ return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+ LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getICmp(Predicate,
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+ LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFCmp(Predicate,
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+ return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+ unwrap<Constant>(ConstantIndices,
+ NumIndices),
+ NumIndices));
+}
+
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices,
+ unsigned NumIndices) {
+ Constant* Val = unwrap<Constant>(ConstantVal);
+ Constant** Idxs = unwrap<Constant>(ConstantIndices, NumIndices);
+ return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, Idxs, NumIndices));
+}
+
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+ LLVMBool isSigned) {
+ return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType), isSigned));
+}
+
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+ LLVMValueRef ConstantIfTrue,
+ LLVMValueRef ConstantIfFalse) {
+ return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+ unwrap<Constant>(ConstantIfTrue),
+ unwrap<Constant>(ConstantIfFalse)));
+}
+
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+ LLVMValueRef IndexConstant) {
+ return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+ unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+ LLVMValueRef ElementValueConstant,
+ LLVMValueRef IndexConstant) {
+ return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+ unwrap<Constant>(ElementValueConstant),
+ unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+ LLVMValueRef VectorBConstant,
+ LLVMValueRef MaskConstant) {
+ return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+ unwrap<Constant>(VectorBConstant),
+ unwrap<Constant>(MaskConstant)));
+}
+
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+ unsigned NumIdx) {
+ return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+ ArrayRef<unsigned>(IdxList,
+ NumIdx)));
+}
+
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+ LLVMValueRef ElementValueConstant,
+ unsigned *IdxList, unsigned NumIdx) {
+ return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+ unwrap<Constant>(ElementValueConstant),
+ ArrayRef<unsigned>(IdxList,
+ NumIdx)));
+}
+
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+ const char *Constraints,
+ LLVMBool HasSideEffects,
+ LLVMBool IsAlignStack) {
+ return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
+ Constraints, HasSideEffects, IsAlignStack));
+}
+
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+ return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
+/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
+
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
+ return wrap(unwrap<GlobalValue>(Global)->getParent());
+}
+
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->isDeclaration();
+}
+
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
+ switch (unwrap<GlobalValue>(Global)->getLinkage()) {
+ default:
+ assert(false && "Unhandled Linkage Type.");
+ case GlobalValue::ExternalLinkage:
+ return LLVMExternalLinkage;
+ case GlobalValue::AvailableExternallyLinkage:
+ return LLVMAvailableExternallyLinkage;
+ case GlobalValue::LinkOnceAnyLinkage:
+ return LLVMLinkOnceAnyLinkage;
+ case GlobalValue::LinkOnceODRLinkage:
+ return LLVMLinkOnceODRLinkage;
+ case GlobalValue::WeakAnyLinkage:
+ return LLVMWeakAnyLinkage;
+ case GlobalValue::WeakODRLinkage:
+ return LLVMWeakODRLinkage;
+ case GlobalValue::AppendingLinkage:
+ return LLVMAppendingLinkage;
+ case GlobalValue::InternalLinkage:
+ return LLVMInternalLinkage;
+ case GlobalValue::PrivateLinkage:
+ return LLVMPrivateLinkage;
+ case GlobalValue::LinkerPrivateLinkage:
+ return LLVMLinkerPrivateLinkage;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ return LLVMLinkerPrivateWeakLinkage;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ return LLVMLinkerPrivateWeakDefAutoLinkage;
+ case GlobalValue::DLLImportLinkage:
+ return LLVMDLLImportLinkage;
+ case GlobalValue::DLLExportLinkage:
+ return LLVMDLLExportLinkage;
+ case GlobalValue::ExternalWeakLinkage:
+ return LLVMExternalWeakLinkage;
+ case GlobalValue::CommonLinkage:
+ return LLVMCommonLinkage;
+ }
+
+ // Should never get here.
+ return static_cast<LLVMLinkage>(0);
+}
+
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
+ GlobalValue *GV = unwrap<GlobalValue>(Global);
+
+ switch (Linkage) {
+ default:
+ assert(false && "Unhandled Linkage Type.");
+ case LLVMExternalLinkage:
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ break;
+ case LLVMAvailableExternallyLinkage:
+ GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ break;
+ case LLVMLinkOnceAnyLinkage:
+ GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ break;
+ case LLVMLinkOnceODRLinkage:
+ GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ break;
+ case LLVMWeakAnyLinkage:
+ GV->setLinkage(GlobalValue::WeakAnyLinkage);
+ break;
+ case LLVMWeakODRLinkage:
+ GV->setLinkage(GlobalValue::WeakODRLinkage);
+ break;
+ case LLVMAppendingLinkage:
+ GV->setLinkage(GlobalValue::AppendingLinkage);
+ break;
+ case LLVMInternalLinkage:
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ break;
+ case LLVMPrivateLinkage:
+ GV->setLinkage(GlobalValue::PrivateLinkage);
+ break;
+ case LLVMLinkerPrivateLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
+ break;
+ case LLVMLinkerPrivateWeakLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+ break;
+ case LLVMLinkerPrivateWeakDefAutoLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
+ break;
+ case LLVMDLLImportLinkage:
+ GV->setLinkage(GlobalValue::DLLImportLinkage);
+ break;
+ case LLVMDLLExportLinkage:
+ GV->setLinkage(GlobalValue::DLLExportLinkage);
+ break;
+ case LLVMExternalWeakLinkage:
+ GV->setLinkage(GlobalValue::ExternalWeakLinkage);
+ break;
+ case LLVMGhostLinkage:
+ DEBUG(errs()
+ << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
+ break;
+ case LLVMCommonLinkage:
+ GV->setLinkage(GlobalValue::CommonLinkage);
+ break;
+ }
+}
+
+const char *LLVMGetSection(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->getSection().c_str();
+}
+
+void LLVMSetSection(LLVMValueRef Global, const char *Section) {
+ unwrap<GlobalValue>(Global)->setSection(Section);
+}
+
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
+ return static_cast<LLVMVisibility>(
+ unwrap<GlobalValue>(Global)->getVisibility());
+}
+
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
+ unwrap<GlobalValue>(Global)
+ ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
+}
+
+unsigned LLVMGetAlignment(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->getAlignment();
+}
+
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
+ unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+}
+
+/*--.. Operations on global variables ......................................--*/
+
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
+ return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name));
+}
+
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+ const char *Name,
+ unsigned AddressSpace) {
+ return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name, 0,
+ false, AddressSpace));
+}
+
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getNamedGlobal(Name));
+}
+
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::global_iterator I = Mod->global_begin();
+ if (I == Mod->global_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::global_iterator I = Mod->global_end();
+ if (I == Mod->global_begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+ Module::global_iterator I = GV;
+ if (++I == GV->getParent()->global_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+ Module::global_iterator I = GV;
+ if (I == GV->getParent()->global_begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
+ unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
+}
+
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
+ GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
+ if ( !GV->hasInitializer() )
+ return 0;
+ return wrap(GV->getInitializer());
+}
+
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
+ unwrap<GlobalVariable>(GlobalVar)
+ ->setInitializer(unwrap<Constant>(ConstantVal));
+}
+
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
+}
+
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
+ unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
+}
+
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isConstant();
+}
+
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
+ unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
+}
+
+/*--.. Operations on aliases ......................................--*/
+
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+ const char *Name) {
+ return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
+ unwrap<Constant>(Aliasee), unwrap (M)));
+}
+
+/*--.. Operations on functions .............................................--*/
+
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+ LLVMTypeRef FunctionTy) {
+ return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
+ GlobalValue::ExternalLinkage, Name, unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getFunction(Name));
+}
+
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::iterator I = Mod->begin();
+ if (I == Mod->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::iterator I = Mod->end();
+ if (I == Mod->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Module::iterator I = Func;
+ if (++I == Func->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Module::iterator I = Func;
+ if (I == Func->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMDeleteFunction(LLVMValueRef Fn) {
+ unwrap<Function>(Fn)->eraseFromParent();
+}
+
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
+ if (Function *F = dyn_cast<Function>(unwrap(Fn)))
+ return F->getIntrinsicID();
+ return 0;
+}
+
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
+ return unwrap<Function>(Fn)->getCallingConv();
+}
+
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
+ return unwrap<Function>(Fn)->setCallingConv(
+ static_cast<CallingConv::ID>(CC));
+}
+
+const char *LLVMGetGC(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ return F->hasGC()? F->getGC() : 0;
+}
+
+void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
+ Function *F = unwrap<Function>(Fn);
+ if (GC)
+ F->setGC(GC);
+ else
+ F->clearGC();
+}
+
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttrListPtr PAL = Func->getAttributes();
+ const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
+ Func->setAttributes(PALnew);
+}
+
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttrListPtr PAL = Func->getAttributes();
+ const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
+ Func->setAttributes(PALnew);
+}
+
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttrListPtr PAL = Func->getAttributes();
+ Attributes attr = PAL.getFnAttributes();
+ return (LLVMAttribute)attr;
+}
+
+/*--.. Operations on parameters ............................................--*/
+
+unsigned LLVMCountParams(LLVMValueRef FnRef) {
+ // This function is strictly redundant to
+ // LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+ return unwrap<Function>(FnRef)->arg_size();
+}
+
+void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
+ Function *Fn = unwrap<Function>(FnRef);
+ for (Function::arg_iterator I = Fn->arg_begin(),
+ E = Fn->arg_end(); I != E; I++)
+ *ParamRefs++ = wrap(I);
+}
+
+LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
+ Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
+ while (index --> 0)
+ AI++;
+ return wrap(AI);
+}
+
+LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
+ return wrap(unwrap<Argument>(V)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::arg_iterator I = Func->arg_begin();
+ if (I == Func->arg_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::arg_iterator I = Func->arg_end();
+ if (I == Func->arg_begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Function::arg_iterator I = A;
+ if (++I == A->getParent()->arg_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Function::arg_iterator I = A;
+ if (I == A->getParent()->arg_begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+ unwrap<Argument>(Arg)->addAttr(PA);
+}
+
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+ unwrap<Argument>(Arg)->removeAttr(PA);
+}
+
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Attributes attr = A->getParent()->getAttributes().getParamAttributes(
+ A->getArgNo()+1);
+ return (LLVMAttribute)attr;
+}
+
+
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
+ unwrap<Argument>(Arg)->addAttr(
+ Attribute::constructAlignmentFromInt(align));
+}
+
+/*--.. Operations on basic blocks ..........................................--*/
+
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
+ return wrap(static_cast<Value*>(unwrap(BB)));
+}
+
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
+ return isa<BasicBlock>(unwrap(Val));
+}
+
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
+ return wrap(unwrap<BasicBlock>(Val));
+}
+
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
+ return wrap(unwrap(BB)->getParent());
+}
+
+unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
+ return unwrap<Function>(FnRef)->size();
+}
+
+void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
+ Function *Fn = unwrap<Function>(FnRef);
+ for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
+ *BasicBlocksRefs++ = wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
+ return wrap(&unwrap<Function>(Fn)->getEntryBlock());
+}
+
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::iterator I = Func->begin();
+ if (I == Func->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::iterator I = Func->end();
+ if (I == Func->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ Function::iterator I = Block;
+ if (++I == Block->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ Function::iterator I = Block;
+ if (I == Block->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+ LLVMValueRef FnRef,
+ const char *Name) {
+ return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
+ return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+ LLVMBasicBlockRef BBRef,
+ const char *Name) {
+ BasicBlock *BB = unwrap(BBRef);
+ return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
+ const char *Name) {
+ return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
+}
+
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
+ unwrap(BBRef)->eraseFromParent();
+}
+
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
+/*--.. Operations on instructions ..........................................--*/
+
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
+ return wrap(unwrap<Instruction>(Inst)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ BasicBlock::iterator I = Block->begin();
+ if (I == Block->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ BasicBlock::iterator I = Block->end();
+ if (I == Block->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
+ Instruction *Instr = unwrap<Instruction>(Inst);
+ BasicBlock::iterator I = Instr;
+ if (++I == Instr->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
+ Instruction *Instr = unwrap<Instruction>(Inst);
+ BasicBlock::iterator I = Instr;
+ if (I == Instr->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+/*--.. Call and invoke instructions ........................................--*/
+
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
+ Value *V = unwrap(Instr);
+ if (CallInst *CI = dyn_cast<CallInst>(V))
+ return CI->getCallingConv();
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ return II->getCallingConv();
+ llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
+ return 0;
+}
+
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
+ Value *V = unwrap(Instr);
+ if (CallInst *CI = dyn_cast<CallInst>(V))
+ return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ return II->setCallingConv(static_cast<CallingConv::ID>(CC));
+ llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
+ LLVMAttribute PA) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ Call.setAttributes(
+ Call.getAttributes().addAttr(index, PA));
+}
+
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
+ LLVMAttribute PA) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ Call.setAttributes(
+ Call.getAttributes().removeAttr(index, PA));
+}
+
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+ unsigned align) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ Call.setAttributes(
+ Call.getAttributes().addAttr(index,
+ Attribute::constructAlignmentFromInt(align)));
+}
+
+/*--.. Operations on call instructions (only) ..............................--*/
+
+LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
+ return unwrap<CallInst>(Call)->isTailCall();
+}
+
+void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
+ unwrap<CallInst>(Call)->setTailCall(isTailCall);
+}
+
+/*--.. Operations on phi nodes .............................................--*/
+
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+ LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
+ PHINode *PhiVal = unwrap<PHINode>(PhiNode);
+ for (unsigned I = 0; I != Count; ++I)
+ PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
+}
+
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
+ return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
+}
+
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
+ return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
+}
+
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
+ return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
+ return wrap(new IRBuilder<>(*unwrap(C)));
+}
+
+LLVMBuilderRef LLVMCreateBuilder(void) {
+ return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
+}
+
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+ LLVMValueRef Instr) {
+ BasicBlock *BB = unwrap(Block);
+ Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
+ unwrap(Builder)->SetInsertPoint(BB, I);
+}
+
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+ Instruction *I = unwrap<Instruction>(Instr);
+ unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+}
+
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
+ BasicBlock *BB = unwrap(Block);
+ unwrap(Builder)->SetInsertPoint(BB);
+}
+
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->GetInsertBlock());
+}
+
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
+ unwrap(Builder)->ClearInsertionPoint();
+}
+
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+ unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
+}
+
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+ const char *Name) {
+ unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
+}
+
+void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
+ delete unwrap(Builder);
+}
+
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+ MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+ unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->getCurrentDebugLocation()
+ .getAsMDNode(unwrap(Builder)->getContext()));
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+ unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
+/*--.. Instruction builders ................................................--*/
+
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateRetVoid());
+}
+
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
+ return wrap(unwrap(B)->CreateRet(unwrap(V)));
+}
+
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
+ unsigned N) {
+ return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
+}
+
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
+ return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
+}
+
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
+ return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
+}
+
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
+ LLVMBasicBlockRef Else, unsigned NumCases) {
+ return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
+}
+
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+ unsigned NumDests) {
+ return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
+ ArrayRef<Value *>(unwrap(Args), NumArgs),
+ Name));
+}
+
+LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateUnwind());
+}
+
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateUnreachable());
+}
+
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+ LLVMBasicBlockRef Dest) {
+ unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
+}
+
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+ unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
+/*--.. Arithmetic ..........................................................--*/
+
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+ LLVMValueRef RHS, const char *Name) {
+ return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+ unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
+}
+
+/*--.. Memory ..............................................................--*/
+
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+ const char *Name) {
+ const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+ Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+ AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+ Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
+ ITy, unwrap(Ty), AllocSize,
+ 0, 0, "");
+ return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Val, const char *Name) {
+ const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+ Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+ AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+ Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
+ ITy, unwrap(Ty), AllocSize,
+ unwrap(Val), 0, "");
+ return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Val, const char *Name) {
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
+ return wrap(unwrap(B)->Insert(
+ CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
+}
+
+
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+}
+
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMValueRef PointerVal) {
+ return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
+}
+
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ LLVMValueRef *Indices, unsigned NumIndices,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), unwrap(Indices),
+ unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ LLVMValueRef *Indices, unsigned NumIndices,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), unwrap(Indices),
+ unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ unsigned Idx, const char *Name) {
+ return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateGlobalString(Str, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
+}
+
+/*--.. Casts ...............................................................--*/
+
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
+ Name));
+}
+
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
+ Name));
+}
+
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
+ Name));
+}
+
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+ unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
+ /*isSigned*/true, Name));
+}
+
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+/*--.. Comparisons .........................................................--*/
+
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
+ unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
+ unwrap(LHS), unwrap(RHS), Name));
+}
+
+/*--.. Miscellaneous instructions ..........................................--*/
+
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
+ return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateCall(unwrap(Fn),
+ ArrayRef<Value *>(unwrap(Args), NumArgs),
+ Name));
+}
+
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
+ LLVMValueRef Then, LLVMValueRef Else,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
+ Name));
+}
+
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
+ LLVMTypeRef Ty, const char *Name) {
+ return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+ LLVMValueRef Index, const char *Name) {
+ return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
+ Name));
+}
+
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+ LLVMValueRef EltVal, LLVMValueRef Index,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
+ unwrap(Index), Name));
+}
+
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
+ LLVMValueRef V2, LLVMValueRef Mask,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
+ unwrap(Mask), Name));
+}
+
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+ unsigned Index, const char *Name) {
+ return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
+}
+
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+ LLVMValueRef EltVal, unsigned Index,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
+ Index, Name));
+}
+
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
+ LLVMValueRef RHS, const char *Name) {
+ return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+}
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
+ return reinterpret_cast<LLVMModuleProviderRef>(M);
+}
+
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
+ delete unwrap(MP);
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
+ const char *Path,
+ LLVMMemoryBufferRef *OutMemBuf,
+ char **OutMessage) {
+
+ OwningPtr<MemoryBuffer> MB;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+ *OutMemBuf = wrap(MB.take());
+ return 0;
+ }
+
+ *OutMessage = strdup(ec.message().c_str());
+ return 1;
+}
+
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+ char **OutMessage) {
+ OwningPtr<MemoryBuffer> MB;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+ *OutMemBuf = wrap(MB.take());
+ return 0;
+ }
+
+ *OutMessage = strdup(ec.message().c_str());
+ return 1;
+}
+
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
+ delete unwrap(MemBuf);
+}
+
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+ return wrap(PassRegistry::getPassRegistry());
+}
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+ return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+ return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+ return LLVMCreateFunctionPassManagerForModule(
+ reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+ return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+ return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+ delete unwrap(PM);
+}
diff --git a/contrib/llvm/lib/VMCore/DebugInfoProbe.cpp b/contrib/llvm/lib/VMCore/DebugInfoProbe.cpp
new file mode 100644
index 000000000000..d1275ff58caa
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/DebugInfoProbe.cpp
@@ -0,0 +1,225 @@
+//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DebugInfoProbe. This probe can be used by a pass
+// manager to analyze how optimizer is treating debugging information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "debuginfoprobe"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include <set>
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden,
+ cl::desc("Enable debug info probe"));
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeImpl - This class implements a interface to monitor
+// how an optimization pass is preserving debugging information.
+
+namespace llvm {
+
+ class DebugInfoProbeImpl {
+ public:
+ DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {}
+ void initialize(StringRef PName, Function &F);
+ void finalize(Function &F);
+ void report();
+ private:
+ unsigned NumDbgLineLost, NumDbgValueLost;
+ std::string PassName;
+ Function *TheFn;
+ std::set<MDNode *> DbgVariables;
+ std::set<Instruction *> MissingDebugLoc;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeImpl
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) {
+ if (!EnableDebugInfoProbe) return;
+ PassName = PName;
+
+ DbgVariables.clear();
+ MissingDebugLoc.clear();
+ TheFn = &F;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown())
+ MissingDebugLoc.insert(BI);
+ if (!isa<DbgInfoIntrinsic>(BI)) continue;
+ Value *Addr = NULL;
+ MDNode *Node = NULL;
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+ Addr = DDI->getAddress();
+ Node = DDI->getVariable();
+ } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+ Addr = DVI->getValue();
+ Node = DVI->getVariable();
+ }
+ if (Addr)
+ DbgVariables.insert(Node);
+ }
+}
+
+/// report - Report findings. This should be invoked after finalize.
+void DebugInfoProbeImpl::report() {
+ if (!EnableDebugInfoProbe) return;
+ if (NumDbgLineLost || NumDbgValueLost) {
+ raw_ostream *OutStream = CreateInfoOutputFile();
+ if (NumDbgLineLost)
+ *OutStream << NumDbgLineLost
+ << "\t times line number info lost by "
+ << PassName << "\n";
+ if (NumDbgValueLost)
+ *OutStream << NumDbgValueLost
+ << "\t times variable info lost by "
+ << PassName << "\n";
+ delete OutStream;
+ }
+ NumDbgLineLost = 0;
+ NumDbgValueLost = 0;
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbeImpl::finalize(Function &F) {
+ if (!EnableDebugInfoProbe) return;
+ assert (TheFn == &F && "Invalid function to measure!");
+
+ std::set<MDNode *>DbgVariables2;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown() &&
+ MissingDebugLoc.count(BI) == 0) {
+ ++NumDbgLineLost;
+ DEBUG(dbgs() << "DebugInfoProbe (" << PassName << "): --- ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ if (!isa<DbgInfoIntrinsic>(BI)) continue;
+ Value *Addr = NULL;
+ MDNode *Node = NULL;
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+ Addr = DDI->getAddress();
+ Node = DDI->getVariable();
+ } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+ Addr = DVI->getValue();
+ Node = DVI->getVariable();
+ }
+ if (Addr)
+ DbgVariables2.insert(Node);
+ }
+
+ for (std::set<MDNode *>::iterator I = DbgVariables.begin(),
+ E = DbgVariables.end(); I != E; ++I) {
+ if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) {
+ DEBUG(dbgs()
+ << "DebugInfoProbe("
+ << PassName
+ << "): Losing dbg info for variable: ";
+ if (MDString *MDS = dyn_cast_or_null<MDString>(
+ (*I)->getOperand(2)))
+ dbgs() << MDS->getString();
+ else
+ dbgs() << "...";
+ dbgs() << "\n");
+ ++NumDbgValueLost;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+DebugInfoProbe::DebugInfoProbe() {
+ pImpl = new DebugInfoProbeImpl();
+}
+
+DebugInfoProbe::~DebugInfoProbe() {
+ delete pImpl;
+}
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbe::initialize(StringRef PName, Function &F) {
+ pImpl->initialize(PName, F);
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbe::finalize(Function &F) {
+ pImpl->finalize(F);
+}
+
+/// report - Report findings. This should be invoked after finalize.
+void DebugInfoProbe::report() {
+ pImpl->report();
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeInfo
+
+/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
+/// them.
+DebugInfoProbeInfo::~DebugInfoProbeInfo() {
+ if (!EnableDebugInfoProbe) return;
+ for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(),
+ E = Probes.end(); I != E; ++I) {
+ I->second->report();
+ delete I->second;
+ }
+ }
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbeInfo::initialize(Pass *P, Function &F) {
+ if (!EnableDebugInfoProbe) return;
+ if (P->getAsPMDataManager())
+ return;
+
+ StringMapEntry<DebugInfoProbe *> &Entry =
+ Probes.GetOrCreateValue(P->getPassName());
+ DebugInfoProbe *&Probe = Entry.getValue();
+ if (!Probe)
+ Probe = new DebugInfoProbe();
+ Probe->initialize(P->getPassName(), F);
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbeInfo::finalize(Pass *P, Function &F) {
+ if (!EnableDebugInfoProbe) return;
+ if (P->getAsPMDataManager())
+ return;
+ StringMapEntry<DebugInfoProbe *> &Entry =
+ Probes.GetOrCreateValue(P->getPassName());
+ DebugInfoProbe *&Probe = Entry.getValue();
+ assert (Probe && "DebugInfoProbe is not initialized!");
+ Probe->finalize(F);
+}
diff --git a/contrib/llvm/lib/VMCore/DebugLoc.cpp b/contrib/llvm/lib/VMCore/DebugLoc.cpp
new file mode 100644
index 000000000000..4ff6b2cd80e8
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/DebugLoc.cpp
@@ -0,0 +1,344 @@
+//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "LLVMContextImpl.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DebugLoc Implementation
+//===----------------------------------------------------------------------===//
+
+MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
+ if (ScopeIdx == 0) return 0;
+
+ if (ScopeIdx > 0) {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified.
+ assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+ "Invalid ScopeIdx!");
+ return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+ }
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+}
+
+MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified. Zero is invalid.
+ if (ScopeIdx >= 0) return 0;
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+/// Return both the Scope and the InlinedAt values.
+void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+ const LLVMContext &Ctx) const {
+ if (ScopeIdx == 0) {
+ Scope = IA = 0;
+ return;
+ }
+
+ if (ScopeIdx > 0) {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified.
+ assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+ "Invalid ScopeIdx!");
+ Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+ IA = 0;
+ return;
+ }
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+ IA = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
+ MDNode *Scope, MDNode *InlinedAt) {
+ DebugLoc Result;
+
+ // If no scope is available, this is an unknown location.
+ if (Scope == 0) return Result;
+
+ // Saturate line and col to "unknown".
+ if (Col > 255) Col = 0;
+ if (Line >= (1 << 24)) Line = 0;
+ Result.LineCol = Line | (Col << 24);
+
+ LLVMContext &Ctx = Scope->getContext();
+
+ // If there is no inlined-at location, use the ScopeRecords array.
+ if (InlinedAt == 0)
+ Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
+ else
+ Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
+ InlinedAt, 0);
+
+ return Result;
+}
+
+/// getAsMDNode - This method converts the compressed DebugLoc node into a
+/// DILocation compatible MDNode.
+MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+ if (isUnknown()) return 0;
+
+ MDNode *Scope, *IA;
+ getScopeAndInlinedAt(Scope, IA, Ctx);
+ assert(Scope && "If scope is null, this should be isUnknown()");
+
+ LLVMContext &Ctx2 = Scope->getContext();
+ const Type *Int32 = Type::getInt32Ty(Ctx2);
+ Value *Elts[] = {
+ ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
+ Scope, IA
+ };
+ return MDNode::get(Ctx2, Elts);
+}
+
+/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
+ if (N == 0 || N->getNumOperands() != 4) return DebugLoc();
+
+ MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(2));
+ if (Scope == 0) return DebugLoc();
+
+ unsigned LineNo = 0, ColNo = 0;
+ if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(0)))
+ LineNo = Line->getZExtValue();
+ if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(1)))
+ ColNo = Col->getZExtValue();
+
+ return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3)));
+}
+
+/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
+ if (N == 0 || N->getNumOperands() < 3) return DebugLoc();
+
+ MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(1));
+ if (Scope == 0) return DebugLoc();
+
+ unsigned LineNo = 0, ColNo = 0;
+ if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(2)))
+ LineNo = Line->getZExtValue();
+ if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(3)))
+ ColNo = Col->getZExtValue();
+
+ return get(LineNo, ColNo, Scope, NULL);
+}
+
+void DebugLoc::dump(const LLVMContext &Ctx) const {
+#ifndef NDEBUG
+ if (!isUnknown()) {
+ dbgs() << getLine();
+ if (getCol() != 0)
+ dbgs() << ',' << getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ dbgs() << " @ ";
+ InlinedAtDL.dump(Ctx);
+ } else
+ dbgs() << "\n";
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// DenseMap specialization
+//===----------------------------------------------------------------------===//
+
+DebugLoc DenseMapInfo<DebugLoc>::getEmptyKey() {
+ return DebugLoc::getEmptyKey();
+}
+
+DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() {
+ return DebugLoc::getTombstoneKey();
+}
+
+unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(Key.LineCol);
+ ID.AddInteger(Key.ScopeIdx);
+ return ID.ComputeHash();
+}
+
+bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) {
+ return LHS == RHS;
+}
+
+//===----------------------------------------------------------------------===//
+// LLVMContextImpl Implementation
+//===----------------------------------------------------------------------===//
+
+int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
+ int ExistingIdx) {
+ // If we already have an entry for this scope, return it.
+ int &Idx = ScopeRecordIdx[Scope];
+ if (Idx) return Idx;
+
+ // If we don't have an entry, but ExistingIdx is specified, use it.
+ if (ExistingIdx)
+ return Idx = ExistingIdx;
+
+ // Otherwise add a new entry.
+
+ // Start out ScopeRecords with a minimal reasonable size to avoid
+ // excessive reallocation starting out.
+ if (ScopeRecords.empty())
+ ScopeRecords.reserve(128);
+
+ // Index is biased by 1 for index.
+ Idx = ScopeRecords.size()+1;
+ ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
+ return Idx;
+}
+
+int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
+ int ExistingIdx) {
+ // If we already have an entry, return it.
+ int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
+ if (Idx) return Idx;
+
+ // If we don't have an entry, but ExistingIdx is specified, use it.
+ if (ExistingIdx)
+ return Idx = ExistingIdx;
+
+ // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
+ // excessive reallocation starting out.
+ if (ScopeInlinedAtRecords.empty())
+ ScopeInlinedAtRecords.reserve(128);
+
+ // Index is biased by 1 and negated.
+ Idx = -ScopeInlinedAtRecords.size()-1;
+ ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
+ DebugRecVH(IA, this, Idx)));
+ return Idx;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugRecVH Implementation
+//===----------------------------------------------------------------------===//
+
+/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
+/// to drop to null and we need remove our entry from the DenseMap.
+void DebugRecVH::deleted() {
+ // If this is a non-canonical reference, just drop the value to null, we know
+ // it doesn't have a map entry.
+ if (Idx == 0) {
+ setValPtr(0);
+ return;
+ }
+
+ MDNode *Cur = get();
+
+ // If the index is positive, it is an entry in ScopeRecords.
+ if (Idx > 0) {
+ assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
+ Ctx->ScopeRecordIdx.erase(Cur);
+ // Reset this VH to null and we're done.
+ setValPtr(0);
+ Idx = 0;
+ return;
+ }
+
+ // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+ // is the scope or the inlined-at record entry.
+ assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+ std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+ assert((this == &Entry.first || this == &Entry.second) &&
+ "Mapping out of date!");
+
+ MDNode *OldScope = Entry.first.get();
+ MDNode *OldInlinedAt = Entry.second.get();
+ assert(OldScope != 0 && OldInlinedAt != 0 &&
+ "Entry should be non-canonical if either val dropped to null");
+
+ // Otherwise, we do have an entry in it, nuke it and we're done.
+ assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+ "Mapping out of date");
+ Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+
+ // Reset this VH to null. Drop both 'Idx' values to null to indicate that
+ // we're in non-canonical form now.
+ setValPtr(0);
+ Entry.first.Idx = Entry.second.Idx = 0;
+}
+
+void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
+ // If being replaced with a non-mdnode value (e.g. undef) handle this as if
+ // the mdnode got deleted.
+ MDNode *NewVal = dyn_cast<MDNode>(NewVa);
+ if (NewVal == 0) return deleted();
+
+ // If this is a non-canonical reference, just change it, we know it already
+ // doesn't have a map entry.
+ if (Idx == 0) {
+ setValPtr(NewVa);
+ return;
+ }
+
+ MDNode *OldVal = get();
+ assert(OldVal != NewVa && "Node replaced with self?");
+
+ // If the index is positive, it is an entry in ScopeRecords.
+ if (Idx > 0) {
+ assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
+ Ctx->ScopeRecordIdx.erase(OldVal);
+ setValPtr(NewVal);
+
+ int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
+
+ // If NewVal already has an entry, this becomes a non-canonical reference,
+ // just drop Idx to 0 to signify this.
+ if (NewEntry != Idx)
+ Idx = 0;
+ return;
+ }
+
+ // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+ // is the scope or the inlined-at record entry.
+ assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+ std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+ assert((this == &Entry.first || this == &Entry.second) &&
+ "Mapping out of date!");
+
+ MDNode *OldScope = Entry.first.get();
+ MDNode *OldInlinedAt = Entry.second.get();
+ assert(OldScope != 0 && OldInlinedAt != 0 &&
+ "Entry should be non-canonical if either val dropped to null");
+
+ // Otherwise, we do have an entry in it, nuke it and we're done.
+ assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+ "Mapping out of date");
+ Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+
+ // Reset this VH to the new value.
+ setValPtr(NewVal);
+
+ int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
+ Entry.second.get(), Idx);
+ // If NewVal already has an entry, this becomes a non-canonical reference,
+ // just drop Idx to 0 to signify this.
+ if (NewIdx != Idx) {
+ std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
+ Entry.first.Idx = Entry.second.Idx = 0;
+ }
+}
diff --git a/contrib/llvm/lib/VMCore/Dominators.cpp b/contrib/llvm/lib/VMCore/Dominators.cpp
new file mode 100644
index 000000000000..08b845ef9d6b
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Dominators.cpp
@@ -0,0 +1,106 @@
+//===- Dominators.cpp - Dominator Calculation -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators. Postdominators are available in libanalysis, but are not
+// included in libvmcore, because it's not needed. Forward dominators are
+// needed to support the Verifier pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyDomInfo = true;
+#else
+static bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+ cl::desc("Verify dominator info (time consuming)"));
+
+//===----------------------------------------------------------------------===//
+// DominatorTree Implementation
+//===----------------------------------------------------------------------===//
+//
+// Provide public access to DominatorTree information. Implementation details
+// can be found in DominatorInternals.h.
+//
+//===----------------------------------------------------------------------===//
+
+TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
+TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
+
+char DominatorTree::ID = 0;
+INITIALIZE_PASS(DominatorTree, "domtree",
+ "Dominator Tree Construction", true, true)
+
+bool DominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+void DominatorTree::verifyAnalysis() const {
+ if (!VerifyDomInfo) return;
+
+ Function &F = *getRoot()->getParent();
+
+ DominatorTree OtherDT;
+ OtherDT.getBase().recalculate(F);
+ if (compare(OtherDT)) {
+ errs() << "DominatorTree is not up to date!\nComputed:\n";
+ print(errs());
+ errs() << "\nActual:\n";
+ OtherDT.print(errs());
+ abort();
+ }
+}
+
+void DominatorTree::print(raw_ostream &OS, const Module *) const {
+ DT->print(OS);
+}
+
+// dominates - Return true if A dominates a use in B. This performs the
+// special checks necessary if A and B are in the same basic block.
+bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
+ const BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
+
+ // If A is an invoke instruction, its value is only available in this normal
+ // successor block.
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(A))
+ BBA = II->getNormalDest();
+
+ if (BBA != BBB) return dominates(BBA, BBB);
+
+ // It is not possible to determine dominance between two PHI nodes
+ // based on their ordering.
+ if (isa<PHINode>(A) && isa<PHINode>(B))
+ return false;
+
+ // Loop through the basic block until we find A or B.
+ BasicBlock::const_iterator I = BBA->begin();
+ for (; &*I != A && &*I != B; ++I)
+ /*empty*/;
+
+ return &*I == A;
+}
diff --git a/contrib/llvm/lib/VMCore/Function.cpp b/contrib/llvm/lib/VMCore/Function.cpp
new file mode 100644
index 000000000000..6536bcd0e2ed
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Function.cpp
@@ -0,0 +1,445 @@
+//===-- Function.cpp - Implement the Global object classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/StringPool.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Threading.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Argument, Function>;
+template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+
+//===----------------------------------------------------------------------===//
+// Argument Implementation
+//===----------------------------------------------------------------------===//
+
+Argument::Argument(const Type *Ty, const Twine &Name, Function *Par)
+ : Value(Ty, Value::ArgumentVal) {
+ Parent = 0;
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (Par)
+ Par->getArgumentList().push_back(this);
+ setName(Name);
+}
+
+void Argument::setParent(Function *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getArgNo - Return the index of this formal argument in its containing
+/// function. For example in "void foo(int a, float b)" a is 0 and b is 1.
+unsigned Argument::getArgNo() const {
+ const Function *F = getParent();
+ assert(F && "Argument is not in a function");
+
+ Function::const_arg_iterator AI = F->arg_begin();
+ unsigned ArgIdx = 0;
+ for (; &*AI != this; ++AI)
+ ++ArgIdx;
+
+ return ArgIdx;
+}
+
+/// hasByValAttr - Return true if this argument has the byval attribute on it
+/// in its containing function.
+bool Argument::hasByValAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
+}
+
+unsigned Argument::getParamAlignment() const {
+ assert(getType()->isPointerTy() && "Only pointers have alignments");
+ return getParent()->getParamAlignment(getArgNo()+1);
+
+}
+
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+}
+
+/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+/// it in its containing function.
+bool Argument::hasNoAliasAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
+}
+
+/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
+/// on it in its containing function.
+bool Argument::hasNoCaptureAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
+}
+
+/// hasSRetAttr - Return true if this argument has the sret attribute on
+/// it in its containing function.
+bool Argument::hasStructRetAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ if (this != getParent()->arg_begin())
+ return false; // StructRet param must be first param
+ return getParent()->paramHasAttr(1, Attribute::StructRet);
+}
+
+/// addAttr - Add a Attribute to an argument
+void Argument::addAttr(Attributes attr) {
+ getParent()->addAttribute(getArgNo() + 1, attr);
+}
+
+/// removeAttr - Remove a Attribute from an argument
+void Argument::removeAttr(Attributes attr) {
+ getParent()->removeAttribute(getArgNo() + 1, attr);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Methods in Function
+//===----------------------------------------------------------------------===//
+
+LLVMContext &Function::getContext() const {
+ return getType()->getContext();
+}
+
+FunctionType *Function::getFunctionType() const {
+ return cast<FunctionType>(getType()->getElementType());
+}
+
+bool Function::isVarArg() const {
+ return getFunctionType()->isVarArg();
+}
+
+Type *Function::getReturnType() const {
+ return getFunctionType()->getReturnType();
+}
+
+void Function::removeFromParent() {
+ getParent()->getFunctionList().remove(this);
+}
+
+void Function::eraseFromParent() {
+ getParent()->getFunctionList().erase(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Function Implementation
+//===----------------------------------------------------------------------===//
+
+Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
+ const Twine &name, Module *ParentModule)
+ : GlobalValue(PointerType::getUnqual(Ty),
+ Value::FunctionVal, 0, 0, Linkage, name) {
+ assert(FunctionType::isValidReturnType(getReturnType()) &&
+ "invalid return type");
+ SymTab = new ValueSymbolTable();
+
+ // If the function has arguments, mark them as lazily built.
+ if (Ty->getNumParams())
+ setValueSubclassData(1); // Set the "has lazy arguments" bit.
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (ParentModule)
+ ParentModule->getFunctionList().push_back(this);
+
+ // Ensure intrinsics have the right parameter attributes.
+ if (unsigned IID = getIntrinsicID())
+ setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+
+}
+
+Function::~Function() {
+ dropAllReferences(); // After this it is safe to delete instructions.
+
+ // Delete all of the method arguments and unlink from symbol table...
+ ArgumentList.clear();
+ delete SymTab;
+
+ // Remove the function from the on-the-side GC table.
+ clearGC();
+}
+
+void Function::BuildLazyArguments() const {
+ // Create the arguments vector, all arguments start out unnamed.
+ const FunctionType *FT = getFunctionType();
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ assert(!FT->getParamType(i)->isVoidTy() &&
+ "Cannot have void typed arguments!");
+ ArgumentList.push_back(new Argument(FT->getParamType(i)));
+ }
+
+ // Clear the lazy arguments bit.
+ unsigned SDC = getSubclassDataFromValue();
+ const_cast<Function*>(this)->setValueSubclassData(SDC &= ~1);
+}
+
+size_t Function::arg_size() const {
+ return getFunctionType()->getNumParams();
+}
+bool Function::arg_empty() const {
+ return getFunctionType()->getNumParams() == 0;
+}
+
+void Function::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+// dropAllReferences() - This function causes all the subinstructions to "let
+// go" of all references that they are maintaining. This allows one to
+// 'delete' a whole class at a time, even though there may be circular
+// references... first all references are dropped, and all use counts go to
+// zero. Then everything is deleted for real. Note that no operations are
+// valid on an object that has "dropped all references", except operator
+// delete.
+//
+void Function::dropAllReferences() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+
+ // Delete all basic blocks. They are now unused, except possibly by
+ // blockaddresses, but BasicBlock's destructor takes care of those.
+ while (!BasicBlocks.empty())
+ BasicBlocks.begin()->eraseFromParent();
+}
+
+void Function::addAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.addAttr(i, attr);
+ setAttributes(PAL);
+}
+
+void Function::removeAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.removeAttr(i, attr);
+ setAttributes(PAL);
+}
+
+// Maintain the GC name for each function in an on-the-side table. This saves
+// allocating an additional word in Function for programs which do not use GC
+// (i.e., most programs) at the cost of increased overhead for clients which do
+// use GC.
+static DenseMap<const Function*,PooledStringPtr> *GCNames;
+static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
+
+bool Function::hasGC() const {
+ sys::SmartScopedReader<true> Reader(*GCLock);
+ return GCNames && GCNames->count(this);
+}
+
+const char *Function::getGC() const {
+ assert(hasGC() && "Function has no collector");
+ sys::SmartScopedReader<true> Reader(*GCLock);
+ return *(*GCNames)[this];
+}
+
+void Function::setGC(const char *Str) {
+ sys::SmartScopedWriter<true> Writer(*GCLock);
+ if (!GCNamePool)
+ GCNamePool = new StringPool();
+ if (!GCNames)
+ GCNames = new DenseMap<const Function*,PooledStringPtr>();
+ (*GCNames)[this] = GCNamePool->intern(Str);
+}
+
+void Function::clearGC() {
+ sys::SmartScopedWriter<true> Writer(*GCLock);
+ if (GCNames) {
+ GCNames->erase(this);
+ if (GCNames->empty()) {
+ delete GCNames;
+ GCNames = 0;
+ if (GCNamePool->empty()) {
+ delete GCNamePool;
+ GCNamePool = 0;
+ }
+ }
+ }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a Function) from the Function Src to this one.
+void Function::copyAttributesFrom(const GlobalValue *Src) {
+ assert(isa<Function>(Src) && "Expected a Function!");
+ GlobalValue::copyAttributesFrom(Src);
+ const Function *SrcF = cast<Function>(Src);
+ setCallingConv(SrcF->getCallingConv());
+ setAttributes(SrcF->getAttributes());
+ if (SrcF->hasGC())
+ setGC(SrcF->getGC());
+ else
+ clearGC();
+}
+
+/// getIntrinsicID - This method returns the ID number of the specified
+/// function, or Intrinsic::not_intrinsic if the function is not an
+/// intrinsic, or if the pointer is null. This value is always defined to be
+/// zero to allow easy checking for whether a function is intrinsic or not. The
+/// particular intrinsic functions which correspond to this value are defined in
+/// llvm/Intrinsics.h.
+///
+unsigned Function::getIntrinsicID() const {
+ const ValueName *ValName = this->getValueName();
+ if (!ValName)
+ return 0;
+ unsigned Len = ValName->getKeyLength();
+ const char *Name = ValName->getKeyData();
+
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "llvm/Intrinsics.gen"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
+ assert(id < num_intrinsics && "Invalid intrinsic ID!");
+ static const char * const Table[] = {
+ "not_intrinsic",
+#define GET_INTRINSIC_NAME_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+ if (Tys.empty())
+ return Table[id];
+ std::string Result(Table[id]);
+ for (unsigned i = 0; i < Tys.size(); ++i) {
+ if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+ Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) +
+ EVT::getEVT(PTyp->getElementType()).getEVTString();
+ }
+ else if (Tys[i])
+ Result += "." + EVT::getEVT(Tys[i]).getEVTString();
+ }
+ return Result;
+}
+
+const FunctionType *Intrinsic::getType(LLVMContext &Context,
+ ID id, ArrayRef<Type*> Tys) {
+ const Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+bool Intrinsic::isOverloaded(ID id) {
+ static const bool OTable[] = {
+ false,
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ return OTable[id];
+}
+
+/// This defines the "Intrinsic::getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
+ // There can never be multiple globals with the same name of different types,
+ // because intrinsics must be a specific type.
+ return
+ cast<Function>(M->getOrInsertFunction(getName(id, Tys),
+ getType(M->getContext(), id, Tys)));
+}
+
+// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "llvm/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+/// hasAddressTaken - returns true if there are any uses of this function
+/// other than direct calls or invokes to it.
+bool Function::hasAddressTaken(const User* *PutOffender) const {
+ for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ const User *U = *I;
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return PutOffender ? (*PutOffender = U, true) : true;
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(I))
+ return PutOffender ? (*PutOffender = U, true) : true;
+ }
+ return false;
+}
+
+/// callsFunctionThatReturnsTwice - Return true if the function has a call to
+/// setjmp or other function that gcc recognizes as "returning twice".
+///
+/// FIXME: Remove after <rdar://problem/8031714> is fixed.
+/// FIXME: Is the above FIXME valid?
+bool Function::callsFunctionThatReturnsTwice() const {
+ const Module *M = this->getParent();
+ static const char *ReturnsTwiceFns[] = {
+ "_setjmp",
+ "setjmp",
+ "sigsetjmp",
+ "setjmp_syscall",
+ "savectx",
+ "qsetjmp",
+ "vfork",
+ "getcontext"
+ };
+
+ for (unsigned I = 0; I < array_lengthof(ReturnsTwiceFns); ++I)
+ if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
+ if (!Callee->use_empty())
+ for (Value::const_use_iterator
+ I = Callee->use_begin(), E = Callee->use_end();
+ I != E; ++I)
+ if (const CallInst *CI = dyn_cast<CallInst>(*I))
+ if (CI->getParent()->getParent() == this)
+ return true;
+ }
+
+ return false;
+}
+
+// vim: sw=2 ai
diff --git a/contrib/llvm/lib/VMCore/GVMaterializer.cpp b/contrib/llvm/lib/VMCore/GVMaterializer.cpp
new file mode 100644
index 000000000000..f77a9c908d54
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/GVMaterializer.cpp
@@ -0,0 +1,18 @@
+//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Minimal implementation of the abstract interface for materializing
+// GlobalValues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/GVMaterializer.h"
+using namespace llvm;
+
+GVMaterializer::~GVMaterializer() {}
diff --git a/contrib/llvm/lib/VMCore/Globals.cpp b/contrib/llvm/lib/VMCore/Globals.cpp
new file mode 100644
index 000000000000..db008e09d1c8
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Globals.cpp
@@ -0,0 +1,263 @@
+//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GlobalValue & GlobalVariable classes for the VMCore
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Class
+//===----------------------------------------------------------------------===//
+
+bool GlobalValue::isMaterializable() const {
+ return getParent() && getParent()->isMaterializable(this);
+}
+bool GlobalValue::isDematerializable() const {
+ return getParent() && getParent()->isDematerializable(this);
+}
+bool GlobalValue::Materialize(std::string *ErrInfo) {
+ return getParent()->Materialize(this, ErrInfo);
+}
+void GlobalValue::Dematerialize() {
+ getParent()->Dematerialize(this);
+}
+
+/// Override destroyConstant to make sure it doesn't get called on
+/// GlobalValue's because they shouldn't be treated like other constants.
+void GlobalValue::destroyConstant() {
+ llvm_unreachable("You can't GV->destroyConstant()!");
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalValue) from the GlobalValue Src to this one.
+void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
+ setAlignment(Src->getAlignment());
+ setSection(Src->getSection());
+ setVisibility(Src->getVisibility());
+ setUnnamedAddr(Src->hasUnnamedAddr());
+}
+
+void GlobalValue::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ Alignment = Log2_32(Align) + 1;
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool GlobalValue::isDeclaration() const {
+ // Globals are definitions if they have an initializer.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+ return GV->getNumOperands() == 0;
+
+ // Functions are definitions if they have a body.
+ if (const Function *F = dyn_cast<Function>(this))
+ return F->empty();
+
+ // Aliases are always definitions.
+ assert(isa<GlobalAlias>(this));
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalVariable Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
+ Constant *InitVal, const Twine &Name,
+ bool ThreadLocal, unsigned AddressSpace)
+ : GlobalValue(PointerType::get(Ty, AddressSpace),
+ Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != 0, Link, Name),
+ isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+ if (InitVal) {
+ assert(InitVal->getType() == Ty &&
+ "Initializer should be the same type as the GlobalVariable!");
+ Op<0>() = InitVal;
+ }
+
+ LeakDetector::addGarbageObject(this);
+}
+
+GlobalVariable::GlobalVariable(Module &M, const Type *Ty, bool constant,
+ LinkageTypes Link, Constant *InitVal,
+ const Twine &Name,
+ GlobalVariable *Before, bool ThreadLocal,
+ unsigned AddressSpace)
+ : GlobalValue(PointerType::get(Ty, AddressSpace),
+ Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != 0, Link, Name),
+ isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+ if (InitVal) {
+ assert(InitVal->getType() == Ty &&
+ "Initializer should be the same type as the GlobalVariable!");
+ Op<0>() = InitVal;
+ }
+
+ LeakDetector::addGarbageObject(this);
+
+ if (Before)
+ Before->getParent()->getGlobalList().insert(Before, this);
+ else
+ M.getGlobalList().push_back(this);
+}
+
+void GlobalVariable::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalVariable::removeFromParent() {
+ getParent()->getGlobalList().remove(this);
+}
+
+void GlobalVariable::eraseFromParent() {
+ getParent()->getGlobalList().erase(this);
+}
+
+void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ // If you call this, then you better know this GVar has a constant
+ // initializer worth replacing. Enforce that here.
+ assert(getNumOperands() == 1 &&
+ "Attempt to replace uses of Constants on a GVar with no initializer");
+
+ // And, since you know it has an initializer, the From value better be
+ // the initializer :)
+ assert(getOperand(0) == From &&
+ "Attempt to replace wrong constant initializer in GVar");
+
+ // And, you better have a constant for the replacement value
+ assert(isa<Constant>(To) &&
+ "Attempt to replace GVar initializer with non-constant");
+
+ // Okay, preconditions out of the way, replace the constant initializer.
+ this->setOperand(0, cast<Constant>(To));
+}
+
+void GlobalVariable::setInitializer(Constant *InitVal) {
+ if (InitVal == 0) {
+ if (hasInitializer()) {
+ Op<0>().set(0);
+ NumOperands = 0;
+ }
+ } else {
+ assert(InitVal->getType() == getType()->getElementType() &&
+ "Initializer type must match GlobalVariable type");
+ if (!hasInitializer())
+ NumOperands = 1;
+ Op<0>().set(InitVal);
+ }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalVariable) from the GlobalVariable Src to this one.
+void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
+ assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
+ GlobalValue::copyAttributesFrom(Src);
+ const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
+ setThreadLocal(SrcVar->isThreadLocal());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalAlias Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link,
+ const Twine &Name, Constant* aliasee,
+ Module *ParentModule)
+ : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+ LeakDetector::addGarbageObject(this);
+
+ if (aliasee)
+ assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
+ Op<0>() = aliasee;
+
+ if (ParentModule)
+ ParentModule->getAliasList().push_back(this);
+}
+
+void GlobalAlias::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalAlias::removeFromParent() {
+ getParent()->getAliasList().remove(this);
+}
+
+void GlobalAlias::eraseFromParent() {
+ getParent()->getAliasList().erase(this);
+}
+
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+ assert((!Aliasee || Aliasee->getType() == getType()) &&
+ "Alias and aliasee types should match!");
+
+ setOperand(0, Aliasee);
+}
+
+const GlobalValue *GlobalAlias::getAliasedGlobal() const {
+ const Constant *C = getAliasee();
+ if (C == 0) return 0;
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GV;
+
+ const ConstantExpr *CE = cast<ConstantExpr>(C);
+ assert((CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ "Unsupported aliasee");
+
+ return dyn_cast<GlobalValue>(CE->getOperand(0));
+}
+
+const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
+ SmallPtrSet<const GlobalValue*, 3> Visited;
+
+ // Check if we need to stop early.
+ if (stopOnWeak && mayBeOverridden())
+ return this;
+
+ const GlobalValue *GV = getAliasedGlobal();
+ Visited.insert(GV);
+
+ // Iterate over aliasing chain, stopping on weak alias if necessary.
+ while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+ if (stopOnWeak && GA->mayBeOverridden())
+ break;
+
+ GV = GA->getAliasedGlobal();
+
+ if (!Visited.insert(GV))
+ return 0;
+ }
+
+ return GV;
+}
diff --git a/contrib/llvm/lib/VMCore/IRBuilder.cpp b/contrib/llvm/lib/VMCore/IRBuilder.cpp
new file mode 100644
index 000000000000..ffe961fee7c2
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/IRBuilder.cpp
@@ -0,0 +1,149 @@
+//===---- IRBuilder.cpp - Builder for LLVM Instrs -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+using namespace llvm;
+
+/// CreateGlobalString - Make a new global variable with an initializer that
+/// has array of i8 type filled in with the nul terminated string value
+/// specified. If Name is specified, it is the name of the global variable
+/// created.
+Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+ Constant *StrConstant = ConstantArray::get(Context, Str, true);
+ Module &M = *BB->getParent()->getParent();
+ GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
+ true, GlobalValue::InternalLinkage,
+ StrConstant, "", 0, false);
+ GV->setName(Name);
+ GV->setUnnamedAddr(true);
+ return GV;
+}
+
+Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+ assert(BB && BB->getParent() && "No current function!");
+ return BB->getParent()->getReturnType();
+}
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+ const PointerType *PT = cast<PointerType>(Ptr->getType());
+ if (PT->getElementType()->isIntegerTy(8))
+ return Ptr;
+
+ // Otherwise, we need to insert a bitcast.
+ PT = getInt8PtrTy(PT->getAddressSpace());
+ BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+ BB->getInstList().insert(InsertPt, BCI);
+ SetInstDebugLocation(BCI);
+ return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+ IRBuilderBase *Builder) {
+ CallInst *CI = CallInst::Create(Callee, Ops, "");
+ Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+ Builder->SetInstDebugLocation(CI);
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Ptr = getCastedInt8PtrValue(Ptr);
+ Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+ Type *Tys[] = { Ptr->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+
+ Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+ Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+
+ Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+ Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
+ assert(isa<PointerType>(Ptr->getType()) &&
+ "lifetime.start only applies to pointers.");
+ Ptr = getCastedInt8PtrValue(Ptr);
+ if (!Size)
+ Size = getInt64(-1);
+ else
+ assert(Size->getType() == getInt64Ty() &&
+ "lifetime.start requires the size to be an i64");
+ Value *Ops[] = { Size, Ptr };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
+ return createCallHelper(TheFn, Ops, this);
+}
+
+CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
+ assert(isa<PointerType>(Ptr->getType()) &&
+ "lifetime.end only applies to pointers.");
+ Ptr = getCastedInt8PtrValue(Ptr);
+ if (!Size)
+ Size = getInt64(-1);
+ else
+ assert(Size->getType() == getInt64Ty() &&
+ "lifetime.end requires the size to be an i64");
+ Value *Ops[] = { Size, Ptr };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
+ return createCallHelper(TheFn, Ops, this);
+}
diff --git a/contrib/llvm/lib/VMCore/InlineAsm.cpp b/contrib/llvm/lib/VMCore/InlineAsm.cpp
new file mode 100644
index 000000000000..4a03b395e98e
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/InlineAsm.cpp
@@ -0,0 +1,294 @@
+//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InlineAsm class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InlineAsm.h"
+#include "ConstantsContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/DerivedTypes.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() {
+}
+
+
+InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
+ bool isAlignStack) {
+ InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack);
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+ return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+}
+
+InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
+ const std::string &constraints, bool hasSideEffects,
+ bool isAlignStack)
+ : Value(Ty, Value::InlineAsmVal),
+ AsmString(asmString),
+ Constraints(constraints), HasSideEffects(hasSideEffects),
+ IsAlignStack(isAlignStack) {
+
+ // Do various checks on the constraint string and type.
+ assert(Verify(getFunctionType(), constraints) &&
+ "Function type not legal for constraints!");
+}
+
+void InlineAsm::destroyConstant() {
+ getType()->getContext().pImpl->InlineAsms.remove(this);
+ delete this;
+}
+
+FunctionType *InlineAsm::getFunctionType() const {
+ return cast<FunctionType>(getType()->getElementType());
+}
+
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+ Type(isInput), isEarlyClobber(false),
+ MatchingInput(-1), isCommutative(false),
+ isIndirect(false), isMultipleAlternative(false),
+ currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+ Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+ MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+ isIndirect(other.isIndirect), Codes(other.Codes),
+ isMultipleAlternative(other.isMultipleAlternative),
+ multipleAlternatives(other.multipleAlternatives),
+ currentAlternativeIndex(other.currentAlternativeIndex) {
+}
+
+/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
+/// fields in this structure. If the constraint string is not understood,
+/// return true, otherwise return false.
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
+ InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
+ StringRef::iterator I = Str.begin(), E = Str.end();
+ unsigned multipleAlternativeCount = Str.count('|') + 1;
+ unsigned multipleAlternativeIndex = 0;
+ ConstraintCodeVector *pCodes = &Codes;
+
+ // Initialize
+ isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+ if (isMultipleAlternative) {
+ multipleAlternatives.resize(multipleAlternativeCount);
+ pCodes = &multipleAlternatives[0].Codes;
+ }
+ Type = isInput;
+ isEarlyClobber = false;
+ MatchingInput = -1;
+ isCommutative = false;
+ isIndirect = false;
+ currentAlternativeIndex = 0;
+
+ // Parse prefixes.
+ if (*I == '~') {
+ Type = isClobber;
+ ++I;
+ } else if (*I == '=') {
+ ++I;
+ Type = isOutput;
+ }
+
+ if (*I == '*') {
+ isIndirect = true;
+ ++I;
+ }
+
+ if (I == E) return true; // Just a prefix, like "==" or "~".
+
+ // Parse the modifiers.
+ bool DoneWithModifiers = false;
+ while (!DoneWithModifiers) {
+ switch (*I) {
+ default:
+ DoneWithModifiers = true;
+ break;
+ case '&': // Early clobber.
+ if (Type != isOutput || // Cannot early clobber anything but output.
+ isEarlyClobber) // Reject &&&&&&
+ return true;
+ isEarlyClobber = true;
+ break;
+ case '%': // Commutative.
+ if (Type == isClobber || // Cannot commute clobbers.
+ isCommutative) // Reject %%%%%
+ return true;
+ isCommutative = true;
+ break;
+ case '#': // Comment.
+ case '*': // Register preferencing.
+ return true; // Not supported.
+ }
+
+ if (!DoneWithModifiers) {
+ ++I;
+ if (I == E) return true; // Just prefixes and modifiers!
+ }
+ }
+
+ // Parse the various constraints.
+ while (I != E) {
+ if (*I == '{') { // Physical register reference.
+ // Find the end of the register name.
+ StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
+ if (ConstraintEnd == E) return true; // "{foo"
+ pCodes->push_back(std::string(I, ConstraintEnd+1));
+ I = ConstraintEnd+1;
+ } else if (isdigit(*I)) { // Matching Constraint
+ // Maximal munch numbers.
+ StringRef::iterator NumStart = I;
+ while (I != E && isdigit(*I))
+ ++I;
+ pCodes->push_back(std::string(NumStart, I));
+ unsigned N = atoi(pCodes->back().c_str());
+ // Check that this is a valid matching constraint!
+ if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
+ Type != isInput)
+ return true; // Invalid constraint number.
+
+ // If Operand N already has a matching input, reject this. An output
+ // can't be constrained to the same value as multiple inputs.
+ if (isMultipleAlternative) {
+ InlineAsm::SubConstraintInfo &scInfo =
+ ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+ if (scInfo.MatchingInput != -1)
+ return true;
+ // Note that operand #n has a matching input.
+ scInfo.MatchingInput = ConstraintsSoFar.size();
+ } else {
+ if (ConstraintsSoFar[N].hasMatchingInput())
+ return true;
+ // Note that operand #n has a matching input.
+ ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+ }
+ } else if (*I == '|') {
+ multipleAlternativeIndex++;
+ pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+ ++I;
+ } else if (*I == '^') {
+ // Multi-letter constraint
+ // FIXME: For now assuming these are 2-character constraints.
+ pCodes->push_back(std::string(I+1, I+3));
+ I += 3;
+ } else {
+ // Single letter constraint.
+ pCodes->push_back(std::string(I, I+1));
+ ++I;
+ }
+ }
+
+ return false;
+}
+
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+ if (index < multipleAlternatives.size()) {
+ currentAlternativeIndex = index;
+ InlineAsm::SubConstraintInfo &scInfo =
+ multipleAlternatives[currentAlternativeIndex];
+ MatchingInput = scInfo.MatchingInput;
+ Codes = scInfo.Codes;
+ }
+}
+
+InlineAsm::ConstraintInfoVector
+InlineAsm::ParseConstraints(StringRef Constraints) {
+ ConstraintInfoVector Result;
+
+ // Scan the constraints string.
+ for (StringRef::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ) {
+ ConstraintInfo Info;
+
+ // Find the end of this constraint.
+ StringRef::iterator ConstraintEnd = std::find(I, E, ',');
+
+ if (ConstraintEnd == I || // Empty constraint like ",,"
+ Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
+ Result.clear(); // Erroneous constraint?
+ break;
+ }
+
+ Result.push_back(Info);
+
+ // ConstraintEnd may be either the next comma or the end of the string. In
+ // the former case, we skip the comma.
+ I = ConstraintEnd;
+ if (I != E) {
+ ++I;
+ if (I == E) { Result.clear(); break; } // don't allow "xyz,"
+ }
+ }
+
+ return Result;
+}
+
+/// Verify - Verify that the specified constraint string is reasonable for the
+/// specified function type, and otherwise validate the constraint string.
+bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
+ if (Ty->isVarArg()) return false;
+
+ ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
+
+ // Error parsing constraints.
+ if (Constraints.empty() && !ConstStr.empty()) return false;
+
+ unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
+ unsigned NumIndirect = 0;
+
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ switch (Constraints[i].Type) {
+ case InlineAsm::isOutput:
+ if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
+ return false; // outputs before inputs and clobbers.
+ if (!Constraints[i].isIndirect) {
+ ++NumOutputs;
+ break;
+ }
+ ++NumIndirect;
+ // FALLTHROUGH for Indirect Outputs.
+ case InlineAsm::isInput:
+ if (NumClobbers) return false; // inputs before clobbers.
+ ++NumInputs;
+ break;
+ case InlineAsm::isClobber:
+ ++NumClobbers;
+ break;
+ }
+ }
+
+ switch (NumOutputs) {
+ case 0:
+ if (!Ty->getReturnType()->isVoidTy()) return false;
+ break;
+ case 1:
+ if (Ty->getReturnType()->isStructTy()) return false;
+ break;
+ default:
+ const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+ if (STy == 0 || STy->getNumElements() != NumOutputs)
+ return false;
+ break;
+ }
+
+ if (Ty->getNumParams() != NumInputs) return false;
+ return true;
+}
+
diff --git a/contrib/llvm/lib/VMCore/Instruction.cpp b/contrib/llvm/lib/VMCore/Instruction.cpp
new file mode 100644
index 000000000000..02c075743959
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Instruction.cpp
@@ -0,0 +1,414 @@
+//===-- Instruction.cpp - Implement the Instruction class -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Instruction class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+ Instruction *InsertBefore)
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ // Make sure that we get added to a basicblock
+ LeakDetector::addGarbageObject(this);
+
+ // If requested, insert this instruction into a basic block...
+ if (InsertBefore) {
+ assert(InsertBefore->getParent() &&
+ "Instruction to insert before is not in a basic block!");
+ InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+ }
+}
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+ BasicBlock *InsertAtEnd)
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ // Make sure that we get added to a basicblock
+ LeakDetector::addGarbageObject(this);
+
+ // append this instruction into the basic block
+ assert(InsertAtEnd && "Basic block to append to may not be NULL!");
+ InsertAtEnd->getInstList().push_back(this);
+}
+
+
+// Out of line virtual method, so the vtable, etc has a home.
+Instruction::~Instruction() {
+ assert(Parent == 0 && "Instruction still linked in the program!");
+ if (hasMetadataHashEntry())
+ clearMetadataHashEntries();
+}
+
+
+void Instruction::setParent(BasicBlock *P) {
+ if (getParent()) {
+ if (!P) LeakDetector::addGarbageObject(this);
+ } else {
+ if (P) LeakDetector::removeGarbageObject(this);
+ }
+
+ Parent = P;
+}
+
+void Instruction::removeFromParent() {
+ getParent()->getInstList().remove(this);
+}
+
+void Instruction::eraseFromParent() {
+ getParent()->getInstList().erase(this);
+}
+
+/// insertBefore - Insert an unlinked instructions into a basic block
+/// immediately before the specified instruction.
+void Instruction::insertBefore(Instruction *InsertPos) {
+ InsertPos->getParent()->getInstList().insert(InsertPos, this);
+}
+
+/// insertAfter - Insert an unlinked instructions into a basic block
+/// immediately after the specified instruction.
+void Instruction::insertAfter(Instruction *InsertPos) {
+ InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+}
+
+/// moveBefore - Unlink this instruction from its current basic block and
+/// insert it into the basic block that MovePos lives in, right before
+/// MovePos.
+void Instruction::moveBefore(Instruction *MovePos) {
+ MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
+ this);
+}
+
+
+const char *Instruction::getOpcodeName(unsigned OpCode) {
+ switch (OpCode) {
+ // Terminators
+ case Ret: return "ret";
+ case Br: return "br";
+ case Switch: return "switch";
+ case IndirectBr: return "indirectbr";
+ case Invoke: return "invoke";
+ case Unwind: return "unwind";
+ case Unreachable: return "unreachable";
+
+ // Standard binary operators...
+ case Add: return "add";
+ case FAdd: return "fadd";
+ case Sub: return "sub";
+ case FSub: return "fsub";
+ case Mul: return "mul";
+ case FMul: return "fmul";
+ case UDiv: return "udiv";
+ case SDiv: return "sdiv";
+ case FDiv: return "fdiv";
+ case URem: return "urem";
+ case SRem: return "srem";
+ case FRem: return "frem";
+
+ // Logical operators...
+ case And: return "and";
+ case Or : return "or";
+ case Xor: return "xor";
+
+ // Memory instructions...
+ case Alloca: return "alloca";
+ case Load: return "load";
+ case Store: return "store";
+ case GetElementPtr: return "getelementptr";
+
+ // Convert instructions...
+ case Trunc: return "trunc";
+ case ZExt: return "zext";
+ case SExt: return "sext";
+ case FPTrunc: return "fptrunc";
+ case FPExt: return "fpext";
+ case FPToUI: return "fptoui";
+ case FPToSI: return "fptosi";
+ case UIToFP: return "uitofp";
+ case SIToFP: return "sitofp";
+ case IntToPtr: return "inttoptr";
+ case PtrToInt: return "ptrtoint";
+ case BitCast: return "bitcast";
+
+ // Other instructions...
+ case ICmp: return "icmp";
+ case FCmp: return "fcmp";
+ case PHI: return "phi";
+ case Select: return "select";
+ case Call: return "call";
+ case Shl: return "shl";
+ case LShr: return "lshr";
+ case AShr: return "ashr";
+ case VAArg: return "va_arg";
+ case ExtractElement: return "extractelement";
+ case InsertElement: return "insertelement";
+ case ShuffleVector: return "shufflevector";
+ case ExtractValue: return "extractvalue";
+ case InsertValue: return "insertvalue";
+
+ default: return "<Invalid operator> ";
+ }
+
+ return 0;
+}
+
+/// isIdenticalTo - Return true if the specified instruction is exactly
+/// identical to the current one. This means that all operands match and any
+/// extra information (e.g. load is volatile) agree.
+bool Instruction::isIdenticalTo(const Instruction *I) const {
+ return isIdenticalToWhenDefined(I) &&
+ SubclassOptionalData == I->SubclassOptionalData;
+}
+
+/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+/// ignores the SubclassOptionalData flags, which specify conditions
+/// under which the instruction's result is undefined.
+bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
+ if (getOpcode() != I->getOpcode() ||
+ getNumOperands() != I->getNumOperands() ||
+ getType() != I->getType())
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) != I->getOperand(i))
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+ return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+ return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+ return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+ CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+ return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+ return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+
+ return true;
+}
+
+// isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
+bool Instruction::isSameOperationAs(const Instruction *I) const {
+ if (getOpcode() != I->getOpcode() ||
+ getNumOperands() != I->getNumOperands() ||
+ getType() != I->getType())
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i)->getType() != I->getOperand(i)->getType())
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+ return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+ return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+ return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+ CI->getAttributes() ==
+ cast<InvokeInst>(I)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+ return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+ return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+
+ return true;
+}
+
+/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
+/// specified block. Note that PHI nodes are considered to evaluate their
+/// operands in the corresponding predecessor block.
+bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
+ for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ // PHI nodes uses values in the corresponding predecessor block. For other
+ // instructions, just check to see whether the parent of the use matches up.
+ const User *U = *UI;
+ const PHINode *PN = dyn_cast<PHINode>(U);
+ if (PN == 0) {
+ if (cast<Instruction>(U)->getParent() != BB)
+ return true;
+ continue;
+ }
+
+ if (PN->getIncomingBlock(UI) != BB)
+ return true;
+ }
+ return false;
+}
+
+/// mayReadFromMemory - Return true if this instruction may read memory.
+///
+bool Instruction::mayReadFromMemory() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::VAArg:
+ case Instruction::Load:
+ return true;
+ case Instruction::Call:
+ return !cast<CallInst>(this)->doesNotAccessMemory();
+ case Instruction::Invoke:
+ return !cast<InvokeInst>(this)->doesNotAccessMemory();
+ case Instruction::Store:
+ return cast<StoreInst>(this)->isVolatile();
+ }
+}
+
+/// mayWriteToMemory - Return true if this instruction may modify memory.
+///
+bool Instruction::mayWriteToMemory() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::Store:
+ case Instruction::VAArg:
+ return true;
+ case Instruction::Call:
+ return !cast<CallInst>(this)->onlyReadsMemory();
+ case Instruction::Invoke:
+ return !cast<InvokeInst>(this)->onlyReadsMemory();
+ case Instruction::Load:
+ return cast<LoadInst>(this)->isVolatile();
+ }
+}
+
+/// mayThrow - Return true if this instruction may throw an exception.
+///
+bool Instruction::mayThrow() const {
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return !CI->doesNotThrow();
+ return false;
+}
+
+/// isAssociative - Return true if the instruction is associative:
+///
+/// Associative operators satisfy: x op (y op z) === (x op y) op z
+///
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+///
+bool Instruction::isAssociative(unsigned Opcode) {
+ return Opcode == And || Opcode == Or || Opcode == Xor ||
+ Opcode == Add || Opcode == Mul;
+}
+
+/// isCommutative - Return true if the instruction is commutative:
+///
+/// Commutative operators satisfy: (x op y) === (y op x)
+///
+/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+/// applied to any type.
+///
+bool Instruction::isCommutative(unsigned op) {
+ switch (op) {
+ case Add:
+ case FAdd:
+ case Mul:
+ case FMul:
+ case And:
+ case Or:
+ case Xor:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Instruction::isSafeToSpeculativelyExecute() const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(getOperand(i)))
+ if (C->canTrap())
+ return false;
+
+ switch (getOpcode()) {
+ default:
+ return true;
+ case UDiv:
+ case URem: {
+ // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+ ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+ return Op && !Op->isNullValue();
+ }
+ case SDiv:
+ case SRem: {
+ // x / y is undefined if y == 0, and might be undefined if y == -1,
+ // but calcuations like x / 3 are safe.
+ ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+ return Op && !Op->isNullValue() && !Op->isAllOnesValue();
+ }
+ case Load: {
+ const LoadInst *LI = cast<LoadInst>(this);
+ if (LI->isVolatile())
+ return false;
+ return LI->getPointerOperand()->isDereferenceablePointer();
+ }
+ case Call:
+ return false; // The called function could have undefined behavior or
+ // side-effects.
+ // FIXME: We should special-case some intrinsics (bswap,
+ // overflow-checking arithmetic, etc.)
+ case VAArg:
+ case Alloca:
+ case Invoke:
+ case PHI:
+ case Store:
+ case Ret:
+ case Br:
+ case IndirectBr:
+ case Switch:
+ case Unwind:
+ case Unreachable:
+ return false; // Misc instructions which have effects
+ }
+}
+
+Instruction *Instruction::clone() const {
+ Instruction *New = clone_impl();
+ New->SubclassOptionalData = SubclassOptionalData;
+ if (!hasMetadata())
+ return New;
+
+ // Otherwise, enumerate and copy over metadata from the old instruction to the
+ // new one.
+ SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
+ getAllMetadataOtherThanDebugLoc(TheMDs);
+ for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
+ New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+
+ New->setDebugLoc(getDebugLoc());
+ return New;
+}
diff --git a/contrib/llvm/lib/VMCore/Instructions.cpp b/contrib/llvm/lib/VMCore/Instructions.cpp
new file mode 100644
index 000000000000..9baad09cb272
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Instructions.cpp
@@ -0,0 +1,3187 @@
+//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all of the non-inline methods for the LLVM instruction
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CallSite Class
+//===----------------------------------------------------------------------===//
+
+User::op_iterator CallSite::getCallee() const {
+ Instruction *II(getInstruction());
+ return isCall()
+ ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+ : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
+}
+
+//===----------------------------------------------------------------------===//
+// TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+TerminatorInst::~TerminatorInst() {
+}
+
+//===----------------------------------------------------------------------===//
+// UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+UnaryInstruction::~UnaryInstruction() {
+}
+
+//===----------------------------------------------------------------------===//
+// SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// areInvalidOperands - Return a string if the specified operands are invalid
+/// for a select operation, otherwise return null.
+const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
+ if (Op1->getType() != Op2->getType())
+ return "both values to select must have same type";
+
+ if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+ // Vector select.
+ if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
+ return "vector select condition element type must be i1";
+ const VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+ if (ET == 0)
+ return "selected values for vector select must be vectors";
+ if (ET->getNumElements() != VT->getNumElements())
+ return "vector select requires selected vectors to have "
+ "the same vector length as select condition";
+ } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
+ return "select condition must be i1 or <n x i1>";
+ }
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PHINode Class
+//===----------------------------------------------------------------------===//
+
+PHINode::PHINode(const PHINode &PN)
+ : Instruction(PN.getType(), Instruction::PHI,
+ allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
+ ReservedSpace(PN.getNumOperands()) {
+ std::copy(PN.op_begin(), PN.op_end(), op_begin());
+ std::copy(PN.block_begin(), PN.block_end(), block_begin());
+ SubclassOptionalData = PN.SubclassOptionalData;
+}
+
+PHINode::~PHINode() {
+ dropHungoffUses();
+}
+
+Use *PHINode::allocHungoffUses(unsigned N) const {
+ // Allocate the array of Uses of the incoming values, followed by a pointer
+ // (with bottom bit set) to the User, followed by the array of pointers to
+ // the incoming basic blocks.
+ size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+ + N * sizeof(BasicBlock*);
+ Use *Begin = static_cast<Use*>(::operator new(size));
+ Use *End = Begin + N;
+ (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+ return Use::initTags(Begin, End);
+}
+
+// removeIncomingValue - Remove an incoming value. This is useful if a
+// predecessor basic block is deleted.
+Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
+ Value *Removed = getIncomingValue(Idx);
+
+ // Move everything after this operand down.
+ //
+ // FIXME: we could just swap with the end of the list, then erase. However,
+ // clients might not expect this to happen. The code as it is thrashes the
+ // use/def lists, which is kinda lame.
+ std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+ std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
+
+ // Nuke the last value.
+ Op<-1>().set(0);
+ --NumOperands;
+
+ // If the PHI node is dead, because it has zero entries, nuke it now.
+ if (getNumOperands() == 0 && DeletePHIIfEmpty) {
+ // If anyone is using this PHI, make them use a dummy value instead...
+ replaceAllUsesWith(UndefValue::get(getType()));
+ eraseFromParent();
+ }
+ return Removed;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation. This grows the number of ops by 1.5
+/// times.
+///
+void PHINode::growOperands() {
+ unsigned e = getNumOperands();
+ unsigned NumOps = e + e / 2;
+ if (NumOps < 2) NumOps = 2; // 2 op PHI nodes are VERY common.
+
+ Use *OldOps = op_begin();
+ BasicBlock **OldBlocks = block_begin();
+
+ ReservedSpace = NumOps;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ std::copy(OldOps, OldOps + e, op_begin());
+ std::copy(OldBlocks, OldBlocks + e, block_begin());
+
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+/// hasConstantValue - If the specified PHI node always merges together the same
+/// value, return the value, otherwise return null.
+Value *PHINode::hasConstantValue() const {
+ // Exploit the fact that phi nodes always have at least one entry.
+ Value *ConstantValue = getIncomingValue(0);
+ for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+ if (getIncomingValue(i) != ConstantValue)
+ return 0; // Incoming values not all the same.
+ return ConstantValue;
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallInst Implementation
+//===----------------------------------------------------------------------===//
+
+CallInst::~CallInst() {
+}
+
+void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+ assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
+ Op<-1>() = Func;
+
+#ifndef NDEBUG
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+ assert((Args.size() == FTy->getNumParams() ||
+ (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+ "Calling a function with bad signature!");
+
+ for (unsigned i = 0; i != Args.size(); ++i)
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Args[i]->getType()) &&
+ "Calling a function with a bad signature!");
+#endif
+
+ std::copy(Args.begin(), Args.end(), op_begin());
+ setName(NameStr);
+}
+
+void CallInst::init(Value *Func, const Twine &NameStr) {
+ assert(NumOperands == 1 && "NumOperands not set up?");
+ Op<-1>() = Func;
+
+#ifndef NDEBUG
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+ assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
+#endif
+
+ setName(NameStr);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+ Instruction *InsertBefore)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 1,
+ 1, InsertBefore) {
+ init(Func, Name);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 1,
+ 1, InsertAtEnd) {
+ init(Func, Name);
+}
+
+CallInst::CallInst(const CallInst &CI)
+ : Instruction(CI.getType(), Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+ CI.getNumOperands()) {
+ setAttributes(CI.getAttributes());
+ setTailCall(CI.isTailCall());
+ setCallingConv(CI.getCallingConv());
+
+ std::copy(CI.op_begin(), CI.op_end(), op_begin());
+ SubclassOptionalData = CI.SubclassOptionalData;
+}
+
+void CallInst::addAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.addAttr(i, attr);
+ setAttributes(PAL);
+}
+
+void CallInst::removeAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.removeAttr(i, attr);
+ setAttributes(PAL);
+}
+
+bool CallInst::paramHasAttr(unsigned i, Attributes attr) const {
+ if (AttributeList.paramHasAttr(i, attr))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->paramHasAttr(i, attr);
+ return false;
+}
+
+/// IsConstantOne - Return true only if val is constant int 1
+static bool IsConstantOne(Value *val) {
+ assert(val && "IsConstantOne does not work with NULL val");
+ return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Instruction *createMalloc(Instruction *InsertBefore,
+ BasicBlock *InsertAtEnd, const Type *IntPtrTy,
+ const Type *AllocTy, Value *AllocSize,
+ Value *ArraySize, Function *MallocF,
+ const Twine &Name) {
+ assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+ "createMalloc needs either InsertBefore or InsertAtEnd");
+
+ // malloc(type) becomes:
+ // bitcast (i8* malloc(typeSize)) to type*
+ // malloc(type, arraySize) becomes:
+ // bitcast (i8 *malloc(typeSize*arraySize)) to type*
+ if (!ArraySize)
+ ArraySize = ConstantInt::get(IntPtrTy, 1);
+ else if (ArraySize->getType() != IntPtrTy) {
+ if (InsertBefore)
+ ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+ "", InsertBefore);
+ else
+ ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+ "", InsertAtEnd);
+ }
+
+ if (!IsConstantOne(ArraySize)) {
+ if (IsConstantOne(AllocSize)) {
+ AllocSize = ArraySize; // Operand * 1 = Operand
+ } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
+ Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
+ false /*ZExt*/);
+ // Malloc arg is constant product of type size and array size
+ AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
+ } else {
+ // Multiply type size by the array size...
+ if (InsertBefore)
+ AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+ "mallocsize", InsertBefore);
+ else
+ AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+ "mallocsize", InsertAtEnd);
+ }
+ }
+
+ assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+ // Create the call to Malloc.
+ BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+ Module* M = BB->getParent()->getParent();
+ Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+ Value *MallocFunc = MallocF;
+ if (!MallocFunc)
+ // prototype malloc as "void *malloc(size_t)"
+ MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+ const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+ CallInst *MCall = NULL;
+ Instruction *Result = NULL;
+ if (InsertBefore) {
+ MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
+ Result = MCall;
+ if (Result->getType() != AllocPtrType)
+ // Create a cast instruction to convert to the right type...
+ Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
+ } else {
+ MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
+ Result = MCall;
+ if (Result->getType() != AllocPtrType) {
+ InsertAtEnd->getInstList().push_back(MCall);
+ // Create a cast instruction to convert to the right type...
+ Result = new BitCastInst(MCall, AllocPtrType, Name);
+ }
+ }
+ MCall->setTailCall();
+ if (Function *F = dyn_cast<Function>(MallocFunc)) {
+ MCall->setCallingConv(F->getCallingConv());
+ if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+ }
+ assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
+
+ return Result;
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+/// possibly multiplied by the array size if the array size is not
+/// constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
+ const Type *IntPtrTy, const Type *AllocTy,
+ Value *AllocSize, Value *ArraySize,
+ Function * MallocF,
+ const Twine &Name) {
+ return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+ ArraySize, MallocF, Name);
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+/// possibly multiplied by the array size if the array size is not
+/// constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+/// Note: This function does not add the bitcast to the basic block, that is the
+/// responsibility of the caller.
+Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
+ const Type *IntPtrTy, const Type *AllocTy,
+ Value *AllocSize, Value *ArraySize,
+ Function *MallocF, const Twine &Name) {
+ return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+ ArraySize, MallocF, Name);
+}
+
+static Instruction* createFree(Value* Source, Instruction *InsertBefore,
+ BasicBlock *InsertAtEnd) {
+ assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+ "createFree needs either InsertBefore or InsertAtEnd");
+ assert(Source->getType()->isPointerTy() &&
+ "Can not free something of nonpointer type!");
+
+ BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+ Module* M = BB->getParent()->getParent();
+
+ const Type *VoidTy = Type::getVoidTy(M->getContext());
+ const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+ // prototype free as "void free(void*)"
+ Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+ CallInst* Result = NULL;
+ Value *PtrCast = Source;
+ if (InsertBefore) {
+ if (Source->getType() != IntPtrTy)
+ PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
+ Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
+ } else {
+ if (Source->getType() != IntPtrTy)
+ PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
+ Result = CallInst::Create(FreeFunc, PtrCast, "");
+ }
+ Result->setTailCall();
+ if (Function *F = dyn_cast<Function>(FreeFunc))
+ Result->setCallingConv(F->getCallingConv());
+
+ return Result;
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+ return createFree(Source, InsertBefore, NULL);
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+/// Note: This function does not add the call to the basic block, that is the
+/// responsibility of the caller.
+Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
+ Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+ assert(FreeCall && "CreateFree did not create a CallInst");
+ return FreeCall;
+}
+
+//===----------------------------------------------------------------------===//
+// InvokeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+ ArrayRef<Value *> Args, const Twine &NameStr) {
+ assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
+ Op<-3>() = Fn;
+ Op<-2>() = IfNormal;
+ Op<-1>() = IfException;
+
+#ifndef NDEBUG
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
+
+ assert(((Args.size() == FTy->getNumParams()) ||
+ (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+ "Invoking a function with bad signature");
+
+ for (unsigned i = 0, e = Args.size(); i != e; i++)
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Args[i]->getType()) &&
+ "Invoking a function with a bad signature!");
+#endif
+
+ std::copy(Args.begin(), Args.end(), op_begin());
+ setName(NameStr);
+}
+
+InvokeInst::InvokeInst(const InvokeInst &II)
+ : TerminatorInst(II.getType(), Instruction::Invoke,
+ OperandTraits<InvokeInst>::op_end(this)
+ - II.getNumOperands(),
+ II.getNumOperands()) {
+ setAttributes(II.getAttributes());
+ setCallingConv(II.getCallingConv());
+ std::copy(II.op_begin(), II.op_end(), op_begin());
+ SubclassOptionalData = II.SubclassOptionalData;
+}
+
+BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned InvokeInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ return setSuccessor(idx, B);
+}
+
+bool InvokeInst::paramHasAttr(unsigned i, Attributes attr) const {
+ if (AttributeList.paramHasAttr(i, attr))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->paramHasAttr(i, attr);
+ return false;
+}
+
+void InvokeInst::addAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.addAttr(i, attr);
+ setAttributes(PAL);
+}
+
+void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.removeAttr(i, attr);
+ setAttributes(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+// ReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+ReturnInst::ReturnInst(const ReturnInst &RI)
+ : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) -
+ RI.getNumOperands(),
+ RI.getNumOperands()) {
+ if (RI.getNumOperands())
+ Op<0>() = RI.Op<0>();
+ SubclassOptionalData = RI.SubclassOptionalData;
+}
+
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertBefore) {
+ if (retVal)
+ Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertAtEnd) {
+ if (retVal)
+ Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
+}
+
+unsigned ReturnInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
+/// emit the vtable for the class in this translation unit.
+void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("ReturnInst has no successors!");
+}
+
+BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("ReturnInst has no successors!");
+ return 0;
+}
+
+ReturnInst::~ReturnInst() {
+}
+
+//===----------------------------------------------------------------------===//
+// UnwindInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnwindInst::UnwindInst(LLVMContext &Context, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+ 0, 0, InsertBefore) {
+}
+UnwindInst::UnwindInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+ 0, 0, InsertAtEnd) {
+}
+
+
+unsigned UnwindInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("UnwindInst has no successors!");
+}
+
+BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("UnwindInst has no successors!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// UnreachableInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnreachableInst::UnreachableInst(LLVMContext &Context,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+ 0, 0, InsertBefore) {
+}
+UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+ 0, 0, InsertAtEnd) {
+}
+
+unsigned UnreachableInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("UnwindInst has no successors!");
+}
+
+BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("UnwindInst has no successors!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// BranchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void BranchInst::AssertOK() {
+ if (isConditional())
+ assert(getCondition()->getType()->isIntegerTy(1) &&
+ "May only branch on boolean predicates!");
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1,
+ 1, InsertBefore) {
+ assert(IfTrue != 0 && "Branch destination may not be null!");
+ Op<-1>() = IfTrue;
+}
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3,
+ 3, InsertBefore) {
+ Op<-1>() = IfTrue;
+ Op<-2>() = IfFalse;
+ Op<-3>() = Cond;
+#ifndef NDEBUG
+ AssertOK();
+#endif
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1,
+ 1, InsertAtEnd) {
+ assert(IfTrue != 0 && "Branch destination may not be null!");
+ Op<-1>() = IfTrue;
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3,
+ 3, InsertAtEnd) {
+ Op<-1>() = IfTrue;
+ Op<-2>() = IfFalse;
+ Op<-3>() = Cond;
+#ifndef NDEBUG
+ AssertOK();
+#endif
+}
+
+
+BranchInst::BranchInst(const BranchInst &BI) :
+ TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+ BI.getNumOperands()) {
+ Op<-1>() = BI.Op<-1>();
+ if (BI.getNumOperands() != 1) {
+ assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
+ Op<-3>() = BI.Op<-3>();
+ Op<-2>() = BI.Op<-2>();
+ }
+ SubclassOptionalData = BI.SubclassOptionalData;
+}
+
+BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned BranchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AllocaInst Implementation
+//===----------------------------------------------------------------------===//
+
+static Value *getAISize(LLVMContext &Context, Value *Amt) {
+ if (!Amt)
+ Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
+ else {
+ assert(!isa<BasicBlock>(Amt) &&
+ "Passed basic block into allocation size parameter! Use other ctor");
+ assert(Amt->getType()->isIntegerTy() &&
+ "Allocation array size is not an integer!");
+ }
+ return Amt;
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+ const Twine &Name, Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+ Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), 0), InsertBefore) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), 0), InsertAtEnd) {
+ setAlignment(0);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+ const Twine &Name, Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+ setAlignment(Align);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+ setAlignment(Align);
+ assert(!Ty->isVoidTy() && "Cannot allocate void!");
+ setName(Name);
+}
+
+// Out of line virtual method, so the vtable, etc has a home.
+AllocaInst::~AllocaInst() {
+}
+
+void AllocaInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ setInstructionSubclassData(Log2_32(Align) + 1);
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool AllocaInst::isArrayAllocation() const {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
+ return !CI->isOne();
+ return true;
+}
+
+Type *AllocaInst::getAllocatedType() const {
+ return getType()->getElementType();
+}
+
+/// isStaticAlloca - Return true if this alloca is in the entry block of the
+/// function and is a constant size. If so, the code generator will fold it
+/// into the prolog/epilog code, so it is basically free.
+bool AllocaInst::isStaticAlloca() const {
+ // Must be constant size.
+ if (!isa<ConstantInt>(getArraySize())) return false;
+
+ // Must be in the entry block.
+ const BasicBlock *Parent = getParent();
+ return Parent == &Parent->getParent()->front();
+}
+
+//===----------------------------------------------------------------------===//
+// LoadInst Implementation
+//===----------------------------------------------------------------------===//
+
+void LoadInst::AssertOK() {
+ assert(getOperand(0)->getType()->isPointerTy() &&
+ "Ptr must have pointer type.");
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+ Instruction *InsertBef)
+: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+void LoadInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+ ((Log2_32(Align)+1)<<1));
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+// StoreInst Implementation
+//===----------------------------------------------------------------------===//
+
+void StoreInst::AssertOK() {
+ assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
+ assert(getOperand(1)->getType()->isPointerTy() &&
+ "Ptr must have pointer type!");
+ assert(getOperand(0)->getType() ==
+ cast<PointerType>(getOperand(1)->getType())->getElementType()
+ && "Ptr must be a pointer to Val type!");
+}
+
+
+StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+}
+
+void StoreInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+ ((Log2_32(Align)+1) << 1));
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+static unsigned retrieveAddrSpace(const Value *Val) {
+ return cast<PointerType>(Val->getType())->getAddressSpace();
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
+ const Twine &Name) {
+ assert(NumOperands == 1+NumIdx && "NumOperands not initialized?");
+ Use *OL = OperandList;
+ OL[0] = Ptr;
+
+ for (unsigned i = 0; i != NumIdx; ++i)
+ OL[i+1] = Idx[i];
+
+ setName(Name);
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value *Idx, const Twine &Name) {
+ assert(NumOperands == 2 && "NumOperands not initialized?");
+ Use *OL = OperandList;
+ OL[0] = Ptr;
+ OL[1] = Idx;
+
+ setName(Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
+ : Instruction(GEPI.getType(), GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this)
+ - GEPI.getNumOperands(),
+ GEPI.getNumOperands()) {
+ Use *OL = OperandList;
+ Use *GEPIOL = GEPI.OperandList;
+ for (unsigned i = 0, E = NumOperands; i != E; ++i)
+ OL[i] = GEPIOL[i];
+ SubclassOptionalData = GEPI.SubclassOptionalData;
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+ const Twine &Name, Instruction *InBe)
+ : Instruction(PointerType::get(
+ checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
+ GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+ 2, InBe) {
+ init(Ptr, Idx, Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+ const Twine &Name, BasicBlock *IAE)
+ : Instruction(PointerType::get(
+ checkGEPType(getIndexedType(Ptr->getType(),Idx)),
+ retrieveAddrSpace(Ptr)),
+ GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+ 2, IAE) {
+ init(Ptr, Idx, Name);
+}
+
+/// getIndexedType - Returns the type of the element that would be accessed with
+/// a gep instruction with the specified parameters.
+///
+/// The Idxs pointer should point to a continuous piece of memory containing the
+/// indices, either as Value* or uint64_t.
+///
+/// A null type is returned if the indices are invalid for the specified
+/// pointer type.
+///
+template <typename IndexTy>
+static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
+ unsigned NumIdx) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+ if (!PTy) return 0; // Type isn't a pointer type!
+ Type *Agg = PTy->getElementType();
+
+ // Handle the special case of the empty set index set, which is always valid.
+ if (NumIdx == 0)
+ return Agg;
+
+ // If there is at least one index, the top level type must be sized, otherwise
+ // it cannot be 'stepped over'.
+ if (!Agg->isSized())
+ return 0;
+
+ unsigned CurIdx = 1;
+ for (; CurIdx != NumIdx; ++CurIdx) {
+ CompositeType *CT = dyn_cast<CompositeType>(Agg);
+ if (!CT || CT->isPointerTy()) return 0;
+ IndexTy Index = Idxs[CurIdx];
+ if (!CT->indexValid(Index)) return 0;
+ Agg = CT->getTypeAtIndex(Index);
+ }
+ return CurIdx == NumIdx ? Agg : 0;
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs,
+ unsigned NumIdx) {
+ return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+ Constant* const *Idxs,
+ unsigned NumIdx) {
+ return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+ uint64_t const *Idxs,
+ unsigned NumIdx) {
+ return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+ if (!PTy) return 0; // Type isn't a pointer type!
+
+ // Check the pointer index.
+ if (!PTy->indexValid(Idx)) return 0;
+
+ return PTy->getElementType();
+}
+
+
+/// hasAllZeroIndices - Return true if all of the indices of this GEP are
+/// zeros. If so, the result pointer and the first operand have the same
+/// value, just potentially different types.
+bool GetElementPtrInst::hasAllZeroIndices() const {
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
+ if (!CI->isZero()) return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// hasAllConstantIndices - Return true if all of the indices of this GEP are
+/// constant integers. If so, the result pointer and the first operand have
+/// a constant offset between them.
+bool GetElementPtrInst::hasAllConstantIndices() const {
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ }
+ return true;
+}
+
+void GetElementPtrInst::setIsInBounds(bool B) {
+ cast<GEPOperator>(this)->setIsInBounds(B);
+}
+
+bool GetElementPtrInst::isInBounds() const {
+ return cast<GEPOperator>(this)->isInBounds();
+}
+
+//===----------------------------------------------------------------------===//
+// ExtractElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+ const Twine &Name,
+ Instruction *InsertBef)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertBef) {
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+ const Twine &Name,
+ BasicBlock *InsertAE)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertAE) {
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+
+bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
+ if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+ return false;
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// InsertElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+ const Twine &Name,
+ Instruction *InsertBef)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertBef) {
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+ const Twine &Name,
+ BasicBlock *InsertAE)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertAE) {
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
+ const Value *Index) {
+ if (!Vec->getType()->isVectorTy())
+ return false; // First operand of insertelement must be vector type.
+
+ if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
+ return false;// Second operand of insertelement must be vector element type.
+
+ if (!Index->getType()->isIntegerTy(32))
+ return false; // Third operand of insertelement must be i32.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ShuffleVectorInst Implementation
+//===----------------------------------------------------------------------===//
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+ const Twine &Name,
+ Instruction *InsertBefore)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+ cast<VectorType>(Mask->getType())->getNumElements()),
+ ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this),
+ InsertBefore) {
+ assert(isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector instruction operands!");
+ Op<0>() = V1;
+ Op<1>() = V2;
+ Op<2>() = Mask;
+ setName(Name);
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+ cast<VectorType>(Mask->getType())->getNumElements()),
+ ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this),
+ InsertAtEnd) {
+ assert(isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector instruction operands!");
+
+ Op<0>() = V1;
+ Op<1>() = V2;
+ Op<2>() = Mask;
+ setName(Name);
+}
+
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
+ const Value *Mask) {
+ if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
+ return false;
+
+ const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+ if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+ return false;
+
+ // Check to see if Mask is valid.
+ if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+ const VectorType *VTy = cast<VectorType>(V1->getType());
+ for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+ if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+ if (CI->uge(VTy->getNumElements()*2))
+ return false;
+ } else if (!isa<UndefValue>(MV->getOperand(i))) {
+ return false;
+ }
+ }
+ }
+ else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
+ return false;
+
+ return true;
+}
+
+/// getMaskValue - Return the index from the shuffle mask for the specified
+/// output result. This is either -1 if the element is undef or a number less
+/// than 2*numelements.
+int ShuffleVectorInst::getMaskValue(unsigned i) const {
+ const Constant *Mask = cast<Constant>(getOperand(2));
+ if (isa<UndefValue>(Mask)) return -1;
+ if (isa<ConstantAggregateZero>(Mask)) return 0;
+ const ConstantVector *MaskCV = cast<ConstantVector>(Mask);
+ assert(i < MaskCV->getNumOperands() && "Index out of range");
+
+ if (isa<UndefValue>(MaskCV->getOperand(i)))
+ return -1;
+ return cast<ConstantInt>(MaskCV->getOperand(i))->getZExtValue();
+}
+
+//===----------------------------------------------------------------------===//
+// InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+ const Twine &Name) {
+ assert(NumOperands == 2 && "NumOperands not initialized?");
+
+ // There's no fundamental reason why we require at least one index
+ // (other than weirdness with &*IdxBegin being invalid; see
+ // getelementptr's init routine for example). But there's no
+ // present need to support it.
+ assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
+
+ assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
+ Val->getType() && "Inserted value must match indexed type!");
+ Op<0>() = Agg;
+ Op<1>() = Val;
+
+ Indices.append(Idxs.begin(), Idxs.end());
+ setName(Name);
+}
+
+InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
+ : Instruction(IVI.getType(), InsertValue,
+ OperandTraits<InsertValueInst>::op_begin(this), 2),
+ Indices(IVI.Indices) {
+ Op<0>() = IVI.getOperand(0);
+ Op<1>() = IVI.getOperand(1);
+ SubclassOptionalData = IVI.SubclassOptionalData;
+}
+
+//===----------------------------------------------------------------------===//
+// ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
+ assert(NumOperands == 1 && "NumOperands not initialized?");
+
+ // There's no fundamental reason why we require at least one index.
+ // But there's no present need to support it.
+ assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
+
+ Indices.append(Idxs.begin(), Idxs.end());
+ setName(Name);
+}
+
+ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
+ : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
+ Indices(EVI.Indices) {
+ SubclassOptionalData = EVI.SubclassOptionalData;
+}
+
+// getIndexedType - Returns the type of the element that would be extracted
+// with an extractvalue instruction with the specified parameters.
+//
+// A null type is returned if the indices are invalid for the specified
+// pointer type.
+//
+Type *ExtractValueInst::getIndexedType(const Type *Agg,
+ ArrayRef<unsigned> Idxs) {
+ for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
+ unsigned Index = Idxs[CurIdx];
+ // We can't use CompositeType::indexValid(Index) here.
+ // indexValid() always returns true for arrays because getelementptr allows
+ // out-of-bounds indices. Since we don't allow those for extractvalue and
+ // insertvalue we need to check array indexing manually.
+ // Since the only other types we can index into are struct types it's just
+ // as easy to check those manually as well.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+ if (Index >= AT->getNumElements())
+ return 0;
+ } else if (const StructType *ST = dyn_cast<StructType>(Agg)) {
+ if (Index >= ST->getNumElements())
+ return 0;
+ } else {
+ // Not a valid type to index into.
+ return 0;
+ }
+
+ Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
+ }
+ return const_cast<Type*>(Agg);
+}
+
+//===----------------------------------------------------------------------===//
+// BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+ const Type *Ty, const Twine &Name,
+ Instruction *InsertBefore)
+ : Instruction(Ty, iType,
+ OperandTraits<BinaryOperator>::op_begin(this),
+ OperandTraits<BinaryOperator>::operands(this),
+ InsertBefore) {
+ Op<0>() = S1;
+ Op<1>() = S2;
+ init(iType);
+ setName(Name);
+}
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+ const Type *Ty, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Ty, iType,
+ OperandTraits<BinaryOperator>::op_begin(this),
+ OperandTraits<BinaryOperator>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = S1;
+ Op<1>() = S2;
+ init(iType);
+ setName(Name);
+}
+
+
+void BinaryOperator::init(BinaryOps iType) {
+ Value *LHS = getOperand(0), *RHS = getOperand(1);
+ (void)LHS; (void)RHS; // Silence warnings.
+ assert(LHS->getType() == RHS->getType() &&
+ "Binary operator operand types must match!");
+#ifndef NDEBUG
+ switch (iType) {
+ case Add: case Sub:
+ case Mul:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isIntOrIntVectorTy() &&
+ "Tried to create an integer operation on a non-integer type!");
+ break;
+ case FAdd: case FSub:
+ case FMul:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
+ break;
+ case UDiv:
+ case SDiv:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Incorrect operand type (not integer) for S/UDIV");
+ break;
+ case FDiv:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Incorrect operand type (not floating point) for FDIV");
+ break;
+ case URem:
+ case SRem:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Incorrect operand type (not integer) for S/UREM");
+ break;
+ case FRem:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Incorrect operand type (not floating point) for FREM");
+ break;
+ case Shl:
+ case LShr:
+ case AShr:
+ assert(getType() == LHS->getType() &&
+ "Shift operation should return same type as operands!");
+ assert((getType()->isIntegerTy() ||
+ (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Tried to create a shift operation on a non-integral type!");
+ break;
+ case And: case Or:
+ case Xor:
+ assert(getType() == LHS->getType() &&
+ "Logical operation should return same type as operands!");
+ assert((getType()->isIntegerTy() ||
+ (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ "Tried to create a logical operation on a non-integral type!");
+ break;
+ default:
+ break;
+ }
+#endif
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(S1->getType() == S2->getType() &&
+ "Cannot create binary operator with two operands of differing type!");
+ return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ BinaryOperator *Res = Create(Op, S1, S2, Name);
+ InsertAtEnd->getInstList().push_back(Res);
+ return Res;
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::Sub,
+ zero, Op,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::Sub,
+ zero, Op,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::FSub,
+ zero, Op,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ return new BinaryOperator(Instruction::FSub,
+ zero, Op,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+ Instruction *InsertBefore) {
+ Constant *C;
+ if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+ C = Constant::getAllOnesValue(PTy->getElementType());
+ C = ConstantVector::get(
+ std::vector<Constant*>(PTy->getNumElements(), C));
+ } else {
+ C = Constant::getAllOnesValue(Op->getType());
+ }
+
+ return new BinaryOperator(Instruction::Xor, Op, C,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ Constant *AllOnes;
+ if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+ // Create a vector of all ones values.
+ Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
+ AllOnes = ConstantVector::get(
+ std::vector<Constant*>(PTy->getNumElements(), Elt));
+ } else {
+ AllOnes = Constant::getAllOnesValue(Op->getType());
+ }
+
+ return new BinaryOperator(Instruction::Xor, Op, AllOnes,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+
+// isConstantAllOnes - Helper function for several functions below
+static inline bool isConstantAllOnes(const Value *V) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI->isAllOnesValue();
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+ return CV->isAllOnesValue();
+ return false;
+}
+
+bool BinaryOperator::isNeg(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ if (Bop->getOpcode() == Instruction::Sub)
+ if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+ return C->isNegativeZeroValue();
+ return false;
+}
+
+bool BinaryOperator::isFNeg(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ if (Bop->getOpcode() == Instruction::FSub)
+ if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+ return C->isNegativeZeroValue();
+ return false;
+}
+
+bool BinaryOperator::isNot(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ return (Bop->getOpcode() == Instruction::Xor &&
+ (isConstantAllOnes(Bop->getOperand(1)) ||
+ isConstantAllOnes(Bop->getOperand(0))));
+ return false;
+}
+
+Value *BinaryOperator::getNegArgument(Value *BinOp) {
+ return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
+ return getNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+ return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+ return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getNotArgument(Value *BinOp) {
+ assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
+ BinaryOperator *BO = cast<BinaryOperator>(BinOp);
+ Value *Op0 = BO->getOperand(0);
+ Value *Op1 = BO->getOperand(1);
+ if (isConstantAllOnes(Op0)) return Op1;
+
+ assert(isConstantAllOnes(Op1));
+ return Op0;
+}
+
+const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
+ return getNotArgument(const_cast<Value*>(BinOp));
+}
+
+
+// swapOperands - Exchange the two operands to this instruction. This
+// instruction is safe to use on any binary instruction and does not
+// modify the semantics of the instruction. If the instruction is
+// order dependent (SetLT f.e.) the opcode is changed.
+//
+bool BinaryOperator::swapOperands() {
+ if (!isCommutative())
+ return true; // Can't commute operands
+ Op<0>().swap(Op<1>());
+ return false;
+}
+
+void BinaryOperator::setHasNoUnsignedWrap(bool b) {
+ cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
+}
+
+void BinaryOperator::setHasNoSignedWrap(bool b) {
+ cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
+}
+
+void BinaryOperator::setIsExact(bool b) {
+ cast<PossiblyExactOperator>(this)->setIsExact(b);
+}
+
+bool BinaryOperator::hasNoUnsignedWrap() const {
+ return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
+}
+
+bool BinaryOperator::hasNoSignedWrap() const {
+ return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
+}
+
+bool BinaryOperator::isExact() const {
+ return cast<PossiblyExactOperator>(this)->isExact();
+}
+
+//===----------------------------------------------------------------------===//
+// CastInst Class
+//===----------------------------------------------------------------------===//
+
+// Just determine if this cast only deals with integral->integral conversion.
+bool CastInst::isIntegerCast() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Trunc:
+ return true;
+ case Instruction::BitCast:
+ return getOperand(0)->getType()->isIntegerTy() &&
+ getType()->isIntegerTy();
+ }
+}
+
+bool CastInst::isLosslessCast() const {
+ // Only BitCast can be lossless, exit fast if we're not BitCast
+ if (getOpcode() != Instruction::BitCast)
+ return false;
+
+ // Identity cast is always lossless
+ const Type* SrcTy = getOperand(0)->getType();
+ const Type* DstTy = getType();
+ if (SrcTy == DstTy)
+ return true;
+
+ // Pointer to pointer is always lossless.
+ if (SrcTy->isPointerTy())
+ return DstTy->isPointerTy();
+ return false; // Other types have no identity values
+}
+
+/// This function determines if the CastInst does not require any bits to be
+/// changed in order to effect the cast. Essentially, it identifies cases where
+/// no code gen is necessary for the cast, hence the name no-op cast. For
+/// example, the following are all no-op casts:
+/// # bitcast i32* %x to i8*
+/// # bitcast <2 x i32> %x to <4 x i16>
+/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
+/// @brief Determine if the described cast is a no-op.
+bool CastInst::isNoopCast(Instruction::CastOps Opcode,
+ const Type *SrcTy,
+ const Type *DestTy,
+ const Type *IntPtrTy) {
+ switch (Opcode) {
+ default:
+ assert(!"Invalid CastOp");
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return false; // These always modify bits
+ case Instruction::BitCast:
+ return true; // BitCast never modifies bits.
+ case Instruction::PtrToInt:
+ return IntPtrTy->getScalarSizeInBits() ==
+ DestTy->getScalarSizeInBits();
+ case Instruction::IntToPtr:
+ return IntPtrTy->getScalarSizeInBits() ==
+ SrcTy->getScalarSizeInBits();
+ }
+}
+
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(const Type *IntPtrTy) const {
+ return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
+/// This function determines if a pair of casts can be eliminated and what
+/// opcode should be used in the elimination. This assumes that there are two
+/// instructions like this:
+/// * %F = firstOpcode SrcTy %x to MidTy
+/// * %S = secondOpcode MidTy %F to DstTy
+/// The function returns a resultOpcode so these two casts can be replaced with:
+/// * %Replacement = resultOpcode %SrcTy %x to DstTy
+/// If no such cast is permited, the function returns 0.
+unsigned CastInst::isEliminableCastPair(
+ Instruction::CastOps firstOp, Instruction::CastOps secondOp,
+ const Type *SrcTy, const Type *MidTy, const Type *DstTy, const Type *IntPtrTy)
+{
+ // Define the 144 possibilities for these two cast instructions. The values
+ // in this matrix determine what to do in a given situation and select the
+ // case in the switch below. The rows correspond to firstOp, the columns
+ // correspond to secondOp. In looking at the table below, keep in mind
+ // the following cast properties:
+ //
+ // Size Compare Source Destination
+ // Operator Src ? Size Type Sign Type Sign
+ // -------- ------------ ------------------- ---------------------
+ // TRUNC > Integer Any Integral Any
+ // ZEXT < Integral Unsigned Integer Any
+ // SEXT < Integral Signed Integer Any
+ // FPTOUI n/a FloatPt n/a Integral Unsigned
+ // FPTOSI n/a FloatPt n/a Integral Signed
+ // UITOFP n/a Integral Unsigned FloatPt n/a
+ // SITOFP n/a Integral Signed FloatPt n/a
+ // FPTRUNC > FloatPt n/a FloatPt n/a
+ // FPEXT < FloatPt n/a FloatPt n/a
+ // PTRTOINT n/a Pointer n/a Integral Unsigned
+ // INTTOPTR n/a Integral Unsigned Pointer n/a
+ // BITCAST = FirstClass n/a FirstClass n/a
+ //
+ // NOTE: some transforms are safe, but we consider them to be non-profitable.
+ // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+ // into "fptoui double to i64", but this loses information about the range
+ // of the produced value (we no longer know the top-part is all zeros).
+ // Further this conversion is often much more expensive for typical hardware,
+ // and causes issues when building libgcc. We disallow fptosi+sext for the
+ // same reason.
+ const unsigned numCastOps =
+ Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+ static const uint8_t CastResults[numCastOps][numCastOps] = {
+ // T F F U S F F P I B -+
+ // R Z S P P I I T P 2 N T |
+ // U E E 2 2 2 2 R E I T C +- secondOp
+ // N X X U S F F N X N 2 V |
+ // C T T I I P P C T T P T -+
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc -+
+ { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt |
+ { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP +- firstOp
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP |
+ { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc |
+ { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt |
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt |
+ { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr |
+ { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+
+ };
+
+ // If either of the casts are a bitcast from scalar to vector, disallow the
+ // merging.
+ if ((firstOp == Instruction::BitCast &&
+ isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+ (secondOp == Instruction::BitCast &&
+ isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+ return 0; // Disallowed
+
+ int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
+ [secondOp-Instruction::CastOpsBegin];
+ switch (ElimCase) {
+ case 0:
+ // categorically disallowed
+ return 0;
+ case 1:
+ // allowed, use first cast's opcode
+ return firstOp;
+ case 2:
+ // allowed, use second cast's opcode
+ return secondOp;
+ case 3:
+ // no-op cast in second op implies firstOp as long as the DestTy
+ // is integer and we are not converting between a vector and a
+ // non vector type.
+ if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
+ return firstOp;
+ return 0;
+ case 4:
+ // no-op cast in second op implies firstOp as long as the DestTy
+ // is floating point.
+ if (DstTy->isFloatingPointTy())
+ return firstOp;
+ return 0;
+ case 5:
+ // no-op cast in first op implies secondOp as long as the SrcTy
+ // is an integer.
+ if (SrcTy->isIntegerTy())
+ return secondOp;
+ return 0;
+ case 6:
+ // no-op cast in first op implies secondOp as long as the SrcTy
+ // is a floating point.
+ if (SrcTy->isFloatingPointTy())
+ return secondOp;
+ return 0;
+ case 7: {
+ // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+ if (!IntPtrTy)
+ return 0;
+ unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+ unsigned MidSize = MidTy->getScalarSizeInBits();
+ if (MidSize >= PtrSize)
+ return Instruction::BitCast;
+ return 0;
+ }
+ case 8: {
+ // ext, trunc -> bitcast, if the SrcTy and DstTy are same size
+ // ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy)
+ // ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy)
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ unsigned DstSize = DstTy->getScalarSizeInBits();
+ if (SrcSize == DstSize)
+ return Instruction::BitCast;
+ else if (SrcSize < DstSize)
+ return firstOp;
+ return secondOp;
+ }
+ case 9: // zext, sext -> zext, because sext can't sign extend after zext
+ return Instruction::ZExt;
+ case 10:
+ // fpext followed by ftrunc is allowed if the bit size returned to is
+ // the same as the original, in which case its just a bitcast
+ if (SrcTy == DstTy)
+ return Instruction::BitCast;
+ return 0; // If the types are not the same we can't eliminate it.
+ case 11:
+ // bitcast followed by ptrtoint is allowed as long as the bitcast
+ // is a pointer to pointer cast.
+ if (SrcTy->isPointerTy() && MidTy->isPointerTy())
+ return secondOp;
+ return 0;
+ case 12:
+ // inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast
+ if (MidTy->isPointerTy() && DstTy->isPointerTy())
+ return firstOp;
+ return 0;
+ case 13: {
+ // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+ if (!IntPtrTy)
+ return 0;
+ unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ unsigned DstSize = DstTy->getScalarSizeInBits();
+ if (SrcSize <= PtrSize && SrcSize == DstSize)
+ return Instruction::BitCast;
+ return 0;
+ }
+ case 99:
+ // cast combination can't happen (error in input). This is for all cases
+ // where the MidTy is not the same for the two cast instructions.
+ assert(!"Invalid Cast Combination");
+ return 0;
+ default:
+ assert(!"Error in CastResults table!!!");
+ return 0;
+ }
+ return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+ const Twine &Name, Instruction *InsertBefore) {
+ assert(castIsValid(op, S, Ty) && "Invalid cast!");
+ // Construct and return the appropriate CastInst subclass
+ switch (op) {
+ case Trunc: return new TruncInst (S, Ty, Name, InsertBefore);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore);
+ case SExt: return new SExtInst (S, Ty, Name, InsertBefore);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
+ default:
+ assert(!"Invalid opcode provided");
+ }
+ return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+ const Twine &Name, BasicBlock *InsertAtEnd) {
+ assert(castIsValid(op, S, Ty) && "Invalid cast!");
+ // Construct and return the appropriate CastInst subclass
+ switch (op) {
+ case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd);
+ case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
+ default:
+ assert(!"Invalid opcode provided");
+ }
+ return 0;
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Invalid cast");
+
+ if (Ty->isIntegerTy())
+ return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+/// @brief Create a BitCast or a PtrToInt cast instruction
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Invalid cast");
+
+ if (Ty->isIntegerTy())
+ return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
+ bool isSigned, const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "Invalid integer cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
+ bool isSigned, const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+ return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+ return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+// Check whether it is valid to call getCastOpcode for these types.
+// This routine must be kept in sync with getCastOpcode.
+bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
+ if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
+ return false;
+
+ if (SrcTy == DestTy)
+ return true;
+
+ if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+ if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+ if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+ // An element by element cast. Valid if casting the elements is valid.
+ SrcTy = SrcVecTy->getElementType();
+ DestTy = DestVecTy->getElementType();
+ }
+
+ // Get the bit sizes, we'll need these
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
+ unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+ // Run through the possibilities ...
+ if (DestTy->isIntegerTy()) { // Casting to integral
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ return true;
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ return true;
+ } else if (SrcTy->isVectorTy()) { // Casting from vector
+ return DestBits == SrcBits;
+ } else { // Casting from something else
+ return SrcTy->isPointerTy();
+ }
+ } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ return true;
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ return true;
+ } else if (SrcTy->isVectorTy()) { // Casting from vector
+ return DestBits == SrcBits;
+ } else { // Casting from something else
+ return false;
+ }
+ } else if (DestTy->isVectorTy()) { // Casting to vector
+ return DestBits == SrcBits;
+ } else if (DestTy->isPointerTy()) { // Casting to pointer
+ if (SrcTy->isPointerTy()) { // Casting from pointer
+ return true;
+ } else if (SrcTy->isIntegerTy()) { // Casting from integral
+ return true;
+ } else { // Casting from something else
+ return false;
+ }
+ } else if (DestTy->isX86_MMXTy()) {
+ if (SrcTy->isVectorTy()) {
+ return DestBits == SrcBits; // 64-bit vector to MMX
+ } else {
+ return false;
+ }
+ } else { // Casting to something else
+ return false;
+ }
+}
+
+// Provide a way to get a "cast" where the cast opcode is inferred from the
+// types and size of the operand. This, basically, is a parallel of the
+// logic in the castIsValid function below. This axiom should hold:
+// castIsValid( getCastOpcode(Val, Ty), Val, Ty)
+// should not assert in castIsValid. In other words, this produces a "correct"
+// casting opcode for the arguments passed to it.
+// This routine must be kept in sync with isCastable.
+Instruction::CastOps
+CastInst::getCastOpcode(
+ const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
+ const Type *SrcTy = Src->getType();
+
+ assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
+ "Only first class types are castable!");
+
+ if (SrcTy == DestTy)
+ return BitCast;
+
+ if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+ if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+ if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+ // An element by element cast. Find the appropriate opcode based on the
+ // element types.
+ SrcTy = SrcVecTy->getElementType();
+ DestTy = DestVecTy->getElementType();
+ }
+
+ // Get the bit sizes, we'll need these
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr
+ unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+ // Run through the possibilities ...
+ if (DestTy->isIntegerTy()) { // Casting to integral
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ if (DestBits < SrcBits)
+ return Trunc; // int -> smaller int
+ else if (DestBits > SrcBits) { // its an extension
+ if (SrcIsSigned)
+ return SExt; // signed -> SEXT
+ else
+ return ZExt; // unsigned -> ZEXT
+ } else {
+ return BitCast; // Same size, No-op cast
+ }
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ if (DestIsSigned)
+ return FPToSI; // FP -> sint
+ else
+ return FPToUI; // FP -> uint
+ } else if (SrcTy->isVectorTy()) {
+ assert(DestBits == SrcBits &&
+ "Casting vector to integer of different width");
+ return BitCast; // Same size, no-op cast
+ } else {
+ assert(SrcTy->isPointerTy() &&
+ "Casting from a value that is not first-class type");
+ return PtrToInt; // ptr -> int
+ }
+ } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) { // Casting from integral
+ if (SrcIsSigned)
+ return SIToFP; // sint -> FP
+ else
+ return UIToFP; // uint -> FP
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ if (DestBits < SrcBits) {
+ return FPTrunc; // FP -> smaller FP
+ } else if (DestBits > SrcBits) {
+ return FPExt; // FP -> larger FP
+ } else {
+ return BitCast; // same size, no-op cast
+ }
+ } else if (SrcTy->isVectorTy()) {
+ assert(DestBits == SrcBits &&
+ "Casting vector to floating point of different width");
+ return BitCast; // same size, no-op cast
+ } else {
+ llvm_unreachable("Casting pointer or non-first class to float");
+ }
+ } else if (DestTy->isVectorTy()) {
+ assert(DestBits == SrcBits &&
+ "Illegal cast to vector (wrong type or size)");
+ return BitCast;
+ } else if (DestTy->isPointerTy()) {
+ if (SrcTy->isPointerTy()) {
+ return BitCast; // ptr -> ptr
+ } else if (SrcTy->isIntegerTy()) {
+ return IntToPtr; // int -> ptr
+ } else {
+ assert(!"Casting pointer to other than pointer or int");
+ }
+ } else if (DestTy->isX86_MMXTy()) {
+ if (SrcTy->isVectorTy()) {
+ assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
+ return BitCast; // 64-bit vector to MMX
+ } else {
+ assert(!"Illegal cast to X86_MMX");
+ }
+ } else {
+ assert(!"Casting to type that is not first-class");
+ }
+
+ // If we fall through to here we probably hit an assertion cast above
+ // and assertions are not turned on. Anything we return is an error, so
+ // BitCast is as good a choice as any.
+ return BitCast;
+}
+
+//===----------------------------------------------------------------------===//
+// CastInst SubClass Constructors
+//===----------------------------------------------------------------------===//
+
+/// Check that the construction parameters for a CastInst are correct. This
+/// could be broken out into the separate constructors but it is useful to have
+/// it in one place and to eliminate the redundant code for getting the sizes
+/// of the types involved.
+bool
+CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
+
+ // Check for type sanity on the arguments
+ const Type *SrcTy = S->getType();
+ if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
+ SrcTy->isAggregateType() || DstTy->isAggregateType())
+ return false;
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DstBitSize = DstTy->getScalarSizeInBits();
+
+ // If these are vector types, get the lengths of the vectors (using zero for
+ // scalar types means that checking that vector lengths match also checks that
+ // scalars are not being converted to vectors or vectors to scalars).
+ unsigned SrcLength = SrcTy->isVectorTy() ?
+ cast<VectorType>(SrcTy)->getNumElements() : 0;
+ unsigned DstLength = DstTy->isVectorTy() ?
+ cast<VectorType>(DstTy)->getNumElements() : 0;
+
+ // Switch on the opcode provided
+ switch (op) {
+ default: return false; // This is an input error
+ case Instruction::Trunc:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength && SrcBitSize > DstBitSize;
+ case Instruction::ZExt:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength && SrcBitSize < DstBitSize;
+ case Instruction::SExt:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength && SrcBitSize < DstBitSize;
+ case Instruction::FPTrunc:
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+ SrcLength == DstLength && SrcBitSize > DstBitSize;
+ case Instruction::FPExt:
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+ SrcLength == DstLength && SrcBitSize < DstBitSize;
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy() &&
+ SrcLength == DstLength;
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
+ SrcLength == DstLength;
+ case Instruction::PtrToInt:
+ return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+ case Instruction::IntToPtr:
+ return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+ case Instruction::BitCast:
+ // BitCast implies a no-op cast of type only. No bits change.
+ // However, you can't cast pointers to anything but pointers.
+ if (SrcTy->isPointerTy() != DstTy->isPointerTy())
+ return false;
+
+ // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
+ // these cases, the cast is okay if the source and destination bit widths
+ // are identical.
+ return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+ }
+}
+
+TruncInst::TruncInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+TruncInst::TruncInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+ZExtInst::ZExtInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+
+ZExtInst::ZExtInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+SExtInst::SExtInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+SExtInst::SExtInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+FPTruncInst::FPTruncInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPTruncInst::FPTruncInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPExtInst::FPExtInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+FPExtInst::FPExtInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+UIToFPInst::UIToFPInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+UIToFPInst::UIToFPInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+FPToUIInst::FPToUIInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToUIInst::FPToUIInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToSIInst::FPToSIInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+FPToSIInst::FPToSIInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+PtrToIntInst::PtrToIntInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+PtrToIntInst::PtrToIntInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+IntToPtrInst::IntToPtrInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+IntToPtrInst::IntToPtrInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+BitCastInst::BitCastInst(
+ Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+BitCastInst::BitCastInst(
+ Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+//===----------------------------------------------------------------------===//
+// CmpInst Classes
+//===----------------------------------------------------------------------===//
+
+void CmpInst::Anchor() const {}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+ Value *LHS, Value *RHS, const Twine &Name,
+ Instruction *InsertBefore)
+ : Instruction(ty, op,
+ OperandTraits<CmpInst>::op_begin(this),
+ OperandTraits<CmpInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ setPredicate((Predicate)predicate);
+ setName(Name);
+}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+ Value *LHS, Value *RHS, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(ty, op,
+ OperandTraits<CmpInst>::op_begin(this),
+ OperandTraits<CmpInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ setPredicate((Predicate)predicate);
+ setName(Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate,
+ Value *S1, Value *S2,
+ const Twine &Name, Instruction *InsertBefore) {
+ if (Op == Instruction::ICmp) {
+ if (InsertBefore)
+ return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ else
+ return new ICmpInst(CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ }
+
+ if (InsertBefore)
+ return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ else
+ return new FCmpInst(CmpInst::Predicate(predicate),
+ S1, S2, Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2,
+ const Twine &Name, BasicBlock *InsertAtEnd) {
+ if (Op == Instruction::ICmp) {
+ return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+ }
+ return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+ S1, S2, Name);
+}
+
+void CmpInst::swapOperands() {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ IC->swapOperands();
+ else
+ cast<FCmpInst>(this)->swapOperands();
+}
+
+bool CmpInst::isCommutative() const {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ return IC->isCommutative();
+ return cast<FCmpInst>(this)->isCommutative();
+}
+
+bool CmpInst::isEquality() const {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ return IC->isEquality();
+ return cast<FCmpInst>(this)->isEquality();
+}
+
+
+CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(!"Unknown cmp predicate!");
+ case ICMP_EQ: return ICMP_NE;
+ case ICMP_NE: return ICMP_EQ;
+ case ICMP_UGT: return ICMP_ULE;
+ case ICMP_ULT: return ICMP_UGE;
+ case ICMP_UGE: return ICMP_ULT;
+ case ICMP_ULE: return ICMP_UGT;
+ case ICMP_SGT: return ICMP_SLE;
+ case ICMP_SLT: return ICMP_SGE;
+ case ICMP_SGE: return ICMP_SLT;
+ case ICMP_SLE: return ICMP_SGT;
+
+ case FCMP_OEQ: return FCMP_UNE;
+ case FCMP_ONE: return FCMP_UEQ;
+ case FCMP_OGT: return FCMP_ULE;
+ case FCMP_OLT: return FCMP_UGE;
+ case FCMP_OGE: return FCMP_ULT;
+ case FCMP_OLE: return FCMP_UGT;
+ case FCMP_UEQ: return FCMP_ONE;
+ case FCMP_UNE: return FCMP_OEQ;
+ case FCMP_UGT: return FCMP_OLE;
+ case FCMP_ULT: return FCMP_OGE;
+ case FCMP_UGE: return FCMP_OLT;
+ case FCMP_ULE: return FCMP_OGT;
+ case FCMP_ORD: return FCMP_UNO;
+ case FCMP_UNO: return FCMP_ORD;
+ case FCMP_TRUE: return FCMP_FALSE;
+ case FCMP_FALSE: return FCMP_TRUE;
+ }
+}
+
+ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(! "Unknown icmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
+ return pred;
+ case ICMP_UGT: return ICMP_SGT;
+ case ICMP_ULT: return ICMP_SLT;
+ case ICMP_UGE: return ICMP_SGE;
+ case ICMP_ULE: return ICMP_SLE;
+ }
+}
+
+ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(! "Unknown icmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
+ return pred;
+ case ICMP_SGT: return ICMP_UGT;
+ case ICMP_SLT: return ICMP_ULT;
+ case ICMP_SGE: return ICMP_UGE;
+ case ICMP_SLE: return ICMP_ULE;
+ }
+}
+
+/// Initialize a set of values that all satisfy the condition with C.
+///
+ConstantRange
+ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
+ APInt Lower(C);
+ APInt Upper(C);
+ uint32_t BitWidth = C.getBitWidth();
+ switch (pred) {
+ default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
+ case ICmpInst::ICMP_EQ: Upper++; break;
+ case ICmpInst::ICMP_NE: Lower++; break;
+ case ICmpInst::ICMP_ULT:
+ Lower = APInt::getMinValue(BitWidth);
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_SLT:
+ Lower = APInt::getSignedMinValue(BitWidth);
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_UGT:
+ Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_SGT:
+ Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ // Check for an empty-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/false);
+ break;
+ case ICmpInst::ICMP_ULE:
+ Lower = APInt::getMinValue(BitWidth); Upper++;
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ case ICmpInst::ICMP_SLE:
+ Lower = APInt::getSignedMinValue(BitWidth); Upper++;
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ case ICmpInst::ICMP_UGE:
+ Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ case ICmpInst::ICMP_SGE:
+ Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ // Check for a full-set condition.
+ if (Lower == Upper)
+ return ConstantRange(BitWidth, /*isFullSet=*/true);
+ break;
+ }
+ return ConstantRange(Lower, Upper);
+}
+
+CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(!"Unknown cmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ return pred;
+ case ICMP_SGT: return ICMP_SLT;
+ case ICMP_SLT: return ICMP_SGT;
+ case ICMP_SGE: return ICMP_SLE;
+ case ICMP_SLE: return ICMP_SGE;
+ case ICMP_UGT: return ICMP_ULT;
+ case ICMP_ULT: return ICMP_UGT;
+ case ICMP_UGE: return ICMP_ULE;
+ case ICMP_ULE: return ICMP_UGE;
+
+ case FCMP_FALSE: case FCMP_TRUE:
+ case FCMP_OEQ: case FCMP_ONE:
+ case FCMP_UEQ: case FCMP_UNE:
+ case FCMP_ORD: case FCMP_UNO:
+ return pred;
+ case FCMP_OGT: return FCMP_OLT;
+ case FCMP_OLT: return FCMP_OGT;
+ case FCMP_OGE: return FCMP_OLE;
+ case FCMP_OLE: return FCMP_OGE;
+ case FCMP_UGT: return FCMP_ULT;
+ case FCMP_ULT: return FCMP_UGT;
+ case FCMP_UGE: return FCMP_ULE;
+ case FCMP_ULE: return FCMP_UGE;
+ }
+}
+
+bool CmpInst::isUnsigned(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return true;
+ }
+}
+
+bool CmpInst::isSigned(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return true;
+ }
+}
+
+bool CmpInst::isOrdered(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_ORD: return true;
+ }
+}
+
+bool CmpInst::isUnordered(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UNO: return true;
+ }
+}
+
+bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+ switch(predicate) {
+ default: return false;
+ case ICMP_EQ: case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
+ case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
+ }
+}
+
+bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+ switch(predicate) {
+ case ICMP_NE: case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
+ case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
+ default: return false;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+ assert(Value && Default && NumReserved);
+ ReservedSpace = NumReserved;
+ NumOperands = 2;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ OperandList[0] = Value;
+ OperandList[1] = Default;
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination. The number of additional cases can
+/// be specified here to make memory allocation more efficient. This
+/// constructor can also autoinsert before another instruction.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+ 0, 0, InsertBefore) {
+ init(Value, Default, 2+NumCases*2);
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination. The number of additional cases can
+/// be specified here to make memory allocation more efficient. This
+/// constructor also autoinserts at the end of the specified BasicBlock.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+ 0, 0, InsertAtEnd) {
+ init(Value, Default, 2+NumCases*2);
+}
+
+SwitchInst::SwitchInst(const SwitchInst &SI)
+ : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+ init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+ NumOperands = SI.getNumOperands();
+ Use *OL = OperandList, *InOL = SI.OperandList;
+ for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
+ OL[i] = InOL[i];
+ OL[i+1] = InOL[i+1];
+ }
+ SubclassOptionalData = SI.SubclassOptionalData;
+}
+
+SwitchInst::~SwitchInst() {
+ dropHungoffUses();
+}
+
+
+/// addCase - Add an entry to the switch instruction...
+///
+void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+ unsigned OpNo = NumOperands;
+ if (OpNo+2 > ReservedSpace)
+ growOperands(); // Get more space!
+ // Initialize some new operands.
+ assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
+ NumOperands = OpNo+2;
+ OperandList[OpNo] = OnVal;
+ OperandList[OpNo+1] = Dest;
+}
+
+/// removeCase - This method removes the specified successor from the switch
+/// instruction. Note that this cannot be used to remove the default
+/// destination (successor #0).
+///
+void SwitchInst::removeCase(unsigned idx) {
+ assert(idx != 0 && "Cannot remove the default case!");
+ assert(idx*2 < getNumOperands() && "Successor index out of range!!!");
+
+ unsigned NumOps = getNumOperands();
+ Use *OL = OperandList;
+
+ // Overwrite this case with the end of the list.
+ if ((idx + 1) * 2 != NumOps) {
+ OL[idx * 2] = OL[NumOps - 2];
+ OL[idx * 2 + 1] = OL[NumOps - 1];
+ }
+
+ // Nuke the last value.
+ OL[NumOps-2].set(0);
+ OL[NumOps-2+1].set(0);
+ NumOperands = NumOps-2;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation. This grows the number of ops by 3 times.
+///
+void SwitchInst::growOperands() {
+ unsigned e = getNumOperands();
+ unsigned NumOps = e*3;
+
+ ReservedSpace = NumOps;
+ Use *NewOps = allocHungoffUses(NumOps);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i) {
+ NewOps[i] = OldOps[i];
+ }
+ OperandList = NewOps;
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+
+BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned SwitchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+// IndirectBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void IndirectBrInst::init(Value *Address, unsigned NumDests) {
+ assert(Address && Address->getType()->isPointerTy() &&
+ "Address of indirectbr must be a pointer");
+ ReservedSpace = 1+NumDests;
+ NumOperands = 1;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ OperandList[0] = Address;
+}
+
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation. This grows the number of ops by 2 times.
+///
+void IndirectBrInst::growOperands() {
+ unsigned e = getNumOperands();
+ unsigned NumOps = e*2;
+
+ ReservedSpace = NumOps;
+ Use *NewOps = allocHungoffUses(NumOps);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i)
+ NewOps[i] = OldOps[i];
+ OperandList = NewOps;
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+ Instruction *InsertBefore)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+ 0, 0, InsertBefore) {
+ init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+ BasicBlock *InsertAtEnd)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+ 0, 0, InsertAtEnd) {
+ init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
+ : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+ allocHungoffUses(IBI.getNumOperands()),
+ IBI.getNumOperands()) {
+ Use *OL = OperandList, *InOL = IBI.OperandList;
+ for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
+ OL[i] = InOL[i];
+ SubclassOptionalData = IBI.SubclassOptionalData;
+}
+
+IndirectBrInst::~IndirectBrInst() {
+ dropHungoffUses();
+}
+
+/// addDestination - Add a destination.
+///
+void IndirectBrInst::addDestination(BasicBlock *DestBB) {
+ unsigned OpNo = NumOperands;
+ if (OpNo+1 > ReservedSpace)
+ growOperands(); // Get more space!
+ // Initialize some new operands.
+ assert(OpNo < ReservedSpace && "Growing didn't work!");
+ NumOperands = OpNo+1;
+ OperandList[OpNo] = DestBB;
+}
+
+/// removeDestination - This method removes the specified successor from the
+/// indirectbr instruction.
+void IndirectBrInst::removeDestination(unsigned idx) {
+ assert(idx < getNumOperands()-1 && "Successor index out of range!");
+
+ unsigned NumOps = getNumOperands();
+ Use *OL = OperandList;
+
+ // Replace this value with the last one.
+ OL[idx+1] = OL[NumOps-1];
+
+ // Nuke the last value.
+ OL[NumOps-1].set(0);
+ NumOperands = NumOps-1;
+}
+
+BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned IndirectBrInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+// clone_impl() implementations
+//===----------------------------------------------------------------------===//
+
+// Define these methods here so vtables don't get emitted into every translation
+// unit that uses these classes.
+
+GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+ return new (getNumOperands()) GetElementPtrInst(*this);
+}
+
+BinaryOperator *BinaryOperator::clone_impl() const {
+ return Create(getOpcode(), Op<0>(), Op<1>());
+}
+
+FCmpInst* FCmpInst::clone_impl() const {
+ return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ICmpInst* ICmpInst::clone_impl() const {
+ return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ExtractValueInst *ExtractValueInst::clone_impl() const {
+ return new ExtractValueInst(*this);
+}
+
+InsertValueInst *InsertValueInst::clone_impl() const {
+ return new InsertValueInst(*this);
+}
+
+AllocaInst *AllocaInst::clone_impl() const {
+ return new AllocaInst(getAllocatedType(),
+ (Value*)getOperand(0),
+ getAlignment());
+}
+
+LoadInst *LoadInst::clone_impl() const {
+ return new LoadInst(getOperand(0),
+ Twine(), isVolatile(),
+ getAlignment());
+}
+
+StoreInst *StoreInst::clone_impl() const {
+ return new StoreInst(getOperand(0), getOperand(1),
+ isVolatile(), getAlignment());
+}
+
+TruncInst *TruncInst::clone_impl() const {
+ return new TruncInst(getOperand(0), getType());
+}
+
+ZExtInst *ZExtInst::clone_impl() const {
+ return new ZExtInst(getOperand(0), getType());
+}
+
+SExtInst *SExtInst::clone_impl() const {
+ return new SExtInst(getOperand(0), getType());
+}
+
+FPTruncInst *FPTruncInst::clone_impl() const {
+ return new FPTruncInst(getOperand(0), getType());
+}
+
+FPExtInst *FPExtInst::clone_impl() const {
+ return new FPExtInst(getOperand(0), getType());
+}
+
+UIToFPInst *UIToFPInst::clone_impl() const {
+ return new UIToFPInst(getOperand(0), getType());
+}
+
+SIToFPInst *SIToFPInst::clone_impl() const {
+ return new SIToFPInst(getOperand(0), getType());
+}
+
+FPToUIInst *FPToUIInst::clone_impl() const {
+ return new FPToUIInst(getOperand(0), getType());
+}
+
+FPToSIInst *FPToSIInst::clone_impl() const {
+ return new FPToSIInst(getOperand(0), getType());
+}
+
+PtrToIntInst *PtrToIntInst::clone_impl() const {
+ return new PtrToIntInst(getOperand(0), getType());
+}
+
+IntToPtrInst *IntToPtrInst::clone_impl() const {
+ return new IntToPtrInst(getOperand(0), getType());
+}
+
+BitCastInst *BitCastInst::clone_impl() const {
+ return new BitCastInst(getOperand(0), getType());
+}
+
+CallInst *CallInst::clone_impl() const {
+ return new(getNumOperands()) CallInst(*this);
+}
+
+SelectInst *SelectInst::clone_impl() const {
+ return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+VAArgInst *VAArgInst::clone_impl() const {
+ return new VAArgInst(getOperand(0), getType());
+}
+
+ExtractElementInst *ExtractElementInst::clone_impl() const {
+ return ExtractElementInst::Create(getOperand(0), getOperand(1));
+}
+
+InsertElementInst *InsertElementInst::clone_impl() const {
+ return InsertElementInst::Create(getOperand(0),
+ getOperand(1),
+ getOperand(2));
+}
+
+ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+ return new ShuffleVectorInst(getOperand(0),
+ getOperand(1),
+ getOperand(2));
+}
+
+PHINode *PHINode::clone_impl() const {
+ return new PHINode(*this);
+}
+
+ReturnInst *ReturnInst::clone_impl() const {
+ return new(getNumOperands()) ReturnInst(*this);
+}
+
+BranchInst *BranchInst::clone_impl() const {
+ return new(getNumOperands()) BranchInst(*this);
+}
+
+SwitchInst *SwitchInst::clone_impl() const {
+ return new SwitchInst(*this);
+}
+
+IndirectBrInst *IndirectBrInst::clone_impl() const {
+ return new IndirectBrInst(*this);
+}
+
+
+InvokeInst *InvokeInst::clone_impl() const {
+ return new(getNumOperands()) InvokeInst(*this);
+}
+
+UnwindInst *UnwindInst::clone_impl() const {
+ LLVMContext &Context = getContext();
+ return new UnwindInst(Context);
+}
+
+UnreachableInst *UnreachableInst::clone_impl() const {
+ LLVMContext &Context = getContext();
+ return new UnreachableInst(Context);
+}
diff --git a/contrib/llvm/lib/VMCore/IntrinsicInst.cpp b/contrib/llvm/lib/VMCore/IntrinsicInst.cpp
new file mode 100644
index 000000000000..ac8ec2086b18
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/IntrinsicInst.cpp
@@ -0,0 +1,73 @@
+//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods that make it really easy to deal with intrinsic
+// functions.
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class. Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+//
+// In some cases, arguments to intrinsics need to be generic and are defined as
+// type pointer to empty struct { }*. To access the real item of interest the
+// cast instruction needs to be stripped away.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+///
+
+static Value *CastOperand(Value *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (CE->isCast())
+ return CE->getOperand(0);
+ return NULL;
+}
+
+Value *DbgInfoIntrinsic::StripCast(Value *C) {
+ if (Value *CO = CastOperand(C)) {
+ C = StripCast(CO);
+ } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasInitializer())
+ if (Value *CO = CastOperand(GV->getInitializer()))
+ C = StripCast(CO);
+ }
+ return dyn_cast<GlobalVariable>(C);
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+///
+
+Value *DbgDeclareInst::getAddress() const {
+ if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
+ return MD->getOperand(0);
+ else
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgValueInst - This represents the llvm.dbg.value instruction.
+///
+
+const Value *DbgValueInst::getValue() const {
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
+
+Value *DbgValueInst::getValue() {
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
diff --git a/contrib/llvm/lib/VMCore/LLVMContext.cpp b/contrib/llvm/lib/VMCore/LLVMContext.cpp
new file mode 100644
index 000000000000..ebd1e0aa1b0f
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LLVMContext.cpp
@@ -0,0 +1,147 @@
+//===-- LLVMContext.cpp - Implement LLVMContext -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LLVMContext, as a wrapper around the opaque
+// class LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
+#include "LLVMContextImpl.h"
+#include <cctype>
+using namespace llvm;
+
+static ManagedStatic<LLVMContext> GlobalContext;
+
+LLVMContext& llvm::getGlobalContext() {
+ return *GlobalContext;
+}
+
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
+ // Create the fixed metadata kinds. This is done in the same order as the
+ // MD_* enum values so that they correspond.
+
+ // Create the 'dbg' metadata kind.
+ unsigned DbgID = getMDKindID("dbg");
+ assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+ // Create the 'tbaa' metadata kind.
+ unsigned TBAAID = getMDKindID("tbaa");
+ assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+
+ // Create the 'prof' metadata kind.
+ unsigned ProfID = getMDKindID("prof");
+ assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
+}
+LLVMContext::~LLVMContext() { delete pImpl; }
+
+void LLVMContext::addModule(Module *M) {
+ pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+ pImpl->OwnedModules.erase(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+ void *DiagContext) {
+ pImpl->InlineAsmDiagHandler = DiagHandler;
+ pImpl->InlineAsmDiagContext = DiagContext;
+}
+
+/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+/// setInlineAsmDiagnosticHandler.
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
+ return pImpl->InlineAsmDiagHandler;
+}
+
+/// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+/// setInlineAsmDiagnosticHandler.
+void *LLVMContext::getInlineAsmDiagnosticContext() const {
+ return pImpl->InlineAsmDiagContext;
+}
+
+void LLVMContext::emitError(StringRef ErrorStr) {
+ emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+ unsigned LocCookie = 0;
+ if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+ if (SrcLoc->getNumOperands() != 0)
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+ LocCookie = CI->getZExtValue();
+ }
+ return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+ // If there is no error handler installed, just print the error and exit.
+ if (pImpl->InlineAsmDiagHandler == 0) {
+ errs() << "error: " << ErrorStr << "\n";
+ exit(1);
+ }
+
+ // If we do have an error handler, we can report the error and keep going.
+ SMDiagnostic Diag("", "error: " + ErrorStr.str());
+
+ pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+/// isValidName - Return true if Name is a valid custom metadata handler name.
+static bool isValidName(StringRef MDName) {
+ if (MDName.empty())
+ return false;
+
+ if (!std::isalpha(MDName[0]))
+ return false;
+
+ for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
+ ++I) {
+ if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
+ return false;
+ }
+ return true;
+}
+#endif
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+unsigned LLVMContext::getMDKindID(StringRef Name) const {
+ assert(isValidName(Name) && "Invalid MDNode name");
+
+ // If this is new, assign it its ID.
+ return
+ pImpl->CustomMDKindNames.GetOrCreateValue(
+ Name, pImpl->CustomMDKindNames.size()).second;
+}
+
+/// getHandlerNames - Populate client supplied smallvector using custome
+/// metadata name and ID.
+void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
+ Names.resize(pImpl->CustomMDKindNames.size());
+ for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
+ E = pImpl->CustomMDKindNames.end(); I != E; ++I)
+ Names[I->second] = I->first();
+}
diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp b/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
new file mode 100644
index 000000000000..504b37267f70
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
@@ -0,0 +1,94 @@
+//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the opaque LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
+ : TheTrueVal(0), TheFalseVal(0),
+ VoidTy(C, Type::VoidTyID),
+ LabelTy(C, Type::LabelTyID),
+ FloatTy(C, Type::FloatTyID),
+ DoubleTy(C, Type::DoubleTyID),
+ MetadataTy(C, Type::MetadataTyID),
+ X86_FP80Ty(C, Type::X86_FP80TyID),
+ FP128Ty(C, Type::FP128TyID),
+ PPC_FP128Ty(C, Type::PPC_FP128TyID),
+ X86_MMXTy(C, Type::X86_MMXTyID),
+ Int1Ty(C, 1),
+ Int8Ty(C, 8),
+ Int16Ty(C, 16),
+ Int32Ty(C, 32),
+ Int64Ty(C, 64) {
+ InlineAsmDiagHandler = 0;
+ InlineAsmDiagContext = 0;
+ NamedStructTypesUniqueID = 0;
+}
+
+namespace {
+struct DropReferences {
+ // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+ // is a Constant*.
+ template<typename PairT>
+ void operator()(const PairT &P) {
+ P.second->dropAllReferences();
+ }
+};
+}
+
+LLVMContextImpl::~LLVMContextImpl() {
+ // NOTE: We need to delete the contents of OwnedModules, but we have to
+ // duplicate it into a temporary vector, because the destructor of Module
+ // will try to remove itself from OwnedModules set. This would cause
+ // iterator invalidation if we iterated on the set directly.
+ std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+ DeleteContainerPointers(Modules);
+
+ std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
+ DropReferences());
+ std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
+ DropReferences());
+ std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
+ DropReferences());
+ std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
+ DropReferences());
+ ExprConstants.freeConstants();
+ ArrayConstants.freeConstants();
+ StructConstants.freeConstants();
+ VectorConstants.freeConstants();
+ AggZeroConstants.freeConstants();
+ NullPtrConstants.freeConstants();
+ UndefValueConstants.freeConstants();
+ InlineAsms.freeConstants();
+ DeleteContainerSeconds(IntConstants);
+ DeleteContainerSeconds(FPConstants);
+
+ // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
+ // and the NonUniquedMDNodes sets, so copy the values out first.
+ SmallVector<MDNode*, 8> MDNodes;
+ MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
+ for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+ I != E; ++I)
+ MDNodes.push_back(&*I);
+ MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+ for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
+ E = MDNodes.end(); I != E; ++I)
+ (*I)->destroy();
+ assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+ "Destroying all MDNodes didn't empty the Context's sets.");
+ // Destroy MDStrings.
+ DeleteContainerSeconds(MDStringCache);
+}
diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.h b/contrib/llvm/lib/VMCore/LLVMContextImpl.h
new file mode 100644
index 000000000000..06a6f2a25a38
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.h
@@ -0,0 +1,238 @@
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares LLVMContextImpl, the opaque implementation
+// of LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_IMPL_H
+#define LLVM_LLVMCONTEXT_IMPL_H
+
+#include "llvm/LLVMContext.h"
+#include "ConstantsContext.h"
+#include "LeaksContext.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
+namespace llvm {
+
+class ConstantInt;
+class ConstantFP;
+class LLVMContext;
+class Type;
+class Value;
+
+struct DenseMapAPIntKeyInfo {
+ struct KeyTy {
+ APInt val;
+ const Type* type;
+ KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
+ KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
+ bool operator==(const KeyTy& that) const {
+ return type == that.type && this->val == that.val;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+ static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+ static unsigned getHashValue(const KeyTy &Key) {
+ return DenseMapInfo<void*>::getHashValue(Key.type) ^
+ Key.val.getHashValue();
+ }
+ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+};
+
+struct DenseMapAPFloatKeyInfo {
+ struct KeyTy {
+ APFloat val;
+ KeyTy(const APFloat& V) : val(V){}
+ KeyTy(const KeyTy& that) : val(that.val) {}
+ bool operator==(const KeyTy& that) const {
+ return this->val.bitwiseIsEqual(that.val);
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline KeyTy getEmptyKey() {
+ return KeyTy(APFloat(APFloat::Bogus,1));
+ }
+ static inline KeyTy getTombstoneKey() {
+ return KeyTy(APFloat(APFloat::Bogus,2));
+ }
+ static unsigned getHashValue(const KeyTy &Key) {
+ return Key.val.getHashValue();
+ }
+ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
+/// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp.
+class DebugRecVH : public CallbackVH {
+ /// Ctx - This is the LLVM Context being referenced.
+ LLVMContextImpl *Ctx;
+
+ /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
+ /// this reference lives in. If this is zero, then it represents a
+ /// non-canonical entry that has no DenseMap value. This can happen due to
+ /// RAUW.
+ int Idx;
+public:
+ DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
+ : CallbackVH(n), Ctx(ctx), Idx(idx) {}
+
+ MDNode *get() const {
+ return cast_or_null<MDNode>(getValPtr());
+ }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *VNew);
+};
+
+class LLVMContextImpl {
+public:
+ /// OwnedModules - The set of modules instantiated in this context, and which
+ /// will be automatically deleted if this context is deleted.
+ SmallPtrSet<Module*, 4> OwnedModules;
+
+ LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+ void *InlineAsmDiagContext;
+
+ typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
+ DenseMapAPIntKeyInfo> IntMapTy;
+ IntMapTy IntConstants;
+
+ typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
+ DenseMapAPFloatKeyInfo> FPMapTy;
+ FPMapTy FPConstants;
+
+ StringMap<MDString*> MDStringCache;
+
+ FoldingSet<MDNode> MDNodeSet;
+ // MDNodes may be uniqued or not uniqued. When they're not uniqued, they
+ // aren't in the MDNodeSet, but they're still shared between objects, so no
+ // one object can destroy them. This set allows us to at least destroy them
+ // on Context destruction.
+ SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
+
+ ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
+
+ typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+ ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
+ ArrayConstantsTy ArrayConstants;
+
+ typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+ StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
+ StructConstantsTy StructConstants;
+
+ typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+ VectorType, ConstantVector> VectorConstantsTy;
+ VectorConstantsTy VectorConstants;
+
+ ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
+ NullPtrConstants;
+ ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
+
+ DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
+ ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+ ExprConstants;
+
+ ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+ InlineAsm> InlineAsms;
+
+ ConstantInt *TheTrueVal;
+ ConstantInt *TheFalseVal;
+
+ LeakDetectorImpl<Value> LLVMObjects;
+
+ // Basic type instances.
+ Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy;
+ Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+ IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+
+
+ /// TypeAllocator - All dynamically allocated types are allocated from this.
+ /// They live forever until the context is torn down.
+ BumpPtrAllocator TypeAllocator;
+
+ DenseMap<unsigned, IntegerType*> IntegerTypes;
+
+ // TODO: Optimize FunctionTypes/AnonStructTypes!
+ std::map<std::vector<Type*>, FunctionType*> FunctionTypes;
+ std::map<std::vector<Type*>, StructType*> AnonStructTypes;
+ StringMap<StructType*> NamedStructTypes;
+ unsigned NamedStructTypesUniqueID;
+
+ DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
+ DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+ DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
+ DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
+
+
+ /// ValueHandles - This map keeps track of all of the value handles that are
+ /// watching a Value*. The Value::HasValueHandle bit is used to know
+ // whether or not a value has an entry in this map.
+ typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+ ValueHandlesTy ValueHandles;
+
+ /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
+ StringMap<unsigned> CustomMDKindNames;
+
+ typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy;
+ typedef SmallVector<MDPairTy, 2> MDMapTy;
+
+ /// MetadataStore - Collection of per-instruction metadata used in this
+ /// context.
+ DenseMap<const Instruction *, MDMapTy> MetadataStore;
+
+ /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
+ /// entry with no "inlined at" element.
+ DenseMap<MDNode*, int> ScopeRecordIdx;
+
+ /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
+ /// index. The ValueHandle ensures that ScopeRecordIdx stays up to date if
+ /// the MDNode is RAUW'd.
+ std::vector<DebugRecVH> ScopeRecords;
+
+ /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
+ /// scope/inlined-at pair.
+ DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
+
+ /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
+ /// for an index. The ValueHandle ensures that ScopeINlinedAtIdx stays up
+ /// to date.
+ std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+
+ int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
+ int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+
+ LLVMContextImpl(LLVMContext &C);
+ ~LLVMContextImpl();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/LeakDetector.cpp b/contrib/llvm/lib/VMCore/LeakDetector.cpp
new file mode 100644
index 000000000000..f6651e93e273
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LeakDetector.cpp
@@ -0,0 +1,69 @@
+//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LeakDetector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Value.h"
+using namespace llvm;
+
+static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
+static ManagedStatic<LeakDetectorImpl<void> > Objects;
+
+static void clearGarbage(LLVMContext &Context) {
+ Objects->clear();
+ Context.pImpl->LLVMObjects.clear();
+}
+
+void LeakDetector::addGarbageObjectImpl(void *Object) {
+ sys::SmartScopedLock<true> Lock(*ObjectsLock);
+ Objects->addGarbage(Object);
+}
+
+void LeakDetector::addGarbageObjectImpl(const Value *Object) {
+ LLVMContextImpl *pImpl = Object->getContext().pImpl;
+ pImpl->LLVMObjects.addGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(void *Object) {
+ sys::SmartScopedLock<true> Lock(*ObjectsLock);
+ Objects->removeGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
+ LLVMContextImpl *pImpl = Object->getContext().pImpl;
+ pImpl->LLVMObjects.removeGarbage(Object);
+}
+
+void LeakDetector::checkForGarbageImpl(LLVMContext &Context,
+ const std::string &Message) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ sys::SmartScopedLock<true> Lock(*ObjectsLock);
+
+ Objects->setName("GENERIC");
+ pImpl->LLVMObjects.setName("LLVM");
+
+ // use non-short-circuit version so that both checks are performed
+ if (Objects->hasGarbage(Message) |
+ pImpl->LLVMObjects.hasGarbage(Message))
+ errs() << "\nThis is probably because you removed an object, but didn't "
+ << "delete it. Please check your code for memory leaks.\n";
+
+ // Clear out results so we don't get duplicate warnings on
+ // next call...
+ clearGarbage(Context);
+}
diff --git a/contrib/llvm/lib/VMCore/LeaksContext.h b/contrib/llvm/lib/VMCore/LeaksContext.h
new file mode 100644
index 000000000000..b9e59d46b7ad
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LeaksContext.h
@@ -0,0 +1,92 @@
+//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by
+// LLVMContextImpl for leaks detectors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+template <class T>
+struct PrinterTrait {
+ static void print(const T* P) { errs() << P; }
+};
+
+template<>
+struct PrinterTrait<Value> {
+ static void print(const Value* P) { errs() << *P; }
+};
+
+template <typename T>
+struct LeakDetectorImpl {
+ explicit LeakDetectorImpl(const char* const name = "") :
+ Cache(0), Name(name) { }
+
+ void clear() {
+ Cache = 0;
+ Ts.clear();
+ }
+
+ void setName(const char* n) {
+ Name = n;
+ }
+
+ // Because the most common usage pattern, by far, is to add a
+ // garbage object, then remove it immediately, we optimize this
+ // case. When an object is added, it is not added to the set
+ // immediately, it is added to the CachedValue Value. If it is
+ // immediately removed, no set search need be performed.
+ void addGarbage(const T* o) {
+ assert(Ts.count(o) == 0 && "Object already in set!");
+ if (Cache) {
+ assert(Cache != o && "Object already in set!");
+ Ts.insert(Cache);
+ }
+ Cache = o;
+ }
+
+ void removeGarbage(const T* o) {
+ if (o == Cache)
+ Cache = 0; // Cache hit
+ else
+ Ts.erase(o);
+ }
+
+ bool hasGarbage(const std::string& Message) {
+ addGarbage(0); // Flush the Cache
+
+ assert(Cache == 0 && "No value should be cached anymore!");
+
+ if (!Ts.empty()) {
+ errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
+ for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+ E = Ts.end(); I != E; ++I) {
+ errs() << '\t';
+ PrinterTrait<T>::print(*I);
+ errs() << '\n';
+ }
+ errs() << '\n';
+
+ return true;
+ }
+
+ return false;
+ }
+
+private:
+ SmallPtrSet<const T*, 8> Ts;
+ const T* Cache;
+ const char* Name;
+};
+
+}
diff --git a/contrib/llvm/lib/VMCore/Metadata.cpp b/contrib/llvm/lib/VMCore/Metadata.cpp
new file mode 100644
index 000000000000..ace4dc2de271
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Metadata.cpp
@@ -0,0 +1,562 @@
+//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Metadata.h"
+#include "LLVMContextImpl.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Instruction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MDString implementation.
+//
+
+MDString::MDString(LLVMContext &C, StringRef S)
+ : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {}
+
+MDString *MDString::get(LLVMContext &Context, StringRef Str) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ StringMapEntry<MDString *> &Entry =
+ pImpl->MDStringCache.GetOrCreateValue(Str);
+ MDString *&S = Entry.getValue();
+ if (!S) S = new MDString(Context, Entry.getKey());
+ return S;
+}
+
+//===----------------------------------------------------------------------===//
+// MDNodeOperand implementation.
+//
+
+// Use CallbackVH to hold MDNode operands.
+namespace llvm {
+class MDNodeOperand : public CallbackVH {
+ MDNode *Parent;
+public:
+ MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
+ ~MDNodeOperand() {}
+
+ void set(Value *V) {
+ setValPtr(V);
+ }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *NV);
+};
+} // end namespace llvm.
+
+
+void MDNodeOperand::deleted() {
+ Parent->replaceOperand(this, 0);
+}
+
+void MDNodeOperand::allUsesReplacedWith(Value *NV) {
+ Parent->replaceOperand(this, NV);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// MDNode implementation.
+//
+
+/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
+/// the end of the MDNode.
+static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
+ // Use <= instead of < to permit a one-past-the-end address.
+ assert(Op <= N->getNumOperands() && "Invalid operand number");
+ return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
+}
+
+MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
+: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
+ NumOperands = Vals.size();
+
+ if (isFunctionLocal)
+ setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
+
+ // Initialize the operand list, which is co-allocated on the end of the node.
+ unsigned i = 0;
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+ Op != E; ++Op, ++i)
+ new (Op) MDNodeOperand(Vals[i], this);
+}
+
+
+/// ~MDNode - Destroy MDNode.
+MDNode::~MDNode() {
+ assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
+ "Not being destroyed through destroy()?");
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+ if (isNotUniqued()) {
+ pImpl->NonUniquedMDNodes.erase(this);
+ } else {
+ pImpl->MDNodeSet.RemoveNode(this);
+ }
+
+ // Destroy the operands.
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+ Op != E; ++Op)
+ Op->~MDNodeOperand();
+}
+
+static const Function *getFunctionForValue(Value *V) {
+ if (!V) return NULL;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ BasicBlock *BB = I->getParent();
+ return BB ? BB->getParent() : 0;
+ }
+ if (Argument *A = dyn_cast<Argument>(V))
+ return A->getParent();
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return BB->getParent();
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ return MD->getFunction();
+ return NULL;
+}
+
+#ifndef NDEBUG
+static const Function *assertLocalFunction(const MDNode *N) {
+ if (!N->isFunctionLocal()) return 0;
+
+ // FIXME: This does not handle cyclic function local metadata.
+ const Function *F = 0, *NewF = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i)) {
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ NewF = assertLocalFunction(MD);
+ else
+ NewF = getFunctionForValue(V);
+ }
+ if (F == 0)
+ F = NewF;
+ else
+ assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
+ }
+ return F;
+}
+#endif
+
+// getFunction - If this metadata is function-local and recursively has a
+// function-local operand, return the first such operand's parent function.
+// Otherwise, return null. getFunction() should not be used for performance-
+// critical code because it recursively visits all the MDNode's operands.
+const Function *MDNode::getFunction() const {
+#ifndef NDEBUG
+ return assertLocalFunction(this);
+#endif
+ if (!isFunctionLocal()) return NULL;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (const Function *F = getFunctionForValue(getOperand(i)))
+ return F;
+ return NULL;
+}
+
+// destroy - Delete this node. Only when there are no uses.
+void MDNode::destroy() {
+ setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
+ // Placement delete, the free the memory.
+ this->~MDNode();
+ free(this);
+}
+
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+ return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+ (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
+MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
+ FunctionLocalness FL, bool Insert) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != Vals.size(); ++i)
+ ID.AddPointer(Vals[i]);
+
+ void *InsertPoint;
+ MDNode *N = NULL;
+
+ if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+ return N;
+
+ bool isFunctionLocal = false;
+ switch (FL) {
+ case FL_Unknown:
+ for (unsigned i = 0; i != Vals.size(); ++i) {
+ Value *V = Vals[i];
+ if (!V) continue;
+ if (isFunctionLocalValue(V)) {
+ isFunctionLocal = true;
+ break;
+ }
+ }
+ break;
+ case FL_No:
+ isFunctionLocal = false;
+ break;
+ case FL_Yes:
+ isFunctionLocal = true;
+ break;
+ }
+
+ // Coallocate space for the node and Operands together, then placement new.
+ void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
+ N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
+
+ // InsertPoint will have been set by the FindNodeOrInsertPos call.
+ pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+
+ return N;
+}
+
+MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
+ return getMDNode(Context, Vals, FL_Unknown);
+}
+
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context,
+ ArrayRef<Value*> Vals,
+ bool isFunctionLocal) {
+ return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No);
+}
+
+MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
+ return getMDNode(Context, Vals, FL_Unknown, false);
+}
+
+MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
+ MDNode *N =
+ (MDNode *)malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
+ N = new (N) MDNode(Context, Vals, FL_No);
+ N->setValueSubclassData(N->getSubclassDataFromValue() |
+ NotUniquedBit);
+ LeakDetector::addGarbageObject(N);
+ return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+ assert(N->use_empty() && "Temporary MDNode has uses!");
+ assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+ "Deleting a non-temporary uniqued node!");
+ assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+ "Deleting a non-temporary non-uniqued node!");
+ assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+ "Temporary MDNode does not have NotUniquedBit set!");
+ assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+ "Temporary MDNode has DestroyFlag set!");
+ LeakDetector::removeGarbageObject(N);
+ N->destroy();
+}
+
+/// getOperand - Return specified operand.
+Value *MDNode::getOperand(unsigned i) const {
+ return *getOperandPtr(const_cast<MDNode*>(this), i);
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ ID.AddPointer(getOperand(i));
+}
+
+void MDNode::setIsNotUniqued() {
+ setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+ pImpl->NonUniquedMDNodes.insert(this);
+}
+
+// Replace value from this node's operand list.
+void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
+ Value *From = *Op;
+
+ // If is possible that someone did GV->RAUW(inst), replacing a global variable
+ // with an instruction or some other function-local object. If this is a
+ // non-function-local MDNode, it can't point to a function-local object.
+ // Handle this case by implicitly dropping the MDNode reference to null.
+ // Likewise if the MDNode is function-local but for a different function.
+ if (To && isFunctionLocalValue(To)) {
+ if (!isFunctionLocal())
+ To = 0;
+ else {
+ const Function *F = getFunction();
+ const Function *FV = getFunctionForValue(To);
+ // Metadata can be function-local without having an associated function.
+ // So only consider functions to have changed if non-null.
+ if (F && FV && F != FV)
+ To = 0;
+ }
+ }
+
+ if (From == To)
+ return;
+
+ // Update the operand.
+ Op->set(To);
+
+ // If this node is already not being uniqued (because one of the operands
+ // already went to null), then there is nothing else to do here.
+ if (isNotUniqued()) return;
+
+ LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+ // Remove "this" from the context map. FoldingSet doesn't have to reprofile
+ // this node to remove it, so we don't care what state the operands are in.
+ pImpl->MDNodeSet.RemoveNode(this);
+
+ // If we are dropping an argument to null, we choose to not unique the MDNode
+ // anymore. This commonly occurs during destruction, and uniquing these
+ // brings little reuse. Also, this means we don't need to include
+ // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
+ if (To == 0) {
+ setIsNotUniqued();
+ return;
+ }
+
+ // Now that the node is out of the folding set, get ready to reinsert it.
+ // First, check to see if another node with the same operands already exists
+ // in the set. If so, then this node is redundant.
+ FoldingSetNodeID ID;
+ Profile(ID);
+ void *InsertPoint;
+ if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+ replaceAllUsesWith(N);
+ destroy();
+ return;
+ }
+
+ // InsertPoint will have been set by the FindNodeOrInsertPos call.
+ pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+ // If this MDValue was previously function-local but no longer is, clear
+ // its function-local flag.
+ if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+ bool isStillFunctionLocal = false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Value *V = getOperand(i);
+ if (!V) continue;
+ if (isFunctionLocalValue(V)) {
+ isStillFunctionLocal = true;
+ break;
+ }
+ }
+ if (!isStillFunctionLocal)
+ setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// NamedMDNode implementation.
+//
+
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+ return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
+}
+
+NamedMDNode::NamedMDNode(const Twine &N)
+ : Name(N.str()), Parent(0),
+ Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
+}
+
+NamedMDNode::~NamedMDNode() {
+ dropAllReferences();
+ delete &getNMDOps(Operands);
+}
+
+/// getNumOperands - Return number of NamedMDNode operands.
+unsigned NamedMDNode::getNumOperands() const {
+ return (unsigned)getNMDOps(Operands).size();
+}
+
+/// getOperand - Return specified operand.
+MDNode *NamedMDNode::getOperand(unsigned i) const {
+ assert(i < getNumOperands() && "Invalid Operand number!");
+ return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
+}
+
+/// addOperand - Add metadata Operand.
+void NamedMDNode::addOperand(MDNode *M) {
+ assert(!M->isFunctionLocal() &&
+ "NamedMDNode operands must not be function-local!");
+ getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
+}
+
+/// eraseFromParent - Drop all references and remove the node from parent
+/// module.
+void NamedMDNode::eraseFromParent() {
+ getParent()->eraseNamedMetadata(this);
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void NamedMDNode::dropAllReferences() {
+ getNMDOps(Operands).clear();
+}
+
+/// getName - Return a constant reference to this named metadata's name.
+StringRef NamedMDNode::getName() const {
+ return StringRef(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Metadata method implementations.
+//
+
+void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+ if (Node == 0 && !hasMetadata()) return;
+ setMetadata(getContext().getMDKindID(Kind), Node);
+}
+
+MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+ return getMetadataImpl(getContext().getMDKindID(Kind));
+}
+
+/// setMetadata - Set the metadata of of the specified kind to the specified
+/// node. This updates/replaces metadata if already present, or removes it if
+/// Node is null.
+void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
+ if (Node == 0 && !hasMetadata()) return;
+
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (KindID == LLVMContext::MD_dbg) {
+ DbgLoc = DebugLoc::getFromDILocation(Node);
+ return;
+ }
+
+ // Handle the case when we're adding/updating metadata on an instruction.
+ if (Node) {
+ LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+ assert(!Info.empty() == hasMetadataHashEntry() &&
+ "HasMetadata bit is wonked");
+ if (Info.empty()) {
+ setHasMetadataHashEntry(true);
+ } else {
+ // Handle replacement of an existing value.
+ for (unsigned i = 0, e = Info.size(); i != e; ++i)
+ if (Info[i].first == KindID) {
+ Info[i].second = Node;
+ return;
+ }
+ }
+
+ // No replacement, just add it to the list.
+ Info.push_back(std::make_pair(KindID, Node));
+ return;
+ }
+
+ // Otherwise, we're removing metadata from an instruction.
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
+ "HasMetadata bit out of date!");
+ LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+
+ // Common case is removing the only entry.
+ if (Info.size() == 1 && Info[0].first == KindID) {
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
+ return;
+ }
+
+ // Handle removal of an existing value.
+ for (unsigned i = 0, e = Info.size(); i != e; ++i)
+ if (Info[i].first == KindID) {
+ Info[i] = Info.back();
+ Info.pop_back();
+ assert(!Info.empty() && "Removing last entry should be handled above");
+ return;
+ }
+ // Otherwise, removing an entry that doesn't exist on the instruction.
+}
+
+MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (KindID == LLVMContext::MD_dbg)
+ return DbgLoc.getAsMDNode(getContext());
+
+ if (!hasMetadataHashEntry()) return 0;
+
+ LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+ assert(!Info.empty() && "bit out of sync with hash table");
+
+ for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
+ I != E; ++I)
+ if (I->first == KindID)
+ return I->second;
+ return 0;
+}
+
+void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
+ MDNode*> > &Result) const {
+ Result.clear();
+
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (!DbgLoc.isUnknown()) {
+ Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
+ DbgLoc.getAsMDNode(getContext())));
+ if (!hasMetadataHashEntry()) return;
+ }
+
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
+ "Shouldn't have called this");
+ const LLVMContextImpl::MDMapTy &Info =
+ getContext().pImpl->MetadataStore.find(this)->second;
+ assert(!Info.empty() && "Shouldn't have called this");
+
+ Result.append(Info.begin(), Info.end());
+
+ // Sort the resulting array so it is stable.
+ if (Result.size() > 1)
+ array_pod_sort(Result.begin(), Result.end());
+}
+
+void Instruction::
+getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+ MDNode*> > &Result) const {
+ Result.clear();
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
+ "Shouldn't have called this");
+ const LLVMContextImpl::MDMapTy &Info =
+ getContext().pImpl->MetadataStore.find(this)->second;
+ assert(!Info.empty() && "Shouldn't have called this");
+
+ Result.append(Info.begin(), Info.end());
+
+ // Sort the resulting array so it is stable.
+ if (Result.size() > 1)
+ array_pod_sort(Result.begin(), Result.end());
+}
+
+
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+ assert(hasMetadataHashEntry() && "Caller should check");
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
+}
+
diff --git a/contrib/llvm/lib/VMCore/Module.cpp b/contrib/llvm/lib/VMCore/Module.cpp
new file mode 100644
index 000000000000..be2fcb8ac6c0
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Module.cpp
@@ -0,0 +1,553 @@
+//===-- Module.cpp - Implement the Module class ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Module class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include <algorithm>
+#include <cstdarg>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Methods to implement the globals and functions lists.
+//
+
+GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
+ GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()),
+ false, GlobalValue::ExternalLinkage);
+ // This should not be garbage monitored.
+ LeakDetector::removeGarbageObject(Ret);
+ return Ret;
+}
+GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() {
+ GlobalAlias *Ret = new GlobalAlias(Type::getInt32Ty(getGlobalContext()),
+ GlobalValue::ExternalLinkage);
+ // This should not be garbage monitored.
+ LeakDetector::removeGarbageObject(Ret);
+ return Ret;
+}
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file.
+template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
+template class llvm::SymbolTableListTraits<Function, Module>;
+template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+
+//===----------------------------------------------------------------------===//
+// Primitive Module methods.
+//
+
+Module::Module(StringRef MID, LLVMContext& C)
+ : Context(C), Materializer(NULL), ModuleID(MID) {
+ ValSymTab = new ValueSymbolTable();
+ NamedMDSymTab = new StringMap<NamedMDNode *>();
+ Context.addModule(this);
+}
+
+Module::~Module() {
+ Context.removeModule(this);
+ dropAllReferences();
+ GlobalList.clear();
+ FunctionList.clear();
+ AliasList.clear();
+ LibraryList.clear();
+ NamedMDList.clear();
+ delete ValSymTab;
+ delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+}
+
+/// Target endian information.
+Module::Endianness Module::getEndianness() const {
+ StringRef temp = DataLayout;
+ Module::Endianness ret = AnyEndianness;
+
+ while (!temp.empty()) {
+ StringRef token = DataLayout;
+ tie(token, temp) = getToken(temp, "-");
+
+ if (token[0] == 'e') {
+ ret = LittleEndian;
+ } else if (token[0] == 'E') {
+ ret = BigEndian;
+ }
+ }
+
+ return ret;
+}
+
+/// Target Pointer Size information...
+Module::PointerSize Module::getPointerSize() const {
+ StringRef temp = DataLayout;
+ Module::PointerSize ret = AnyPointerSize;
+
+ while (!temp.empty()) {
+ StringRef token, signalToken;
+ tie(token, temp) = getToken(temp, "-");
+ tie(signalToken, token) = getToken(token, ":");
+
+ if (signalToken[0] == 'p') {
+ int size = 0;
+ getToken(token, ":").first.getAsInteger(10, size);
+ if (size == 32)
+ ret = Pointer32;
+ else if (size == 64)
+ ret = Pointer64;
+ }
+ }
+
+ return ret;
+}
+
+/// getNamedValue - Return the first global value in the module with
+/// the specified name, of arbitrary type. This method returns null
+/// if a global with the specified name is not found.
+GlobalValue *Module::getNamedValue(StringRef Name) const {
+ return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
+}
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+/// This ID is uniqued across modules in the current LLVMContext.
+unsigned Module::getMDKindID(StringRef Name) const {
+ return Context.getMDKindID(Name);
+}
+
+/// getMDKindNames - Populate client supplied SmallVector with the name for
+/// custom metadata IDs registered in this LLVMContext. ID #0 is not used,
+/// so it is filled in as an empty string.
+void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
+ return Context.getMDKindNames(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the functions in the module.
+//
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table. If it does not exist, add a prototype for the function and return
+// it. This is nice because it allows most passes to get away with not handling
+// the symbol table directly for this common task.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+ const FunctionType *Ty,
+ AttrListPtr AttributeList) {
+ // See if we have a definition for the specified function already.
+ GlobalValue *F = getNamedValue(Name);
+ if (F == 0) {
+ // Nope, add it
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ if (!New->isIntrinsic()) // Intrinsics get attrs set on construction
+ New->setAttributes(AttributeList);
+ FunctionList.push_back(New);
+ return New; // Return the new prototype.
+ }
+
+ // Okay, the function exists. Does it have externally visible linkage?
+ if (F->hasLocalLinkage()) {
+ // Clear the function's name.
+ F->setName("");
+ // Retry, now there won't be a conflict.
+ Constant *NewF = getOrInsertFunction(Name, Ty);
+ F->setName(Name);
+ return NewF;
+ }
+
+ // If the function exists but has the wrong type, return a bitcast to the
+ // right type.
+ if (F->getType() != PointerType::getUnqual(Ty))
+ return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+
+ // Otherwise, we just found the existing function or a prototype.
+ return F;
+}
+
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
+ const FunctionType *Ty,
+ AttrListPtr AttributeList) {
+ // See if we have a definition for the specified function already.
+ GlobalValue *F = getNamedValue(Name);
+ if (F == 0) {
+ // Nope, add it
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ New->setAttributes(AttributeList);
+ FunctionList.push_back(New);
+ return New; // Return the new prototype.
+ }
+
+ // Otherwise, we just found the existing function or a prototype.
+ return F;
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+ const FunctionType *Ty) {
+ AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
+ return getOrInsertFunction(Name, Ty, AttributeList);
+}
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table. If it does not exist, add a prototype for the function and return it.
+// This version of the method takes a null terminated list of function
+// arguments, which makes it easier for clients to use.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+ AttrListPtr AttributeList,
+ const Type *RetTy, ...) {
+ va_list Args;
+ va_start(Args, RetTy);
+
+ // Build the list of argument types...
+ std::vector<Type*> ArgTys;
+ while (Type *ArgTy = va_arg(Args, Type*))
+ ArgTys.push_back(ArgTy);
+
+ va_end(Args);
+
+ // Build the function type and chain to the other getOrInsertFunction...
+ return getOrInsertFunction(Name,
+ FunctionType::get(RetTy, ArgTys, false),
+ AttributeList);
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+ const Type *RetTy, ...) {
+ va_list Args;
+ va_start(Args, RetTy);
+
+ // Build the list of argument types...
+ std::vector<Type*> ArgTys;
+ while (Type *ArgTy = va_arg(Args, Type*))
+ ArgTys.push_back(ArgTy);
+
+ va_end(Args);
+
+ // Build the function type and chain to the other getOrInsertFunction...
+ return getOrInsertFunction(Name,
+ FunctionType::get(RetTy, ArgTys, false),
+ AttrListPtr::get((AttributeWithIndex *)0, 0));
+}
+
+// getFunction - Look up the specified function in the module symbol table.
+// If it does not exist, return null.
+//
+Function *Module::getFunction(StringRef Name) const {
+ return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+/// getGlobalVariable - Look up the specified global variable in the module
+/// symbol table. If it does not exist, return null. The type argument
+/// should be the underlying type of the global, i.e., it should not have
+/// the top-level PointerType, which represents the address of the global.
+/// If AllowLocal is set to true, this function will return types that
+/// have an local. By default, these types are not returned.
+///
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
+ bool AllowLocal) const {
+ if (GlobalVariable *Result =
+ dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
+ if (AllowLocal || !Result->hasLocalLinkage())
+ return Result;
+ return 0;
+}
+
+/// getOrInsertGlobal - Look up the specified global in the module symbol table.
+/// 1. If it does not exist, add a declaration of the global and return it.
+/// 2. Else, the global exists but has the wrong type: return the function
+/// with a constantexpr cast to the right type.
+/// 3. Finally, if the existing global is the correct delclaration, return the
+/// existing global.
+Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) {
+ // See if we have a definition for the specified global already.
+ GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
+ if (GV == 0) {
+ // Nope, add it
+ GlobalVariable *New =
+ new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+ 0, Name);
+ return New; // Return the new declaration.
+ }
+
+ // If the variable exists but has the wrong type, return a bitcast to the
+ // right type.
+ if (GV->getType() != PointerType::getUnqual(Ty))
+ return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+
+ // Otherwise, we just found the existing function or a prototype.
+ return GV;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+// getNamedAlias - Look up the specified global in the module symbol table.
+// If it does not exist, return null.
+//
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
+ return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
+}
+
+/// getNamedMetadata - Return the first NamedMDNode in the module with the
+/// specified name. This method returns null if a NamedMDNode with the
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
+ SmallString<256> NameData;
+ StringRef NameRef = Name.toStringRef(NameData);
+ return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
+}
+
+/// getOrInsertNamedMetadata - Return the first named MDNode in the module
+/// with the specified name. This method returns a new NamedMDNode if a
+/// NamedMDNode with the specified name is not found.
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
+ NamedMDNode *&NMD =
+ (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+ if (!NMD) {
+ NMD = new NamedMDNode(Name);
+ NMD->setParent(this);
+ NamedMDList.push_back(NMD);
+ }
+ return NMD;
+}
+
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+ static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+ NamedMDList.erase(NMD);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to control the materialization of GlobalValues in the Module.
+//
+void Module::setMaterializer(GVMaterializer *GVM) {
+ assert(!Materializer &&
+ "Module already has a GVMaterializer. Call MaterializeAllPermanently"
+ " to clear it out before setting another one.");
+ Materializer.reset(GVM);
+}
+
+bool Module::isMaterializable(const GlobalValue *GV) const {
+ if (Materializer)
+ return Materializer->isMaterializable(GV);
+ return false;
+}
+
+bool Module::isDematerializable(const GlobalValue *GV) const {
+ if (Materializer)
+ return Materializer->isDematerializable(GV);
+ return false;
+}
+
+bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+ if (Materializer)
+ return Materializer->Materialize(GV, ErrInfo);
+ return false;
+}
+
+void Module::Dematerialize(GlobalValue *GV) {
+ if (Materializer)
+ return Materializer->Dematerialize(GV);
+}
+
+bool Module::MaterializeAll(std::string *ErrInfo) {
+ if (!Materializer)
+ return false;
+ return Materializer->MaterializeModule(this, ErrInfo);
+}
+
+bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
+ if (MaterializeAll(ErrInfo))
+ return true;
+ Materializer.reset();
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other module related stuff.
+//
+
+
+// dropAllReferences() - This function causes all the subelementss to "let go"
+// of all references that they are maintaining. This allows one to 'delete' a
+// whole module at a time, even though there may be circular references... first
+// all references are dropped, and all use counts go to zero. Then everything
+// is deleted for real. Note that no operations are valid on an object that
+// has "dropped all references", except operator delete.
+//
+void Module::dropAllReferences() {
+ for(Module::iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+
+ for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
+ I->dropAllReferences();
+
+ for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
+ I->dropAllReferences();
+}
+
+void Module::addLibrary(StringRef Lib) {
+ for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
+ if (*I == Lib)
+ return;
+ LibraryList.push_back(Lib);
+}
+
+void Module::removeLibrary(StringRef Lib) {
+ LibraryListType::iterator I = LibraryList.begin();
+ LibraryListType::iterator E = LibraryList.end();
+ for (;I != E; ++I)
+ if (*I == Lib) {
+ LibraryList.erase(I);
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Type finding functionality.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// TypeFinder - Walk over a module, identifying all of the types that are
+ /// used by the module.
+ class TypeFinder {
+ // To avoid walking constant expressions multiple times and other IR
+ // objects, we keep several helper maps.
+ DenseSet<const Value*> VisitedConstants;
+ DenseSet<const Type*> VisitedTypes;
+
+ std::vector<StructType*> &StructTypes;
+ public:
+ TypeFinder(std::vector<StructType*> &structTypes)
+ : StructTypes(structTypes) {}
+
+ void run(const Module &M) {
+ // Get types from global variables.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ incorporateType(I->getType());
+ if (I->hasInitializer())
+ incorporateValue(I->getInitializer());
+ }
+
+ // Get types from aliases.
+ for (Module::const_alias_iterator I = M.alias_begin(),
+ E = M.alias_end(); I != E; ++I) {
+ incorporateType(I->getType());
+ if (const Value *Aliasee = I->getAliasee())
+ incorporateValue(Aliasee);
+ }
+
+ SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+ // Get types from functions.
+ for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+ incorporateType(FI->getType());
+
+ for (Function::const_iterator BB = FI->begin(), E = FI->end();
+ BB != E;++BB)
+ for (BasicBlock::const_iterator II = BB->begin(),
+ E = BB->end(); II != E; ++II) {
+ const Instruction &I = *II;
+ // Incorporate the type of the instruction and all its operands.
+ incorporateType(I.getType());
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ incorporateValue(*OI);
+
+ // Incorporate types hiding in metadata.
+ I.getAllMetadataOtherThanDebugLoc(MDForInst);
+ for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+ incorporateMDNode(MDForInst[i].second);
+ MDForInst.clear();
+ }
+ }
+
+ for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ incorporateMDNode(NMD->getOperand(i));
+ }
+ }
+
+ private:
+ void incorporateType(Type *Ty) {
+ // Check to see if we're already visited this type.
+ if (!VisitedTypes.insert(Ty).second)
+ return;
+
+ // If this is a structure or opaque type, add a name for the type.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ StructTypes.push_back(STy);
+
+ // Recursively walk all contained types.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ incorporateType(*I);
+ }
+
+ /// incorporateValue - This method is used to walk operand lists finding
+ /// types hiding in constant expressions and other operands that won't be
+ /// walked in other ways. GlobalValues, basic blocks, instructions, and
+ /// inst operands are all explicitly enumerated.
+ void incorporateValue(const Value *V) {
+ if (const MDNode *M = dyn_cast<MDNode>(V))
+ return incorporateMDNode(M);
+ if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Check this type.
+ incorporateType(V->getType());
+
+ // Look in operands for types.
+ const User *U = cast<User>(V);
+ for (Constant::const_op_iterator I = U->op_begin(),
+ E = U->op_end(); I != E;++I)
+ incorporateValue(*I);
+ }
+
+ void incorporateMDNode(const MDNode *V) {
+
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Look in operands for types.
+ for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+ if (Value *Op = V->getOperand(i))
+ incorporateValue(Op);
+ }
+ };
+} // end anonymous namespace
+
+void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const {
+ TypeFinder(StructTypes).run(*this);
+}
+
+
diff --git a/contrib/llvm/lib/VMCore/Pass.cpp b/contrib/llvm/lib/VMCore/Pass.cpp
new file mode 100644
index 000000000000..9afc54063321
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Pass.cpp
@@ -0,0 +1,293 @@
+//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass infrastructure. It is primarily
+// responsible with ensuring that passes are executed and batched together
+// optimally.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Pass Implementation
+//
+
+Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
+
+// Force out-of-line virtual method.
+Pass::~Pass() {
+ delete Resolver;
+}
+
+// Force out-of-line virtual method.
+ModulePass::~ModulePass() { }
+
+Pass *ModulePass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+}
+
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+ return PMT_ModulePassManager;
+}
+
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+ return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
+}
+
+// dumpPassStructure - Implement the -debug-passes=Structure option
+void Pass::dumpPassStructure(unsigned Offset) {
+ dbgs().indent(Offset*2) << getPassName() << "\n";
+}
+
+/// getPassName - Return a nice clean name for a pass. This usually
+/// implemented in terms of the name that is registered by one of the
+/// Registration templates, but can be overloaded directly.
+///
+const char *Pass::getPassName() const {
+ AnalysisID AID = getPassID();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+ if (PI)
+ return PI->getPassName();
+ return "Unnamed pass: implement Pass::getPassName()";
+}
+
+void Pass::preparePassManager(PMStack &) {
+ // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+ // Default implementation.
+ return PMT_Unknown;
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+ // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+ // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+ // By default, don't do anything.
+}
+
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
+ return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+ return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+ return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+ assert(!Resolver && "Resolver is already set");
+ Resolver = AR;
+}
+
+// print - Print out the internal state of the pass. This is called by Analyze
+// to print out the contents of an analysis. Otherwise it is not necessary to
+// implement this method.
+//
+void Pass::print(raw_ostream &O,const Module*) const {
+ O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
+}
+
+// dump - call print(cerr);
+void Pass::dump() const {
+ print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// ImmutablePass Implementation
+//
+// Force out-of-line virtual method.
+ImmutablePass::~ImmutablePass() { }
+
+void ImmutablePass::initializePass() {
+ // By default, don't do anything.
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPass Implementation
+//
+
+Pass *FunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createPrintFunctionPass(Banner, &O);
+}
+
+bool FunctionPass::doInitialization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool FunctionPass::doFinalization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+ return PMT_FunctionPassManager;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicBlockPass Implementation
+//
+
+Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+
+ llvm_unreachable("BasicBlockPass printing unsupported.");
+ return 0;
+}
+
+bool BasicBlockPass::doInitialization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doFinalization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+}
+
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+ return PassRegistry::getPassRegistry()->getPassInfo(TI);
+}
+
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
+ return PassRegistry::getPassRegistry()->getPassInfo(Arg);
+}
+
+Pass *PassInfo::createPass() const {
+ assert((!isAnalysisGroup() || NormalCtor) &&
+ "No default implementation found for analysis group!");
+ assert(NormalCtor &&
+ "Cannot call createPass on PassInfo without default ctor!");
+ return NormalCtor();
+}
+
+//===----------------------------------------------------------------------===//
+// Analysis Group Implementation Code
+//===----------------------------------------------------------------------===//
+
+// RegisterAGBase implementation
+//
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+ const void *PassID, bool isDefault)
+ : PassInfo(Name, InterfaceID) {
+ PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+ *this, isDefault);
+}
+
+//===----------------------------------------------------------------------===//
+// PassRegistrationListener implementation
+//
+
+// PassRegistrationListener ctor - Add the current object to the list of
+// PassRegistrationListeners...
+PassRegistrationListener::PassRegistrationListener() {
+ PassRegistry::getPassRegistry()->addRegistrationListener(this);
+}
+
+// dtor - Remove object from list of listeners...
+PassRegistrationListener::~PassRegistrationListener() {
+ PassRegistry::getPassRegistry()->removeRegistrationListener(this);
+}
+
+// enumeratePasses - Iterate over the registered passes, calling the
+// passEnumerate callback on each PassInfo object.
+//
+void PassRegistrationListener::enumeratePasses() {
+ PassRegistry::getPassRegistry()->enumerateWith(this);
+}
+
+PassNameParser::~PassNameParser() {}
+
+//===----------------------------------------------------------------------===//
+// AnalysisUsage Class Implementation
+//
+
+namespace {
+ struct GetCFGOnlyPasses : public PassRegistrationListener {
+ typedef AnalysisUsage::VectorType VectorType;
+ VectorType &CFGOnlyList;
+ GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
+
+ void passEnumerate(const PassInfo *P) {
+ if (P->isCFGOnlyPass())
+ CFGOnlyList.push_back(P->getTypeInfo());
+ }
+ };
+}
+
+// setPreservesCFG - This function should be called to by the pass, iff they do
+// not:
+//
+// 1. Add or remove basic blocks from the function
+// 2. Modify terminator instructions in any way.
+//
+// This function annotates the AnalysisUsage info object to say that analyses
+// that only depend on the CFG are preserved by this pass.
+//
+void AnalysisUsage::setPreservesCFG() {
+ // Since this transformation doesn't modify the CFG, it preserves all analyses
+ // that only depend on the CFG (like dominators, loop info, etc...)
+ GetCFGOnlyPasses(Preserved).enumeratePasses();
+}
+
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+ const PassInfo *PI = Pass::lookupPassInfo(Arg);
+ // If the pass exists, preserve it. Otherwise silently do nothing.
+ if (PI) Preserved.push_back(PI->getTypeInfo());
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
+ Required.push_back(ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+ Required.push_back(&ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+ Required.push_back(&ID);
+ RequiredTransitive.push_back(&ID);
+ return *this;
+}
diff --git a/contrib/llvm/lib/VMCore/PassManager.cpp b/contrib/llvm/lib/VMCore/PassManager.cpp
new file mode 100644
index 000000000000..5cf2905733ac
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/PassManager.cpp
@@ -0,0 +1,1882 @@
+//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/PassManagers.h"
+#include "llvm/PassManager.h"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/StringMap.h"
+#include <algorithm>
+#include <cstdio>
+#include <map>
+using namespace llvm;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information. Often it is useful to find out what pass is
+// running when a crash occurs in a utility. When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+ None, Arguments, Structure, Executions, Details
+};
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+ cl::desc("Print PassManager debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+ clEnumVal(Structure , "print pass structure before run()"),
+ clEnumVal(Executions, "print pass name before it is executed"),
+ clEnumVal(Details , "print pass details when it is executed"),
+ clEnumValEnd));
+
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+ llvm::cl::desc("Print IR before specified passes"),
+ cl::Hidden);
+
+static PassOptionList
+PrintAfter("print-after",
+ llvm::cl::desc("Print IR after specified passes"),
+ cl::Hidden);
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+ llvm::cl::desc("Print IR before each pass"),
+ cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+ llvm::cl::desc("Print IR after each pass"),
+ cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
+ PassOptionList &PassesToPrint) {
+ if (const llvm::PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
+ for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+ const llvm::PassInfo *PassInf = PassesToPrint[i];
+ if (PassInf)
+ if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(const void *PassID) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(const void *PassID) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
+}
+
+} // End of llvm namespace
+
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+ return PassDebugging >= Executions;
+}
+
+
+
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+ if (V == 0 && M == 0)
+ OS << "Releasing pass '";
+ else
+ OS << "Running pass '";
+
+ OS << P->getPassName() << "'";
+
+ if (M) {
+ OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+ return;
+ }
+ if (V == 0) {
+ OS << '\n';
+ return;
+ }
+
+ OS << " on ";
+ if (isa<Function>(V))
+ OS << "function";
+ else if (isa<BasicBlock>(V))
+ OS << "basic block";
+ else
+ OS << "value";
+
+ OS << " '";
+ WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+ OS << "'\n";
+}
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class BBPassManager : public PMDataManager, public FunctionPass {
+
+public:
+ static char ID;
+ explicit BBPassManager(int Depth)
+ : PMDataManager(Depth), FunctionPass(ID) {}
+
+ /// Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the function, and if so, return true.
+ bool runOnFunction(Function &F);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M);
+ bool doInitialization(Function &F);
+ bool doFinalization(Module &M);
+ bool doFinalization(Function &F);
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ virtual const char *getPassName() const {
+ return "BasicBlock Pass Manager";
+ }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ BP->dumpPassStructure(Offset + 1);
+ dumpLastUses(BP, Offset+1);
+ }
+ }
+
+ BasicBlockPass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+ return BP;
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+ }
+};
+
+char BBPassManager::ID = 0;
+}
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+private:
+ bool wasRun;
+public:
+ static char ID;
+ explicit FunctionPassManagerImpl(int Depth) :
+ Pass(PT_PassManager, ID), PMDataManager(Depth),
+ PMTopLevelManager(new FPPassManager(1)), wasRun(false) {}
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// createPrinterPass - Get a function printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintFunctionPass(Banner, &O);
+ }
+
+ // Prepare for running an on the fly pass, freeing memory if needed
+ // from a previous run.
+ void releaseMemoryOnTheFly();
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Function &F);
+
+ /// doInitialization - Run all of the initializers for the function passes.
+ ///
+ bool doInitialization(Module &M);
+
+ /// doFinalization - Run all of the finalizers for the function passes.
+ ///
+ bool doFinalization(Module &M);
+
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ void addTopLevelPass(Pass *P) {
+ if (ImmutablePass *IP = P->getAsImmutablePass()) {
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+ initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ recordAvailableAnalysis(IP);
+ } else {
+ P->assignPassManager(activeStack, PMT_FunctionPassManager);
+ }
+
+ }
+
+ FPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+ return FP;
+ }
+};
+
+char FunctionPassManagerImpl::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+ static char ID;
+ explicit MPPassManager(int Depth) :
+ Pass(PT_PassManager, ID), PMDataManager(Depth) { }
+
+ // Delete on the fly managers.
+ virtual ~MPPassManager() {
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ delete FPP;
+ }
+ }
+
+ /// createPrinterPass - Get a module printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ /// Add RequiredPass into list of lower level passes required by pass P.
+ /// RequiredPass is run on the fly by Pass Manager when P requests it
+ /// through getAnalysis interface.
+ virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+ /// Return function pass corresponding to PassInfo PI, that is
+ /// required by module pass MP. Instantiate analysis pass, by using
+ /// its runOnFunction() for function F.
+ virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
+
+ virtual const char *getPassName() const {
+ return "Module Pass Manager";
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ MP->dumpPassStructure(Offset + 1);
+ std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+ OnTheFlyManagers.find(MP);
+ if (I != OnTheFlyManagers.end())
+ I->second->dumpPassStructure(Offset + 2);
+ dumpLastUses(MP, Offset+1);
+ }
+ }
+
+ ModulePass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<ModulePass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_ModulePassManager;
+ }
+
+ private:
+ /// Collection of on the fly FPPassManagers. These managers manage
+ /// function passes that are required by module passes.
+ std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+
+public:
+ static char ID;
+ explicit PassManagerImpl(int Depth) :
+ Pass(PT_PassManager, ID), PMDataManager(Depth),
+ PMTopLevelManager(new MPPassManager(1)) {}
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// createPrinterPass - Get a module printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Module &M);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ void addTopLevelPass(Pass *P) {
+ if (ImmutablePass *IP = P->getAsImmutablePass()) {
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+ initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ recordAvailableAnalysis(IP);
+ } else {
+ P->assignPassManager(activeStack, PMT_ModulePassManager);
+ }
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ MPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+ return MP;
+ }
+};
+
+char PassManagerImpl::ID = 0;
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+ if (TheDebugProbe) return;
+
+ // Constructed the first time this is called. This guarantees that the
+ // object will be constructed, if -enable-debug-info-probe is set,
+ // before static globals, thus it will be destroyed before them.
+ static ManagedStatic<DebugInfoProbeInfo> DIP;
+ TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute. This only happens when
+/// -time-passes is enabled on the command line.
+///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
+class TimingInfo {
+ DenseMap<Pass*, Timer*> TimingData;
+ TimerGroup TG;
+public:
+ // Use 'create' member to get this.
+ TimingInfo() : TG("... Pass execution timing report ...") {}
+
+ // TimingDtor - Print out information about timing information
+ ~TimingInfo() {
+ // Delete all of the timers, which accumulate their info into the
+ // TimerGroup.
+ for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+ E = TimingData.end(); I != E; ++I)
+ delete I->second;
+ // TimerGroup is deleted next, printing the report.
+ }
+
+ // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+ // to a non null value (if the -time-passes option is enabled) or it leaves it
+ // null. It may be called multiple times.
+ static void createTheTimeInfo();
+
+ /// getPassTimer - Return the timer for the specified pass if it exists.
+ Timer *getPassTimer(Pass *P) {
+ if (P->getAsPMDataManager())
+ return 0;
+
+ sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+ Timer *&T = TimingData[P];
+ if (T == 0)
+ T = new Timer(P->getPassName(), TG);
+ return T;
+ }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+ PMDM->setTopLevelManager(this);
+ addPassManager(PMDM);
+ activeStack.push(PMDM);
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void
+PMTopLevelManager::setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses,
+ Pass *P) {
+ unsigned PDepth = 0;
+ if (P->getResolver())
+ PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+ for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
+ E = AnalysisPasses.end(); I != E; ++I) {
+ Pass *AP = *I;
+ LastUser[AP] = P;
+
+ if (P == AP)
+ continue;
+
+ // Update the last users of passes that are required transitive by AP.
+ AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 12> LastPMUses;
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ assert(AnalysisPass && "Expected analysis pass to exist.");
+ AnalysisResolver *AR = AnalysisPass->getResolver();
+ assert(AR && "Expected analysis resolver to exist.");
+ unsigned APDepth = AR->getPMDataManager().getDepth();
+
+ if (PDepth == APDepth)
+ LastUses.push_back(AnalysisPass);
+ else if (PDepth > APDepth)
+ LastPMUses.push_back(AnalysisPass);
+ }
+
+ setLastUser(LastUses, P);
+
+ // If this pass has a corresponding pass manager, push higher level
+ // analysis to this pass manager.
+ if (P->getResolver())
+ setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
+ // If AP is the last user of other passes then make P last user of
+ // such passes.
+ for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+ LUE = LastUser.end(); LUI != LUE; ++LUI) {
+ if (LUI->second == AP)
+ // DenseMap iterator is not invalidated here because
+ // this is just updating existing entries.
+ LastUser[LUI->first] = P;
+ }
+ }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
+ Pass *P) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+ InversedLastUser.find(P);
+ if (DMI == InversedLastUser.end())
+ return;
+
+ SmallPtrSet<Pass *, 8> &LU = DMI->second;
+ for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+ E = LU.end(); I != E; ++I) {
+ LastUses.push_back(*I);
+ }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+ AnalysisUsage *AnUsage = NULL;
+ DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+ if (DMI != AnUsageMap.end())
+ AnUsage = DMI->second;
+ else {
+ AnUsage = new AnalysisUsage();
+ P->getAnalysisUsage(*AnUsage);
+ AnUsageMap[P] = AnUsage;
+ }
+ return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+ // TODO : Allocate function manager for this pass, other wise required set
+ // may be inserted into previous function manager
+
+ // Give pass a chance to prepare the stage.
+ P->preparePassManager(activeStack);
+
+ // If P is an analysis pass and it is available then do not
+ // generate the analysis again. Stale analysis info should not be
+ // available at this point.
+ const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+ if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+ delete P;
+ return;
+ }
+
+ AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+ bool checkAnalysis = true;
+ while (checkAnalysis) {
+ checkAnalysis = false;
+
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+ E = RequiredSet.end(); I != E; ++I) {
+
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ if (!AnalysisPass) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ assert(PI && "Expected required passes to be initialized");
+ AnalysisPass = PI->createPass();
+ if (P->getPotentialPassManagerType () ==
+ AnalysisPass->getPotentialPassManagerType())
+ // Schedule analysis pass that is managed by the same pass manager.
+ schedulePass(AnalysisPass);
+ else if (P->getPotentialPassManagerType () >
+ AnalysisPass->getPotentialPassManagerType()) {
+ // Schedule analysis pass that is managed by a new manager.
+ schedulePass(AnalysisPass);
+ // Recheck analysis passes to ensure that required analyses that
+ // are already checked are still available.
+ checkAnalysis = true;
+ }
+ else
+ // Do not schedule this analysis. Lower level analsyis
+ // passes are run on the fly.
+ delete AnalysisPass;
+ }
+ }
+ }
+
+ // Now all required passes are available.
+ addTopLevelPass(P);
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+ // Check pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check other pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator
+ I = IndirectPassManagers.begin(),
+ E = IndirectPassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check the immutable passes. Iterate in reverse order so that we find
+ // the most recently registered passes first.
+ for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+ ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+ AnalysisID PI = (*I)->getPassID();
+ if (PI == AID)
+ return *I;
+
+ // If Pass not found then check the interfaces implemented by Immutable Pass
+ const PassInfo *PassInf =
+ PassRegistry::getPassRegistry()->getPassInfo(PI);
+ assert(PassInf && "Expected all immutable passes to be initialized");
+ const std::vector<const PassInfo*> &ImmPI =
+ PassInf->getInterfacesImplemented();
+ for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+ EE = ImmPI.end(); II != EE; ++II) {
+ if ((*II)->getTypeInfo() == AID)
+ return *I;
+ }
+ }
+
+ return 0;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+ if (PassDebugging < Structure)
+ return;
+
+ // Print out the immutable passes
+ for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+ ImmutablePasses[i]->dumpPassStructure(0);
+ }
+
+ // Every class that derives from PMDataManager also derives from Pass
+ // (sometimes indirectly), but there's no inheritance relationship
+ // between PMDataManager and Pass, so we have to getAsPass to get
+ // from a PMDataManager* to a Pass*.
+ for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->getAsPass()->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+ if (PassDebugging < Arguments)
+ return;
+
+ dbgs() << "Pass Arguments: ";
+ for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+ ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+ assert(PI && "Expected all immutable passes to be initialized");
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
+ }
+ for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->dumpPassArguments();
+ dbgs() << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ // Initailize other pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator
+ I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+ I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+ DME = LastUser.end(); DMI != DME; ++DMI) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+ InversedLastUser.find(DMI->second);
+ if (InvDMI != InversedLastUser.end()) {
+ SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+ L.insert(DMI->first);
+ } else {
+ SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+ InversedLastUser[DMI->second] = L;
+ }
+ }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ delete *I;
+
+ for (SmallVectorImpl<ImmutablePass *>::iterator
+ I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ delete *I;
+
+ for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+ DME = AnUsageMap.end(); DMI != DME; ++DMI)
+ delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+ AnalysisID PI = P->getPassID();
+
+ AvailableAnalysis[PI] = P;
+
+ assert(!AvailableAnalysis.empty());
+
+ // This pass is the current implementation of all of the interfaces it
+ // implements as well.
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+ if (PInf == 0) return;
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i)
+ AvailableAnalysis[II[i]->getTypeInfo()] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return true;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
+ E = HigherLevelAnalysis.end(); I != E; ++I) {
+ Pass *P1 = *I;
+ if (P1->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(),
+ P1->getPassID()) ==
+ PreservedSet.end())
+ return false;
+ }
+
+ return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+ // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+ return;
+#endif
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+ // Verify preserved analysis
+ for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+ E = PreservedSet.end(); I != E; ++I) {
+ AnalysisID AID = *I;
+ if (Pass *AP = findAnalysisPass(AID, true)) {
+ TimeRegion PassTimer(getPassTimer(AP));
+ AP->verifyAnalysis();
+ }
+ }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (std::map<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+ E = AvailableAnalysis.end(); I != E; ) {
+ std::map<AnalysisID, Pass*>::iterator Info = I++;
+ if (Info->second->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
+ dbgs() << S->getPassName() << "'\n";
+ }
+ AvailableAnalysis.erase(Info);
+ }
+ }
+
+ // Check inherited analysis also. If P is not preserving analysis
+ // provided by parent manager then remove it here.
+ for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+ if (!InheritedAnalysis[Index])
+ continue;
+
+ for (std::map<AnalysisID, Pass*>::iterator
+ I = InheritedAnalysis[Index]->begin(),
+ E = InheritedAnalysis[Index]->end(); I != E; ) {
+ std::map<AnalysisID, Pass *>::iterator Info = I++;
+ if (Info->second->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
+ dbgs() << S->getPassName() << "'\n";
+ }
+ InheritedAnalysis[Index]->erase(Info);
+ }
+ }
+ }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
+ enum PassDebuggingString DBG_STR) {
+
+ SmallVector<Pass *, 12> DeadPasses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(DeadPasses, P);
+
+ if (PassDebugging >= Details && !DeadPasses.empty()) {
+ dbgs() << " -*- '" << P->getPassName();
+ dbgs() << "' is the last user of following pass instances.";
+ dbgs() << " Free these instances\n";
+ }
+
+ for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
+ E = DeadPasses.end(); I != E; ++I)
+ freePass(*I, Msg, DBG_STR);
+}
+
+void PMDataManager::freePass(Pass *P, StringRef Msg,
+ enum PassDebuggingString DBG_STR) {
+ dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
+
+ {
+ // If the pass crashes releasing memory, remember this.
+ PassManagerPrettyStackEntry X(P);
+ TimeRegion PassTimer(getPassTimer(P));
+
+ P->releaseMemory();
+ }
+
+ AnalysisID PI = P->getPassID();
+ if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+ // Remove the pass itself (if it is not already removed).
+ AvailableAnalysis.erase(PI);
+
+ // Remove all interfaces this pass implements, for which it is also
+ // listed as the available implementation.
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i) {
+ std::map<AnalysisID, Pass*>::iterator Pos =
+ AvailableAnalysis.find(II[i]->getTypeInfo());
+ if (Pos != AvailableAnalysis.end() && Pos->second == P)
+ AvailableAnalysis.erase(Pos);
+ }
+ }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+ // This manager is going to manage pass P. Set up analysis resolver
+ // to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+
+ // If a FunctionPass F is the last user of ModulePass info M
+ // then the F's manager, not F, records itself as a last user of M.
+ SmallVector<Pass *, 12> TransferLastUses;
+
+ if (!ProcessAnalysis) {
+ // Add pass
+ PassVector.push_back(P);
+ return;
+ }
+
+ // At the moment, this pass is the last user of all required passes.
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 8> RequiredPasses;
+ SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+ unsigned PDepth = this->getDepth();
+
+ collectRequiredAnalysis(RequiredPasses,
+ ReqAnalysisNotAvailable, P);
+ for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
+ E = RequiredPasses.end(); I != E; ++I) {
+ Pass *PRequired = *I;
+ unsigned RDepth = 0;
+
+ assert(PRequired->getResolver() && "Analysis Resolver is not set");
+ PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+ RDepth = DM.getDepth();
+
+ if (PDepth == RDepth)
+ LastUses.push_back(PRequired);
+ else if (PDepth > RDepth) {
+ // Let the parent claim responsibility of last use
+ TransferLastUses.push_back(PRequired);
+ // Keep track of higher level analysis used by this manager.
+ HigherLevelAnalysis.push_back(PRequired);
+ } else
+ llvm_unreachable("Unable to accommodate Required Pass");
+ }
+
+ // Set P as P's last user until someone starts using P.
+ // However, if P is a Pass Manager then it does not need
+ // to record its last user.
+ if (P->getAsPMDataManager() == 0)
+ LastUses.push_back(P);
+ TPM->setLastUser(LastUses, P);
+
+ if (!TransferLastUses.empty()) {
+ Pass *My_PM = getAsPass();
+ TPM->setLastUser(TransferLastUses, My_PM);
+ TransferLastUses.clear();
+ }
+
+ // Now, take care of required analyses that are not available.
+ for (SmallVectorImpl<AnalysisID>::iterator
+ I = ReqAnalysisNotAvailable.begin(),
+ E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ Pass *AnalysisPass = PI->createPass();
+ this->addLowerLevelRequiredPass(P, AnalysisPass);
+ }
+
+ // Take a note of analysis required and made available by this pass.
+ // Remove the analysis not preserved by this pass
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+
+ // Add pass
+ PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+ SmallVectorImpl<AnalysisID> &RP_NotAvail,
+ Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator
+ I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+}
+
+// All Required analyses should be available to the pass as it runs! Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+ for (AnalysisUsage::VectorType::const_iterator
+ I = AnUsage->getRequiredSet().begin(),
+ E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+ Pass *Impl = findAnalysisPass(*I, true);
+ if (Impl == 0)
+ // This may be analysis pass that is initialized on the fly.
+ // If that is not the case then it will raise an assert when it is used.
+ continue;
+ AnalysisResolver *AR = P->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->addAnalysisImplsPair(*I, Impl);
+ }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+ // Check if AvailableAnalysis map has one entry.
+ std::map<AnalysisID, Pass*>::const_iterator I = AvailableAnalysis.find(AID);
+
+ if (I != AvailableAnalysis.end())
+ return I->second;
+
+ // Search Parents through TopLevelManager
+ if (SearchParent)
+ return TPM->findAnalysisPass(AID);
+
+ return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+ SmallVector<Pass *, 12> LUses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(LUses, P);
+
+ for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
+ E = LUses.end(); I != E; ++I) {
+ llvm::dbgs() << "--" << std::string(Offset*2, ' ');
+ (*I)->dumpPassStructure(0);
+ }
+}
+
+void PMDataManager::dumpPassArguments() const {
+ for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I) {
+ if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+ PMD->dumpPassArguments();
+ else
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
+ }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+ enum PassDebuggingString S2,
+ StringRef Msg) {
+ if (PassDebugging < Executions)
+ return;
+ dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+ switch (S1) {
+ case EXECUTION_MSG:
+ dbgs() << "Executing Pass '" << P->getPassName();
+ break;
+ case MODIFICATION_MSG:
+ dbgs() << "Made Modification '" << P->getPassName();
+ break;
+ case FREEING_MSG:
+ dbgs() << " Freeing Pass '" << P->getPassName();
+ break;
+ default:
+ break;
+ }
+ switch (S2) {
+ case ON_BASICBLOCK_MSG:
+ dbgs() << "' on BasicBlock '" << Msg << "'...\n";
+ break;
+ case ON_FUNCTION_MSG:
+ dbgs() << "' on Function '" << Msg << "'...\n";
+ break;
+ case ON_MODULE_MSG:
+ dbgs() << "' on Module '" << Msg << "'...\n";
+ break;
+ case ON_REGION_MSG:
+ dbgs() << "' on Region '" << Msg << "'...\n";
+ break;
+ case ON_LOOP_MSG:
+ dbgs() << "' on Loop '" << Msg << "'...\n";
+ break;
+ case ON_CG_MSG:
+ dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+ break;
+ default:
+ break;
+ }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
+ const AnalysisUsage::VectorType &Set) const {
+ assert(PassDebugging >= Details);
+ if (Set.empty())
+ return;
+ dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+ for (unsigned i = 0; i != Set.size(); ++i) {
+ if (i) dbgs() << ',';
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+ if (!PInf) {
+ // Some preserved passes, such as AliasAnalysis, may not be initialized by
+ // all drivers.
+ dbgs() << " Uninitialized Pass";
+ continue;
+ }
+ dbgs() << ' ' << PInf->getPassName();
+ }
+ dbgs() << '\n';
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ if (TPM) {
+ TPM->dumpArguments();
+ TPM->dumpPasses();
+ }
+
+ // Module Level pass may required Function Level analysis info
+ // (e.g. dominator info). Pass manager uses on the fly function pass manager
+ // to provide this on demand. In that case, in Pass manager terminology,
+ // module level pass is requiring lower level analysis info managed by
+ // lower level pass manager.
+
+ // When Pass manager is not able to order required analysis info, Pass manager
+ // checks whether any lower level manager will be able to provide this
+ // analysis info on demand or not.
+#ifndef NDEBUG
+ dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+ dbgs() << "' required by '" << P->getPassName() << "'\n";
+#endif
+ llvm_unreachable("Unable to schedule pass");
+}
+
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
+ assert(0 && "Unable to find on the fly pass");
+ return NULL;
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+ for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I)
+ delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+ return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
+ Function &F) {
+ return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = doInitialization(F);
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+ dumpRequiredSet(BP);
+
+ initializeAnalysisImpl(BP);
+
+ {
+ // If the pass crashes, remember this.
+ PassManagerPrettyStackEntry X(BP, *I);
+ TimeRegion PassTimer(getPassTimer(BP));
+
+ LocalChanged |= BP->runOnBasicBlock(*I);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+ I->getName());
+ dumpPreservedSet(BP);
+
+ verifyPreservedAnalysis(BP);
+ removeNotPreservedAnalysis(BP);
+ recordAvailableAnalysis(BP);
+ removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+ }
+
+ return doFinalization(F) || Changed;
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doInitialization(F);
+ }
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doFinalization(F);
+ }
+
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
+ FPM = new FunctionPassManagerImpl(0);
+ // FPM is the top level manager.
+ FPM->setTopLevelManager(FPM);
+
+ AnalysisResolver *AR = new AnalysisResolver(*FPM);
+ FPM->setResolver(AR);
+}
+
+FunctionPassManager::~FunctionPassManager() {
+ delete FPM;
+}
+
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void FunctionPassManager::addImpl(Pass *P) {
+ FPM->add(P);
+}
+
+/// add - Add a pass to the queue of passes to run. This passes
+/// ownership of the Pass to the PassManager. When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+ // If this is a not a function pass, don't add a printer for it.
+ const void *PassID = P->getPassID();
+ if (P->getPassKind() == PT_Function)
+ if (ShouldPrintBeforePass(PassID))
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ + P->getPassName() + " ***"));
+
+ addImpl(P);
+
+ if (P->getPassKind() == PT_Function)
+ if (ShouldPrintAfterPass(PassID))
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ + P->getPassName() + " ***"));
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+ if (F.isMaterializable()) {
+ std::string errstr;
+ if (F.Materialize(&errstr))
+ report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+ }
+ return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+ return FPM->doInitialization(*M);
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+ return FPM->doFinalization(*M);
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+ bool Changed = false;
+
+ dumpArguments();
+ dumpPasses();
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ AnalysisResolver *AR = FP->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->clearAnalysisImpls();
+ }
+}
+
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+ if (!wasRun)
+ return;
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+ FPPassManager *FPPM = getContainedManager(Index);
+ for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+ FPPM->getContainedPass(Index)->releaseMemory();
+ }
+ }
+ wasRun = false;
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+ createDebugInfoProbe();
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnFunction(F);
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ getContainedManager(Index)->cleanup();
+
+ wasRun = true;
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+ llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ FP->dumpPassStructure(Offset + 1);
+ dumpLastUses(FP, Offset+1);
+ }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpRequiredSet(FP);
+
+ initializeAnalysisImpl(FP);
+ if (TheDebugProbe)
+ TheDebugProbe->initialize(FP, F);
+ {
+ PassManagerPrettyStackEntry X(FP, F);
+ TimeRegion PassTimer(getPassTimer(FP));
+
+ LocalChanged |= FP->runOnFunction(F);
+ }
+ if (TheDebugProbe)
+ TheDebugProbe->finalize(FP, F);
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpPreservedSet(FP);
+
+ verifyPreservedAnalysis(FP);
+ removeNotPreservedAnalysis(FP);
+ recordAvailableAnalysis(FP);
+ removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+ }
+ return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+ bool Changed = doInitialization(M);
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ runOnFunction(*I);
+
+ return doFinalization(M) || Changed;
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method. Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Initialize on-the-fly passes
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ Changed |= FPP->doInitialization(M);
+ }
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
+ dumpRequiredSet(MP);
+
+ initializeAnalysisImpl(MP);
+
+ {
+ PassManagerPrettyStackEntry X(MP, M);
+ TimeRegion PassTimer(getPassTimer(MP));
+
+ LocalChanged |= MP->runOnModule(M);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+ M.getModuleIdentifier());
+ dumpPreservedSet(MP);
+
+ verifyPreservedAnalysis(MP);
+ removeNotPreservedAnalysis(MP);
+ recordAvailableAnalysis(MP);
+ removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
+ }
+
+ // Finalize on-the-fly passes
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ // We don't know when is the last time an on-the-fly pass is run,
+ // so we need to releaseMemory / finalize here
+ FPP->releaseMemoryOnTheFly();
+ Changed |= FPP->doFinalization(M);
+ }
+ return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+ assert((P->getPotentialPassManagerType() <
+ RequiredPass->getPotentialPassManagerType()) &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+ if (!FPP) {
+ FPP = new FunctionPassManagerImpl(0);
+ // FPP is the top level manager.
+ FPP->setTopLevelManager(FPP);
+
+ OnTheFlyManagers[P] = FPP;
+ }
+ FPP->add(RequiredPass);
+
+ // Register P as the last user of RequiredPass.
+ SmallVector<Pass *, 1> LU;
+ LU.push_back(RequiredPass);
+ FPP->setLastUser(LU, P);
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+ assert(FPP && "Unable to find on the fly pass");
+
+ FPP->releaseMemoryOnTheFly();
+ FPP->run(F);
+ return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+//
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+ createDebugInfoProbe();
+
+ dumpArguments();
+ dumpPasses();
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnModule(M);
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+ PM = new PassManagerImpl(0);
+ // PM is the top level manager
+ PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+ delete PM;
+}
+
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void PassManager::addImpl(Pass *P) {
+ PM->add(P);
+}
+
+/// add - Add a pass to the queue of passes to run. This passes ownership of
+/// the Pass to the PassManager. When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass. This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+ const void* PassID = P->getPassID();
+ if (ShouldPrintBeforePass(PassID))
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ + P->getPassName() + " ***"));
+
+ addImpl(P);
+
+ if (ShouldPrintAfterPass(PassID))
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ + P->getPassName() + " ***"));
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+ return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo Class - This class is used to calculate information about the
+// amount of time each pass takes to execute. This only happens with
+// -time-passes is enabled on the command line.
+//
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+ cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null. It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+ if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+ // Constructed the first time this is called, iff -time-passes is enabled.
+ // This guarantees that the object will be constructed before static globals,
+ // thus it will be destroyed before them.
+ static ManagedStatic<TimingInfo> TTI;
+ TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+Timer *llvm::getPassTimer(Pass *P) {
+ if (TheTimeInfo)
+ return TheTimeInfo->getPassTimer(P);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+ PMDataManager *Top = this->top();
+ Top->initializeAnalysisInfo();
+
+ S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+ assert(PM && "Unable to push. Pass Manager expected");
+
+ if (!this->empty()) {
+ PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+ assert(TPM && "Unable to find top level manager");
+ TPM->addIndirectPassManager(PM);
+ PM->setTopLevelManager(TPM);
+ }
+
+ S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() const {
+ for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
+ E = S.end(); I != E; ++I)
+ printf("%s ", (*I)->getAsPass()->getPassName());
+
+ if (!S.empty())
+ printf("\n");
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find Module Pass Manager
+ while (!PMS.empty()) {
+ PassManagerType TopPMType = PMS.top()->getPassManagerType();
+ if (TopPMType == PreferredType)
+ break; // We found desired pass manager
+ else if (TopPMType > PMT_ModulePassManager)
+ PMS.pop(); // Pop children pass managers
+ else
+ break;
+ }
+ assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+ PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+
+ // Find Module Pass Manager
+ while (!PMS.empty()) {
+ if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+ PMS.pop();
+ else
+ break;
+ }
+
+ // Create new Function Pass Manager if needed.
+ FPPassManager *FPP;
+ if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
+ FPP = (FPPassManager *)PMS.top();
+ } else {
+ assert(!PMS.empty() && "Unable to create Function Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Function Pass Manager
+ FPP = new FPPassManager(PMD->getDepth() + 1);
+ FPP->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(FPP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+ // [4] Push new manager into PMS
+ PMS.push(FPP);
+ }
+
+ // Assign FPP as the manager of this pass.
+ FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ BBPassManager *BBP;
+
+ // Basic Pass Manager is a leaf pass manager. It does not handle
+ // any other pass manager.
+ if (!PMS.empty() &&
+ PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
+ BBP = (BBPassManager *)PMS.top();
+ } else {
+ // If leaf manager is not Basic Block Pass manager then create new
+ // basic Block Pass manager.
+ assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Basic Block Manager
+ BBP = new BBPassManager(PMD->getDepth() + 1);
+
+ // [2] Set up new manager's top level manager
+ // Basic Block Pass Manager does not live by itself
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(BBP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ BBP->assignPassManager(PMS, PreferredType);
+
+ // [4] Push new manager into PMS
+ PMS.push(BBP);
+ }
+
+ // Assign BBP as the manager of this pass.
+ BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
diff --git a/contrib/llvm/lib/VMCore/PassRegistry.cpp b/contrib/llvm/lib/VMCore/PassRegistry.cpp
new file mode 100644
index 000000000000..fa92620b288e
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/PassRegistry.cpp
@@ -0,0 +1,208 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
+using namespace llvm;
+
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful resurrection after
+// llvm_shutdown is run. Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+ return &*PassRegistryObj;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+ /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+ typedef DenseMap<const void*, const PassInfo*> MapType;
+ MapType PassInfoMap;
+
+ typedef StringMap<const PassInfo*> StringMapType;
+ StringMapType PassInfoStringMap;
+
+ /// AnalysisGroupInfo - Keep track of information for each analysis group.
+ struct AnalysisGroupInfo {
+ SmallPtrSet<const PassInfo *, 8> Implementations;
+ };
+ DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+
+ std::vector<const PassInfo*> ToFree;
+ std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+ if (!pImpl)
+ pImpl = new PassRegistryImpl();
+ return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+
+ for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+ E = Impl->ToFree.end(); I != E; ++I)
+ delete *I;
+
+ delete Impl;
+ pImpl = 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+ return I != Impl->PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::StringMapType::const_iterator
+ I = Impl->PassInfoStringMap.find(Arg);
+ return I != Impl->PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ bool Inserted =
+ Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ assert(Inserted && "Pass registered multiple times!");
+ (void)Inserted;
+ Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
+
+ // Notify any listeners.
+ for (std::vector<PassRegistrationListener*>::iterator
+ I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
+ (*I)->passRegistered(&PI);
+
+ if (ShouldFree) Impl->ToFree.push_back(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::MapType::iterator I =
+ Impl->PassInfoMap.find(PI.getTypeInfo());
+ assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
+
+ // Remove pass from the map.
+ Impl->PassInfoMap.erase(I);
+ Impl->PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+ E = Impl->PassInfoMap.end(); I != E; ++I)
+ L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
+ const void *PassID,
+ PassInfo& Registeree,
+ bool isDefault,
+ bool ShouldFree) {
+ PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
+ if (InterfaceInfo == 0) {
+ // First reference to Interface, register it now.
+ registerPass(Registeree);
+ InterfaceInfo = &Registeree;
+ }
+ assert(Registeree.isAnalysisGroup() &&
+ "Trying to join an analysis group that is a normal pass!");
+
+ if (PassID) {
+ PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+ assert(ImplementationInfo &&
+ "Must register pass before adding to AnalysisGroup!");
+
+ sys::SmartScopedLock<true> Guard(*Lock);
+
+ // Make sure we keep track of the fact that the implementation implements
+ // the interface.
+ ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::AnalysisGroupInfo &AGI =
+ Impl->AnalysisGroupInfoMap[InterfaceInfo];
+ assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+ "Cannot add a pass to the same analysis group more than once!");
+ AGI.Implementations.insert(ImplementationInfo);
+ if (isDefault) {
+ assert(InterfaceInfo->getNormalCtor() == 0 &&
+ "Default implementation for analysis group already specified!");
+ assert(ImplementationInfo->getNormalCtor() &&
+ "Cannot specify pass as default if it does not have a default ctor");
+ InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+ }
+ }
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ if (ShouldFree) Impl->ToFree.push_back(&Registeree);
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ Impl->Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+
+ // NOTE: This is necessary, because removeRegistrationListener() can be called
+ // as part of the llvm_shutdown sequence. Since we have no control over the
+ // order of that sequence, we need to gracefully handle the case where the
+ // PassRegistry is destructed before the object that triggers this call.
+ if (!pImpl) return;
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ std::vector<PassRegistrationListener*>::iterator I =
+ std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+ assert(I != Impl->Listeners.end() &&
+ "PassRegistrationListener not registered!");
+ Impl->Listeners.erase(I);
+}
diff --git a/contrib/llvm/lib/VMCore/PrintModulePass.cpp b/contrib/llvm/lib/VMCore/PrintModulePass.cpp
new file mode 100644
index 000000000000..1f1fbc91bc31
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/PrintModulePass.cpp
@@ -0,0 +1,101 @@
+//===--- VMCore/PrintModulePass.cpp - Module/Function Printer -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PrintModulePass and PrintFunctionPass implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/PrintModulePass.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class PrintModulePass : public ModulePass {
+ std::string Banner;
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintModulePass() : ModulePass(ID), Out(&dbgs()),
+ DeleteStream(false) {}
+ PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
+ : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+
+ ~PrintModulePass() {
+ if (DeleteStream) delete Out;
+ }
+
+ bool runOnModule(Module &M) {
+ (*Out) << Banner << M;
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class PrintFunctionPass : public FunctionPass {
+ std::string Banner; // String to print before each function
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()),
+ DeleteStream(false) {}
+ PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
+ : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+
+ ~PrintFunctionPass() {
+ if (DeleteStream) delete Out;
+ }
+
+ // runOnFunction - This pass just prints a banner followed by the
+ // function as it's processed.
+ //
+ bool runOnFunction(Function &F) {
+ (*Out) << Banner << static_cast<Value&>(F);
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char PrintModulePass::ID = 0;
+INITIALIZE_PASS(PrintModulePass, "print-module",
+ "Print module to stderr", false, false)
+char PrintFunctionPass::ID = 0;
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+ "Print function to stderr", false, false)
+
+/// createPrintModulePass - Create and return a pass that writes the
+/// module to the specified raw_ostream.
+ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS,
+ bool DeleteStream,
+ const std::string &Banner) {
+ return new PrintModulePass(Banner, OS, DeleteStream);
+}
+
+/// createPrintFunctionPass - Create and return a pass that prints
+/// functions to the specified raw_ostream as they are processed.
+FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
+ llvm::raw_ostream *OS,
+ bool DeleteStream) {
+ return new PrintFunctionPass(Banner, OS, DeleteStream);
+}
+
diff --git a/contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h
new file mode 100644
index 000000000000..72687bb5e0b2
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h
@@ -0,0 +1,118 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stickier parts of the SymbolTableListTraits class,
+// and is explicitly instantiated where needed to avoid defining all this code
+// in a widely used header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+
+#include "llvm/SymbolTableListTraits.h"
+#include "llvm/ValueSymbolTable.h"
+
+namespace llvm {
+
+/// setSymTabObject - This is called when (f.e.) the parent of a basic block
+/// changes. This requires us to remove all the instruction symtab entries from
+/// the current function and reinsert them into the new function.
+template<typename ValueSubClass, typename ItemParentClass>
+template<typename TPtr>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::setSymTabObject(TPtr *Dest, TPtr Src) {
+ // Get the old symtab and value list before doing the assignment.
+ ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+
+ // Do it.
+ *Dest = Src;
+
+ // Get the new SymTab object.
+ ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+
+ // If there is nothing to do, quick exit.
+ if (OldST == NewST) return;
+
+ // Move all the elements from the old symtab to the new one.
+ iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+ if (ItemList.empty()) return;
+
+ if (OldST) {
+ // Remove all entries from the previous symtab.
+ for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+ I != ItemList.end(); ++I)
+ if (I->hasName())
+ OldST->removeValueName(I->getValueName());
+ }
+
+ if (NewST) {
+ // Add all of the items to the new symtab.
+ for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+ I != ItemList.end(); ++I)
+ if (I->hasName())
+ NewST->reinsertValue(I);
+ }
+
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::addNodeToList(ValueSubClass *V) {
+ assert(V->getParent() == 0 && "Value already in a container!!");
+ ItemParentClass *Owner = getListOwner();
+ V->setParent(Owner);
+ if (V->hasName())
+ if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+ ST->reinsertValue(V);
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::removeNodeFromList(ValueSubClass *V) {
+ V->setParent(0);
+ if (V->hasName())
+ if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+ ST->removeValueName(V->getValueName());
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+ ilist_iterator<ValueSubClass> first,
+ ilist_iterator<ValueSubClass> last) {
+ // We only have to do work here if transferring instructions between BBs
+ ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
+ if (NewIP == OldIP) return; // No work to do at all...
+
+ // We only have to update symbol table entries if we are transferring the
+ // instructions to a different symtab object...
+ ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
+ ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+ if (NewST != OldST) {
+ for (; first != last; ++first) {
+ ValueSubClass &V = *first;
+ bool HasName = V.hasName();
+ if (OldST && HasName)
+ OldST->removeValueName(V.getValueName());
+ V.setParent(NewIP);
+ if (NewST && HasName)
+ NewST->reinsertValue(&V);
+ }
+ } else {
+ // Just transferring between blocks in the same function, simply update the
+ // parent fields in the instructions...
+ for (; first != last; ++first)
+ first->setParent(NewIP);
+ }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/Type.cpp b/contrib/llvm/lib/VMCore/Type.cpp
new file mode 100644
index 000000000000..f874d1b28302
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Type.cpp
@@ -0,0 +1,687 @@
+//===-- Type.cpp - Implement the Type class -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Type class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Module.h"
+#include <algorithm>
+#include <cstdarg>
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Type Class Implementation
+//===----------------------------------------------------------------------===//
+
+Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+ switch (IDNumber) {
+ case VoidTyID : return getVoidTy(C);
+ case FloatTyID : return getFloatTy(C);
+ case DoubleTyID : return getDoubleTy(C);
+ case X86_FP80TyID : return getX86_FP80Ty(C);
+ case FP128TyID : return getFP128Ty(C);
+ case PPC_FP128TyID : return getPPC_FP128Ty(C);
+ case LabelTyID : return getLabelTy(C);
+ case MetadataTyID : return getMetadataTy(C);
+ case X86_MMXTyID : return getX86_MMXTy(C);
+ default:
+ return 0;
+ }
+}
+
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+const Type *Type::getScalarType() const {
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType();
+ return this;
+}
+
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+ return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+}
+
+/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
+/// integer types.
+///
+bool Type::isIntOrIntVectorTy() const {
+ if (isIntegerTy())
+ return true;
+ if (ID != Type::VectorTyID) return false;
+
+ return cast<VectorType>(this)->getElementType()->isIntegerTy();
+}
+
+/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
+///
+bool Type::isFPOrFPVectorTy() const {
+ if (ID == Type::FloatTyID || ID == Type::DoubleTyID ||
+ ID == Type::FP128TyID || ID == Type::X86_FP80TyID ||
+ ID == Type::PPC_FP128TyID)
+ return true;
+ if (ID != Type::VectorTyID) return false;
+
+ return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
+}
+
+// canLosslesslyBitCastTo - Return true if this type can be converted to
+// 'Ty' without any reinterpretation of bits. For example, i8* to i32*.
+//
+bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
+ // Identity cast means no change so return true
+ if (this == Ty)
+ return true;
+
+ // They are not convertible unless they are at least first class types
+ if (!this->isFirstClassType() || !Ty->isFirstClassType())
+ return false;
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not. Also, 64-bit vector types can be
+ // converted to x86mmx.
+ if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
+ if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+ if (Ty->getTypeID() == Type::X86_MMXTyID &&
+ thisPTy->getBitWidth() == 64)
+ return true;
+ }
+
+ if (this->getTypeID() == Type::X86_MMXTyID)
+ if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ if (thatPTy->getBitWidth() == 64)
+ return true;
+
+ // At this point we have only various mismatches of the first class types
+ // remaining and ptr->ptr. Just select the lossless conversions. Everything
+ // else is not lossless.
+ if (this->isPointerTy())
+ return Ty->isPointerTy();
+ return false; // Other types have no identity values
+}
+
+bool Type::isEmptyTy() const {
+ const ArrayType *ATy = dyn_cast<ArrayType>(this);
+ if (ATy) {
+ unsigned NumElements = ATy->getNumElements();
+ return NumElements == 0 || ATy->getElementType()->isEmptyTy();
+ }
+
+ const StructType *STy = dyn_cast<StructType>(this);
+ if (STy) {
+ unsigned NumElements = STy->getNumElements();
+ for (unsigned i = 0; i < NumElements; ++i)
+ if (!STy->getElementType(i)->isEmptyTy())
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+unsigned Type::getPrimitiveSizeInBits() const {
+ switch (getTypeID()) {
+ case Type::FloatTyID: return 32;
+ case Type::DoubleTyID: return 64;
+ case Type::X86_FP80TyID: return 80;
+ case Type::FP128TyID: return 128;
+ case Type::PPC_FP128TyID: return 128;
+ case Type::X86_MMXTyID: return 64;
+ case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
+ case Type::VectorTyID: return cast<VectorType>(this)->getBitWidth();
+ default: return 0;
+ }
+}
+
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() const {
+ return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type. This
+/// is only valid on floating point types. If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType()->getFPMantissaWidth();
+ assert(isFloatingPointTy() && "Not a floating point type!");
+ if (ID == FloatTyID) return 24;
+ if (ID == DoubleTyID) return 53;
+ if (ID == X86_FP80TyID) return 64;
+ if (ID == FP128TyID) return 113;
+ assert(ID == PPC_FP128TyID && "unknown fp type");
+ return -1;
+}
+
+/// isSizedDerivedType - Derived types like structures and arrays are sized
+/// iff all of the members of the type are sized as well. Since asking for
+/// their size is relatively uncommon, move this operation out of line.
+bool Type::isSizedDerivedType() const {
+ if (this->isIntegerTy())
+ return true;
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+ return ATy->getElementType()->isSized();
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType()->isSized();
+
+ if (!this->isStructTy())
+ return false;
+
+ // Opaque structs have no size.
+ if (cast<StructType>(this)->isOpaque())
+ return false;
+
+ // Okay, our struct is sized if all of the elements are.
+ for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
+ if (!(*I)->isSized())
+ return false;
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Primitive 'Type' data
+//===----------------------------------------------------------------------===//
+
+Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
+Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
+Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
+Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
+Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
+Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
+Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+
+IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
+IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
+IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
+IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
+IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+
+IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+ return IntegerType::get(C, N);
+}
+
+PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+ return getFloatTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+ return getDoubleTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+ return getX86_FP80Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+ return getFP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+ return getPPC_FP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+ return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+ return getIntNTy(C, N)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt1Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt8Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt16Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt32Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+ return getInt64Ty(C)->getPointerTo(AS);
+}
+
+
+//===----------------------------------------------------------------------===//
+// IntegerType Implementation
+//===----------------------------------------------------------------------===//
+
+IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+ assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+ assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+
+ // Check for the built-in integer types
+ switch (NumBits) {
+ case 1: return cast<IntegerType>(Type::getInt1Ty(C));
+ case 8: return cast<IntegerType>(Type::getInt8Ty(C));
+ case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+ case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+ case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+ default:
+ break;
+ }
+
+ IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
+
+ if (Entry == 0)
+ Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+
+ return Entry;
+}
+
+bool IntegerType::isPowerOf2ByteWidth() const {
+ unsigned BitWidth = getBitWidth();
+ return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+}
+
+APInt IntegerType::getMask() const {
+ return APInt::getAllOnesValue(getBitWidth());
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionType Implementation
+//===----------------------------------------------------------------------===//
+
+FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params,
+ bool IsVarArgs)
+ : Type(Result->getContext(), FunctionTyID) {
+ Type **SubTys = reinterpret_cast<Type**>(this+1);
+ assert(isValidReturnType(Result) && "invalid return type for function");
+ setSubclassData(IsVarArgs);
+
+ SubTys[0] = const_cast<Type*>(Result);
+
+ for (unsigned i = 0, e = Params.size(); i != e; ++i) {
+ assert(isValidArgumentType(Params[i]) &&
+ "Not a valid type for function argument!");
+ SubTys[i+1] = Params[i];
+ }
+
+ ContainedTys = SubTys;
+ NumContainedTys = Params.size() + 1; // + 1 for result type
+}
+
+// FunctionType::get - The factory function for the FunctionType class.
+FunctionType *FunctionType::get(const Type *ReturnType,
+ ArrayRef<Type*> Params, bool isVarArg) {
+ // TODO: This is brutally slow.
+ std::vector<Type*> Key;
+ Key.reserve(Params.size()+2);
+ Key.push_back(const_cast<Type*>(ReturnType));
+ for (unsigned i = 0, e = Params.size(); i != e; ++i)
+ Key.push_back(const_cast<Type*>(Params[i]));
+ if (isVarArg)
+ Key.push_back(0);
+
+ LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+ FunctionType *&FT = pImpl->FunctionTypes[Key];
+
+ if (FT == 0) {
+ FT = (FunctionType*) pImpl->TypeAllocator.
+ Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1),
+ AlignOf<FunctionType>::Alignment);
+ new (FT) FunctionType(ReturnType, Params, isVarArg);
+ }
+
+ return FT;
+}
+
+
+FunctionType *FunctionType::get(const Type *Result, bool isVarArg) {
+ return get(Result, ArrayRef<Type *>(), isVarArg);
+}
+
+
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(const Type *RetTy) {
+ return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+ !RetTy->isMetadataTy();
+}
+
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(const Type *ArgTy) {
+ return ArgTy->isFirstClassType();
+}
+
+//===----------------------------------------------------------------------===//
+// StructType Implementation
+//===----------------------------------------------------------------------===//
+
+// Primitive Constructors.
+
+StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
+ bool isPacked) {
+ // FIXME: std::vector is horribly inefficient for this probe.
+ std::vector<Type*> Key;
+ for (unsigned i = 0, e = ETypes.size(); i != e; ++i) {
+ assert(isValidElementType(ETypes[i]) &&
+ "Invalid type for structure element!");
+ Key.push_back(ETypes[i]);
+ }
+ if (isPacked)
+ Key.push_back(0);
+
+ StructType *&ST = Context.pImpl->AnonStructTypes[Key];
+ if (ST) return ST;
+
+ // Value not found. Create a new type!
+ ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ ST->setSubclassData(SCDB_IsAnonymous); // Anonymous struct.
+ ST->setBody(ETypes, isPacked);
+ return ST;
+}
+
+void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+ assert(isOpaque() && "Struct body already set!");
+
+ setSubclassData(getSubclassData() | SCDB_HasBody);
+ if (isPacked)
+ setSubclassData(getSubclassData() | SCDB_Packed);
+
+ Type **Elts = getContext().pImpl->
+ TypeAllocator.Allocate<Type*>(Elements.size());
+ memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size());
+
+ ContainedTys = Elts;
+ NumContainedTys = Elements.size();
+}
+
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) {
+ StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ if (!Name.empty())
+ ST->setName(Name);
+ return ST;
+}
+
+void StructType::setName(StringRef Name) {
+ if (Name == getName()) return;
+
+ // If this struct already had a name, remove its symbol table entry.
+ if (SymbolTableEntry) {
+ getContext().pImpl->NamedStructTypes.erase(getName());
+ SymbolTableEntry = 0;
+ }
+
+ // If this is just removing the name, we're done.
+ if (Name.empty())
+ return;
+
+ // Look up the entry for the name.
+ StringMapEntry<StructType*> *Entry =
+ &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+
+ // While we have a name collision, try a random rename.
+ if (Entry->getValue()) {
+ SmallString<64> TempStr(Name);
+ TempStr.push_back('.');
+ raw_svector_ostream TmpStream(TempStr);
+
+ do {
+ TempStr.resize(Name.size()+1);
+ TmpStream.resync();
+ TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
+
+ Entry = &getContext().pImpl->
+ NamedStructTypes.GetOrCreateValue(TmpStream.str());
+ } while (Entry->getValue());
+ }
+
+ // Okay, we found an entry that isn't used. It's us!
+ Entry->setValue(this);
+
+ SymbolTableEntry = Entry;
+}
+
+//===----------------------------------------------------------------------===//
+// StructType Helper functions.
+
+StructType *StructType::get(LLVMContext &Context, bool isPacked) {
+ return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+}
+
+StructType *StructType::get(Type *type, ...) {
+ assert(type != 0 && "Cannot create a struct type with no elements with this");
+ LLVMContext &Ctx = type->getContext();
+ va_list ap;
+ SmallVector<llvm::Type*, 8> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ return llvm::StructType::get(Ctx, StructFields);
+}
+
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name,
+ ArrayRef<Type*> Elements, bool isPacked) {
+ StructType *ST = createNamed(Context, Name);
+ ST->setBody(Elements, isPacked);
+ return ST;
+}
+
+StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements,
+ bool isPacked) {
+ assert(!Elements.empty() &&
+ "This method may not be invoked with an empty list");
+ return createNamed(Elements[0]->getContext(), Name, Elements, isPacked);
+}
+
+StructType *StructType::createNamed(StringRef Name, Type *type, ...) {
+ assert(type != 0 && "Cannot create a struct type with no elements with this");
+ LLVMContext &Ctx = type->getContext();
+ va_list ap;
+ SmallVector<llvm::Type*, 8> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ return llvm::StructType::createNamed(Ctx, Name, StructFields);
+}
+
+StringRef StructType::getName() const {
+ assert(!isAnonymous() && "Anonymous structs never have names");
+ if (SymbolTableEntry == 0) return StringRef();
+
+ return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
+}
+
+void StructType::setBody(Type *type, ...) {
+ assert(type != 0 && "Cannot create a struct type with no elements with this");
+ va_list ap;
+ SmallVector<llvm::Type*, 8> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ setBody(StructFields);
+}
+
+bool StructType::isValidElementType(const Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+/// isLayoutIdentical - Return true if this is layout identical to the
+/// specified struct.
+bool StructType::isLayoutIdentical(const StructType *Other) const {
+ if (this == Other) return true;
+
+ if (isPacked() != Other->isPacked() ||
+ getNumElements() != Other->getNumElements())
+ return false;
+
+ return std::equal(element_begin(), element_end(), Other->element_begin());
+}
+
+
+/// getTypeByName - Return the type with the specified name, or null if there
+/// is none by that name.
+StructType *Module::getTypeByName(StringRef Name) const {
+ StringMap<StructType*>::iterator I =
+ getContext().pImpl->NamedStructTypes.find(Name);
+ if (I != getContext().pImpl->NamedStructTypes.end())
+ return I->second;
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// CompositeType Implementation
+//===----------------------------------------------------------------------===//
+
+Type *CompositeType::getTypeAtIndex(const Value *V) const {
+ if (const StructType *STy = dyn_cast<StructType>(this)) {
+ unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+ assert(indexValid(Idx) && "Invalid structure index!");
+ return STy->getElementType(Idx);
+ }
+
+ return cast<SequentialType>(this)->getElementType();
+}
+Type *CompositeType::getTypeAtIndex(unsigned Idx) const {
+ if (const StructType *STy = dyn_cast<StructType>(this)) {
+ assert(indexValid(Idx) && "Invalid structure index!");
+ return STy->getElementType(Idx);
+ }
+
+ return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+ if (const StructType *STy = dyn_cast<StructType>(this)) {
+ // Structure indexes require 32-bit integer constants.
+ if (V->getType()->isIntegerTy(32))
+ if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+ return CU->getZExtValue() < STy->getNumElements();
+ return false;
+ }
+
+ // Sequential types can be indexed by any integer.
+ return V->getType()->isIntegerTy();
+}
+
+bool CompositeType::indexValid(unsigned Idx) const {
+ if (const StructType *STy = dyn_cast<StructType>(this))
+ return Idx < STy->getNumElements();
+ // Sequential types can be indexed by any integer.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ArrayType Implementation
+//===----------------------------------------------------------------------===//
+
+ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
+ : SequentialType(ArrayTyID, ElType) {
+ NumElements = NumEl;
+}
+
+
+ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) {
+ Type *ElementType = const_cast<Type*>(elementType);
+ assert(isValidElementType(ElementType) && "Invalid type for array element!");
+
+ LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+ ArrayType *&Entry =
+ pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
+
+ if (Entry == 0)
+ Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+ return Entry;
+}
+
+bool ArrayType::isValidElementType(const Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+//===----------------------------------------------------------------------===//
+// VectorType Implementation
+//===----------------------------------------------------------------------===//
+
+VectorType::VectorType(Type *ElType, unsigned NumEl)
+ : SequentialType(VectorTyID, ElType) {
+ NumElements = NumEl;
+}
+
+VectorType *VectorType::get(const Type *elementType, unsigned NumElements) {
+ Type *ElementType = const_cast<Type*>(elementType);
+ assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+ assert(isValidElementType(ElementType) &&
+ "Elements of a VectorType must be a primitive type");
+
+ LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+ VectorType *&Entry = ElementType->getContext().pImpl
+ ->VectorTypes[std::make_pair(ElementType, NumElements)];
+
+ if (Entry == 0)
+ Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+ return Entry;
+}
+
+bool VectorType::isValidElementType(const Type *ElemTy) {
+ return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
+}
+
+//===----------------------------------------------------------------------===//
+// PointerType Implementation
+//===----------------------------------------------------------------------===//
+
+PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) {
+ Type *EltTy = const_cast<Type*>(eltTy);
+ assert(EltTy && "Can't get a pointer to <null> type!");
+ assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+
+ LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
+
+ // Since AddressSpace #0 is the common case, we special case it.
+ PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
+ : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
+
+ if (Entry == 0)
+ Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+ return Entry;
+}
+
+
+PointerType::PointerType(Type *E, unsigned AddrSpace)
+ : SequentialType(PointerTyID, E) {
+ setSubclassData(AddrSpace);
+}
+
+PointerType *Type::getPointerTo(unsigned addrs) const {
+ return PointerType::get(this, addrs);
+}
+
+bool PointerType::isValidElementType(const Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy();
+}
diff --git a/contrib/llvm/lib/VMCore/Use.cpp b/contrib/llvm/lib/VMCore/Use.cpp
new file mode 100644
index 000000000000..359a1517ab79
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Use.cpp
@@ -0,0 +1,144 @@
+//===-- Use.cpp - Implement the Use class ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the algorithm for finding the User of a Use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Use swap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::swap(Use &RHS) {
+ Value *V1(Val);
+ Value *V2(RHS.Val);
+ if (V1 != V2) {
+ if (V1) {
+ removeFromList();
+ }
+
+ if (V2) {
+ RHS.removeFromList();
+ Val = V2;
+ V2->addUse(*this);
+ } else {
+ Val = 0;
+ }
+
+ if (V1) {
+ RHS.Val = V1;
+ V1->addUse(RHS);
+ } else {
+ RHS.Val = 0;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Use getImpliedUser Implementation
+//===----------------------------------------------------------------------===//
+
+const Use *Use::getImpliedUser() const {
+ const Use *Current = this;
+
+ while (true) {
+ unsigned Tag = (Current++)->Prev.getInt();
+ switch (Tag) {
+ case zeroDigitTag:
+ case oneDigitTag:
+ continue;
+
+ case stopTag: {
+ ++Current;
+ ptrdiff_t Offset = 1;
+ while (true) {
+ unsigned Tag = Current->Prev.getInt();
+ switch (Tag) {
+ case zeroDigitTag:
+ case oneDigitTag:
+ ++Current;
+ Offset = (Offset << 1) + Tag;
+ continue;
+ default:
+ return Current + Offset;
+ }
+ }
+ }
+
+ case fullStopTag:
+ return Current;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Use initTags Implementation
+//===----------------------------------------------------------------------===//
+
+Use *Use::initTags(Use * const Start, Use *Stop) {
+ ptrdiff_t Done = 0;
+ while (Done < 20) {
+ if (Start == Stop--)
+ return Start;
+ static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, stopTag,
+ zeroDigitTag, oneDigitTag, oneDigitTag,
+ stopTag, zeroDigitTag, oneDigitTag,
+ zeroDigitTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, oneDigitTag,
+ oneDigitTag, stopTag
+ };
+ new(Stop) Use(tags[Done++]);
+ }
+
+ ptrdiff_t Count = Done;
+ while (Start != Stop) {
+ --Stop;
+ if (!Count) {
+ new(Stop) Use(stopTag);
+ ++Done;
+ Count = Done;
+ } else {
+ new(Stop) Use(PrevPtrTag(Count & 1));
+ Count >>= 1;
+ ++Done;
+ }
+ }
+
+ return Start;
+}
+
+//===----------------------------------------------------------------------===//
+// Use zap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::zap(Use *Start, const Use *Stop, bool del) {
+ while (Start != Stop)
+ (--Stop)->~Use();
+ if (del)
+ ::operator delete(Start);
+}
+
+//===----------------------------------------------------------------------===//
+// Use getUser Implementation
+//===----------------------------------------------------------------------===//
+
+User *Use::getUser() const {
+ const Use *End = getImpliedUser();
+ const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+ return ref->getInt()
+ ? ref->getPointer()
+ : (User*)End;
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/VMCore/User.cpp b/contrib/llvm/lib/VMCore/User.cpp
new file mode 100644
index 000000000000..f01fa349adfd
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/User.cpp
@@ -0,0 +1,79 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constant.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// User Class
+//===----------------------------------------------------------------------===//
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+ if (From == To) return; // Duh what?
+
+ assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+ "Cannot call User::replaceUsesOfWith on a constant!");
+
+ for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+ if (getOperand(i) == From) { // Is This operand is pointing to oldval?
+ // The side effects of this setOperand call include linking to
+ // "To", adding "this" to the uses list of To, and
+ // most importantly, removing "this" from the use list of "From".
+ setOperand(i, To); // Fix it now...
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+ // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+ // the User.
+ size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+ Use *Begin = static_cast<Use*>(::operator new(size));
+ Use *End = Begin + N;
+ (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
+ return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+// User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+ void *Storage = ::operator new(s + sizeof(Use) * Us);
+ Use *Start = static_cast<Use*>(Storage);
+ Use *End = Start + Us;
+ User *Obj = reinterpret_cast<User*>(End);
+ Obj->OperandList = Start;
+ Obj->NumOperands = Us;
+ Use::initTags(Start, End);
+ return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+// User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+ User *Start = static_cast<User*>(Usr);
+ Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+ // If there were hung-off uses, they will have been freed already and
+ // NumOperands reset to 0, so here we just free the User itself.
+ ::operator delete(Storage);
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/VMCore/Value.cpp b/contrib/llvm/lib/VMCore/Value.cpp
new file mode 100644
index 000000000000..f1815e377edc
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Value.cpp
@@ -0,0 +1,632 @@
+//===-- Value.cpp - Implement the Value class -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Value, ValueHandle, and User classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Value Class
+//===----------------------------------------------------------------------===//
+
+static inline Type *checkType(const Type *Ty) {
+ assert(Ty && "Value defined with a null type: Error!");
+ return const_cast<Type*>(Ty);
+}
+
+Value::Value(const Type *ty, unsigned scid)
+ : SubclassID(scid), HasValueHandle(0),
+ SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
+ UseList(0), Name(0) {
+ // FIXME: Why isn't this in the subclass gunk??
+ if (isa<CallInst>(this) || isa<InvokeInst>(this))
+ assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
+ "invalid CallInst type!");
+ else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
+ assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
+ "Cannot create non-first-class values except for constants!");
+}
+
+Value::~Value() {
+ // Notify all ValueHandles (if present) that this value is going away.
+ if (HasValueHandle)
+ ValueHandleBase::ValueIsDeleted(this);
+
+#ifndef NDEBUG // Only in -g mode...
+ // Check to make sure that there are no uses of this value that are still
+ // around when the value is destroyed. If there are, then we have a dangling
+ // reference and something is wrong. This code is here to print out what is
+ // still being referenced. The value in question should be printed as
+ // a <badref>
+ //
+ if (!use_empty()) {
+ dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+ for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+ dbgs() << "Use still stuck around after Def is destroyed:"
+ << **I << "\n";
+ }
+#endif
+ assert(use_empty() && "Uses remain when a value is destroyed!");
+
+ // If this value is named, destroy the name. This should not be in a symtab
+ // at this point.
+ if (Name)
+ Name->Destroy();
+
+ // There should be no uses of this object anymore, remove it.
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// hasNUses - Return true if this Value has exactly N users.
+///
+bool Value::hasNUses(unsigned N) const {
+ const_use_iterator UI = use_begin(), E = use_end();
+
+ for (; N; --N, ++UI)
+ if (UI == E) return false; // Too few.
+ return UI == E;
+}
+
+/// hasNUsesOrMore - Return true if this value has N users or more. This is
+/// logically equivalent to getNumUses() >= N.
+///
+bool Value::hasNUsesOrMore(unsigned N) const {
+ const_use_iterator UI = use_begin(), E = use_end();
+
+ for (; N; --N, ++UI)
+ if (UI == E) return false; // Too few.
+
+ return true;
+}
+
+/// isUsedInBasicBlock - Return true if this value is used in the specified
+/// basic block.
+bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+ for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ const Instruction *User = dyn_cast<Instruction>(*I);
+ if (User && User->getParent() == BB)
+ return true;
+ }
+ return false;
+}
+
+
+/// getNumUses - This method computes the number of uses of this Value. This
+/// is a linear time operation. Use hasOneUse or hasNUses to check for specific
+/// values.
+unsigned Value::getNumUses() const {
+ return (unsigned)std::distance(use_begin(), use_end());
+}
+
+static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
+ ST = 0;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (BasicBlock *P = I->getParent())
+ if (Function *PP = P->getParent())
+ ST = &PP->getValueSymbolTable();
+ } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ if (Function *P = BB->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (Module *P = GV->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (Argument *A = dyn_cast<Argument>(V)) {
+ if (Function *P = A->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (isa<MDString>(V))
+ return true;
+ else {
+ assert(isa<Constant>(V) && "Unknown value type!");
+ return true; // no name is setable for this.
+ }
+ return false;
+}
+
+StringRef Value::getName() const {
+ // Make sure the empty string is still a C string. For historical reasons,
+ // some clients want to call .data() on the result and expect it to be null
+ // terminated.
+ if (!Name) return StringRef("", 0);
+ return Name->getKey();
+}
+
+std::string Value::getNameStr() const {
+ return getName().str();
+}
+
+void Value::setName(const Twine &NewName) {
+ // Fast path for common IRBuilder case of setName("") when there is no name.
+ if (NewName.isTriviallyEmpty() && !hasName())
+ return;
+
+ SmallString<256> NameData;
+ StringRef NameRef = NewName.toStringRef(NameData);
+
+ // Name isn't changing?
+ if (getName() == NameRef)
+ return;
+
+ assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
+
+ // Get the symbol table to update for this object.
+ ValueSymbolTable *ST;
+ if (getSymTab(this, ST))
+ return; // Cannot set a name on this value (e.g. constant).
+
+ if (!ST) { // No symbol table to update? Just do the change.
+ if (NameRef.empty()) {
+ // Free the name for this value.
+ Name->Destroy();
+ Name = 0;
+ return;
+ }
+
+ if (Name)
+ Name->Destroy();
+
+ // NOTE: Could optimize for the case the name is shrinking to not deallocate
+ // then reallocated.
+
+ // Create the new name.
+ Name = ValueName::Create(NameRef.begin(), NameRef.end());
+ Name->setValue(this);
+ return;
+ }
+
+ // NOTE: Could optimize for the case the name is shrinking to not deallocate
+ // then reallocated.
+ if (hasName()) {
+ // Remove old name.
+ ST->removeValueName(Name);
+ Name->Destroy();
+ Name = 0;
+
+ if (NameRef.empty())
+ return;
+ }
+
+ // Name is changing to something new.
+ Name = ST->createValueName(NameRef, this);
+}
+
+
+/// takeName - transfer the name from V to this value, setting V's name to
+/// empty. It is an error to call V->takeName(V).
+void Value::takeName(Value *V) {
+ ValueSymbolTable *ST = 0;
+ // If this value has a name, drop it.
+ if (hasName()) {
+ // Get the symtab this is in.
+ if (getSymTab(this, ST)) {
+ // We can't set a name on this value, but we need to clear V's name if
+ // it has one.
+ if (V->hasName()) V->setName("");
+ return; // Cannot set a name on this value (e.g. constant).
+ }
+
+ // Remove old name.
+ if (ST)
+ ST->removeValueName(Name);
+ Name->Destroy();
+ Name = 0;
+ }
+
+ // Now we know that this has no name.
+
+ // If V has no name either, we're done.
+ if (!V->hasName()) return;
+
+ // Get this's symtab if we didn't before.
+ if (!ST) {
+ if (getSymTab(this, ST)) {
+ // Clear V's name.
+ V->setName("");
+ return; // Cannot set a name on this value (e.g. constant).
+ }
+ }
+
+ // Get V's ST, this should always succed, because V has a name.
+ ValueSymbolTable *VST;
+ bool Failure = getSymTab(V, VST);
+ assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
+
+ // If these values are both in the same symtab, we can do this very fast.
+ // This works even if both values have no symtab yet.
+ if (ST == VST) {
+ // Take the name!
+ Name = V->Name;
+ V->Name = 0;
+ Name->setValue(this);
+ return;
+ }
+
+ // Otherwise, things are slightly more complex. Remove V's name from VST and
+ // then reinsert it into ST.
+
+ if (VST)
+ VST->removeValueName(V->Name);
+ Name = V->Name;
+ V->Name = 0;
+ Name->setValue(this);
+
+ if (ST)
+ ST->reinsertValue(this);
+}
+
+
+void Value::replaceAllUsesWith(Value *New) {
+ assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+ assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+ assert(New->getType() == getType() &&
+ "replaceAllUses of value with new value of different type!");
+
+ // Notify all ValueHandles (if present) that this value is going away.
+ if (HasValueHandle)
+ ValueHandleBase::ValueIsRAUWd(this, New);
+
+ while (!use_empty()) {
+ Use &U = *UseList;
+ // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+ // constant because they are uniqued.
+ if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (!isa<GlobalValue>(C)) {
+ C->replaceUsesOfWithOnConstant(this, New, &U);
+ continue;
+ }
+ }
+
+ U.set(New);
+ }
+
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+ BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
+}
+
+Value *Value::stripPointerCasts() {
+ if (!getType()->isPointerTy())
+ return this;
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+
+ Value *V = this;
+ Visited.insert(V);
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->hasAllZeroIndices())
+ return V;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ return V;
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+ // Note that it is not safe to speculate into a malloc'd region because
+ // malloc may return null.
+ // It's also not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. Some cases could
+ // be handled using TargetData to check sizes and alignments though.
+
+ // These are obviously ok.
+ if (isa<AllocaInst>(this)) return true;
+
+ // Global variables which can't collapse to null are ok.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+ return !GV->hasExternalWeakLinkage();
+
+ // byval arguments are ok.
+ if (const Argument *A = dyn_cast<Argument>(this))
+ return A->hasByValAttr();
+
+ // For GEPs, determine if the indexing lands within the allocated object.
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+ // Conservatively require that the base pointer be fully dereferenceable.
+ if (!GEP->getOperand(0)->isDereferenceablePointer())
+ return false;
+ // Check the indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::const_op_iterator I = GEP->op_begin()+1,
+ E = GEP->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ const Type *Ty = *GTI++;
+ // Struct indices can't be out of bounds.
+ if (isa<StructType>(Ty))
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+ if (!CI)
+ return false;
+ // Zero is always ok.
+ if (CI->isZero())
+ continue;
+ // Check to see that it's within the bounds of an array.
+ const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (!ATy)
+ return false;
+ if (CI->getValue().getActiveBits() > 64)
+ return false;
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return false;
+ }
+ // Indices check out; this is dereferenceable.
+ return true;
+ }
+
+ // If we don't know, assume the worst.
+ return false;
+}
+
+/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+/// return the value in the PHI node corresponding to PredBB. If not, return
+/// ourself. This is useful if you want to know the value something has in a
+/// predecessor block.
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+ const BasicBlock *PredBB) {
+ PHINode *PN = dyn_cast<PHINode>(this);
+ if (PN && PN->getParent() == CurBB)
+ return PN->getIncomingValueForBlock(PredBB);
+ return this;
+}
+
+LLVMContext &Value::getContext() const { return VTy->getContext(); }
+
+//===----------------------------------------------------------------------===//
+// ValueHandleBase Class
+//===----------------------------------------------------------------------===//
+
+/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+/// List is known to point into the existing use list.
+void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
+ assert(List && "Handle list is null?");
+
+ // Splice ourselves into the list.
+ Next = *List;
+ *List = this;
+ setPrevPtr(List);
+ if (Next) {
+ Next->setPrevPtr(&Next);
+ assert(VP == Next->VP && "Added to wrong list?");
+ }
+}
+
+void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
+ assert(List && "Must insert after existing node");
+
+ Next = List->Next;
+ setPrevPtr(&List->Next);
+ List->Next = this;
+ if (Next)
+ Next->setPrevPtr(&Next);
+}
+
+/// AddToUseList - Add this ValueHandle to the use list for VP.
+void ValueHandleBase::AddToUseList() {
+ assert(VP && "Null pointer doesn't have a use list!");
+
+ LLVMContextImpl *pImpl = VP->getContext().pImpl;
+
+ if (VP->HasValueHandle) {
+ // If this value already has a ValueHandle, then it must be in the
+ // ValueHandles map already.
+ ValueHandleBase *&Entry = pImpl->ValueHandles[VP];
+ assert(Entry != 0 && "Value doesn't have any handles?");
+ AddToExistingUseList(&Entry);
+ return;
+ }
+
+ // Ok, it doesn't have any handles yet, so we must insert it into the
+ // DenseMap. However, doing this insertion could cause the DenseMap to
+ // reallocate itself, which would invalidate all of the PrevP pointers that
+ // point into the old table. Handle this by checking for reallocation and
+ // updating the stale pointers only if needed.
+ DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+ const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
+
+ ValueHandleBase *&Entry = Handles[VP];
+ assert(Entry == 0 && "Value really did already have handles?");
+ AddToExistingUseList(&Entry);
+ VP->HasValueHandle = true;
+
+ // If reallocation didn't happen or if this was the first insertion, don't
+ // walk the table.
+ if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
+ Handles.size() == 1) {
+ return;
+ }
+
+ // Okay, reallocation did happen. Fix the Prev Pointers.
+ for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
+ E = Handles.end(); I != E; ++I) {
+ assert(I->second && I->first == I->second->VP && "List invariant broken!");
+ I->second->setPrevPtr(&I->second);
+ }
+}
+
+/// RemoveFromUseList - Remove this ValueHandle from its current use list.
+void ValueHandleBase::RemoveFromUseList() {
+ assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!");
+
+ // Unlink this from its use list.
+ ValueHandleBase **PrevPtr = getPrevPtr();
+ assert(*PrevPtr == this && "List invariant broken");
+
+ *PrevPtr = Next;
+ if (Next) {
+ assert(Next->getPrevPtr() == &Next && "List invariant broken");
+ Next->setPrevPtr(PrevPtr);
+ return;
+ }
+
+ // If the Next pointer was null, then it is possible that this was the last
+ // ValueHandle watching VP. If so, delete its entry from the ValueHandles
+ // map.
+ LLVMContextImpl *pImpl = VP->getContext().pImpl;
+ DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+ if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
+ Handles.erase(VP);
+ VP->HasValueHandle = false;
+ }
+}
+
+
+void ValueHandleBase::ValueIsDeleted(Value *V) {
+ assert(V->HasValueHandle && "Should only be called if ValueHandles present");
+
+ // Get the linked list base, which is guaranteed to exist since the
+ // HasValueHandle flag is set.
+ LLVMContextImpl *pImpl = V->getContext().pImpl;
+ ValueHandleBase *Entry = pImpl->ValueHandles[V];
+ assert(Entry && "Value bit set but no entries exist");
+
+ // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+ // and remove themselves from the list without breaking our iteration. This
+ // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+ // Note that we deliberately do not the support the case when dropping a value
+ // handle results in a new value handle being permanently added to the list
+ // (as might occur in theory for CallbackVH's): the new value handle will not
+ // be processed and the checking code will mete out righteous punishment if
+ // the handle is still present once we have finished processing all the other
+ // value handles (it is fine to momentarily add then remove a value handle).
+ for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+ Iterator.RemoveFromUseList();
+ Iterator.AddToExistingUseListAfter(Entry);
+ assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+ switch (Entry->getKind()) {
+ case Assert:
+ break;
+ case Tracking:
+ // Mark that this value has been deleted by setting it to an invalid Value
+ // pointer.
+ Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
+ break;
+ case Weak:
+ // Weak just goes to null, which will unlink it from the list.
+ Entry->operator=(0);
+ break;
+ case Callback:
+ // Forward to the subclass's implementation.
+ static_cast<CallbackVH*>(Entry)->deleted();
+ break;
+ }
+ }
+
+ // All callbacks, weak references, and assertingVHs should be dropped by now.
+ if (V->HasValueHandle) {
+#ifndef NDEBUG // Only in +Asserts mode...
+ dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+ << "\n";
+ if (pImpl->ValueHandles[V]->getKind() == Assert)
+ llvm_unreachable("An asserting value handle still pointed to this"
+ " value!");
+
+#endif
+ llvm_unreachable("All references to V were not removed?");
+ }
+}
+
+
+void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
+ assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
+ assert(Old != New && "Changing value into itself!");
+
+ // Get the linked list base, which is guaranteed to exist since the
+ // HasValueHandle flag is set.
+ LLVMContextImpl *pImpl = Old->getContext().pImpl;
+ ValueHandleBase *Entry = pImpl->ValueHandles[Old];
+
+ assert(Entry && "Value bit set but no entries exist");
+
+ // We use a local ValueHandleBase as an iterator so that
+ // ValueHandles can add and remove themselves from the list without
+ // breaking our iteration. This is not really an AssertingVH; we
+ // just have to give ValueHandleBase some kind.
+ for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+ Iterator.RemoveFromUseList();
+ Iterator.AddToExistingUseListAfter(Entry);
+ assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+ switch (Entry->getKind()) {
+ case Assert:
+ // Asserting handle does not follow RAUW implicitly.
+ break;
+ case Tracking:
+ // Tracking goes to new value like a WeakVH. Note that this may make it
+ // something incompatible with its templated type. We don't want to have a
+ // virtual (or inline) interface to handle this though, so instead we make
+ // the TrackingVH accessors guarantee that a client never sees this value.
+
+ // FALLTHROUGH
+ case Weak:
+ // Weak goes to the new value, which will unlink it from Old's list.
+ Entry->operator=(New);
+ break;
+ case Callback:
+ // Forward to the subclass's implementation.
+ static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
+ break;
+ }
+ }
+
+#ifndef NDEBUG
+ // If any new tracking or weak value handles were added while processing the
+ // list, then complain about it now.
+ if (Old->HasValueHandle)
+ for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+ switch (Entry->getKind()) {
+ case Tracking:
+ case Weak:
+ dbgs() << "After RAUW from " << *Old->getType() << " %"
+ << Old->getNameStr() << " to " << *New->getType() << " %"
+ << New->getNameStr() << "\n";
+ llvm_unreachable("A tracking or weak value handle still pointed to the"
+ " old value!\n");
+ default:
+ break;
+ }
+#endif
+}
+
+/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
+/// more than once.
+CallbackVH::~CallbackVH() {}
diff --git a/contrib/llvm/lib/VMCore/ValueSymbolTable.cpp b/contrib/llvm/lib/VMCore/ValueSymbolTable.cpp
new file mode 100644
index 000000000000..f1c970361a50
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ValueSymbolTable.cpp
@@ -0,0 +1,117 @@
+//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueSymbolTable class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "valuesymtab"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Class destructor
+ValueSymbolTable::~ValueSymbolTable() {
+#ifndef NDEBUG // Only do this in -g mode...
+ for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
+ dbgs() << "Value still in symbol table! Type = '"
+ << *VI->getValue()->getType() << "' Name = '"
+ << VI->getKeyData() << "'\n";
+ assert(vmap.empty() && "Values remain in symbol table!");
+#endif
+}
+
+// Insert a value into the symbol table with the specified name...
+//
+void ValueSymbolTable::reinsertValue(Value* V) {
+ assert(V->hasName() && "Can't insert nameless Value into symbol table");
+
+ // Try inserting the name, assuming it won't conflict.
+ if (vmap.insert(V->Name)) {
+ //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
+ return;
+ }
+
+ // Otherwise, there is a naming conflict. Rename this value.
+ SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
+
+ // The name is too already used, just free it so we can allocate a new name.
+ V->Name->Destroy();
+
+ unsigned BaseSize = UniqueName.size();
+ while (1) {
+ // Trim any suffix off and append the next number.
+ UniqueName.resize(BaseSize);
+ raw_svector_ostream(UniqueName) << ++LastUnique;
+
+ // Try insert the vmap entry with this suffix.
+ ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+ if (NewName.getValue() == 0) {
+ // Newly inserted name. Success!
+ NewName.setValue(V);
+ V->Name = &NewName;
+ //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+ return;
+ }
+ }
+}
+
+void ValueSymbolTable::removeValueName(ValueName *V) {
+ //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
+ // Remove the value from the symbol table.
+ vmap.remove(V);
+}
+
+/// createValueName - This method attempts to create a value name and insert
+/// it into the symbol table with the specified name. If it conflicts, it
+/// auto-renames the name and returns that instead.
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
+ // In the common case, the name is not already in the symbol table.
+ ValueName &Entry = vmap.GetOrCreateValue(Name);
+ if (Entry.getValue() == 0) {
+ Entry.setValue(V);
+ //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
+ // << *V << "\n");
+ return &Entry;
+ }
+
+ // Otherwise, there is a naming conflict. Rename this value.
+ SmallString<256> UniqueName(Name.begin(), Name.end());
+
+ while (1) {
+ // Trim any suffix off and append the next number.
+ UniqueName.resize(Name.size());
+ raw_svector_ostream(UniqueName) << ++LastUnique;
+
+ // Try insert the vmap entry with this suffix.
+ ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+ if (NewName.getValue() == 0) {
+ // Newly inserted name. Success!
+ NewName.setValue(V);
+ //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+ return &NewName;
+ }
+ }
+}
+
+
+// dump - print out the symbol table
+//
+void ValueSymbolTable::dump() const {
+ //DEBUG(dbgs() << "ValueSymbolTable:\n");
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ //DEBUG(dbgs() << " '" << I->getKeyData() << "' = ");
+ I->getValue()->dump();
+ //DEBUG(dbgs() << "\n");
+ }
+}
diff --git a/contrib/llvm/lib/VMCore/ValueTypes.cpp b/contrib/llvm/lib/VMCore/ValueTypes.cpp
new file mode 100644
index 000000000000..21a1f034446a
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ValueTypes.cpp
@@ -0,0 +1,212 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods in the CodeGen/ValueTypes.h header.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+ EVT VT;
+ VT.LLVMTy = IntegerType::get(Context, BitWidth);
+ assert(VT.isExtended() && "Type is not extended!");
+ return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+ unsigned NumElements) {
+ EVT ResultVT;
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedVector() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended64BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 512;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+ assert(isExtended() && "Type is not extended!");
+ return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+ assert(isExtended() && "Type is not extended!");
+ return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+ assert(isExtended() && "Type is not extended!");
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+ return ITy->getBitWidth();
+ if (const VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+ return VTy->getBitWidth();
+ assert(false && "Unrecognized extended type!");
+ return 0; // Suppress warnings.
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+ switch (V.SimpleTy) {
+ default:
+ if (isVector())
+ return "v" + utostr(getVectorNumElements()) +
+ getVectorElementType().getEVTString();
+ if (isInteger())
+ return "i" + utostr(getSizeInBits());
+ llvm_unreachable("Invalid EVT!");
+ return "?";
+ case MVT::i1: return "i1";
+ case MVT::i8: return "i8";
+ case MVT::i16: return "i16";
+ case MVT::i32: return "i32";
+ case MVT::i64: return "i64";
+ case MVT::i128: return "i128";
+ case MVT::f32: return "f32";
+ case MVT::f64: return "f64";
+ case MVT::f80: return "f80";
+ case MVT::f128: return "f128";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
+ case MVT::v2i8: return "v2i8";
+ case MVT::v4i8: return "v4i8";
+ case MVT::v8i8: return "v8i8";
+ case MVT::v16i8: return "v16i8";
+ case MVT::v32i8: return "v32i8";
+ case MVT::v2i16: return "v2i16";
+ case MVT::v4i16: return "v4i16";
+ case MVT::v8i16: return "v8i16";
+ case MVT::v16i16: return "v16i16";
+ case MVT::v2i32: return "v2i32";
+ case MVT::v4i32: return "v4i32";
+ case MVT::v8i32: return "v8i32";
+ case MVT::v1i64: return "v1i64";
+ case MVT::v2i64: return "v2i64";
+ case MVT::v4i64: return "v4i64";
+ case MVT::v8i64: return "v8i64";
+ case MVT::v2f32: return "v2f32";
+ case MVT::v4f32: return "v4f32";
+ case MVT::v8f32: return "v8f32";
+ case MVT::v2f64: return "v2f64";
+ case MVT::v4f64: return "v4f64";
+ case MVT::Metadata:return "Metadata";
+ case MVT::untyped: return "untyped";
+ }
+}
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT. For integer types, this returns an unsigned type. Note
+/// that this will abort for types that cannot be represented.
+const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+ switch (V.SimpleTy) {
+ default:
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy;
+ case MVT::isVoid: return Type::getVoidTy(Context);
+ case MVT::i1: return Type::getInt1Ty(Context);
+ case MVT::i8: return Type::getInt8Ty(Context);
+ case MVT::i16: return Type::getInt16Ty(Context);
+ case MVT::i32: return Type::getInt32Ty(Context);
+ case MVT::i64: return Type::getInt64Ty(Context);
+ case MVT::i128: return IntegerType::get(Context, 128);
+ case MVT::f32: return Type::getFloatTy(Context);
+ case MVT::f64: return Type::getDoubleTy(Context);
+ case MVT::f80: return Type::getX86_FP80Ty(Context);
+ case MVT::f128: return Type::getFP128Ty(Context);
+ case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+ case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
+ case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
+ case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+}
+
+/// getEVT - Return the value type corresponding to the specified type. This
+/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ if (HandleUnknown) return MVT(MVT::Other);
+ llvm_unreachable("Unknown type!");
+ return MVT::isVoid;
+ case Type::VoidTyID:
+ return MVT::isVoid;
+ case Type::IntegerTyID:
+ return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+ case Type::FloatTyID: return MVT(MVT::f32);
+ case Type::DoubleTyID: return MVT(MVT::f64);
+ case Type::X86_FP80TyID: return MVT(MVT::f80);
+ case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::FP128TyID: return MVT(MVT::f128);
+ case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+ case Type::PointerTyID: return MVT(MVT::iPTR);
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
+ VTy->getNumElements());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/VMCore/Verifier.cpp b/contrib/llvm/lib/VMCore/Verifier.cpp
new file mode 100644
index 000000000000..b146b896cbfb
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Verifier.cpp
@@ -0,0 +1,1870 @@
+//===-- Verifier.cpp - Implement the Module Verifier -------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system.
+//
+// Note that this does not provide full `Java style' security and verifications,
+// instead it just tries to ensure that code is well-formed.
+//
+// * Both of a binary operator's parameters are of the same type
+// * Verify that the indices of mem access instructions match other operands
+// * Verify that arithmetic and other things are only performed on first-class
+// types. Verify that shifts & logicals only happen on integrals f.e.
+// * All of the constants in a switch statement are of the correct type
+// * The code is in valid SSA form
+// * It should be illegal to put a label into any other type (like a structure)
+// or to return one. [except constant arrays!]
+// * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
+// * PHI nodes must have an entry for each predecessor, with no extras.
+// * PHI nodes must be the first thing in a basic block, all grouped together
+// * PHI nodes must have at least one entry
+// * All basic blocks should only end with terminator insts, not contain them
+// * The entry node to a function must not have predecessors
+// * All Instructions must be embedded into a basic block
+// * Functions cannot take a void-typed parameter
+// * Verify that a function's argument list agrees with it's declared type.
+// * It is illegal to specify a name for a void value.
+// * It is illegal to have a internal global value with no initializer
+// * It is illegal to have a ret instruction that returns a value that does not
+// agree with the function return value type.
+// * Function call argument types match the function prototype
+// * All other things that are tested by asserts spread about the code...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace { // Anonymous namespace for class
+ struct PreVerifier : public FunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+
+ PreVerifier() : FunctionPass(ID) {
+ initializePreVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // Check that the prerequisites for successful DominatorTree construction
+ // are satisfied.
+ bool runOnFunction(Function &F) {
+ bool Broken = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ if (I->empty() || !I->back().isTerminator()) {
+ dbgs() << "Basic Block in function '" << F.getName()
+ << "' does not have terminator!\n";
+ WriteAsOperand(dbgs(), I, true);
+ dbgs() << "\n";
+ Broken = true;
+ }
+ }
+
+ if (Broken)
+ report_fatal_error("Broken module, no Basic Block terminator!");
+
+ return false;
+ }
+ };
+}
+
+char PreVerifier::ID = 0;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
+ false, false)
+static char &PreVerifyID = PreVerifier::ID;
+
+namespace {
+ struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
+ static char ID; // Pass ID, replacement for typeid
+ bool Broken; // Is this module found to be broken?
+ bool RealPass; // Are we not being run by a PassManager?
+ VerifierFailureAction action;
+ // What to do if verification fails.
+ Module *Mod; // Module we are verifying right now
+ LLVMContext *Context; // Context within which we are verifying
+ DominatorTree *DT; // Dominator Tree, caution can be null!
+
+ std::string Messages;
+ raw_string_ostream MessagesStr;
+
+ /// InstInThisBlock - when verifying a basic block, keep track of all of the
+ /// instructions we have seen so far. This allows us to do efficient
+ /// dominance checks for the case when an instruction has an operand that is
+ /// an instruction in the same block.
+ SmallPtrSet<Instruction*, 16> InstsInThisBlock;
+
+ /// MDNodes - keep track of the metadata nodes that have been checked
+ /// already.
+ SmallPtrSet<MDNode *, 32> MDNodes;
+
+ Verifier()
+ : FunctionPass(ID),
+ Broken(false), RealPass(true), action(AbortProcessAction),
+ Mod(0), Context(0), DT(0), MessagesStr(Messages) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
+ explicit Verifier(VerifierFailureAction ctn)
+ : FunctionPass(ID),
+ Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
+ MessagesStr(Messages) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) {
+ Mod = &M;
+ Context = &M.getContext();
+
+ // If this is a real pass, in a pass manager, we must abort before
+ // returning back to the pass manager, or else the pass manager may try to
+ // run other passes on the broken module.
+ if (RealPass)
+ return abortIfBroken();
+ return false;
+ }
+
+ bool runOnFunction(Function &F) {
+ // Get dominator information if we are being run by PassManager
+ if (RealPass) DT = &getAnalysis<DominatorTree>();
+
+ Mod = F.getParent();
+ if (!Context) Context = &F.getContext();
+
+ visit(F);
+ InstsInThisBlock.clear();
+
+ // If this is a real pass, in a pass manager, we must abort before
+ // returning back to the pass manager, or else the pass manager may try to
+ // run other passes on the broken module.
+ if (RealPass)
+ return abortIfBroken();
+
+ return false;
+ }
+
+ bool doFinalization(Module &M) {
+ // Scan through, checking all of the external function's linkage now...
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ visitGlobalValue(*I);
+
+ // Check to make sure function prototypes are okay.
+ if (I->isDeclaration()) visitFunction(*I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ visitGlobalVariable(*I);
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ visitGlobalAlias(*I);
+
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I)
+ visitNamedMDNode(*I);
+
+ // If the module is broken, abort at this time.
+ return abortIfBroken();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredID(PreVerifyID);
+ if (RealPass)
+ AU.addRequired<DominatorTree>();
+ }
+
+ /// abortIfBroken - If the module is broken and we are supposed to abort on
+ /// this condition, do so.
+ ///
+ bool abortIfBroken() {
+ if (!Broken) return false;
+ MessagesStr << "Broken module found, ";
+ switch (action) {
+ default: llvm_unreachable("Unknown action");
+ case AbortProcessAction:
+ MessagesStr << "compilation aborted!\n";
+ dbgs() << MessagesStr.str();
+ // Client should choose different reaction if abort is not desired
+ abort();
+ case PrintMessageAction:
+ MessagesStr << "verification continues.\n";
+ dbgs() << MessagesStr.str();
+ return false;
+ case ReturnStatusAction:
+ MessagesStr << "compilation terminated.\n";
+ return true;
+ }
+ }
+
+
+ // Verification methods...
+ void visitGlobalValue(GlobalValue &GV);
+ void visitGlobalVariable(GlobalVariable &GV);
+ void visitGlobalAlias(GlobalAlias &GA);
+ void visitNamedMDNode(NamedMDNode &NMD);
+ void visitMDNode(MDNode &MD, Function *F);
+ void visitFunction(Function &F);
+ void visitBasicBlock(BasicBlock &BB);
+ using InstVisitor<Verifier>::visit;
+
+ void visit(Instruction &I);
+
+ void visitTruncInst(TruncInst &I);
+ void visitZExtInst(ZExtInst &I);
+ void visitSExtInst(SExtInst &I);
+ void visitFPTruncInst(FPTruncInst &I);
+ void visitFPExtInst(FPExtInst &I);
+ void visitFPToUIInst(FPToUIInst &I);
+ void visitFPToSIInst(FPToSIInst &I);
+ void visitUIToFPInst(UIToFPInst &I);
+ void visitSIToFPInst(SIToFPInst &I);
+ void visitIntToPtrInst(IntToPtrInst &I);
+ void visitPtrToIntInst(PtrToIntInst &I);
+ void visitBitCastInst(BitCastInst &I);
+ void visitPHINode(PHINode &PN);
+ void visitBinaryOperator(BinaryOperator &B);
+ void visitICmpInst(ICmpInst &IC);
+ void visitFCmpInst(FCmpInst &FC);
+ void visitExtractElementInst(ExtractElementInst &EI);
+ void visitInsertElementInst(InsertElementInst &EI);
+ void visitShuffleVectorInst(ShuffleVectorInst &EI);
+ void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
+ void visitCallInst(CallInst &CI);
+ void visitInvokeInst(InvokeInst &II);
+ void visitGetElementPtrInst(GetElementPtrInst &GEP);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitInstruction(Instruction &I);
+ void visitTerminatorInst(TerminatorInst &I);
+ void visitBranchInst(BranchInst &BI);
+ void visitReturnInst(ReturnInst &RI);
+ void visitSwitchInst(SwitchInst &SI);
+ void visitIndirectBrInst(IndirectBrInst &BI);
+ void visitSelectInst(SelectInst &SI);
+ void visitUserOp1(Instruction &I);
+ void visitUserOp2(Instruction &I) { visitUserOp1(I); }
+ void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+ void visitAllocaInst(AllocaInst &AI);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ void VerifyCallSite(CallSite CS);
+ bool PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+ int VT, unsigned ArgNo, std::string &Suffix);
+ void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+ unsigned RetNum, unsigned ParamNum, ...);
+ void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+ bool isReturnValue, const Value *V);
+ void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
+ const Value *V);
+
+ void WriteValue(const Value *V) {
+ if (!V) return;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ WriteAsOperand(MessagesStr, V, true, Mod);
+ MessagesStr << '\n';
+ }
+ }
+
+ void WriteType(const Type *T) {
+ if (!T) return;
+ MessagesStr << ' ' << *T;
+ }
+
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const Twine &Message,
+ const Value *V1 = 0, const Value *V2 = 0,
+ const Value *V3 = 0, const Value *V4 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ Broken = true;
+ }
+
+ void CheckFailed(const Twine &Message, const Value *V1,
+ const Type *T2, const Value *V3 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteType(T2);
+ WriteValue(V3);
+ Broken = true;
+ }
+
+ void CheckFailed(const Twine &Message, const Type *T1,
+ const Type *T2 = 0, const Type *T3 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteType(T1);
+ WriteType(T2);
+ WriteType(T3);
+ Broken = true;
+ }
+ };
+} // End anonymous namespace
+
+char Verifier::ID = 0;
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+ do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+ do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+ do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+void Verifier::visit(Instruction &I) {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+ InstVisitor<Verifier>::visit(I);
+}
+
+
+void Verifier::visitGlobalValue(GlobalValue &GV) {
+ Assert1(!GV.isDeclaration() ||
+ GV.isMaterializable() ||
+ GV.hasExternalLinkage() ||
+ GV.hasDLLImportLinkage() ||
+ GV.hasExternalWeakLinkage() ||
+ (isa<GlobalAlias>(GV) &&
+ (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+ "Global is external, but doesn't have external or dllimport or weak linkage!",
+ &GV);
+
+ Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
+ "Global is marked as dllimport, but not external", &GV);
+
+ Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+ "Only global variables can have appending linkage!", &GV);
+
+ if (GV.hasAppendingLinkage()) {
+ GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
+ Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
+ "Only global arrays can have appending linkage!", GVar);
+ }
+
+ Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
+ "linker_private_weak_def_auto can only have default visibility!",
+ &GV);
+}
+
+void Verifier::visitGlobalVariable(GlobalVariable &GV) {
+ if (GV.hasInitializer()) {
+ Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+ "Global variable initializer type does not match global "
+ "variable type!", &GV);
+
+ // If the global has common linkage, it must have a zero initializer and
+ // cannot be constant.
+ if (GV.hasCommonLinkage()) {
+ Assert1(GV.getInitializer()->isNullValue(),
+ "'common' global must have a zero initializer!", &GV);
+ Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
+ &GV);
+ }
+ } else {
+ Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
+ GV.hasExternalWeakLinkage(),
+ "invalid linkage type for global declaration", &GV);
+ }
+
+ if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
+ GV.getName() == "llvm.global_dtors")) {
+ Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
+ // Don't worry about emitting an error for it not being an array,
+ // visitGlobalValue will complain on appending non-array.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+ const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ const PointerType *FuncPtrTy =
+ FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
+ Assert1(STy && STy->getNumElements() == 2 &&
+ STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+ STy->getTypeAtIndex(1) == FuncPtrTy,
+ "wrong type for intrinsic global variable", &GV);
+ }
+ }
+
+ visitGlobalValue(GV);
+}
+
+void Verifier::visitGlobalAlias(GlobalAlias &GA) {
+ Assert1(!GA.getName().empty(),
+ "Alias name cannot be empty!", &GA);
+ Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
+ GA.hasWeakLinkage(),
+ "Alias should have external or external weak linkage!", &GA);
+ Assert1(GA.getAliasee(),
+ "Aliasee cannot be NULL!", &GA);
+ Assert1(GA.getType() == GA.getAliasee()->getType(),
+ "Alias and aliasee types should match!", &GA);
+ Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
+
+ if (!isa<GlobalValue>(GA.getAliasee())) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
+ Assert1(CE &&
+ (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ isa<GlobalValue>(CE->getOperand(0)),
+ "Aliasee should be either GlobalValue or bitcast of GlobalValue",
+ &GA);
+ }
+
+ const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
+ Assert1(Aliasee,
+ "Aliasing chain should end with function or global variable", &GA);
+
+ visitGlobalValue(GA);
+}
+
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+ MDNode *MD = NMD.getOperand(i);
+ if (!MD)
+ continue;
+
+ Assert1(!MD->isFunctionLocal(),
+ "Named metadata operand cannot be function local!", MD);
+ visitMDNode(*MD, 0);
+ }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+ // Only visit each node once. Metadata can be mutually recursive, so this
+ // avoids infinite recursion here, as well as being an optimization.
+ if (!MDNodes.insert(&MD))
+ return;
+
+ for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+ Value *Op = MD.getOperand(i);
+ if (!Op)
+ continue;
+ if (isa<Constant>(Op) || isa<MDString>(Op))
+ continue;
+ if (MDNode *N = dyn_cast<MDNode>(Op)) {
+ Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+ "Global metadata operand cannot be function local!", &MD, N);
+ visitMDNode(*N, F);
+ continue;
+ }
+ Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+ // If this was an instruction, bb, or argument, verify that it is in the
+ // function that we expect.
+ Function *ActualF = 0;
+ if (Instruction *I = dyn_cast<Instruction>(Op))
+ ActualF = I->getParent()->getParent();
+ else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+ ActualF = BB->getParent();
+ else if (Argument *A = dyn_cast<Argument>(Op))
+ ActualF = A->getParent();
+ assert(ActualF && "Unimplemented function local metadata case!");
+
+ Assert2(ActualF == F, "function-local metadata used in wrong function",
+ &MD, Op);
+ }
+}
+
+// VerifyParameterAttrs - Check the given attributes for an argument or return
+// value of the specified type. The value V is printed in error messages.
+void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+ bool isReturnValue, const Value *V) {
+ if (Attrs == Attribute::None)
+ return;
+
+ Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
+ Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+ " only applies to the function!", V);
+
+ if (isReturnValue) {
+ Attributes RetI = Attrs & Attribute::ParameterOnly;
+ Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
+ " does not apply to return values!", V);
+ }
+
+ for (unsigned i = 0;
+ i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+ Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
+ Assert1(!(MutI & (MutI - 1)), "Attributes " +
+ Attribute::getAsString(MutI) + " are incompatible!", V);
+ }
+
+ Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
+ Assert1(!TypeI, "Wrong type for attribute " +
+ Attribute::getAsString(TypeI), V);
+
+ Attributes ByValI = Attrs & Attribute::ByVal;
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ Assert1(!ByValI || PTy->getElementType()->isSized(),
+ "Attribute " + Attribute::getAsString(ByValI) +
+ " does not support unsized types!", V);
+ } else {
+ Assert1(!ByValI,
+ "Attribute " + Attribute::getAsString(ByValI) +
+ " only applies to parameters with pointer type!", V);
+ }
+}
+
+// VerifyFunctionAttrs - Check parameter attributes against a function type.
+// The value V is printed in error messages.
+void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
+ const AttrListPtr &Attrs,
+ const Value *V) {
+ if (Attrs.isEmpty())
+ return;
+
+ bool SawNest = false;
+
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ const AttributeWithIndex &Attr = Attrs.getSlot(i);
+
+ const Type *Ty;
+ if (Attr.Index == 0)
+ Ty = FT->getReturnType();
+ else if (Attr.Index-1 < FT->getNumParams())
+ Ty = FT->getParamType(Attr.Index-1);
+ else
+ break; // VarArgs attributes, verified elsewhere.
+
+ VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
+
+ if (Attr.Attrs & Attribute::Nest) {
+ Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+ SawNest = true;
+ }
+
+ if (Attr.Attrs & Attribute::StructRet)
+ Assert1(Attr.Index == 1, "Attribute sret not on first parameter!", V);
+ }
+
+ Attributes FAttrs = Attrs.getFnAttributes();
+ Attributes NotFn = FAttrs & (~Attribute::FunctionOnly);
+ Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) +
+ " does not apply to the function!", V);
+
+ for (unsigned i = 0;
+ i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+ Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
+ Assert1(!(MutI & (MutI - 1)), "Attributes " +
+ Attribute::getAsString(MutI) + " are incompatible!", V);
+ }
+}
+
+static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
+ if (Attrs.isEmpty())
+ return true;
+
+ unsigned LastSlot = Attrs.getNumSlots() - 1;
+ unsigned LastIndex = Attrs.getSlot(LastSlot).Index;
+ if (LastIndex <= Params
+ || (LastIndex == (unsigned)~0
+ && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params)))
+ return true;
+
+ return false;
+}
+
+// visitFunction - Verify that a function is ok.
+//
+void Verifier::visitFunction(Function &F) {
+ // Check function arguments.
+ const FunctionType *FT = F.getFunctionType();
+ unsigned NumArgs = F.arg_size();
+
+ Assert1(Context == &F.getContext(),
+ "Function context does not match Module context!", &F);
+
+ Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+ Assert2(FT->getNumParams() == NumArgs,
+ "# formal arguments must match # of arguments for function type!",
+ &F, FT);
+ Assert1(F.getReturnType()->isFirstClassType() ||
+ F.getReturnType()->isVoidTy() ||
+ F.getReturnType()->isStructTy(),
+ "Functions cannot return aggregate values!", &F);
+
+ Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+ "Invalid struct return type!", &F);
+
+ const AttrListPtr &Attrs = F.getAttributes();
+
+ Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
+ "Attributes after last parameter!", &F);
+
+ // Check function attributes.
+ VerifyFunctionAttrs(FT, Attrs, &F);
+
+ // Check that this function meets the restrictions on this calling convention.
+ switch (F.getCallingConv()) {
+ default:
+ break;
+ case CallingConv::C:
+ break;
+ case CallingConv::Fast:
+ case CallingConv::Cold:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::PTX_Kernel:
+ case CallingConv::PTX_Device:
+ Assert1(!F.isVarArg(),
+ "Varargs functions must have C calling conventions!", &F);
+ break;
+ }
+
+ bool isLLVMdotName = F.getName().size() >= 5 &&
+ F.getName().substr(0, 5) == "llvm.";
+
+ // Check that the argument values match the function type for this function...
+ unsigned i = 0;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++i) {
+ Assert2(I->getType() == FT->getParamType(i),
+ "Argument value does not match function argument type!",
+ I, FT->getParamType(i));
+ Assert1(I->getType()->isFirstClassType(),
+ "Function arguments must have first-class types!", I);
+ if (!isLLVMdotName)
+ Assert2(!I->getType()->isMetadataTy(),
+ "Function takes metadata but isn't an intrinsic", I, &F);
+ }
+
+ if (F.isMaterializable()) {
+ // Function has a body somewhere we can't see.
+ } else if (F.isDeclaration()) {
+ Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
+ F.hasExternalWeakLinkage(),
+ "invalid linkage type for function declaration", &F);
+ } else {
+ // Verify that this function (which has a body) is not named "llvm.*". It
+ // is not legal to define intrinsics.
+ Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+
+ // Check the entry node
+ BasicBlock *Entry = &F.getEntryBlock();
+ Assert1(pred_begin(Entry) == pred_end(Entry),
+ "Entry block to function must not have predecessors!", Entry);
+
+ // The address of the entry block cannot be taken, unless it is dead.
+ if (Entry->hasAddressTaken()) {
+ Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
+ "blockaddress may not be used with the entry block!", Entry);
+ }
+ }
+
+ // If this function is actually an intrinsic, verify that it is only used in
+ // direct call/invokes, never having its "address taken".
+ if (F.getIntrinsicID()) {
+ const User *U;
+ if (F.hasAddressTaken(&U))
+ Assert1(0, "Invalid user of intrinsic instruction!", U);
+ }
+}
+
+// verifyBasicBlock - Verify that a basic block is well formed...
+//
+void Verifier::visitBasicBlock(BasicBlock &BB) {
+ InstsInThisBlock.clear();
+
+ // Ensure that basic blocks have terminators!
+ Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+
+ // Check constraints that this basic block imposes on all of the PHI nodes in
+ // it.
+ if (isa<PHINode>(BB.front())) {
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
+ std::sort(Preds.begin(), Preds.end());
+ PHINode *PN;
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+ // Ensure that PHI nodes have at least one entry!
+ Assert1(PN->getNumIncomingValues() != 0,
+ "PHI nodes must have at least one entry. If the block is dead, "
+ "the PHI should be removed!", PN);
+ Assert1(PN->getNumIncomingValues() == Preds.size(),
+ "PHINode should have one entry for each predecessor of its "
+ "parent basic block!", PN);
+
+ // Get and sort all incoming values in the PHI node...
+ Values.clear();
+ Values.reserve(PN->getNumIncomingValues());
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Values.push_back(std::make_pair(PN->getIncomingBlock(i),
+ PN->getIncomingValue(i)));
+ std::sort(Values.begin(), Values.end());
+
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ // Check to make sure that if there is more than one entry for a
+ // particular basic block in this PHI node, that the incoming values are
+ // all identical.
+ //
+ Assert4(i == 0 || Values[i].first != Values[i-1].first ||
+ Values[i].second == Values[i-1].second,
+ "PHI node has multiple entries for the same basic block with "
+ "different incoming values!", PN, Values[i].first,
+ Values[i].second, Values[i-1].second);
+
+ // Check to make sure that the predecessors and PHI node entries are
+ // matched up.
+ Assert3(Values[i].first == Preds[i],
+ "PHI node entries do not match predecessors!", PN,
+ Values[i].first, Preds[i]);
+ }
+ }
+ }
+}
+
+void Verifier::visitTerminatorInst(TerminatorInst &I) {
+ // Ensure that terminators only exist at the end of the basic block.
+ Assert1(&I == I.getParent()->getTerminator(),
+ "Terminator found in the middle of a basic block!", I.getParent());
+ visitInstruction(I);
+}
+
+void Verifier::visitBranchInst(BranchInst &BI) {
+ if (BI.isConditional()) {
+ Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+ "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+ }
+ visitTerminatorInst(BI);
+}
+
+void Verifier::visitReturnInst(ReturnInst &RI) {
+ Function *F = RI.getParent()->getParent();
+ unsigned N = RI.getNumOperands();
+ if (F->getReturnType()->isVoidTy())
+ Assert2(N == 0,
+ "Found return instr that returns non-void in Function of void "
+ "return type!", &RI, F->getReturnType());
+ else
+ Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+ "Function return type does not match operand "
+ "type of return inst!", &RI, F->getReturnType());
+
+ // Check to make sure that the return value has necessary properties for
+ // terminators...
+ visitTerminatorInst(RI);
+}
+
+void Verifier::visitSwitchInst(SwitchInst &SI) {
+ // Check to make sure that all of the constants in the switch instruction
+ // have the same type as the switched-on value.
+ const Type *SwitchTy = SI.getCondition()->getType();
+ SmallPtrSet<ConstantInt*, 32> Constants;
+ for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
+ Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+ "Switch constants must all be same type as switch value!", &SI);
+ Assert2(Constants.insert(SI.getCaseValue(i)),
+ "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+ }
+
+ visitTerminatorInst(SI);
+}
+
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+ Assert1(BI.getAddress()->getType()->isPointerTy(),
+ "Indirectbr operand must have pointer type!", &BI);
+ for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+ Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+ "Indirectbr destinations must all have pointer type!", &BI);
+
+ visitTerminatorInst(BI);
+}
+
+void Verifier::visitSelectInst(SelectInst &SI) {
+ Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+ SI.getOperand(2)),
+ "Invalid operands for select instruction!", &SI);
+
+ Assert1(SI.getTrueValue()->getType() == SI.getType(),
+ "Select values must have same type as select instruction!", &SI);
+ visitInstruction(SI);
+}
+
+/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
+/// a pass, if any exist, it's an error.
+///
+void Verifier::visitUserOp1(Instruction &I) {
+ Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+}
+
+void Verifier::visitTruncInst(TruncInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "trunc source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitZExtInst(ZExtInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "zext source and destination must both be a vector or neither", &I);
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitSExtInst(SExtInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "sext source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPTruncInst(FPTruncInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fptrunc source and destination must both be a vector or neither",&I);
+ Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPExtInst(FPExtInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+ Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fpext source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitUIToFPInst(UIToFPInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "UIToFP source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isIntOrIntVectorTy(),
+ "UIToFP source must be integer or integer vector", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),
+ "UIToFP result must be FP or FP vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "UIToFP source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitSIToFPInst(SIToFPInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "SIToFP source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isIntOrIntVectorTy(),
+ "SIToFP source must be integer or integer vector", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),
+ "SIToFP result must be FP or FP vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "SIToFP source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPToUIInst(FPToUIInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "FPToUI source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+ &I);
+ Assert1(DestTy->isIntOrIntVectorTy(),
+ "FPToUI result must be integer or integer vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToUI source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPToSIInst(FPToSIInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
+
+ Assert1(SrcVec == DstVec,
+ "FPToSI source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isFPOrFPVectorTy(),
+ "FPToSI source must be FP or FP vector", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(),
+ "FPToSI result must be integer or integer vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToSI source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
+ Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
+ Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitBitCastInst(BitCastInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ // BitCast implies a no-op cast of type only. No bits change.
+ // However, you can't cast pointers to anything but pointers.
+ Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+ "Bitcast requires both operands to be pointer or neither", &I);
+ Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
+
+ // Disallow aggregates.
+ Assert1(!SrcTy->isAggregateType(),
+ "Bitcast operand must not be aggregate", &I);
+ Assert1(!DestTy->isAggregateType(),
+ "Bitcast type must not be aggregate", &I);
+
+ visitInstruction(I);
+}
+
+/// visitPHINode - Ensure that a PHI node is well formed.
+///
+void Verifier::visitPHINode(PHINode &PN) {
+ // Ensure that the PHI nodes are all grouped together at the top of the block.
+ // This can be tested by checking whether the instruction before this is
+ // either nonexistent (because this is begin()) or is a PHI node. If not,
+ // then there is some other instruction before a PHI.
+ Assert2(&PN == &PN.getParent()->front() ||
+ isa<PHINode>(--BasicBlock::iterator(&PN)),
+ "PHI nodes not grouped at top of basic block!",
+ &PN, PN.getParent());
+
+ // Check that all of the values of the PHI node have the same type as the
+ // result, and that the incoming blocks are really basic blocks.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
+ "PHI node operands are not the same type as the result!", &PN);
+ }
+
+ // All other PHI node constraints are checked in the visitBasicBlock method.
+
+ visitInstruction(PN);
+}
+
+void Verifier::VerifyCallSite(CallSite CS) {
+ Instruction *I = CS.getInstruction();
+
+ Assert1(CS.getCalledValue()->getType()->isPointerTy(),
+ "Called function must be a pointer!", I);
+ const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+
+ Assert1(FPTy->getElementType()->isFunctionTy(),
+ "Called function is not pointer to function type!", I);
+ const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+
+ // Verify that the correct number of arguments are being passed
+ if (FTy->isVarArg())
+ Assert1(CS.arg_size() >= FTy->getNumParams(),
+ "Called function requires more parameters than were provided!",I);
+ else
+ Assert1(CS.arg_size() == FTy->getNumParams(),
+ "Incorrect number of arguments passed to called function!", I);
+
+ // Verify that all arguments to the call match the function type.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
+ "Call parameter type does not match function signature!",
+ CS.getArgument(i), FTy->getParamType(i), I);
+
+ const AttrListPtr &Attrs = CS.getAttributes();
+
+ Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
+ "Attributes after last parameter!", I);
+
+ // Verify call attributes.
+ VerifyFunctionAttrs(FTy, Attrs, I);
+
+ if (FTy->isVarArg())
+ // Check attributes on the varargs part.
+ for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
+ Attributes Attr = Attrs.getParamAttributes(Idx);
+
+ VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
+
+ Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
+ Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
+ " cannot be used for vararg call arguments!", I);
+ }
+
+ // Verify that there's no metadata unless it's a direct call to an intrinsic.
+ if (CS.getCalledFunction() == 0 ||
+ !CS.getCalledFunction()->getName().startswith("llvm.")) {
+ for (FunctionType::param_iterator PI = FTy->param_begin(),
+ PE = FTy->param_end(); PI != PE; ++PI)
+ Assert1(!(*PI)->isMetadataTy(),
+ "Function has metadata parameter but isn't an intrinsic", I);
+ }
+
+ visitInstruction(*I);
+}
+
+void Verifier::visitCallInst(CallInst &CI) {
+ VerifyCallSite(&CI);
+
+ if (Function *F = CI.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ visitIntrinsicFunctionCall(ID, CI);
+}
+
+void Verifier::visitInvokeInst(InvokeInst &II) {
+ VerifyCallSite(&II);
+ visitTerminatorInst(II);
+}
+
+/// visitBinaryOperator - Check that both arguments to the binary operator are
+/// of the same type!
+///
+void Verifier::visitBinaryOperator(BinaryOperator &B) {
+ Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+ "Both operands to a binary operator are not of the same type!", &B);
+
+ switch (B.getOpcode()) {
+ // Check that integer arithmetic operators are only used with
+ // integral operands.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ Assert1(B.getType()->isIntOrIntVectorTy(),
+ "Integer arithmetic operators only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Integer arithmetic operators must have same type "
+ "for operands and result!", &B);
+ break;
+ // Check that floating-point arithmetic operators are only used with
+ // floating-point operands.
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ Assert1(B.getType()->isFPOrFPVectorTy(),
+ "Floating-point arithmetic operators only work with "
+ "floating-point types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Floating-point arithmetic operators must have same type "
+ "for operands and result!", &B);
+ break;
+ // Check that logical operators are only used with integral operands.
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ Assert1(B.getType()->isIntOrIntVectorTy(),
+ "Logical operators only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Logical operators must have same type for operands and result!",
+ &B);
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ Assert1(B.getType()->isIntOrIntVectorTy(),
+ "Shifts only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Shift return type must be same as operands!", &B);
+ break;
+ default:
+ llvm_unreachable("Unknown BinaryOperator opcode!");
+ }
+
+ visitInstruction(B);
+}
+
+void Verifier::visitICmpInst(ICmpInst &IC) {
+ // Check that the operands are the same type
+ const Type *Op0Ty = IC.getOperand(0)->getType();
+ const Type *Op1Ty = IC.getOperand(1)->getType();
+ Assert1(Op0Ty == Op1Ty,
+ "Both operands to ICmp instruction are not of the same type!", &IC);
+ // Check that the operands are the right type
+ Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+ "Invalid operand types for ICmp instruction", &IC);
+ // Check that the predicate is valid.
+ Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+ IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+ "Invalid predicate in ICmp instruction!", &IC);
+
+ visitInstruction(IC);
+}
+
+void Verifier::visitFCmpInst(FCmpInst &FC) {
+ // Check that the operands are the same type
+ const Type *Op0Ty = FC.getOperand(0)->getType();
+ const Type *Op1Ty = FC.getOperand(1)->getType();
+ Assert1(Op0Ty == Op1Ty,
+ "Both operands to FCmp instruction are not of the same type!", &FC);
+ // Check that the operands are the right type
+ Assert1(Op0Ty->isFPOrFPVectorTy(),
+ "Invalid operand types for FCmp instruction", &FC);
+ // Check that the predicate is valid.
+ Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+ FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+ "Invalid predicate in FCmp instruction!", &FC);
+
+ visitInstruction(FC);
+}
+
+void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
+ Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
+ EI.getOperand(1)),
+ "Invalid extractelement operands!", &EI);
+ visitInstruction(EI);
+}
+
+void Verifier::visitInsertElementInst(InsertElementInst &IE) {
+ Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
+ IE.getOperand(1),
+ IE.getOperand(2)),
+ "Invalid insertelement operands!", &IE);
+ visitInstruction(IE);
+}
+
+void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
+ Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+ SV.getOperand(2)),
+ "Invalid shufflevector operands!", &SV);
+ visitInstruction(SV);
+}
+
+void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+ const Type *ElTy =
+ GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
+ Idxs.begin(), Idxs.end());
+ Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+ Assert2(GEP.getType()->isPointerTy() &&
+ cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+ "GEP is not of right type for indices!", &GEP, ElTy);
+ visitInstruction(GEP);
+}
+
+void Verifier::visitLoadInst(LoadInst &LI) {
+ const PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+ Assert1(PTy, "Load operand must be a pointer.", &LI);
+ const Type *ElTy = PTy->getElementType();
+ Assert2(ElTy == LI.getType(),
+ "Load result type does not match pointer operand type!", &LI, ElTy);
+ visitInstruction(LI);
+}
+
+void Verifier::visitStoreInst(StoreInst &SI) {
+ const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+ Assert1(PTy, "Store operand must be a pointer.", &SI);
+ const Type *ElTy = PTy->getElementType();
+ Assert2(ElTy == SI.getOperand(0)->getType(),
+ "Stored value type does not match pointer operand type!",
+ &SI, ElTy);
+ visitInstruction(SI);
+}
+
+void Verifier::visitAllocaInst(AllocaInst &AI) {
+ const PointerType *PTy = AI.getType();
+ Assert1(PTy->getAddressSpace() == 0,
+ "Allocation instruction pointer not in the generic address space!",
+ &AI);
+ Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
+ &AI);
+ Assert1(AI.getArraySize()->getType()->isIntegerTy(),
+ "Alloca array size must have integer type", &AI);
+ visitInstruction(AI);
+}
+
+void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
+ Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+ EVI.getIndices()) ==
+ EVI.getType(),
+ "Invalid ExtractValueInst operands!", &EVI);
+
+ visitInstruction(EVI);
+}
+
+void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
+ Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+ IVI.getIndices()) ==
+ IVI.getOperand(1)->getType(),
+ "Invalid InsertValueInst operands!", &IVI);
+
+ visitInstruction(IVI);
+}
+
+/// verifyInstruction - Verify that an instruction is well formed.
+///
+void Verifier::visitInstruction(Instruction &I) {
+ BasicBlock *BB = I.getParent();
+ Assert1(BB, "Instruction not embedded in basic block!", &I);
+
+ if (!isa<PHINode>(I)) { // Check that non-phi nodes are not self referential
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
+ "Only PHI nodes may reference their own value!", &I);
+ }
+
+ // Check that void typed values don't have names
+ Assert1(!I.getType()->isVoidTy() || !I.hasName(),
+ "Instruction has a name, but provides a void value!", &I);
+
+ // Check that the return value of the instruction is either void or a legal
+ // value type.
+ Assert1(I.getType()->isVoidTy() ||
+ I.getType()->isFirstClassType(),
+ "Instruction returns a non-scalar type!", &I);
+
+ // Check that the instruction doesn't produce metadata. Calls are already
+ // checked against the callee type.
+ Assert1(!I.getType()->isMetadataTy() ||
+ isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Invalid use of metadata!", &I);
+
+ // Check that all uses of the instruction, if they are instructions
+ // themselves, actually have parent basic blocks. If the use is not an
+ // instruction, it is an error!
+ for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI) {
+ if (Instruction *Used = dyn_cast<Instruction>(*UI))
+ Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+ " embedded in a basic block!", &I, Used);
+ else {
+ CheckFailed("Use of instruction is not an instruction!", *UI);
+ return;
+ }
+ }
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+
+ // Check to make sure that only first-class-values are operands to
+ // instructions.
+ if (!I.getOperand(i)->getType()->isFirstClassType()) {
+ Assert1(0, "Instruction operands must be first-class values!", &I);
+ }
+
+ if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+ // Check to make sure that the "address of" an intrinsic function is never
+ // taken.
+ Assert1(!F->isIntrinsic() || (i + 1 == e && isa<CallInst>(I)),
+ "Cannot take the address of an intrinsic!", &I);
+ Assert1(F->getParent() == Mod, "Referencing function in another module!",
+ &I);
+ } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
+ Assert1(OpBB->getParent() == BB->getParent(),
+ "Referring to a basic block in another function!", &I);
+ } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
+ Assert1(OpArg->getParent() == BB->getParent(),
+ "Referring to an argument in another function!", &I);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
+ Assert1(GV->getParent() == Mod, "Referencing global in another module!",
+ &I);
+ } else if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+ BasicBlock *OpBlock = Op->getParent();
+
+ // Check that a definition dominates all of its uses.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+ // Invoke results are only usable in the normal destination, not in the
+ // exceptional destination.
+ BasicBlock *NormalDest = II->getNormalDest();
+
+ Assert2(NormalDest != II->getUnwindDest(),
+ "No uses of invoke possible due to dominance structure!",
+ Op, &I);
+
+ // PHI nodes differ from other nodes because they actually "use" the
+ // value in the predecessor basic blocks they correspond to.
+ BasicBlock *UseBlock = BB;
+ if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+ unsigned j = PHINode::getIncomingValueNumForOperand(i);
+ UseBlock = PN->getIncomingBlock(j);
+ }
+ Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
+ Op, &I);
+
+ if (isa<PHINode>(I) && UseBlock == OpBlock) {
+ // Special case of a phi node in the normal destination or the unwind
+ // destination.
+ Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
+ "Invoke result not available in the unwind destination!",
+ Op, &I);
+ } else {
+ Assert2(DT->dominates(NormalDest, UseBlock) ||
+ !DT->isReachableFromEntry(UseBlock),
+ "Invoke result does not dominate all uses!", Op, &I);
+
+ // If the normal successor of an invoke instruction has multiple
+ // predecessors, then the normal edge from the invoke is critical,
+ // so the invoke value can only be live if the destination block
+ // dominates all of it's predecessors (other than the invoke).
+ if (!NormalDest->getSinglePredecessor() &&
+ DT->isReachableFromEntry(UseBlock))
+ // If it is used by something non-phi, then the other case is that
+ // 'NormalDest' dominates all of its predecessors other than the
+ // invoke. In this case, the invoke value can still be used.
+ for (pred_iterator PI = pred_begin(NormalDest),
+ E = pred_end(NormalDest); PI != E; ++PI)
+ if (*PI != II->getParent() && !DT->dominates(NormalDest, *PI) &&
+ DT->isReachableFromEntry(*PI)) {
+ CheckFailed("Invoke result does not dominate all uses!", Op,&I);
+ return;
+ }
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+ // PHI nodes are more difficult than other nodes because they actually
+ // "use" the value in the predecessor basic blocks they correspond to.
+ unsigned j = PHINode::getIncomingValueNumForOperand(i);
+ BasicBlock *PredBB = PN->getIncomingBlock(j);
+ Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
+ !DT->isReachableFromEntry(PredBB)),
+ "Instruction does not dominate all uses!", Op, &I);
+ } else {
+ if (OpBlock == BB) {
+ // If they are in the same basic block, make sure that the definition
+ // comes before the use.
+ Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB),
+ "Instruction does not dominate all uses!", Op, &I);
+ }
+
+ // Definition must dominate use unless use is unreachable!
+ Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, &I) ||
+ !DT->isReachableFromEntry(BB),
+ "Instruction does not dominate all uses!", Op, &I);
+ }
+ } else if (isa<InlineAsm>(I.getOperand(i))) {
+ Assert1((i + 1 == e && isa<CallInst>(I)) ||
+ (i + 3 == e && isa<InvokeInst>(I)),
+ "Cannot take the address of an inline asm!", &I);
+ }
+ }
+ InstsInThisBlock.insert(&I);
+}
+
+// Flags used by TableGen to mark intrinsic parameters with the
+// LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes.
+static const unsigned ExtendedElementVectorType = 0x40000000;
+static const unsigned TruncatedElementVectorType = 0x20000000;
+
+/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
+///
+void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+ Function *IF = CI.getCalledFunction();
+ Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+ IF);
+
+#define GET_INTRINSIC_VERIFIER
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_VERIFIER
+
+ // If the intrinsic takes MDNode arguments, verify that they are either global
+ // or are local to *this* function.
+ for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
+ if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
+ visitMDNode(*MD, CI.getParent()->getParent());
+
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::dbg_declare: { // llvm.dbg.declare
+ Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
+ "invalid llvm.dbg.declare intrinsic call 1", &CI);
+ MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
+ Assert1(MD->getNumOperands() == 1,
+ "invalid llvm.dbg.declare intrinsic call 2", &CI);
+ } break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
+ "alignment argument of memory intrinsics must be a constant int",
+ &CI);
+ Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
+ "isvolatile argument of memory intrinsics must be a constant int",
+ &CI);
+ break;
+ case Intrinsic::gcroot:
+ case Intrinsic::gcwrite:
+ case Intrinsic::gcread:
+ if (ID == Intrinsic::gcroot) {
+ AllocaInst *AI =
+ dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
+ Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+ Assert1(isa<Constant>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #2 must be a constant.", &CI);
+ if (!AI->getType()->getElementType()->isPointerTy()) {
+ Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #1 must either be a pointer alloca, "
+ "or argument #2 must be a non-null constant.", &CI);
+ }
+ }
+
+ Assert1(CI.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", &CI);
+ break;
+ case Intrinsic::init_trampoline:
+ Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+ "llvm.init_trampoline parameter #2 must resolve to a function.",
+ &CI);
+ break;
+ case Intrinsic::prefetch:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+ isa<ConstantInt>(CI.getArgOperand(2)) &&
+ cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+ "invalid arguments to llvm.prefetch",
+ &CI);
+ break;
+ case Intrinsic::stackprotector:
+ Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+ "llvm.stackprotector parameter #2 must resolve to an alloca.",
+ &CI);
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
+ "size argument of memory use markers must be a constant integer",
+ &CI);
+ break;
+ case Intrinsic::invariant_end:
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+ "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+ break;
+ }
+}
+
+/// Produce a string to identify an intrinsic parameter or return value.
+/// The ArgNo value numbers the return values from 0 to NumRets-1 and the
+/// parameters beginning with NumRets.
+///
+static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) {
+ if (ArgNo >= NumRets)
+ return "Intrinsic parameter #" + utostr(ArgNo - NumRets);
+ if (NumRets == 1)
+ return "Intrinsic result type";
+ return "Intrinsic result type #" + utostr(ArgNo);
+}
+
+bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+ int VT, unsigned ArgNo, std::string &Suffix) {
+ const FunctionType *FTy = F->getFunctionType();
+
+ unsigned NumElts = 0;
+ const Type *EltTy = Ty;
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (VTy) {
+ EltTy = VTy->getElementType();
+ NumElts = VTy->getNumElements();
+ }
+
+ const Type *RetTy = FTy->getReturnType();
+ const StructType *ST = dyn_cast<StructType>(RetTy);
+ unsigned NumRetVals;
+ if (RetTy->isVoidTy())
+ NumRetVals = 0;
+ else if (ST)
+ NumRetVals = ST->getNumElements();
+ else
+ NumRetVals = 1;
+
+ if (VT < 0) {
+ int Match = ~VT;
+
+ // Check flags that indicate a type that is an integral vector type with
+ // elements that are larger or smaller than the elements of the matched
+ // type.
+ if ((Match & (ExtendedElementVectorType |
+ TruncatedElementVectorType)) != 0) {
+ const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
+ if (!VTy || !IEltTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+ "an integral vector type.", F);
+ return false;
+ }
+ // Adjust the current Ty (in the opposite direction) rather than
+ // the type being matched against.
+ if ((Match & ExtendedElementVectorType) != 0) {
+ if ((IEltTy->getBitWidth() & 1) != 0) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " vector "
+ "element bit-width is odd.", F);
+ return false;
+ }
+ Ty = VectorType::getTruncatedElementVectorType(VTy);
+ } else
+ Ty = VectorType::getExtendedElementVectorType(VTy);
+ Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType);
+ }
+
+ if (Match <= static_cast<int>(NumRetVals - 1)) {
+ if (ST)
+ RetTy = ST->getElementType(Match);
+
+ if (Ty != RetTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+ "match return type.", F);
+ return false;
+ }
+ } else {
+ if (Ty != FTy->getParamType(Match - NumRetVals)) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+ "match parameter %" + utostr(Match - NumRetVals) + ".", F);
+ return false;
+ }
+ }
+ } else if (VT == MVT::iAny) {
+ if (!EltTy->isIntegerTy()) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+ "an integer type.", F);
+ return false;
+ }
+
+ unsigned GotBits = cast<IntegerType>(EltTy)->getBitWidth();
+ Suffix += ".";
+
+ if (EltTy != Ty)
+ Suffix += "v" + utostr(NumElts);
+
+ Suffix += "i" + utostr(GotBits);
+
+ // Check some constraints on various intrinsics.
+ switch (ID) {
+ default: break; // Not everything needs to be checked.
+ case Intrinsic::bswap:
+ if (GotBits < 16 || GotBits % 16 != 0) {
+ CheckFailed("Intrinsic requires even byte width argument", F);
+ return false;
+ }
+ break;
+ }
+ } else if (VT == MVT::fAny) {
+ if (!EltTy->isFloatingPointTy()) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+ "a floating-point type.", F);
+ return false;
+ }
+
+ Suffix += ".";
+
+ if (EltTy != Ty)
+ Suffix += "v" + utostr(NumElts);
+
+ Suffix += EVT::getEVT(EltTy).getEVTString();
+ } else if (VT == MVT::vAny) {
+ if (!VTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a vector type.",
+ F);
+ return false;
+ }
+ Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
+ } else if (VT == MVT::iPTR) {
+ if (!Ty->isPointerTy()) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
+ "pointer and a pointer is required.", F);
+ return false;
+ }
+ } else if (VT == MVT::iPTRAny) {
+ // Outside of TableGen, we don't distinguish iPTRAny (to any address space)
+ // and iPTR. In the verifier, we can not distinguish which case we have so
+ // allow either case to be legal.
+ if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
+ EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true);
+ if (PointeeVT == MVT::Other) {
+ CheckFailed("Intrinsic has pointer to complex type.");
+ return false;
+ }
+ Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
+ PointeeVT.getEVTString();
+ } else {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
+ "pointer and a pointer is required.", F);
+ return false;
+ }
+ } else if (EVT((MVT::SimpleValueType)VT).isVector()) {
+ EVT VVT = EVT((MVT::SimpleValueType)VT);
+
+ // If this is a vector argument, verify the number and type of elements.
+ if (VVT.getVectorElementType() != EVT::getEVT(EltTy)) {
+ CheckFailed("Intrinsic prototype has incorrect vector element type!", F);
+ return false;
+ }
+
+ if (VVT.getVectorNumElements() != NumElts) {
+ CheckFailed("Intrinsic prototype has incorrect number of "
+ "vector elements!", F);
+ return false;
+ }
+ } else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) !=
+ EltTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is wrong!", F);
+ return false;
+ } else if (EltTy != Ty) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is a vector "
+ "and a scalar is required.", F);
+ return false;
+ }
+
+ return true;
+}
+
+/// VerifyIntrinsicPrototype - TableGen emits calls to this function into
+/// Intrinsics.gen. This implements a little state machine that verifies the
+/// prototype of intrinsics.
+void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+ unsigned NumRetVals,
+ unsigned NumParams, ...) {
+ va_list VA;
+ va_start(VA, NumParams);
+ const FunctionType *FTy = F->getFunctionType();
+
+ // For overloaded intrinsics, the Suffix of the function name must match the
+ // types of the arguments. This variable keeps track of the expected
+ // suffix, to be checked at the end.
+ std::string Suffix;
+
+ if (FTy->getNumParams() + FTy->isVarArg() != NumParams) {
+ CheckFailed("Intrinsic prototype has incorrect number of arguments!", F);
+ return;
+ }
+
+ const Type *Ty = FTy->getReturnType();
+ const StructType *ST = dyn_cast<StructType>(Ty);
+
+ if (NumRetVals == 0 && !Ty->isVoidTy()) {
+ CheckFailed("Intrinsic should return void", F);
+ return;
+ }
+
+ // Verify the return types.
+ if (ST && ST->getNumElements() != NumRetVals) {
+ CheckFailed("Intrinsic prototype has incorrect number of return types!", F);
+ return;
+ }
+
+ for (unsigned ArgNo = 0; ArgNo != NumRetVals; ++ArgNo) {
+ int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+ if (ST) Ty = ST->getElementType(ArgNo);
+ if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix))
+ break;
+ }
+
+ // Verify the parameter types.
+ for (unsigned ArgNo = 0; ArgNo != NumParams; ++ArgNo) {
+ int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+ if (VT == MVT::isVoid && ArgNo > 0) {
+ if (!FTy->isVarArg())
+ CheckFailed("Intrinsic prototype has no '...'!", F);
+ break;
+ }
+
+ if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT,
+ ArgNo + NumRetVals, Suffix))
+ break;
+ }
+
+ va_end(VA);
+
+ // For intrinsics without pointer arguments, if we computed a Suffix then the
+ // intrinsic is overloaded and we need to make sure that the name of the
+ // function is correct. We add the suffix to the name of the intrinsic and
+ // compare against the given function name. If they are not the same, the
+ // function name is invalid. This ensures that overloading of intrinsics
+ // uses a sane and consistent naming convention. Note that intrinsics with
+ // pointer argument may or may not be overloaded so we will check assuming it
+ // has a suffix and not.
+ if (!Suffix.empty()) {
+ std::string Name(Intrinsic::getName(ID));
+ if (Name + Suffix != F->getName()) {
+ CheckFailed("Overloaded intrinsic has incorrect suffix: '" +
+ F->getName().substr(Name.length()) + "'. It should be '" +
+ Suffix + "'", F);
+ }
+ }
+
+ // Check parameter attributes.
+ Assert1(F->getAttributes() == Intrinsic::getAttributes(ID),
+ "Intrinsic has wrong parameter attributes!", F);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
+ return new Verifier(action);
+}
+
+
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
+bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
+ Function &F = const_cast<Function&>(f);
+ assert(!F.isDeclaration() && "Cannot verify external functions");
+
+ FunctionPassManager FPM(F.getParent());
+ Verifier *V = new Verifier(action);
+ FPM.add(V);
+ FPM.run(F);
+ return V->Broken;
+}
+
+/// verifyModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
+ std::string *ErrorInfo) {
+ PassManager PM;
+ Verifier *V = new Verifier(action);
+ PM.add(V);
+ PM.run(const_cast<Module&>(M));
+
+ if (ErrorInfo && V->Broken)
+ *ErrorInfo = V->MessagesStr.str();
+ return V->Broken;
+}